btrfs-progs: check: repair dir_item and inode_ref in lowmem mode
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135
136 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 {
138         return container_of(back, struct data_backref, node);
139 }
140
141 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
142 {
143         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
144         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
145         struct data_backref *back1 = to_data_backref(ext1);
146         struct data_backref *back2 = to_data_backref(ext2);
147
148         WARN_ON(!ext1->is_data);
149         WARN_ON(!ext2->is_data);
150
151         /* parent and root are a union, so this covers both */
152         if (back1->parent > back2->parent)
153                 return 1;
154         if (back1->parent < back2->parent)
155                 return -1;
156
157         /* This is a full backref and the parents match. */
158         if (back1->node.full_backref)
159                 return 0;
160
161         if (back1->owner > back2->owner)
162                 return 1;
163         if (back1->owner < back2->owner)
164                 return -1;
165
166         if (back1->offset > back2->offset)
167                 return 1;
168         if (back1->offset < back2->offset)
169                 return -1;
170
171         if (back1->found_ref && back2->found_ref) {
172                 if (back1->disk_bytenr > back2->disk_bytenr)
173                         return 1;
174                 if (back1->disk_bytenr < back2->disk_bytenr)
175                         return -1;
176
177                 if (back1->bytes > back2->bytes)
178                         return 1;
179                 if (back1->bytes < back2->bytes)
180                         return -1;
181         }
182
183         return 0;
184 }
185
186 /*
187  * Much like data_backref, just removed the undetermined members
188  * and change it to use list_head.
189  * During extent scan, it is stored in root->orphan_data_extent.
190  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
191  */
192 struct orphan_data_extent {
193         struct list_head list;
194         u64 root;
195         u64 objectid;
196         u64 offset;
197         u64 disk_bytenr;
198         u64 disk_len;
199 };
200
201 struct tree_backref {
202         struct extent_backref node;
203         union {
204                 u64 parent;
205                 u64 root;
206         };
207 };
208
209 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
210 {
211         return container_of(back, struct tree_backref, node);
212 }
213
214 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
215 {
216         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
217         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
218         struct tree_backref *back1 = to_tree_backref(ext1);
219         struct tree_backref *back2 = to_tree_backref(ext2);
220
221         WARN_ON(ext1->is_data);
222         WARN_ON(ext2->is_data);
223
224         /* parent and root are a union, so this covers both */
225         if (back1->parent > back2->parent)
226                 return 1;
227         if (back1->parent < back2->parent)
228                 return -1;
229
230         return 0;
231 }
232
233 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
234 {
235         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
236         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
237
238         if (ext1->is_data > ext2->is_data)
239                 return 1;
240
241         if (ext1->is_data < ext2->is_data)
242                 return -1;
243
244         if (ext1->full_backref > ext2->full_backref)
245                 return 1;
246         if (ext1->full_backref < ext2->full_backref)
247                 return -1;
248
249         if (ext1->is_data)
250                 return compare_data_backref(node1, node2);
251         else
252                 return compare_tree_backref(node1, node2);
253 }
254
255 /* Explicit initialization for extent_record::flag_block_full_backref */
256 enum { FLAG_UNSET = 2 };
257
258 struct extent_record {
259         struct list_head backrefs;
260         struct list_head dups;
261         struct rb_root backref_tree;
262         struct list_head list;
263         struct cache_extent cache;
264         struct btrfs_disk_key parent_key;
265         u64 start;
266         u64 max_size;
267         u64 nr;
268         u64 refs;
269         u64 extent_item_refs;
270         u64 generation;
271         u64 parent_generation;
272         u64 info_objectid;
273         u32 num_duplicates;
274         u8 info_level;
275         unsigned int flag_block_full_backref:2;
276         unsigned int found_rec:1;
277         unsigned int content_checked:1;
278         unsigned int owner_ref_checked:1;
279         unsigned int is_root:1;
280         unsigned int metadata:1;
281         unsigned int bad_full_backref:1;
282         unsigned int crossing_stripes:1;
283         unsigned int wrong_chunk_type:1;
284 };
285
286 static inline struct extent_record* to_extent_record(struct list_head *entry)
287 {
288         return container_of(entry, struct extent_record, list);
289 }
290
291 struct inode_backref {
292         struct list_head list;
293         unsigned int found_dir_item:1;
294         unsigned int found_dir_index:1;
295         unsigned int found_inode_ref:1;
296         u8 filetype;
297         u8 ref_type;
298         int errors;
299         u64 dir;
300         u64 index;
301         u16 namelen;
302         char name[0];
303 };
304
305 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
306 {
307         return list_entry(entry, struct inode_backref, list);
308 }
309
310 struct root_item_record {
311         struct list_head list;
312         u64 objectid;
313         u64 bytenr;
314         u64 last_snapshot;
315         u8 level;
316         u8 drop_level;
317         struct btrfs_key drop_key;
318 };
319
320 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
321 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
322 #define REF_ERR_NO_INODE_REF            (1 << 2)
323 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
324 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
325 #define REF_ERR_DUP_INODE_REF           (1 << 5)
326 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
327 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
328 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
329 #define REF_ERR_NO_ROOT_REF             (1 << 9)
330 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
331 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
332 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
333
334 struct file_extent_hole {
335         struct rb_node node;
336         u64 start;
337         u64 len;
338 };
339
340 struct inode_record {
341         struct list_head backrefs;
342         unsigned int checked:1;
343         unsigned int merging:1;
344         unsigned int found_inode_item:1;
345         unsigned int found_dir_item:1;
346         unsigned int found_file_extent:1;
347         unsigned int found_csum_item:1;
348         unsigned int some_csum_missing:1;
349         unsigned int nodatasum:1;
350         int errors;
351
352         u64 ino;
353         u32 nlink;
354         u32 imode;
355         u64 isize;
356         u64 nbytes;
357
358         u32 found_link;
359         u64 found_size;
360         u64 extent_start;
361         u64 extent_end;
362         struct rb_root holes;
363         struct list_head orphan_extents;
364
365         u32 refs;
366 };
367
368 #define I_ERR_NO_INODE_ITEM             (1 << 0)
369 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
370 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
371 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
372 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
373 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
374 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
375 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
376 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
377 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
378 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
379 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
380 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
381 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
382 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
383
384 struct root_backref {
385         struct list_head list;
386         unsigned int found_dir_item:1;
387         unsigned int found_dir_index:1;
388         unsigned int found_back_ref:1;
389         unsigned int found_forward_ref:1;
390         unsigned int reachable:1;
391         int errors;
392         u64 ref_root;
393         u64 dir;
394         u64 index;
395         u16 namelen;
396         char name[0];
397 };
398
399 static inline struct root_backref* to_root_backref(struct list_head *entry)
400 {
401         return list_entry(entry, struct root_backref, list);
402 }
403
404 struct root_record {
405         struct list_head backrefs;
406         struct cache_extent cache;
407         unsigned int found_root_item:1;
408         u64 objectid;
409         u32 found_ref;
410 };
411
412 struct ptr_node {
413         struct cache_extent cache;
414         void *data;
415 };
416
417 struct shared_node {
418         struct cache_extent cache;
419         struct cache_tree root_cache;
420         struct cache_tree inode_cache;
421         struct inode_record *current;
422         u32 refs;
423 };
424
425 struct block_info {
426         u64 start;
427         u32 size;
428 };
429
430 struct walk_control {
431         struct cache_tree shared;
432         struct shared_node *nodes[BTRFS_MAX_LEVEL];
433         int active_node;
434         int root_level;
435 };
436
437 struct bad_item {
438         struct btrfs_key key;
439         u64 root_id;
440         struct list_head list;
441 };
442
443 struct extent_entry {
444         u64 bytenr;
445         u64 bytes;
446         int count;
447         int broken;
448         struct list_head list;
449 };
450
451 struct root_item_info {
452         /* level of the root */
453         u8 level;
454         /* number of nodes at this level, must be 1 for a root */
455         int node_count;
456         u64 bytenr;
457         u64 gen;
458         struct cache_extent cache_extent;
459 };
460
461 /*
462  * Error bit for low memory mode check.
463  *
464  * Currently no caller cares about it yet.  Just internal use for error
465  * classification.
466  */
467 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
468 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
469 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
470 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
471 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
472 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
473 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
474 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
475 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
476 #define CHUNK_TYPE_MISMATCH     (1 << 8)
477
478 static void *print_status_check(void *p)
479 {
480         struct task_ctx *priv = p;
481         const char work_indicator[] = { '.', 'o', 'O', 'o' };
482         uint32_t count = 0;
483         static char *task_position_string[] = {
484                 "checking extents",
485                 "checking free space cache",
486                 "checking fs roots",
487         };
488
489         task_period_start(priv->info, 1000 /* 1s */);
490
491         if (priv->tp == TASK_NOTHING)
492                 return NULL;
493
494         while (1) {
495                 printf("%s [%c]\r", task_position_string[priv->tp],
496                                 work_indicator[count % 4]);
497                 count++;
498                 fflush(stdout);
499                 task_period_wait(priv->info);
500         }
501         return NULL;
502 }
503
504 static int print_status_return(void *p)
505 {
506         printf("\n");
507         fflush(stdout);
508
509         return 0;
510 }
511
512 static enum btrfs_check_mode parse_check_mode(const char *str)
513 {
514         if (strcmp(str, "lowmem") == 0)
515                 return CHECK_MODE_LOWMEM;
516         if (strcmp(str, "orig") == 0)
517                 return CHECK_MODE_ORIGINAL;
518         if (strcmp(str, "original") == 0)
519                 return CHECK_MODE_ORIGINAL;
520
521         return CHECK_MODE_UNKNOWN;
522 }
523
524 /* Compatible function to allow reuse of old codes */
525 static u64 first_extent_gap(struct rb_root *holes)
526 {
527         struct file_extent_hole *hole;
528
529         if (RB_EMPTY_ROOT(holes))
530                 return (u64)-1;
531
532         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
533         return hole->start;
534 }
535
536 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
537 {
538         struct file_extent_hole *hole1;
539         struct file_extent_hole *hole2;
540
541         hole1 = rb_entry(node1, struct file_extent_hole, node);
542         hole2 = rb_entry(node2, struct file_extent_hole, node);
543
544         if (hole1->start > hole2->start)
545                 return -1;
546         if (hole1->start < hole2->start)
547                 return 1;
548         /* Now hole1->start == hole2->start */
549         if (hole1->len >= hole2->len)
550                 /*
551                  * Hole 1 will be merge center
552                  * Same hole will be merged later
553                  */
554                 return -1;
555         /* Hole 2 will be merge center */
556         return 1;
557 }
558
559 /*
560  * Add a hole to the record
561  *
562  * This will do hole merge for copy_file_extent_holes(),
563  * which will ensure there won't be continuous holes.
564  */
565 static int add_file_extent_hole(struct rb_root *holes,
566                                 u64 start, u64 len)
567 {
568         struct file_extent_hole *hole;
569         struct file_extent_hole *prev = NULL;
570         struct file_extent_hole *next = NULL;
571
572         hole = malloc(sizeof(*hole));
573         if (!hole)
574                 return -ENOMEM;
575         hole->start = start;
576         hole->len = len;
577         /* Since compare will not return 0, no -EEXIST will happen */
578         rb_insert(holes, &hole->node, compare_hole);
579
580         /* simple merge with previous hole */
581         if (rb_prev(&hole->node))
582                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
583                                 node);
584         if (prev && prev->start + prev->len >= hole->start) {
585                 hole->len = hole->start + hole->len - prev->start;
586                 hole->start = prev->start;
587                 rb_erase(&prev->node, holes);
588                 free(prev);
589                 prev = NULL;
590         }
591
592         /* iterate merge with next holes */
593         while (1) {
594                 if (!rb_next(&hole->node))
595                         break;
596                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
597                                         node);
598                 if (hole->start + hole->len >= next->start) {
599                         if (hole->start + hole->len <= next->start + next->len)
600                                 hole->len = next->start + next->len -
601                                             hole->start;
602                         rb_erase(&next->node, holes);
603                         free(next);
604                         next = NULL;
605                 } else
606                         break;
607         }
608         return 0;
609 }
610
611 static int compare_hole_range(struct rb_node *node, void *data)
612 {
613         struct file_extent_hole *hole;
614         u64 start;
615
616         hole = (struct file_extent_hole *)data;
617         start = hole->start;
618
619         hole = rb_entry(node, struct file_extent_hole, node);
620         if (start < hole->start)
621                 return -1;
622         if (start >= hole->start && start < hole->start + hole->len)
623                 return 0;
624         return 1;
625 }
626
627 /*
628  * Delete a hole in the record
629  *
630  * This will do the hole split and is much restrict than add.
631  */
632 static int del_file_extent_hole(struct rb_root *holes,
633                                 u64 start, u64 len)
634 {
635         struct file_extent_hole *hole;
636         struct file_extent_hole tmp;
637         u64 prev_start = 0;
638         u64 prev_len = 0;
639         u64 next_start = 0;
640         u64 next_len = 0;
641         struct rb_node *node;
642         int have_prev = 0;
643         int have_next = 0;
644         int ret = 0;
645
646         tmp.start = start;
647         tmp.len = len;
648         node = rb_search(holes, &tmp, compare_hole_range, NULL);
649         if (!node)
650                 return -EEXIST;
651         hole = rb_entry(node, struct file_extent_hole, node);
652         if (start + len > hole->start + hole->len)
653                 return -EEXIST;
654
655         /*
656          * Now there will be no overlap, delete the hole and re-add the
657          * split(s) if they exists.
658          */
659         if (start > hole->start) {
660                 prev_start = hole->start;
661                 prev_len = start - hole->start;
662                 have_prev = 1;
663         }
664         if (hole->start + hole->len > start + len) {
665                 next_start = start + len;
666                 next_len = hole->start + hole->len - start - len;
667                 have_next = 1;
668         }
669         rb_erase(node, holes);
670         free(hole);
671         if (have_prev) {
672                 ret = add_file_extent_hole(holes, prev_start, prev_len);
673                 if (ret < 0)
674                         return ret;
675         }
676         if (have_next) {
677                 ret = add_file_extent_hole(holes, next_start, next_len);
678                 if (ret < 0)
679                         return ret;
680         }
681         return 0;
682 }
683
684 static int copy_file_extent_holes(struct rb_root *dst,
685                                   struct rb_root *src)
686 {
687         struct file_extent_hole *hole;
688         struct rb_node *node;
689         int ret = 0;
690
691         node = rb_first(src);
692         while (node) {
693                 hole = rb_entry(node, struct file_extent_hole, node);
694                 ret = add_file_extent_hole(dst, hole->start, hole->len);
695                 if (ret)
696                         break;
697                 node = rb_next(node);
698         }
699         return ret;
700 }
701
702 static void free_file_extent_holes(struct rb_root *holes)
703 {
704         struct rb_node *node;
705         struct file_extent_hole *hole;
706
707         node = rb_first(holes);
708         while (node) {
709                 hole = rb_entry(node, struct file_extent_hole, node);
710                 rb_erase(node, holes);
711                 free(hole);
712                 node = rb_first(holes);
713         }
714 }
715
716 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
717
718 static void record_root_in_trans(struct btrfs_trans_handle *trans,
719                                  struct btrfs_root *root)
720 {
721         if (root->last_trans != trans->transid) {
722                 root->track_dirty = 1;
723                 root->last_trans = trans->transid;
724                 root->commit_root = root->node;
725                 extent_buffer_get(root->node);
726         }
727 }
728
729 static u8 imode_to_type(u32 imode)
730 {
731 #define S_SHIFT 12
732         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
733                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
734                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
735                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
736                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
737                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
738                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
739                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
740         };
741
742         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
743 #undef S_SHIFT
744 }
745
746 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
747 {
748         struct device_record *rec1;
749         struct device_record *rec2;
750
751         rec1 = rb_entry(node1, struct device_record, node);
752         rec2 = rb_entry(node2, struct device_record, node);
753         if (rec1->devid > rec2->devid)
754                 return -1;
755         else if (rec1->devid < rec2->devid)
756                 return 1;
757         else
758                 return 0;
759 }
760
761 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
762 {
763         struct inode_record *rec;
764         struct inode_backref *backref;
765         struct inode_backref *orig;
766         struct inode_backref *tmp;
767         struct orphan_data_extent *src_orphan;
768         struct orphan_data_extent *dst_orphan;
769         struct rb_node *rb;
770         size_t size;
771         int ret;
772
773         rec = malloc(sizeof(*rec));
774         if (!rec)
775                 return ERR_PTR(-ENOMEM);
776         memcpy(rec, orig_rec, sizeof(*rec));
777         rec->refs = 1;
778         INIT_LIST_HEAD(&rec->backrefs);
779         INIT_LIST_HEAD(&rec->orphan_extents);
780         rec->holes = RB_ROOT;
781
782         list_for_each_entry(orig, &orig_rec->backrefs, list) {
783                 size = sizeof(*orig) + orig->namelen + 1;
784                 backref = malloc(size);
785                 if (!backref) {
786                         ret = -ENOMEM;
787                         goto cleanup;
788                 }
789                 memcpy(backref, orig, size);
790                 list_add_tail(&backref->list, &rec->backrefs);
791         }
792         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
793                 dst_orphan = malloc(sizeof(*dst_orphan));
794                 if (!dst_orphan) {
795                         ret = -ENOMEM;
796                         goto cleanup;
797                 }
798                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
799                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
800         }
801         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
802         if (ret < 0)
803                 goto cleanup_rb;
804
805         return rec;
806
807 cleanup_rb:
808         rb = rb_first(&rec->holes);
809         while (rb) {
810                 struct file_extent_hole *hole;
811
812                 hole = rb_entry(rb, struct file_extent_hole, node);
813                 rb = rb_next(rb);
814                 free(hole);
815         }
816
817 cleanup:
818         if (!list_empty(&rec->backrefs))
819                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
820                         list_del(&orig->list);
821                         free(orig);
822                 }
823
824         if (!list_empty(&rec->orphan_extents))
825                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
826                         list_del(&orig->list);
827                         free(orig);
828                 }
829
830         free(rec);
831
832         return ERR_PTR(ret);
833 }
834
835 static void print_orphan_data_extents(struct list_head *orphan_extents,
836                                       u64 objectid)
837 {
838         struct orphan_data_extent *orphan;
839
840         if (list_empty(orphan_extents))
841                 return;
842         printf("The following data extent is lost in tree %llu:\n",
843                objectid);
844         list_for_each_entry(orphan, orphan_extents, list) {
845                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
846                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
847                        orphan->disk_len);
848         }
849 }
850
851 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
852 {
853         u64 root_objectid = root->root_key.objectid;
854         int errors = rec->errors;
855
856         if (!errors)
857                 return;
858         /* reloc root errors, we print its corresponding fs root objectid*/
859         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
860                 root_objectid = root->root_key.offset;
861                 fprintf(stderr, "reloc");
862         }
863         fprintf(stderr, "root %llu inode %llu errors %x",
864                 (unsigned long long) root_objectid,
865                 (unsigned long long) rec->ino, rec->errors);
866
867         if (errors & I_ERR_NO_INODE_ITEM)
868                 fprintf(stderr, ", no inode item");
869         if (errors & I_ERR_NO_ORPHAN_ITEM)
870                 fprintf(stderr, ", no orphan item");
871         if (errors & I_ERR_DUP_INODE_ITEM)
872                 fprintf(stderr, ", dup inode item");
873         if (errors & I_ERR_DUP_DIR_INDEX)
874                 fprintf(stderr, ", dup dir index");
875         if (errors & I_ERR_ODD_DIR_ITEM)
876                 fprintf(stderr, ", odd dir item");
877         if (errors & I_ERR_ODD_FILE_EXTENT)
878                 fprintf(stderr, ", odd file extent");
879         if (errors & I_ERR_BAD_FILE_EXTENT)
880                 fprintf(stderr, ", bad file extent");
881         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
882                 fprintf(stderr, ", file extent overlap");
883         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
884                 fprintf(stderr, ", file extent discount");
885         if (errors & I_ERR_DIR_ISIZE_WRONG)
886                 fprintf(stderr, ", dir isize wrong");
887         if (errors & I_ERR_FILE_NBYTES_WRONG)
888                 fprintf(stderr, ", nbytes wrong");
889         if (errors & I_ERR_ODD_CSUM_ITEM)
890                 fprintf(stderr, ", odd csum item");
891         if (errors & I_ERR_SOME_CSUM_MISSING)
892                 fprintf(stderr, ", some csum missing");
893         if (errors & I_ERR_LINK_COUNT_WRONG)
894                 fprintf(stderr, ", link count wrong");
895         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
896                 fprintf(stderr, ", orphan file extent");
897         fprintf(stderr, "\n");
898         /* Print the orphan extents if needed */
899         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
900                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
901
902         /* Print the holes if needed */
903         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
904                 struct file_extent_hole *hole;
905                 struct rb_node *node;
906                 int found = 0;
907
908                 node = rb_first(&rec->holes);
909                 fprintf(stderr, "Found file extent holes:\n");
910                 while (node) {
911                         found = 1;
912                         hole = rb_entry(node, struct file_extent_hole, node);
913                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
914                                 hole->start, hole->len);
915                         node = rb_next(node);
916                 }
917                 if (!found)
918                         fprintf(stderr, "\tstart: 0, len: %llu\n",
919                                 round_up(rec->isize,
920                                          root->fs_info->sectorsize));
921         }
922 }
923
924 static void print_ref_error(int errors)
925 {
926         if (errors & REF_ERR_NO_DIR_ITEM)
927                 fprintf(stderr, ", no dir item");
928         if (errors & REF_ERR_NO_DIR_INDEX)
929                 fprintf(stderr, ", no dir index");
930         if (errors & REF_ERR_NO_INODE_REF)
931                 fprintf(stderr, ", no inode ref");
932         if (errors & REF_ERR_DUP_DIR_ITEM)
933                 fprintf(stderr, ", dup dir item");
934         if (errors & REF_ERR_DUP_DIR_INDEX)
935                 fprintf(stderr, ", dup dir index");
936         if (errors & REF_ERR_DUP_INODE_REF)
937                 fprintf(stderr, ", dup inode ref");
938         if (errors & REF_ERR_INDEX_UNMATCH)
939                 fprintf(stderr, ", index mismatch");
940         if (errors & REF_ERR_FILETYPE_UNMATCH)
941                 fprintf(stderr, ", filetype mismatch");
942         if (errors & REF_ERR_NAME_TOO_LONG)
943                 fprintf(stderr, ", name too long");
944         if (errors & REF_ERR_NO_ROOT_REF)
945                 fprintf(stderr, ", no root ref");
946         if (errors & REF_ERR_NO_ROOT_BACKREF)
947                 fprintf(stderr, ", no root backref");
948         if (errors & REF_ERR_DUP_ROOT_REF)
949                 fprintf(stderr, ", dup root ref");
950         if (errors & REF_ERR_DUP_ROOT_BACKREF)
951                 fprintf(stderr, ", dup root backref");
952         fprintf(stderr, "\n");
953 }
954
955 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
956                                           u64 ino, int mod)
957 {
958         struct ptr_node *node;
959         struct cache_extent *cache;
960         struct inode_record *rec = NULL;
961         int ret;
962
963         cache = lookup_cache_extent(inode_cache, ino, 1);
964         if (cache) {
965                 node = container_of(cache, struct ptr_node, cache);
966                 rec = node->data;
967                 if (mod && rec->refs > 1) {
968                         node->data = clone_inode_rec(rec);
969                         if (IS_ERR(node->data))
970                                 return node->data;
971                         rec->refs--;
972                         rec = node->data;
973                 }
974         } else if (mod) {
975                 rec = calloc(1, sizeof(*rec));
976                 if (!rec)
977                         return ERR_PTR(-ENOMEM);
978                 rec->ino = ino;
979                 rec->extent_start = (u64)-1;
980                 rec->refs = 1;
981                 INIT_LIST_HEAD(&rec->backrefs);
982                 INIT_LIST_HEAD(&rec->orphan_extents);
983                 rec->holes = RB_ROOT;
984
985                 node = malloc(sizeof(*node));
986                 if (!node) {
987                         free(rec);
988                         return ERR_PTR(-ENOMEM);
989                 }
990                 node->cache.start = ino;
991                 node->cache.size = 1;
992                 node->data = rec;
993
994                 if (ino == BTRFS_FREE_INO_OBJECTID)
995                         rec->found_link = 1;
996
997                 ret = insert_cache_extent(inode_cache, &node->cache);
998                 if (ret)
999                         return ERR_PTR(-EEXIST);
1000         }
1001         return rec;
1002 }
1003
1004 static void free_orphan_data_extents(struct list_head *orphan_extents)
1005 {
1006         struct orphan_data_extent *orphan;
1007
1008         while (!list_empty(orphan_extents)) {
1009                 orphan = list_entry(orphan_extents->next,
1010                                     struct orphan_data_extent, list);
1011                 list_del(&orphan->list);
1012                 free(orphan);
1013         }
1014 }
1015
1016 static void free_inode_rec(struct inode_record *rec)
1017 {
1018         struct inode_backref *backref;
1019
1020         if (--rec->refs > 0)
1021                 return;
1022
1023         while (!list_empty(&rec->backrefs)) {
1024                 backref = to_inode_backref(rec->backrefs.next);
1025                 list_del(&backref->list);
1026                 free(backref);
1027         }
1028         free_orphan_data_extents(&rec->orphan_extents);
1029         free_file_extent_holes(&rec->holes);
1030         free(rec);
1031 }
1032
1033 static int can_free_inode_rec(struct inode_record *rec)
1034 {
1035         if (!rec->errors && rec->checked && rec->found_inode_item &&
1036             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1037                 return 1;
1038         return 0;
1039 }
1040
1041 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1042                                  struct inode_record *rec)
1043 {
1044         struct cache_extent *cache;
1045         struct inode_backref *tmp, *backref;
1046         struct ptr_node *node;
1047         u8 filetype;
1048
1049         if (!rec->found_inode_item)
1050                 return;
1051
1052         filetype = imode_to_type(rec->imode);
1053         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1054                 if (backref->found_dir_item && backref->found_dir_index) {
1055                         if (backref->filetype != filetype)
1056                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1057                         if (!backref->errors && backref->found_inode_ref &&
1058                             rec->nlink == rec->found_link) {
1059                                 list_del(&backref->list);
1060                                 free(backref);
1061                         }
1062                 }
1063         }
1064
1065         if (!rec->checked || rec->merging)
1066                 return;
1067
1068         if (S_ISDIR(rec->imode)) {
1069                 if (rec->found_size != rec->isize)
1070                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1071                 if (rec->found_file_extent)
1072                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1073         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1074                 if (rec->found_dir_item)
1075                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1076                 if (rec->found_size != rec->nbytes)
1077                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1078                 if (rec->nlink > 0 && !no_holes &&
1079                     (rec->extent_end < rec->isize ||
1080                      first_extent_gap(&rec->holes) < rec->isize))
1081                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1082         }
1083
1084         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1085                 if (rec->found_csum_item && rec->nodatasum)
1086                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1087                 if (rec->some_csum_missing && !rec->nodatasum)
1088                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1089         }
1090
1091         BUG_ON(rec->refs != 1);
1092         if (can_free_inode_rec(rec)) {
1093                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1094                 node = container_of(cache, struct ptr_node, cache);
1095                 BUG_ON(node->data != rec);
1096                 remove_cache_extent(inode_cache, &node->cache);
1097                 free(node);
1098                 free_inode_rec(rec);
1099         }
1100 }
1101
1102 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1103 {
1104         struct btrfs_path path;
1105         struct btrfs_key key;
1106         int ret;
1107
1108         key.objectid = BTRFS_ORPHAN_OBJECTID;
1109         key.type = BTRFS_ORPHAN_ITEM_KEY;
1110         key.offset = ino;
1111
1112         btrfs_init_path(&path);
1113         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1114         btrfs_release_path(&path);
1115         if (ret > 0)
1116                 ret = -ENOENT;
1117         return ret;
1118 }
1119
1120 static int process_inode_item(struct extent_buffer *eb,
1121                               int slot, struct btrfs_key *key,
1122                               struct shared_node *active_node)
1123 {
1124         struct inode_record *rec;
1125         struct btrfs_inode_item *item;
1126
1127         rec = active_node->current;
1128         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1129         if (rec->found_inode_item) {
1130                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1131                 return 1;
1132         }
1133         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1134         rec->nlink = btrfs_inode_nlink(eb, item);
1135         rec->isize = btrfs_inode_size(eb, item);
1136         rec->nbytes = btrfs_inode_nbytes(eb, item);
1137         rec->imode = btrfs_inode_mode(eb, item);
1138         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1139                 rec->nodatasum = 1;
1140         rec->found_inode_item = 1;
1141         if (rec->nlink == 0)
1142                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1143         maybe_free_inode_rec(&active_node->inode_cache, rec);
1144         return 0;
1145 }
1146
1147 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1148                                                 const char *name,
1149                                                 int namelen, u64 dir)
1150 {
1151         struct inode_backref *backref;
1152
1153         list_for_each_entry(backref, &rec->backrefs, list) {
1154                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1155                         break;
1156                 if (backref->dir != dir || backref->namelen != namelen)
1157                         continue;
1158                 if (memcmp(name, backref->name, namelen))
1159                         continue;
1160                 return backref;
1161         }
1162
1163         backref = malloc(sizeof(*backref) + namelen + 1);
1164         if (!backref)
1165                 return NULL;
1166         memset(backref, 0, sizeof(*backref));
1167         backref->dir = dir;
1168         backref->namelen = namelen;
1169         memcpy(backref->name, name, namelen);
1170         backref->name[namelen] = '\0';
1171         list_add_tail(&backref->list, &rec->backrefs);
1172         return backref;
1173 }
1174
1175 static int add_inode_backref(struct cache_tree *inode_cache,
1176                              u64 ino, u64 dir, u64 index,
1177                              const char *name, int namelen,
1178                              u8 filetype, u8 itemtype, int errors)
1179 {
1180         struct inode_record *rec;
1181         struct inode_backref *backref;
1182
1183         rec = get_inode_rec(inode_cache, ino, 1);
1184         BUG_ON(IS_ERR(rec));
1185         backref = get_inode_backref(rec, name, namelen, dir);
1186         BUG_ON(!backref);
1187         if (errors)
1188                 backref->errors |= errors;
1189         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1190                 if (backref->found_dir_index)
1191                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1192                 if (backref->found_inode_ref && backref->index != index)
1193                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1194                 if (backref->found_dir_item && backref->filetype != filetype)
1195                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1196
1197                 backref->index = index;
1198                 backref->filetype = filetype;
1199                 backref->found_dir_index = 1;
1200         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1201                 rec->found_link++;
1202                 if (backref->found_dir_item)
1203                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1204                 if (backref->found_dir_index && backref->filetype != filetype)
1205                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1206
1207                 backref->filetype = filetype;
1208                 backref->found_dir_item = 1;
1209         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1210                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1211                 if (backref->found_inode_ref)
1212                         backref->errors |= REF_ERR_DUP_INODE_REF;
1213                 if (backref->found_dir_index && backref->index != index)
1214                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1215                 else
1216                         backref->index = index;
1217
1218                 backref->ref_type = itemtype;
1219                 backref->found_inode_ref = 1;
1220         } else {
1221                 BUG_ON(1);
1222         }
1223
1224         maybe_free_inode_rec(inode_cache, rec);
1225         return 0;
1226 }
1227
1228 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1229                             struct cache_tree *dst_cache)
1230 {
1231         struct inode_backref *backref;
1232         u32 dir_count = 0;
1233         int ret = 0;
1234
1235         dst->merging = 1;
1236         list_for_each_entry(backref, &src->backrefs, list) {
1237                 if (backref->found_dir_index) {
1238                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1239                                         backref->index, backref->name,
1240                                         backref->namelen, backref->filetype,
1241                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1242                 }
1243                 if (backref->found_dir_item) {
1244                         dir_count++;
1245                         add_inode_backref(dst_cache, dst->ino,
1246                                         backref->dir, 0, backref->name,
1247                                         backref->namelen, backref->filetype,
1248                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1249                 }
1250                 if (backref->found_inode_ref) {
1251                         add_inode_backref(dst_cache, dst->ino,
1252                                         backref->dir, backref->index,
1253                                         backref->name, backref->namelen, 0,
1254                                         backref->ref_type, backref->errors);
1255                 }
1256         }
1257
1258         if (src->found_dir_item)
1259                 dst->found_dir_item = 1;
1260         if (src->found_file_extent)
1261                 dst->found_file_extent = 1;
1262         if (src->found_csum_item)
1263                 dst->found_csum_item = 1;
1264         if (src->some_csum_missing)
1265                 dst->some_csum_missing = 1;
1266         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1267                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1268                 if (ret < 0)
1269                         return ret;
1270         }
1271
1272         BUG_ON(src->found_link < dir_count);
1273         dst->found_link += src->found_link - dir_count;
1274         dst->found_size += src->found_size;
1275         if (src->extent_start != (u64)-1) {
1276                 if (dst->extent_start == (u64)-1) {
1277                         dst->extent_start = src->extent_start;
1278                         dst->extent_end = src->extent_end;
1279                 } else {
1280                         if (dst->extent_end > src->extent_start)
1281                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1282                         else if (dst->extent_end < src->extent_start) {
1283                                 ret = add_file_extent_hole(&dst->holes,
1284                                         dst->extent_end,
1285                                         src->extent_start - dst->extent_end);
1286                         }
1287                         if (dst->extent_end < src->extent_end)
1288                                 dst->extent_end = src->extent_end;
1289                 }
1290         }
1291
1292         dst->errors |= src->errors;
1293         if (src->found_inode_item) {
1294                 if (!dst->found_inode_item) {
1295                         dst->nlink = src->nlink;
1296                         dst->isize = src->isize;
1297                         dst->nbytes = src->nbytes;
1298                         dst->imode = src->imode;
1299                         dst->nodatasum = src->nodatasum;
1300                         dst->found_inode_item = 1;
1301                 } else {
1302                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1303                 }
1304         }
1305         dst->merging = 0;
1306
1307         return 0;
1308 }
1309
1310 static int splice_shared_node(struct shared_node *src_node,
1311                               struct shared_node *dst_node)
1312 {
1313         struct cache_extent *cache;
1314         struct ptr_node *node, *ins;
1315         struct cache_tree *src, *dst;
1316         struct inode_record *rec, *conflict;
1317         u64 current_ino = 0;
1318         int splice = 0;
1319         int ret;
1320
1321         if (--src_node->refs == 0)
1322                 splice = 1;
1323         if (src_node->current)
1324                 current_ino = src_node->current->ino;
1325
1326         src = &src_node->root_cache;
1327         dst = &dst_node->root_cache;
1328 again:
1329         cache = search_cache_extent(src, 0);
1330         while (cache) {
1331                 node = container_of(cache, struct ptr_node, cache);
1332                 rec = node->data;
1333                 cache = next_cache_extent(cache);
1334
1335                 if (splice) {
1336                         remove_cache_extent(src, &node->cache);
1337                         ins = node;
1338                 } else {
1339                         ins = malloc(sizeof(*ins));
1340                         BUG_ON(!ins);
1341                         ins->cache.start = node->cache.start;
1342                         ins->cache.size = node->cache.size;
1343                         ins->data = rec;
1344                         rec->refs++;
1345                 }
1346                 ret = insert_cache_extent(dst, &ins->cache);
1347                 if (ret == -EEXIST) {
1348                         conflict = get_inode_rec(dst, rec->ino, 1);
1349                         BUG_ON(IS_ERR(conflict));
1350                         merge_inode_recs(rec, conflict, dst);
1351                         if (rec->checked) {
1352                                 conflict->checked = 1;
1353                                 if (dst_node->current == conflict)
1354                                         dst_node->current = NULL;
1355                         }
1356                         maybe_free_inode_rec(dst, conflict);
1357                         free_inode_rec(rec);
1358                         free(ins);
1359                 } else {
1360                         BUG_ON(ret);
1361                 }
1362         }
1363
1364         if (src == &src_node->root_cache) {
1365                 src = &src_node->inode_cache;
1366                 dst = &dst_node->inode_cache;
1367                 goto again;
1368         }
1369
1370         if (current_ino > 0 && (!dst_node->current ||
1371             current_ino > dst_node->current->ino)) {
1372                 if (dst_node->current) {
1373                         dst_node->current->checked = 1;
1374                         maybe_free_inode_rec(dst, dst_node->current);
1375                 }
1376                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1377                 BUG_ON(IS_ERR(dst_node->current));
1378         }
1379         return 0;
1380 }
1381
1382 static void free_inode_ptr(struct cache_extent *cache)
1383 {
1384         struct ptr_node *node;
1385         struct inode_record *rec;
1386
1387         node = container_of(cache, struct ptr_node, cache);
1388         rec = node->data;
1389         free_inode_rec(rec);
1390         free(node);
1391 }
1392
1393 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1394
1395 static struct shared_node *find_shared_node(struct cache_tree *shared,
1396                                             u64 bytenr)
1397 {
1398         struct cache_extent *cache;
1399         struct shared_node *node;
1400
1401         cache = lookup_cache_extent(shared, bytenr, 1);
1402         if (cache) {
1403                 node = container_of(cache, struct shared_node, cache);
1404                 return node;
1405         }
1406         return NULL;
1407 }
1408
1409 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1410 {
1411         int ret;
1412         struct shared_node *node;
1413
1414         node = calloc(1, sizeof(*node));
1415         if (!node)
1416                 return -ENOMEM;
1417         node->cache.start = bytenr;
1418         node->cache.size = 1;
1419         cache_tree_init(&node->root_cache);
1420         cache_tree_init(&node->inode_cache);
1421         node->refs = refs;
1422
1423         ret = insert_cache_extent(shared, &node->cache);
1424
1425         return ret;
1426 }
1427
1428 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1429                              struct walk_control *wc, int level)
1430 {
1431         struct shared_node *node;
1432         struct shared_node *dest;
1433         int ret;
1434
1435         if (level == wc->active_node)
1436                 return 0;
1437
1438         BUG_ON(wc->active_node <= level);
1439         node = find_shared_node(&wc->shared, bytenr);
1440         if (!node) {
1441                 ret = add_shared_node(&wc->shared, bytenr, refs);
1442                 BUG_ON(ret);
1443                 node = find_shared_node(&wc->shared, bytenr);
1444                 wc->nodes[level] = node;
1445                 wc->active_node = level;
1446                 return 0;
1447         }
1448
1449         if (wc->root_level == wc->active_node &&
1450             btrfs_root_refs(&root->root_item) == 0) {
1451                 if (--node->refs == 0) {
1452                         free_inode_recs_tree(&node->root_cache);
1453                         free_inode_recs_tree(&node->inode_cache);
1454                         remove_cache_extent(&wc->shared, &node->cache);
1455                         free(node);
1456                 }
1457                 return 1;
1458         }
1459
1460         dest = wc->nodes[wc->active_node];
1461         splice_shared_node(node, dest);
1462         if (node->refs == 0) {
1463                 remove_cache_extent(&wc->shared, &node->cache);
1464                 free(node);
1465         }
1466         return 1;
1467 }
1468
1469 static int leave_shared_node(struct btrfs_root *root,
1470                              struct walk_control *wc, int level)
1471 {
1472         struct shared_node *node;
1473         struct shared_node *dest;
1474         int i;
1475
1476         if (level == wc->root_level)
1477                 return 0;
1478
1479         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1480                 if (wc->nodes[i])
1481                         break;
1482         }
1483         BUG_ON(i >= BTRFS_MAX_LEVEL);
1484
1485         node = wc->nodes[wc->active_node];
1486         wc->nodes[wc->active_node] = NULL;
1487         wc->active_node = i;
1488
1489         dest = wc->nodes[wc->active_node];
1490         if (wc->active_node < wc->root_level ||
1491             btrfs_root_refs(&root->root_item) > 0) {
1492                 BUG_ON(node->refs <= 1);
1493                 splice_shared_node(node, dest);
1494         } else {
1495                 BUG_ON(node->refs < 2);
1496                 node->refs--;
1497         }
1498         return 0;
1499 }
1500
1501 /*
1502  * Returns:
1503  * < 0 - on error
1504  * 1   - if the root with id child_root_id is a child of root parent_root_id
1505  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1506  *       has other root(s) as parent(s)
1507  * 2   - if the root child_root_id doesn't have any parent roots
1508  */
1509 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1510                          u64 child_root_id)
1511 {
1512         struct btrfs_path path;
1513         struct btrfs_key key;
1514         struct extent_buffer *leaf;
1515         int has_parent = 0;
1516         int ret;
1517
1518         btrfs_init_path(&path);
1519
1520         key.objectid = parent_root_id;
1521         key.type = BTRFS_ROOT_REF_KEY;
1522         key.offset = child_root_id;
1523         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1524                                 0, 0);
1525         if (ret < 0)
1526                 return ret;
1527         btrfs_release_path(&path);
1528         if (!ret)
1529                 return 1;
1530
1531         key.objectid = child_root_id;
1532         key.type = BTRFS_ROOT_BACKREF_KEY;
1533         key.offset = 0;
1534         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1535                                 0, 0);
1536         if (ret < 0)
1537                 goto out;
1538
1539         while (1) {
1540                 leaf = path.nodes[0];
1541                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1542                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1543                         if (ret)
1544                                 break;
1545                         leaf = path.nodes[0];
1546                 }
1547
1548                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1549                 if (key.objectid != child_root_id ||
1550                     key.type != BTRFS_ROOT_BACKREF_KEY)
1551                         break;
1552
1553                 has_parent = 1;
1554
1555                 if (key.offset == parent_root_id) {
1556                         btrfs_release_path(&path);
1557                         return 1;
1558                 }
1559
1560                 path.slots[0]++;
1561         }
1562 out:
1563         btrfs_release_path(&path);
1564         if (ret < 0)
1565                 return ret;
1566         return has_parent ? 0 : 2;
1567 }
1568
1569 static int process_dir_item(struct extent_buffer *eb,
1570                             int slot, struct btrfs_key *key,
1571                             struct shared_node *active_node)
1572 {
1573         u32 total;
1574         u32 cur = 0;
1575         u32 len;
1576         u32 name_len;
1577         u32 data_len;
1578         int error;
1579         int nritems = 0;
1580         u8 filetype;
1581         struct btrfs_dir_item *di;
1582         struct inode_record *rec;
1583         struct cache_tree *root_cache;
1584         struct cache_tree *inode_cache;
1585         struct btrfs_key location;
1586         char namebuf[BTRFS_NAME_LEN];
1587
1588         root_cache = &active_node->root_cache;
1589         inode_cache = &active_node->inode_cache;
1590         rec = active_node->current;
1591         rec->found_dir_item = 1;
1592
1593         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1594         total = btrfs_item_size_nr(eb, slot);
1595         while (cur < total) {
1596                 nritems++;
1597                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1598                 name_len = btrfs_dir_name_len(eb, di);
1599                 data_len = btrfs_dir_data_len(eb, di);
1600                 filetype = btrfs_dir_type(eb, di);
1601
1602                 rec->found_size += name_len;
1603                 if (cur + sizeof(*di) + name_len > total ||
1604                     name_len > BTRFS_NAME_LEN) {
1605                         error = REF_ERR_NAME_TOO_LONG;
1606
1607                         if (cur + sizeof(*di) > total)
1608                                 break;
1609                         len = min_t(u32, total - cur - sizeof(*di),
1610                                     BTRFS_NAME_LEN);
1611                 } else {
1612                         len = name_len;
1613                         error = 0;
1614                 }
1615
1616                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1617
1618                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1619                     key->offset != btrfs_name_hash(namebuf, len)) {
1620                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1621                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1622                         key->objectid, key->offset, namebuf, len, filetype,
1623                         key->offset, btrfs_name_hash(namebuf, len));
1624                 }
1625
1626                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1627                         add_inode_backref(inode_cache, location.objectid,
1628                                           key->objectid, key->offset, namebuf,
1629                                           len, filetype, key->type, error);
1630                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1631                         add_inode_backref(root_cache, location.objectid,
1632                                           key->objectid, key->offset,
1633                                           namebuf, len, filetype,
1634                                           key->type, error);
1635                 } else {
1636                         fprintf(stderr, "invalid location in dir item %u\n",
1637                                 location.type);
1638                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1639                                           key->objectid, key->offset, namebuf,
1640                                           len, filetype, key->type, error);
1641                 }
1642
1643                 len = sizeof(*di) + name_len + data_len;
1644                 di = (struct btrfs_dir_item *)((char *)di + len);
1645                 cur += len;
1646         }
1647         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1648                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1649
1650         return 0;
1651 }
1652
1653 static int process_inode_ref(struct extent_buffer *eb,
1654                              int slot, struct btrfs_key *key,
1655                              struct shared_node *active_node)
1656 {
1657         u32 total;
1658         u32 cur = 0;
1659         u32 len;
1660         u32 name_len;
1661         u64 index;
1662         int error;
1663         struct cache_tree *inode_cache;
1664         struct btrfs_inode_ref *ref;
1665         char namebuf[BTRFS_NAME_LEN];
1666
1667         inode_cache = &active_node->inode_cache;
1668
1669         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1670         total = btrfs_item_size_nr(eb, slot);
1671         while (cur < total) {
1672                 name_len = btrfs_inode_ref_name_len(eb, ref);
1673                 index = btrfs_inode_ref_index(eb, ref);
1674
1675                 /* inode_ref + namelen should not cross item boundary */
1676                 if (cur + sizeof(*ref) + name_len > total ||
1677                     name_len > BTRFS_NAME_LEN) {
1678                         if (total < cur + sizeof(*ref))
1679                                 break;
1680
1681                         /* Still try to read out the remaining part */
1682                         len = min_t(u32, total - cur - sizeof(*ref),
1683                                     BTRFS_NAME_LEN);
1684                         error = REF_ERR_NAME_TOO_LONG;
1685                 } else {
1686                         len = name_len;
1687                         error = 0;
1688                 }
1689
1690                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1691                 add_inode_backref(inode_cache, key->objectid, key->offset,
1692                                   index, namebuf, len, 0, key->type, error);
1693
1694                 len = sizeof(*ref) + name_len;
1695                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1696                 cur += len;
1697         }
1698         return 0;
1699 }
1700
1701 static int process_inode_extref(struct extent_buffer *eb,
1702                                 int slot, struct btrfs_key *key,
1703                                 struct shared_node *active_node)
1704 {
1705         u32 total;
1706         u32 cur = 0;
1707         u32 len;
1708         u32 name_len;
1709         u64 index;
1710         u64 parent;
1711         int error;
1712         struct cache_tree *inode_cache;
1713         struct btrfs_inode_extref *extref;
1714         char namebuf[BTRFS_NAME_LEN];
1715
1716         inode_cache = &active_node->inode_cache;
1717
1718         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1719         total = btrfs_item_size_nr(eb, slot);
1720         while (cur < total) {
1721                 name_len = btrfs_inode_extref_name_len(eb, extref);
1722                 index = btrfs_inode_extref_index(eb, extref);
1723                 parent = btrfs_inode_extref_parent(eb, extref);
1724                 if (name_len <= BTRFS_NAME_LEN) {
1725                         len = name_len;
1726                         error = 0;
1727                 } else {
1728                         len = BTRFS_NAME_LEN;
1729                         error = REF_ERR_NAME_TOO_LONG;
1730                 }
1731                 read_extent_buffer(eb, namebuf,
1732                                    (unsigned long)(extref + 1), len);
1733                 add_inode_backref(inode_cache, key->objectid, parent,
1734                                   index, namebuf, len, 0, key->type, error);
1735
1736                 len = sizeof(*extref) + name_len;
1737                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1738                 cur += len;
1739         }
1740         return 0;
1741
1742 }
1743
1744 static int count_csum_range(struct btrfs_root *root, u64 start,
1745                             u64 len, u64 *found)
1746 {
1747         struct btrfs_key key;
1748         struct btrfs_path path;
1749         struct extent_buffer *leaf;
1750         int ret;
1751         size_t size;
1752         *found = 0;
1753         u64 csum_end;
1754         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1755
1756         btrfs_init_path(&path);
1757
1758         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1759         key.offset = start;
1760         key.type = BTRFS_EXTENT_CSUM_KEY;
1761
1762         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1763                                 &key, &path, 0, 0);
1764         if (ret < 0)
1765                 goto out;
1766         if (ret > 0 && path.slots[0] > 0) {
1767                 leaf = path.nodes[0];
1768                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1769                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1770                     key.type == BTRFS_EXTENT_CSUM_KEY)
1771                         path.slots[0]--;
1772         }
1773
1774         while (len > 0) {
1775                 leaf = path.nodes[0];
1776                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1777                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1778                         if (ret > 0)
1779                                 break;
1780                         else if (ret < 0)
1781                                 goto out;
1782                         leaf = path.nodes[0];
1783                 }
1784
1785                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1786                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1787                     key.type != BTRFS_EXTENT_CSUM_KEY)
1788                         break;
1789
1790                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1791                 if (key.offset >= start + len)
1792                         break;
1793
1794                 if (key.offset > start)
1795                         start = key.offset;
1796
1797                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1798                 csum_end = key.offset + (size / csum_size) *
1799                            root->fs_info->sectorsize;
1800                 if (csum_end > start) {
1801                         size = min(csum_end - start, len);
1802                         len -= size;
1803                         start += size;
1804                         *found += size;
1805                 }
1806
1807                 path.slots[0]++;
1808         }
1809 out:
1810         btrfs_release_path(&path);
1811         if (ret < 0)
1812                 return ret;
1813         return 0;
1814 }
1815
1816 static int process_file_extent(struct btrfs_root *root,
1817                                 struct extent_buffer *eb,
1818                                 int slot, struct btrfs_key *key,
1819                                 struct shared_node *active_node)
1820 {
1821         struct inode_record *rec;
1822         struct btrfs_file_extent_item *fi;
1823         u64 num_bytes = 0;
1824         u64 disk_bytenr = 0;
1825         u64 extent_offset = 0;
1826         u64 mask = root->fs_info->sectorsize - 1;
1827         int extent_type;
1828         int ret;
1829
1830         rec = active_node->current;
1831         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1832         rec->found_file_extent = 1;
1833
1834         if (rec->extent_start == (u64)-1) {
1835                 rec->extent_start = key->offset;
1836                 rec->extent_end = key->offset;
1837         }
1838
1839         if (rec->extent_end > key->offset)
1840                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1841         else if (rec->extent_end < key->offset) {
1842                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1843                                            key->offset - rec->extent_end);
1844                 if (ret < 0)
1845                         return ret;
1846         }
1847
1848         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1849         extent_type = btrfs_file_extent_type(eb, fi);
1850
1851         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1852                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1853                 if (num_bytes == 0)
1854                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1855                 rec->found_size += num_bytes;
1856                 num_bytes = (num_bytes + mask) & ~mask;
1857         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1858                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1859                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1860                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1861                 extent_offset = btrfs_file_extent_offset(eb, fi);
1862                 if (num_bytes == 0 || (num_bytes & mask))
1863                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1864                 if (num_bytes + extent_offset >
1865                     btrfs_file_extent_ram_bytes(eb, fi))
1866                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1867                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1868                     (btrfs_file_extent_compression(eb, fi) ||
1869                      btrfs_file_extent_encryption(eb, fi) ||
1870                      btrfs_file_extent_other_encoding(eb, fi)))
1871                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1872                 if (disk_bytenr > 0)
1873                         rec->found_size += num_bytes;
1874         } else {
1875                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1876         }
1877         rec->extent_end = key->offset + num_bytes;
1878
1879         /*
1880          * The data reloc tree will copy full extents into its inode and then
1881          * copy the corresponding csums.  Because the extent it copied could be
1882          * a preallocated extent that hasn't been written to yet there may be no
1883          * csums to copy, ergo we won't have csums for our file extent.  This is
1884          * ok so just don't bother checking csums if the inode belongs to the
1885          * data reloc tree.
1886          */
1887         if (disk_bytenr > 0 &&
1888             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1889                 u64 found;
1890                 if (btrfs_file_extent_compression(eb, fi))
1891                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1892                 else
1893                         disk_bytenr += extent_offset;
1894
1895                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1896                 if (ret < 0)
1897                         return ret;
1898                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1899                         if (found > 0)
1900                                 rec->found_csum_item = 1;
1901                         if (found < num_bytes)
1902                                 rec->some_csum_missing = 1;
1903                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1904                         if (found > 0)
1905                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1906                 }
1907         }
1908         return 0;
1909 }
1910
1911 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1912                             struct walk_control *wc)
1913 {
1914         struct btrfs_key key;
1915         u32 nritems;
1916         int i;
1917         int ret = 0;
1918         struct cache_tree *inode_cache;
1919         struct shared_node *active_node;
1920
1921         if (wc->root_level == wc->active_node &&
1922             btrfs_root_refs(&root->root_item) == 0)
1923                 return 0;
1924
1925         active_node = wc->nodes[wc->active_node];
1926         inode_cache = &active_node->inode_cache;
1927         nritems = btrfs_header_nritems(eb);
1928         for (i = 0; i < nritems; i++) {
1929                 btrfs_item_key_to_cpu(eb, &key, i);
1930
1931                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1932                         continue;
1933                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1934                         continue;
1935
1936                 if (active_node->current == NULL ||
1937                     active_node->current->ino < key.objectid) {
1938                         if (active_node->current) {
1939                                 active_node->current->checked = 1;
1940                                 maybe_free_inode_rec(inode_cache,
1941                                                      active_node->current);
1942                         }
1943                         active_node->current = get_inode_rec(inode_cache,
1944                                                              key.objectid, 1);
1945                         BUG_ON(IS_ERR(active_node->current));
1946                 }
1947                 switch (key.type) {
1948                 case BTRFS_DIR_ITEM_KEY:
1949                 case BTRFS_DIR_INDEX_KEY:
1950                         ret = process_dir_item(eb, i, &key, active_node);
1951                         break;
1952                 case BTRFS_INODE_REF_KEY:
1953                         ret = process_inode_ref(eb, i, &key, active_node);
1954                         break;
1955                 case BTRFS_INODE_EXTREF_KEY:
1956                         ret = process_inode_extref(eb, i, &key, active_node);
1957                         break;
1958                 case BTRFS_INODE_ITEM_KEY:
1959                         ret = process_inode_item(eb, i, &key, active_node);
1960                         break;
1961                 case BTRFS_EXTENT_DATA_KEY:
1962                         ret = process_file_extent(root, eb, i, &key,
1963                                                   active_node);
1964                         break;
1965                 default:
1966                         break;
1967                 };
1968         }
1969         return ret;
1970 }
1971
1972 struct node_refs {
1973         u64 bytenr[BTRFS_MAX_LEVEL];
1974         u64 refs[BTRFS_MAX_LEVEL];
1975         int need_check[BTRFS_MAX_LEVEL];
1976 };
1977
1978 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1979                              struct node_refs *nrefs, u64 level);
1980 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1981                             unsigned int ext_ref);
1982
1983 /*
1984  * Returns >0  Found error, not fatal, should continue
1985  * Returns <0  Fatal error, must exit the whole check
1986  * Returns 0   No errors found
1987  */
1988 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1989                                struct node_refs *nrefs, int *level, int ext_ref)
1990 {
1991         struct extent_buffer *cur = path->nodes[0];
1992         struct btrfs_key key;
1993         u64 cur_bytenr;
1994         u32 nritems;
1995         u64 first_ino = 0;
1996         int root_level = btrfs_header_level(root->node);
1997         int i;
1998         int ret = 0; /* Final return value */
1999         int err = 0; /* Positive error bitmap */
2000
2001         cur_bytenr = cur->start;
2002
2003         /* skip to first inode item or the first inode number change */
2004         nritems = btrfs_header_nritems(cur);
2005         for (i = 0; i < nritems; i++) {
2006                 btrfs_item_key_to_cpu(cur, &key, i);
2007                 if (i == 0)
2008                         first_ino = key.objectid;
2009                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2010                     (first_ino && first_ino != key.objectid))
2011                         break;
2012         }
2013         if (i == nritems) {
2014                 path->slots[0] = nritems;
2015                 return 0;
2016         }
2017         path->slots[0] = i;
2018
2019 again:
2020         err |= check_inode_item(root, path, ext_ref);
2021
2022         /* modify cur since check_inode_item may change path */
2023         cur = path->nodes[0];
2024
2025         if (err & LAST_ITEM)
2026                 goto out;
2027
2028         /* still have inode items in thie leaf */
2029         if (cur->start == cur_bytenr)
2030                 goto again;
2031
2032         /*
2033          * we have switched to another leaf, above nodes may
2034          * have changed, here walk down the path, if a node
2035          * or leaf is shared, check whether we can skip this
2036          * node or leaf.
2037          */
2038         for (i = root_level; i >= 0; i--) {
2039                 if (path->nodes[i]->start == nrefs->bytenr[i])
2040                         continue;
2041
2042                 ret = update_nodes_refs(root,
2043                                 path->nodes[i]->start,
2044                                 nrefs, i);
2045                 if (ret)
2046                         goto out;
2047
2048                 if (!nrefs->need_check[i]) {
2049                         *level += 1;
2050                         break;
2051                 }
2052         }
2053
2054         for (i = 0; i < *level; i++) {
2055                 free_extent_buffer(path->nodes[i]);
2056                 path->nodes[i] = NULL;
2057         }
2058 out:
2059         err &= ~LAST_ITEM;
2060         if (err && !ret)
2061                 ret = err;
2062         return ret;
2063 }
2064
2065 static void reada_walk_down(struct btrfs_root *root,
2066                             struct extent_buffer *node, int slot)
2067 {
2068         struct btrfs_fs_info *fs_info = root->fs_info;
2069         u64 bytenr;
2070         u64 ptr_gen;
2071         u32 nritems;
2072         int i;
2073         int level;
2074
2075         level = btrfs_header_level(node);
2076         if (level != 1)
2077                 return;
2078
2079         nritems = btrfs_header_nritems(node);
2080         for (i = slot; i < nritems; i++) {
2081                 bytenr = btrfs_node_blockptr(node, i);
2082                 ptr_gen = btrfs_node_ptr_generation(node, i);
2083                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2084         }
2085 }
2086
2087 /*
2088  * Check the child node/leaf by the following condition:
2089  * 1. the first item key of the node/leaf should be the same with the one
2090  *    in parent.
2091  * 2. block in parent node should match the child node/leaf.
2092  * 3. generation of parent node and child's header should be consistent.
2093  *
2094  * Or the child node/leaf pointed by the key in parent is not valid.
2095  *
2096  * We hope to check leaf owner too, but since subvol may share leaves,
2097  * which makes leaf owner check not so strong, key check should be
2098  * sufficient enough for that case.
2099  */
2100 static int check_child_node(struct extent_buffer *parent, int slot,
2101                             struct extent_buffer *child)
2102 {
2103         struct btrfs_key parent_key;
2104         struct btrfs_key child_key;
2105         int ret = 0;
2106
2107         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2108         if (btrfs_header_level(child) == 0)
2109                 btrfs_item_key_to_cpu(child, &child_key, 0);
2110         else
2111                 btrfs_node_key_to_cpu(child, &child_key, 0);
2112
2113         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2114                 ret = -EINVAL;
2115                 fprintf(stderr,
2116                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2117                         parent_key.objectid, parent_key.type, parent_key.offset,
2118                         child_key.objectid, child_key.type, child_key.offset);
2119         }
2120         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2121                 ret = -EINVAL;
2122                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2123                         btrfs_node_blockptr(parent, slot),
2124                         btrfs_header_bytenr(child));
2125         }
2126         if (btrfs_node_ptr_generation(parent, slot) !=
2127             btrfs_header_generation(child)) {
2128                 ret = -EINVAL;
2129                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2130                         btrfs_header_generation(child),
2131                         btrfs_node_ptr_generation(parent, slot));
2132         }
2133         return ret;
2134 }
2135
2136 /*
2137  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2138  * in every fs or file tree check. Here we find its all root ids, and only check
2139  * it in the fs or file tree which has the smallest root id.
2140  */
2141 static int need_check(struct btrfs_root *root, struct ulist *roots)
2142 {
2143         struct rb_node *node;
2144         struct ulist_node *u;
2145
2146         if (roots->nnodes == 1)
2147                 return 1;
2148
2149         node = rb_first(&roots->root);
2150         u = rb_entry(node, struct ulist_node, rb_node);
2151         /*
2152          * current root id is not smallest, we skip it and let it be checked
2153          * in the fs or file tree who hash the smallest root id.
2154          */
2155         if (root->objectid != u->val)
2156                 return 0;
2157
2158         return 1;
2159 }
2160
2161 /*
2162  * for a tree node or leaf, we record its reference count, so later if we still
2163  * process this node or leaf, don't need to compute its reference count again.
2164  */
2165 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2166                              struct node_refs *nrefs, u64 level)
2167 {
2168         int check, ret;
2169         u64 refs;
2170         struct ulist *roots;
2171
2172         if (nrefs->bytenr[level] != bytenr) {
2173                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2174                                        level, 1, &refs, NULL);
2175                 if (ret < 0)
2176                         return ret;
2177
2178                 nrefs->bytenr[level] = bytenr;
2179                 nrefs->refs[level] = refs;
2180                 if (refs > 1) {
2181                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2182                                                    0, &roots);
2183                         if (ret)
2184                                 return -EIO;
2185
2186                         check = need_check(root, roots);
2187                         ulist_free(roots);
2188                         nrefs->need_check[level] = check;
2189                 } else {
2190                         nrefs->need_check[level] = 1;
2191                 }
2192         }
2193
2194         return 0;
2195 }
2196
2197 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2198                           struct walk_control *wc, int *level,
2199                           struct node_refs *nrefs)
2200 {
2201         enum btrfs_tree_block_status status;
2202         u64 bytenr;
2203         u64 ptr_gen;
2204         struct btrfs_fs_info *fs_info = root->fs_info;
2205         struct extent_buffer *next;
2206         struct extent_buffer *cur;
2207         int ret, err = 0;
2208         u64 refs;
2209
2210         WARN_ON(*level < 0);
2211         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2212
2213         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2214                 refs = nrefs->refs[*level];
2215                 ret = 0;
2216         } else {
2217                 ret = btrfs_lookup_extent_info(NULL, root,
2218                                        path->nodes[*level]->start,
2219                                        *level, 1, &refs, NULL);
2220                 if (ret < 0) {
2221                         err = ret;
2222                         goto out;
2223                 }
2224                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2225                 nrefs->refs[*level] = refs;
2226         }
2227
2228         if (refs > 1) {
2229                 ret = enter_shared_node(root, path->nodes[*level]->start,
2230                                         refs, wc, *level);
2231                 if (ret > 0) {
2232                         err = ret;
2233                         goto out;
2234                 }
2235         }
2236
2237         while (*level >= 0) {
2238                 WARN_ON(*level < 0);
2239                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2240                 cur = path->nodes[*level];
2241
2242                 if (btrfs_header_level(cur) != *level)
2243                         WARN_ON(1);
2244
2245                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2246                         break;
2247                 if (*level == 0) {
2248                         ret = process_one_leaf(root, cur, wc);
2249                         if (ret < 0)
2250                                 err = ret;
2251                         break;
2252                 }
2253                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2254                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2255
2256                 if (bytenr == nrefs->bytenr[*level - 1]) {
2257                         refs = nrefs->refs[*level - 1];
2258                 } else {
2259                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2260                                         *level - 1, 1, &refs, NULL);
2261                         if (ret < 0) {
2262                                 refs = 0;
2263                         } else {
2264                                 nrefs->bytenr[*level - 1] = bytenr;
2265                                 nrefs->refs[*level - 1] = refs;
2266                         }
2267                 }
2268
2269                 if (refs > 1) {
2270                         ret = enter_shared_node(root, bytenr, refs,
2271                                                 wc, *level - 1);
2272                         if (ret > 0) {
2273                                 path->slots[*level]++;
2274                                 continue;
2275                         }
2276                 }
2277
2278                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2279                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2280                         free_extent_buffer(next);
2281                         reada_walk_down(root, cur, path->slots[*level]);
2282                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2283                         if (!extent_buffer_uptodate(next)) {
2284                                 struct btrfs_key node_key;
2285
2286                                 btrfs_node_key_to_cpu(path->nodes[*level],
2287                                                       &node_key,
2288                                                       path->slots[*level]);
2289                                 btrfs_add_corrupt_extent_record(root->fs_info,
2290                                                 &node_key,
2291                                                 path->nodes[*level]->start,
2292                                                 root->fs_info->nodesize,
2293                                                 *level);
2294                                 err = -EIO;
2295                                 goto out;
2296                         }
2297                 }
2298
2299                 ret = check_child_node(cur, path->slots[*level], next);
2300                 if (ret) {
2301                         free_extent_buffer(next);
2302                         err = ret;
2303                         goto out;
2304                 }
2305
2306                 if (btrfs_is_leaf(next))
2307                         status = btrfs_check_leaf(root, NULL, next);
2308                 else
2309                         status = btrfs_check_node(root, NULL, next);
2310                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2311                         free_extent_buffer(next);
2312                         err = -EIO;
2313                         goto out;
2314                 }
2315
2316                 *level = *level - 1;
2317                 free_extent_buffer(path->nodes[*level]);
2318                 path->nodes[*level] = next;
2319                 path->slots[*level] = 0;
2320         }
2321 out:
2322         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2323         return err;
2324 }
2325
2326 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2327                             unsigned int ext_ref);
2328
2329 /*
2330  * Returns >0  Found error, should continue
2331  * Returns <0  Fatal error, must exit the whole check
2332  * Returns 0   No errors found
2333  */
2334 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2335                              int *level, struct node_refs *nrefs, int ext_ref)
2336 {
2337         enum btrfs_tree_block_status status;
2338         u64 bytenr;
2339         u64 ptr_gen;
2340         struct btrfs_fs_info *fs_info = root->fs_info;
2341         struct extent_buffer *next;
2342         struct extent_buffer *cur;
2343         int ret;
2344
2345         WARN_ON(*level < 0);
2346         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2347
2348         ret = update_nodes_refs(root, path->nodes[*level]->start,
2349                                 nrefs, *level);
2350         if (ret < 0)
2351                 return ret;
2352
2353         while (*level >= 0) {
2354                 WARN_ON(*level < 0);
2355                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2356                 cur = path->nodes[*level];
2357
2358                 if (btrfs_header_level(cur) != *level)
2359                         WARN_ON(1);
2360
2361                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2362                         break;
2363                 /* Don't forgot to check leaf/node validation */
2364                 if (*level == 0) {
2365                         ret = btrfs_check_leaf(root, NULL, cur);
2366                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2367                                 ret = -EIO;
2368                                 break;
2369                         }
2370                         ret = process_one_leaf_v2(root, path, nrefs,
2371                                                   level, ext_ref);
2372                         cur = path->nodes[*level];
2373                         break;
2374                 } else {
2375                         ret = btrfs_check_node(root, NULL, cur);
2376                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2377                                 ret = -EIO;
2378                                 break;
2379                         }
2380                 }
2381                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2382                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2383
2384                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2385                 if (ret)
2386                         break;
2387                 if (!nrefs->need_check[*level - 1]) {
2388                         path->slots[*level]++;
2389                         continue;
2390                 }
2391
2392                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2393                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2394                         free_extent_buffer(next);
2395                         reada_walk_down(root, cur, path->slots[*level]);
2396                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2397                         if (!extent_buffer_uptodate(next)) {
2398                                 struct btrfs_key node_key;
2399
2400                                 btrfs_node_key_to_cpu(path->nodes[*level],
2401                                                       &node_key,
2402                                                       path->slots[*level]);
2403                                 btrfs_add_corrupt_extent_record(fs_info,
2404                                                 &node_key,
2405                                                 path->nodes[*level]->start,
2406                                                 fs_info->nodesize,
2407                                                 *level);
2408                                 ret = -EIO;
2409                                 break;
2410                         }
2411                 }
2412
2413                 ret = check_child_node(cur, path->slots[*level], next);
2414                 if (ret < 0) 
2415                         break;
2416
2417                 if (btrfs_is_leaf(next))
2418                         status = btrfs_check_leaf(root, NULL, next);
2419                 else
2420                         status = btrfs_check_node(root, NULL, next);
2421                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2422                         free_extent_buffer(next);
2423                         ret = -EIO;
2424                         break;
2425                 }
2426
2427                 *level = *level - 1;
2428                 free_extent_buffer(path->nodes[*level]);
2429                 path->nodes[*level] = next;
2430                 path->slots[*level] = 0;
2431         }
2432         return ret;
2433 }
2434
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436                         struct walk_control *wc, int *level)
2437 {
2438         int i;
2439         struct extent_buffer *leaf;
2440
2441         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442                 leaf = path->nodes[i];
2443                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2444                         path->slots[i]++;
2445                         *level = i;
2446                         return 0;
2447                 } else {
2448                         free_extent_buffer(path->nodes[*level]);
2449                         path->nodes[*level] = NULL;
2450                         BUG_ON(*level > wc->active_node);
2451                         if (*level == wc->active_node)
2452                                 leave_shared_node(root, wc, *level);
2453                         *level = i + 1;
2454                 }
2455         }
2456         return 1;
2457 }
2458
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2460                            int *level)
2461 {
2462         int i;
2463         struct extent_buffer *leaf;
2464
2465         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466                 leaf = path->nodes[i];
2467                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2468                         path->slots[i]++;
2469                         *level = i;
2470                         return 0;
2471                 } else {
2472                         free_extent_buffer(path->nodes[*level]);
2473                         path->nodes[*level] = NULL;
2474                         *level = i + 1;
2475                 }
2476         }
2477         return 1;
2478 }
2479
2480 static int check_root_dir(struct inode_record *rec)
2481 {
2482         struct inode_backref *backref;
2483         int ret = -1;
2484
2485         if (!rec->found_inode_item || rec->errors)
2486                 goto out;
2487         if (rec->nlink != 1 || rec->found_link != 0)
2488                 goto out;
2489         if (list_empty(&rec->backrefs))
2490                 goto out;
2491         backref = to_inode_backref(rec->backrefs.next);
2492         if (!backref->found_inode_ref)
2493                 goto out;
2494         if (backref->index != 0 || backref->namelen != 2 ||
2495             memcmp(backref->name, "..", 2))
2496                 goto out;
2497         if (backref->found_dir_index || backref->found_dir_item)
2498                 goto out;
2499         ret = 0;
2500 out:
2501         return ret;
2502 }
2503
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505                               struct btrfs_root *root, struct btrfs_path *path,
2506                               struct inode_record *rec)
2507 {
2508         struct btrfs_inode_item *ei;
2509         struct btrfs_key key;
2510         int ret;
2511
2512         key.objectid = rec->ino;
2513         key.type = BTRFS_INODE_ITEM_KEY;
2514         key.offset = (u64)-1;
2515
2516         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2517         if (ret < 0)
2518                 goto out;
2519         if (ret) {
2520                 if (!path->slots[0]) {
2521                         ret = -ENOENT;
2522                         goto out;
2523                 }
2524                 path->slots[0]--;
2525                 ret = 0;
2526         }
2527         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528         if (key.objectid != rec->ino) {
2529                 ret = -ENOENT;
2530                 goto out;
2531         }
2532
2533         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534                             struct btrfs_inode_item);
2535         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536         btrfs_mark_buffer_dirty(path->nodes[0]);
2537         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539                root->root_key.objectid);
2540 out:
2541         btrfs_release_path(path);
2542         return ret;
2543 }
2544
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546                                     struct btrfs_root *root,
2547                                     struct btrfs_path *path,
2548                                     struct inode_record *rec)
2549 {
2550         int ret;
2551
2552         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553         btrfs_release_path(path);
2554         if (!ret)
2555                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2556         return ret;
2557 }
2558
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560                                struct btrfs_root *root,
2561                                struct btrfs_path *path,
2562                                struct inode_record *rec)
2563 {
2564         struct btrfs_inode_item *ei;
2565         struct btrfs_key key;
2566         int ret = 0;
2567
2568         key.objectid = rec->ino;
2569         key.type = BTRFS_INODE_ITEM_KEY;
2570         key.offset = 0;
2571
2572         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2573         if (ret) {
2574                 if (ret > 0)
2575                         ret = -ENOENT;
2576                 goto out;
2577         }
2578
2579         /* Since ret == 0, no need to check anything */
2580         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581                             struct btrfs_inode_item);
2582         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583         btrfs_mark_buffer_dirty(path->nodes[0]);
2584         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585         printf("reset nbytes for ino %llu root %llu\n",
2586                rec->ino, root->root_key.objectid);
2587 out:
2588         btrfs_release_path(path);
2589         return ret;
2590 }
2591
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593                                  struct cache_tree *inode_cache,
2594                                  struct inode_record *rec,
2595                                  struct inode_backref *backref)
2596 {
2597         struct btrfs_path path;
2598         struct btrfs_trans_handle *trans;
2599         struct btrfs_dir_item *dir_item;
2600         struct extent_buffer *leaf;
2601         struct btrfs_key key;
2602         struct btrfs_disk_key disk_key;
2603         struct inode_record *dir_rec;
2604         unsigned long name_ptr;
2605         u32 data_size = sizeof(*dir_item) + backref->namelen;
2606         int ret;
2607
2608         trans = btrfs_start_transaction(root, 1);
2609         if (IS_ERR(trans))
2610                 return PTR_ERR(trans);
2611
2612         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613                 (unsigned long long)rec->ino);
2614
2615         btrfs_init_path(&path);
2616         key.objectid = backref->dir;
2617         key.type = BTRFS_DIR_INDEX_KEY;
2618         key.offset = backref->index;
2619         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2620         BUG_ON(ret);
2621
2622         leaf = path.nodes[0];
2623         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2624
2625         disk_key.objectid = cpu_to_le64(rec->ino);
2626         disk_key.type = BTRFS_INODE_ITEM_KEY;
2627         disk_key.offset = 0;
2628
2629         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631         btrfs_set_dir_data_len(leaf, dir_item, 0);
2632         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633         name_ptr = (unsigned long)(dir_item + 1);
2634         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635         btrfs_mark_buffer_dirty(leaf);
2636         btrfs_release_path(&path);
2637         btrfs_commit_transaction(trans, root);
2638
2639         backref->found_dir_index = 1;
2640         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641         BUG_ON(IS_ERR(dir_rec));
2642         if (!dir_rec)
2643                 return 0;
2644         dir_rec->found_size += backref->namelen;
2645         if (dir_rec->found_size == dir_rec->isize &&
2646             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648         if (dir_rec->found_size != dir_rec->isize)
2649                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2650
2651         return 0;
2652 }
2653
2654 static int delete_dir_index(struct btrfs_root *root,
2655                             struct inode_backref *backref)
2656 {
2657         struct btrfs_trans_handle *trans;
2658         struct btrfs_dir_item *di;
2659         struct btrfs_path path;
2660         int ret = 0;
2661
2662         trans = btrfs_start_transaction(root, 1);
2663         if (IS_ERR(trans))
2664                 return PTR_ERR(trans);
2665
2666         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667                 (unsigned long long)backref->dir,
2668                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669                 (unsigned long long)root->objectid);
2670
2671         btrfs_init_path(&path);
2672         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673                                     backref->name, backref->namelen,
2674                                     backref->index, -1);
2675         if (IS_ERR(di)) {
2676                 ret = PTR_ERR(di);
2677                 btrfs_release_path(&path);
2678                 btrfs_commit_transaction(trans, root);
2679                 if (ret == -ENOENT)
2680                         return 0;
2681                 return ret;
2682         }
2683
2684         if (!di)
2685                 ret = btrfs_del_item(trans, root, &path);
2686         else
2687                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2688         BUG_ON(ret);
2689         btrfs_release_path(&path);
2690         btrfs_commit_transaction(trans, root);
2691         return ret;
2692 }
2693
2694 static int __create_inode_item(struct btrfs_trans_handle *trans,
2695                                struct btrfs_root *root, u64 ino, u64 size,
2696                                u64 nbytes, u64 nlink, u32 mode)
2697 {
2698         struct btrfs_inode_item ii;
2699         time_t now = time(NULL);
2700         int ret;
2701
2702         btrfs_set_stack_inode_size(&ii, size);
2703         btrfs_set_stack_inode_nbytes(&ii, nbytes);
2704         btrfs_set_stack_inode_nlink(&ii, nlink);
2705         btrfs_set_stack_inode_mode(&ii, mode);
2706         btrfs_set_stack_inode_generation(&ii, trans->transid);
2707         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2708         btrfs_set_stack_timespec_sec(&ii.ctime, now);
2709         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2710         btrfs_set_stack_timespec_sec(&ii.mtime, now);
2711         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2712         btrfs_set_stack_timespec_sec(&ii.otime, 0);
2713         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2714
2715         ret = btrfs_insert_inode(trans, root, ino, &ii);
2716         ASSERT(!ret);
2717
2718         warning("root %llu inode %llu recreating inode item, this may "
2719                 "be incomplete, please check permissions and content after "
2720                 "the fsck completes.\n", (unsigned long long)root->objectid,
2721                 (unsigned long long)ino);
2722
2723         return 0;
2724 }
2725
2726 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2727                                     struct btrfs_root *root, u64 ino,
2728                                     u8 filetype)
2729 {
2730         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2731
2732         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2733 }
2734
2735 static int create_inode_item(struct btrfs_root *root,
2736                              struct inode_record *rec, int root_dir)
2737 {
2738         struct btrfs_trans_handle *trans;
2739         u64 nlink = 0;
2740         u32 mode = 0;
2741         u64 size = 0;
2742         int ret;
2743
2744         trans = btrfs_start_transaction(root, 1);
2745         if (IS_ERR(trans)) {
2746                 ret = PTR_ERR(trans);
2747                 return ret;
2748         }
2749
2750         nlink = root_dir ? 1 : rec->found_link;
2751         if (rec->found_dir_item) {
2752                 if (rec->found_file_extent)
2753                         fprintf(stderr, "root %llu inode %llu has both a dir "
2754                                 "item and extents, unsure if it is a dir or a "
2755                                 "regular file so setting it as a directory\n",
2756                                 (unsigned long long)root->objectid,
2757                                 (unsigned long long)rec->ino);
2758                 mode = S_IFDIR | 0755;
2759                 size = rec->found_size;
2760         } else if (!rec->found_dir_item) {
2761                 size = rec->extent_end;
2762                 mode =  S_IFREG | 0755;
2763         }
2764
2765         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2766                                   nlink, mode);
2767         btrfs_commit_transaction(trans, root);
2768         return 0;
2769 }
2770
2771 static int repair_inode_backrefs(struct btrfs_root *root,
2772                                  struct inode_record *rec,
2773                                  struct cache_tree *inode_cache,
2774                                  int delete)
2775 {
2776         struct inode_backref *tmp, *backref;
2777         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2778         int ret = 0;
2779         int repaired = 0;
2780
2781         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2782                 if (!delete && rec->ino == root_dirid) {
2783                         if (!rec->found_inode_item) {
2784                                 ret = create_inode_item(root, rec, 1);
2785                                 if (ret)
2786                                         break;
2787                                 repaired++;
2788                         }
2789                 }
2790
2791                 /* Index 0 for root dir's are special, don't mess with it */
2792                 if (rec->ino == root_dirid && backref->index == 0)
2793                         continue;
2794
2795                 if (delete &&
2796                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2797                      (backref->found_dir_index && backref->found_inode_ref &&
2798                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2799                         ret = delete_dir_index(root, backref);
2800                         if (ret)
2801                                 break;
2802                         repaired++;
2803                         list_del(&backref->list);
2804                         free(backref);
2805                         continue;
2806                 }
2807
2808                 if (!delete && !backref->found_dir_index &&
2809                     backref->found_dir_item && backref->found_inode_ref) {
2810                         ret = add_missing_dir_index(root, inode_cache, rec,
2811                                                     backref);
2812                         if (ret)
2813                                 break;
2814                         repaired++;
2815                         if (backref->found_dir_item &&
2816                             backref->found_dir_index) {
2817                                 if (!backref->errors &&
2818                                     backref->found_inode_ref) {
2819                                         list_del(&backref->list);
2820                                         free(backref);
2821                                         continue;
2822                                 }
2823                         }
2824                 }
2825
2826                 if (!delete && (!backref->found_dir_index &&
2827                                 !backref->found_dir_item &&
2828                                 backref->found_inode_ref)) {
2829                         struct btrfs_trans_handle *trans;
2830                         struct btrfs_key location;
2831
2832                         ret = check_dir_conflict(root, backref->name,
2833                                                  backref->namelen,
2834                                                  backref->dir,
2835                                                  backref->index);
2836                         if (ret) {
2837                                 /*
2838                                  * let nlink fixing routine to handle it,
2839                                  * which can do it better.
2840                                  */
2841                                 ret = 0;
2842                                 break;
2843                         }
2844                         location.objectid = rec->ino;
2845                         location.type = BTRFS_INODE_ITEM_KEY;
2846                         location.offset = 0;
2847
2848                         trans = btrfs_start_transaction(root, 1);
2849                         if (IS_ERR(trans)) {
2850                                 ret = PTR_ERR(trans);
2851                                 break;
2852                         }
2853                         fprintf(stderr, "adding missing dir index/item pair "
2854                                 "for inode %llu\n",
2855                                 (unsigned long long)rec->ino);
2856                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2857                                                     backref->namelen,
2858                                                     backref->dir, &location,
2859                                                     imode_to_type(rec->imode),
2860                                                     backref->index);
2861                         BUG_ON(ret);
2862                         btrfs_commit_transaction(trans, root);
2863                         repaired++;
2864                 }
2865
2866                 if (!delete && (backref->found_inode_ref &&
2867                                 backref->found_dir_index &&
2868                                 backref->found_dir_item &&
2869                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2870                                 !rec->found_inode_item)) {
2871                         ret = create_inode_item(root, rec, 0);
2872                         if (ret)
2873                                 break;
2874                         repaired++;
2875                 }
2876
2877         }
2878         return ret ? ret : repaired;
2879 }
2880
2881 /*
2882  * To determine the file type for nlink/inode_item repair
2883  *
2884  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2885  * Return -ENOENT if file type is not found.
2886  */
2887 static int find_file_type(struct inode_record *rec, u8 *type)
2888 {
2889         struct inode_backref *backref;
2890
2891         /* For inode item recovered case */
2892         if (rec->found_inode_item) {
2893                 *type = imode_to_type(rec->imode);
2894                 return 0;
2895         }
2896
2897         list_for_each_entry(backref, &rec->backrefs, list) {
2898                 if (backref->found_dir_index || backref->found_dir_item) {
2899                         *type = backref->filetype;
2900                         return 0;
2901                 }
2902         }
2903         return -ENOENT;
2904 }
2905
2906 /*
2907  * To determine the file name for nlink repair
2908  *
2909  * Return 0 if file name is found, set name and namelen.
2910  * Return -ENOENT if file name is not found.
2911  */
2912 static int find_file_name(struct inode_record *rec,
2913                           char *name, int *namelen)
2914 {
2915         struct inode_backref *backref;
2916
2917         list_for_each_entry(backref, &rec->backrefs, list) {
2918                 if (backref->found_dir_index || backref->found_dir_item ||
2919                     backref->found_inode_ref) {
2920                         memcpy(name, backref->name, backref->namelen);
2921                         *namelen = backref->namelen;
2922                         return 0;
2923                 }
2924         }
2925         return -ENOENT;
2926 }
2927
2928 /* Reset the nlink of the inode to the correct one */
2929 static int reset_nlink(struct btrfs_trans_handle *trans,
2930                        struct btrfs_root *root,
2931                        struct btrfs_path *path,
2932                        struct inode_record *rec)
2933 {
2934         struct inode_backref *backref;
2935         struct inode_backref *tmp;
2936         struct btrfs_key key;
2937         struct btrfs_inode_item *inode_item;
2938         int ret = 0;
2939
2940         /* We don't believe this either, reset it and iterate backref */
2941         rec->found_link = 0;
2942
2943         /* Remove all backref including the valid ones */
2944         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2945                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2946                                    backref->index, backref->name,
2947                                    backref->namelen, 0);
2948                 if (ret < 0)
2949                         goto out;
2950
2951                 /* remove invalid backref, so it won't be added back */
2952                 if (!(backref->found_dir_index &&
2953                       backref->found_dir_item &&
2954                       backref->found_inode_ref)) {
2955                         list_del(&backref->list);
2956                         free(backref);
2957                 } else {
2958                         rec->found_link++;
2959                 }
2960         }
2961
2962         /* Set nlink to 0 */
2963         key.objectid = rec->ino;
2964         key.type = BTRFS_INODE_ITEM_KEY;
2965         key.offset = 0;
2966         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2967         if (ret < 0)
2968                 goto out;
2969         if (ret > 0) {
2970                 ret = -ENOENT;
2971                 goto out;
2972         }
2973         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2974                                     struct btrfs_inode_item);
2975         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2976         btrfs_mark_buffer_dirty(path->nodes[0]);
2977         btrfs_release_path(path);
2978
2979         /*
2980          * Add back valid inode_ref/dir_item/dir_index,
2981          * add_link() will handle the nlink inc, so new nlink must be correct
2982          */
2983         list_for_each_entry(backref, &rec->backrefs, list) {
2984                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2985                                      backref->name, backref->namelen,
2986                                      backref->filetype, &backref->index, 1, 0);
2987                 if (ret < 0)
2988                         goto out;
2989         }
2990 out:
2991         btrfs_release_path(path);
2992         return ret;
2993 }
2994
2995 static int get_highest_inode(struct btrfs_trans_handle *trans,
2996                                 struct btrfs_root *root,
2997                                 struct btrfs_path *path,
2998                                 u64 *highest_ino)
2999 {
3000         struct btrfs_key key, found_key;
3001         int ret;
3002
3003         btrfs_init_path(path);
3004         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3005         key.offset = -1;
3006         key.type = BTRFS_INODE_ITEM_KEY;
3007         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3008         if (ret == 1) {
3009                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3010                                 path->slots[0] - 1);
3011                 *highest_ino = found_key.objectid;
3012                 ret = 0;
3013         }
3014         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3015                 ret = -EOVERFLOW;
3016         btrfs_release_path(path);
3017         return ret;
3018 }
3019
3020 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3021                                struct btrfs_root *root,
3022                                struct btrfs_path *path,
3023                                struct inode_record *rec)
3024 {
3025         char *dir_name = "lost+found";
3026         char namebuf[BTRFS_NAME_LEN] = {0};
3027         u64 lost_found_ino;
3028         u32 mode = 0700;
3029         u8 type = 0;
3030         int namelen = 0;
3031         int name_recovered = 0;
3032         int type_recovered = 0;
3033         int ret = 0;
3034
3035         /*
3036          * Get file name and type first before these invalid inode ref
3037          * are deleted by remove_all_invalid_backref()
3038          */
3039         name_recovered = !find_file_name(rec, namebuf, &namelen);
3040         type_recovered = !find_file_type(rec, &type);
3041
3042         if (!name_recovered) {
3043                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3044                        rec->ino, rec->ino);
3045                 namelen = count_digits(rec->ino);
3046                 sprintf(namebuf, "%llu", rec->ino);
3047                 name_recovered = 1;
3048         }
3049         if (!type_recovered) {
3050                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3051                        rec->ino);
3052                 type = BTRFS_FT_REG_FILE;
3053                 type_recovered = 1;
3054         }
3055
3056         ret = reset_nlink(trans, root, path, rec);
3057         if (ret < 0) {
3058                 fprintf(stderr,
3059                         "Failed to reset nlink for inode %llu: %s\n",
3060                         rec->ino, strerror(-ret));
3061                 goto out;
3062         }
3063
3064         if (rec->found_link == 0) {
3065                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3066                 if (ret < 0)
3067                         goto out;
3068                 lost_found_ino++;
3069                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3070                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3071                                   mode);
3072                 if (ret < 0) {
3073                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
3074                                 dir_name, strerror(-ret));
3075                         goto out;
3076                 }
3077                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3078                                      namebuf, namelen, type, NULL, 1, 0);
3079                 /*
3080                  * Add ".INO" suffix several times to handle case where
3081                  * "FILENAME.INO" is already taken by another file.
3082                  */
3083                 while (ret == -EEXIST) {
3084                         /*
3085                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
3086                          */
3087                         if (namelen + count_digits(rec->ino) + 1 >
3088                             BTRFS_NAME_LEN) {
3089                                 ret = -EFBIG;
3090                                 goto out;
3091                         }
3092                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3093                                  ".%llu", rec->ino);
3094                         namelen += count_digits(rec->ino) + 1;
3095                         ret = btrfs_add_link(trans, root, rec->ino,
3096                                              lost_found_ino, namebuf,
3097                                              namelen, type, NULL, 1, 0);
3098                 }
3099                 if (ret < 0) {
3100                         fprintf(stderr,
3101                                 "Failed to link the inode %llu to %s dir: %s\n",
3102                                 rec->ino, dir_name, strerror(-ret));
3103                         goto out;
3104                 }
3105                 /*
3106                  * Just increase the found_link, don't actually add the
3107                  * backref. This will make things easier and this inode
3108                  * record will be freed after the repair is done.
3109                  * So fsck will not report problem about this inode.
3110                  */
3111                 rec->found_link++;
3112                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3113                        namelen, namebuf, dir_name);
3114         }
3115         printf("Fixed the nlink of inode %llu\n", rec->ino);
3116 out:
3117         /*
3118          * Clear the flag anyway, or we will loop forever for the same inode
3119          * as it will not be removed from the bad inode list and the dead loop
3120          * happens.
3121          */
3122         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3123         btrfs_release_path(path);
3124         return ret;
3125 }
3126
3127 /*
3128  * Check if there is any normal(reg or prealloc) file extent for given
3129  * ino.
3130  * This is used to determine the file type when neither its dir_index/item or
3131  * inode_item exists.
3132  *
3133  * This will *NOT* report error, if any error happens, just consider it does
3134  * not have any normal file extent.
3135  */
3136 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3137 {
3138         struct btrfs_path path;
3139         struct btrfs_key key;
3140         struct btrfs_key found_key;
3141         struct btrfs_file_extent_item *fi;
3142         u8 type;
3143         int ret = 0;
3144
3145         btrfs_init_path(&path);
3146         key.objectid = ino;
3147         key.type = BTRFS_EXTENT_DATA_KEY;
3148         key.offset = 0;
3149
3150         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3151         if (ret < 0) {
3152                 ret = 0;
3153                 goto out;
3154         }
3155         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3156                 ret = btrfs_next_leaf(root, &path);
3157                 if (ret) {
3158                         ret = 0;
3159                         goto out;
3160                 }
3161         }
3162         while (1) {
3163                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3164                                       path.slots[0]);
3165                 if (found_key.objectid != ino ||
3166                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3167                         break;
3168                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3169                                     struct btrfs_file_extent_item);
3170                 type = btrfs_file_extent_type(path.nodes[0], fi);
3171                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3172                         ret = 1;
3173                         goto out;
3174                 }
3175         }
3176 out:
3177         btrfs_release_path(&path);
3178         return ret;
3179 }
3180
3181 static u32 btrfs_type_to_imode(u8 type)
3182 {
3183         static u32 imode_by_btrfs_type[] = {
3184                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3185                 [BTRFS_FT_DIR]          = S_IFDIR,
3186                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3187                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3188                 [BTRFS_FT_FIFO]         = S_IFIFO,
3189                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3190                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3191         };
3192
3193         return imode_by_btrfs_type[(type)];
3194 }
3195
3196 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3197                                 struct btrfs_root *root,
3198                                 struct btrfs_path *path,
3199                                 struct inode_record *rec)
3200 {
3201         u8 filetype;
3202         u32 mode = 0700;
3203         int type_recovered = 0;
3204         int ret = 0;
3205
3206         printf("Trying to rebuild inode:%llu\n", rec->ino);
3207
3208         type_recovered = !find_file_type(rec, &filetype);
3209
3210         /*
3211          * Try to determine inode type if type not found.
3212          *
3213          * For found regular file extent, it must be FILE.
3214          * For found dir_item/index, it must be DIR.
3215          *
3216          * For undetermined one, use FILE as fallback.
3217          *
3218          * TODO:
3219          * 1. If found backref(inode_index/item is already handled) to it,
3220          *    it must be DIR.
3221          *    Need new inode-inode ref structure to allow search for that.
3222          */
3223         if (!type_recovered) {
3224                 if (rec->found_file_extent &&
3225                     find_normal_file_extent(root, rec->ino)) {
3226                         type_recovered = 1;
3227                         filetype = BTRFS_FT_REG_FILE;
3228                 } else if (rec->found_dir_item) {
3229                         type_recovered = 1;
3230                         filetype = BTRFS_FT_DIR;
3231                 } else if (!list_empty(&rec->orphan_extents)) {
3232                         type_recovered = 1;
3233                         filetype = BTRFS_FT_REG_FILE;
3234                 } else{
3235                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3236                                rec->ino);
3237                         type_recovered = 1;
3238                         filetype = BTRFS_FT_REG_FILE;
3239                 }
3240         }
3241
3242         ret = btrfs_new_inode(trans, root, rec->ino,
3243                               mode | btrfs_type_to_imode(filetype));
3244         if (ret < 0)
3245                 goto out;
3246
3247         /*
3248          * Here inode rebuild is done, we only rebuild the inode item,
3249          * don't repair the nlink(like move to lost+found).
3250          * That is the job of nlink repair.
3251          *
3252          * We just fill the record and return
3253          */
3254         rec->found_dir_item = 1;
3255         rec->imode = mode | btrfs_type_to_imode(filetype);
3256         rec->nlink = 0;
3257         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3258         /* Ensure the inode_nlinks repair function will be called */
3259         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3260 out:
3261         return ret;
3262 }
3263
3264 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3265                                       struct btrfs_root *root,
3266                                       struct btrfs_path *path,
3267                                       struct inode_record *rec)
3268 {
3269         struct orphan_data_extent *orphan;
3270         struct orphan_data_extent *tmp;
3271         int ret = 0;
3272
3273         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3274                 /*
3275                  * Check for conflicting file extents
3276                  *
3277                  * Here we don't know whether the extents is compressed or not,
3278                  * so we can only assume it not compressed nor data offset,
3279                  * and use its disk_len as extent length.
3280                  */
3281                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3282                                        orphan->offset, orphan->disk_len, 0);
3283                 btrfs_release_path(path);
3284                 if (ret < 0)
3285                         goto out;
3286                 if (!ret) {
3287                         fprintf(stderr,
3288                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3289                                 orphan->disk_bytenr, orphan->disk_len);
3290                         ret = btrfs_free_extent(trans,
3291                                         root->fs_info->extent_root,
3292                                         orphan->disk_bytenr, orphan->disk_len,
3293                                         0, root->objectid, orphan->objectid,
3294                                         orphan->offset);
3295                         if (ret < 0)
3296                                 goto out;
3297                 }
3298                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3299                                 orphan->offset, orphan->disk_bytenr,
3300                                 orphan->disk_len, orphan->disk_len);
3301                 if (ret < 0)
3302                         goto out;
3303
3304                 /* Update file size info */
3305                 rec->found_size += orphan->disk_len;
3306                 if (rec->found_size == rec->nbytes)
3307                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3308
3309                 /* Update the file extent hole info too */
3310                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3311                                            orphan->disk_len);
3312                 if (ret < 0)
3313                         goto out;
3314                 if (RB_EMPTY_ROOT(&rec->holes))
3315                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3316
3317                 list_del(&orphan->list);
3318                 free(orphan);
3319         }
3320         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3321 out:
3322         return ret;
3323 }
3324
3325 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3326                                         struct btrfs_root *root,
3327                                         struct btrfs_path *path,
3328                                         struct inode_record *rec)
3329 {
3330         struct rb_node *node;
3331         struct file_extent_hole *hole;
3332         int found = 0;
3333         int ret = 0;
3334
3335         node = rb_first(&rec->holes);
3336
3337         while (node) {
3338                 found = 1;
3339                 hole = rb_entry(node, struct file_extent_hole, node);
3340                 ret = btrfs_punch_hole(trans, root, rec->ino,
3341                                        hole->start, hole->len);
3342                 if (ret < 0)
3343                         goto out;
3344                 ret = del_file_extent_hole(&rec->holes, hole->start,
3345                                            hole->len);
3346                 if (ret < 0)
3347                         goto out;
3348                 if (RB_EMPTY_ROOT(&rec->holes))
3349                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3350                 node = rb_first(&rec->holes);
3351         }
3352         /* special case for a file losing all its file extent */
3353         if (!found) {
3354                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3355                                        round_up(rec->isize,
3356                                                 root->fs_info->sectorsize));
3357                 if (ret < 0)
3358                         goto out;
3359         }
3360         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3361                rec->ino, root->objectid);
3362 out:
3363         return ret;
3364 }
3365
3366 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3367 {
3368         struct btrfs_trans_handle *trans;
3369         struct btrfs_path path;
3370         int ret = 0;
3371
3372         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3373                              I_ERR_NO_ORPHAN_ITEM |
3374                              I_ERR_LINK_COUNT_WRONG |
3375                              I_ERR_NO_INODE_ITEM |
3376                              I_ERR_FILE_EXTENT_ORPHAN |
3377                              I_ERR_FILE_EXTENT_DISCOUNT|
3378                              I_ERR_FILE_NBYTES_WRONG)))
3379                 return rec->errors;
3380
3381         /*
3382          * For nlink repair, it may create a dir and add link, so
3383          * 2 for parent(256)'s dir_index and dir_item
3384          * 2 for lost+found dir's inode_item and inode_ref
3385          * 1 for the new inode_ref of the file
3386          * 2 for lost+found dir's dir_index and dir_item for the file
3387          */
3388         trans = btrfs_start_transaction(root, 7);
3389         if (IS_ERR(trans))
3390                 return PTR_ERR(trans);
3391
3392         btrfs_init_path(&path);
3393         if (rec->errors & I_ERR_NO_INODE_ITEM)
3394                 ret = repair_inode_no_item(trans, root, &path, rec);
3395         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3396                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3397         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3398                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3399         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3400                 ret = repair_inode_isize(trans, root, &path, rec);
3401         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3402                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3403         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3404                 ret = repair_inode_nlinks(trans, root, &path, rec);
3405         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3406                 ret = repair_inode_nbytes(trans, root, &path, rec);
3407         btrfs_commit_transaction(trans, root);
3408         btrfs_release_path(&path);
3409         return ret;
3410 }
3411
3412 static int check_inode_recs(struct btrfs_root *root,
3413                             struct cache_tree *inode_cache)
3414 {
3415         struct cache_extent *cache;
3416         struct ptr_node *node;
3417         struct inode_record *rec;
3418         struct inode_backref *backref;
3419         int stage = 0;
3420         int ret = 0;
3421         int err = 0;
3422         u64 error = 0;
3423         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3424
3425         if (btrfs_root_refs(&root->root_item) == 0) {
3426                 if (!cache_tree_empty(inode_cache))
3427                         fprintf(stderr, "warning line %d\n", __LINE__);
3428                 return 0;
3429         }
3430
3431         /*
3432          * We need to repair backrefs first because we could change some of the
3433          * errors in the inode recs.
3434          *
3435          * We also need to go through and delete invalid backrefs first and then
3436          * add the correct ones second.  We do this because we may get EEXIST
3437          * when adding back the correct index because we hadn't yet deleted the
3438          * invalid index.
3439          *
3440          * For example, if we were missing a dir index then the directories
3441          * isize would be wrong, so if we fixed the isize to what we thought it
3442          * would be and then fixed the backref we'd still have a invalid fs, so
3443          * we need to add back the dir index and then check to see if the isize
3444          * is still wrong.
3445          */
3446         while (stage < 3) {
3447                 stage++;
3448                 if (stage == 3 && !err)
3449                         break;
3450
3451                 cache = search_cache_extent(inode_cache, 0);
3452                 while (repair && cache) {
3453                         node = container_of(cache, struct ptr_node, cache);
3454                         rec = node->data;
3455                         cache = next_cache_extent(cache);
3456
3457                         /* Need to free everything up and rescan */
3458                         if (stage == 3) {
3459                                 remove_cache_extent(inode_cache, &node->cache);
3460                                 free(node);
3461                                 free_inode_rec(rec);
3462                                 continue;
3463                         }
3464
3465                         if (list_empty(&rec->backrefs))
3466                                 continue;
3467
3468                         ret = repair_inode_backrefs(root, rec, inode_cache,
3469                                                     stage == 1);
3470                         if (ret < 0) {
3471                                 err = ret;
3472                                 stage = 2;
3473                                 break;
3474                         } if (ret > 0) {
3475                                 err = -EAGAIN;
3476                         }
3477                 }
3478         }
3479         if (err)
3480                 return err;
3481
3482         rec = get_inode_rec(inode_cache, root_dirid, 0);
3483         BUG_ON(IS_ERR(rec));
3484         if (rec) {
3485                 ret = check_root_dir(rec);
3486                 if (ret) {
3487                         fprintf(stderr, "root %llu root dir %llu error\n",
3488                                 (unsigned long long)root->root_key.objectid,
3489                                 (unsigned long long)root_dirid);
3490                         print_inode_error(root, rec);
3491                         error++;
3492                 }
3493         } else {
3494                 if (repair) {
3495                         struct btrfs_trans_handle *trans;
3496
3497                         trans = btrfs_start_transaction(root, 1);
3498                         if (IS_ERR(trans)) {
3499                                 err = PTR_ERR(trans);
3500                                 return err;
3501                         }
3502
3503                         fprintf(stderr,
3504                                 "root %llu missing its root dir, recreating\n",
3505                                 (unsigned long long)root->objectid);
3506
3507                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3508                         BUG_ON(ret);
3509
3510                         btrfs_commit_transaction(trans, root);
3511                         return -EAGAIN;
3512                 }
3513
3514                 fprintf(stderr, "root %llu root dir %llu not found\n",
3515                         (unsigned long long)root->root_key.objectid,
3516                         (unsigned long long)root_dirid);
3517         }
3518
3519         while (1) {
3520                 cache = search_cache_extent(inode_cache, 0);
3521                 if (!cache)
3522                         break;
3523                 node = container_of(cache, struct ptr_node, cache);
3524                 rec = node->data;
3525                 remove_cache_extent(inode_cache, &node->cache);
3526                 free(node);
3527                 if (rec->ino == root_dirid ||
3528                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3529                         free_inode_rec(rec);
3530                         continue;
3531                 }
3532
3533                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3534                         ret = check_orphan_item(root, rec->ino);
3535                         if (ret == 0)
3536                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3537                         if (can_free_inode_rec(rec)) {
3538                                 free_inode_rec(rec);
3539                                 continue;
3540                         }
3541                 }
3542
3543                 if (!rec->found_inode_item)
3544                         rec->errors |= I_ERR_NO_INODE_ITEM;
3545                 if (rec->found_link != rec->nlink)
3546                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3547                 if (repair) {
3548                         ret = try_repair_inode(root, rec);
3549                         if (ret == 0 && can_free_inode_rec(rec)) {
3550                                 free_inode_rec(rec);
3551                                 continue;
3552                         }
3553                         ret = 0;
3554                 }
3555
3556                 if (!(repair && ret == 0))
3557                         error++;
3558                 print_inode_error(root, rec);
3559                 list_for_each_entry(backref, &rec->backrefs, list) {
3560                         if (!backref->found_dir_item)
3561                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3562                         if (!backref->found_dir_index)
3563                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3564                         if (!backref->found_inode_ref)
3565                                 backref->errors |= REF_ERR_NO_INODE_REF;
3566                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3567                                 " namelen %u name %s filetype %d errors %x",
3568                                 (unsigned long long)backref->dir,
3569                                 (unsigned long long)backref->index,
3570                                 backref->namelen, backref->name,
3571                                 backref->filetype, backref->errors);
3572                         print_ref_error(backref->errors);
3573                 }
3574                 free_inode_rec(rec);
3575         }
3576         return (error > 0) ? -1 : 0;
3577 }
3578
3579 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3580                                         u64 objectid)
3581 {
3582         struct cache_extent *cache;
3583         struct root_record *rec = NULL;
3584         int ret;
3585
3586         cache = lookup_cache_extent(root_cache, objectid, 1);
3587         if (cache) {
3588                 rec = container_of(cache, struct root_record, cache);
3589         } else {
3590                 rec = calloc(1, sizeof(*rec));
3591                 if (!rec)
3592                         return ERR_PTR(-ENOMEM);
3593                 rec->objectid = objectid;
3594                 INIT_LIST_HEAD(&rec->backrefs);
3595                 rec->cache.start = objectid;
3596                 rec->cache.size = 1;
3597
3598                 ret = insert_cache_extent(root_cache, &rec->cache);
3599                 if (ret)
3600                         return ERR_PTR(-EEXIST);
3601         }
3602         return rec;
3603 }
3604
3605 static struct root_backref *get_root_backref(struct root_record *rec,
3606                                              u64 ref_root, u64 dir, u64 index,
3607                                              const char *name, int namelen)
3608 {
3609         struct root_backref *backref;
3610
3611         list_for_each_entry(backref, &rec->backrefs, list) {
3612                 if (backref->ref_root != ref_root || backref->dir != dir ||
3613                     backref->namelen != namelen)
3614                         continue;
3615                 if (memcmp(name, backref->name, namelen))
3616                         continue;
3617                 return backref;
3618         }
3619
3620         backref = calloc(1, sizeof(*backref) + namelen + 1);
3621         if (!backref)
3622                 return NULL;
3623         backref->ref_root = ref_root;
3624         backref->dir = dir;
3625         backref->index = index;
3626         backref->namelen = namelen;
3627         memcpy(backref->name, name, namelen);
3628         backref->name[namelen] = '\0';
3629         list_add_tail(&backref->list, &rec->backrefs);
3630         return backref;
3631 }
3632
3633 static void free_root_record(struct cache_extent *cache)
3634 {
3635         struct root_record *rec;
3636         struct root_backref *backref;
3637
3638         rec = container_of(cache, struct root_record, cache);
3639         while (!list_empty(&rec->backrefs)) {
3640                 backref = to_root_backref(rec->backrefs.next);
3641                 list_del(&backref->list);
3642                 free(backref);
3643         }
3644
3645         free(rec);
3646 }
3647
3648 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3649
3650 static int add_root_backref(struct cache_tree *root_cache,
3651                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3652                             const char *name, int namelen,
3653                             int item_type, int errors)
3654 {
3655         struct root_record *rec;
3656         struct root_backref *backref;
3657
3658         rec = get_root_rec(root_cache, root_id);
3659         BUG_ON(IS_ERR(rec));
3660         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3661         BUG_ON(!backref);
3662
3663         backref->errors |= errors;
3664
3665         if (item_type != BTRFS_DIR_ITEM_KEY) {
3666                 if (backref->found_dir_index || backref->found_back_ref ||
3667                     backref->found_forward_ref) {
3668                         if (backref->index != index)
3669                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3670                 } else {
3671                         backref->index = index;
3672                 }
3673         }
3674
3675         if (item_type == BTRFS_DIR_ITEM_KEY) {
3676                 if (backref->found_forward_ref)
3677                         rec->found_ref++;
3678                 backref->found_dir_item = 1;
3679         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3680                 backref->found_dir_index = 1;
3681         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3682                 if (backref->found_forward_ref)
3683                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3684                 else if (backref->found_dir_item)
3685                         rec->found_ref++;
3686                 backref->found_forward_ref = 1;
3687         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3688                 if (backref->found_back_ref)
3689                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3690                 backref->found_back_ref = 1;
3691         } else {
3692                 BUG_ON(1);
3693         }
3694
3695         if (backref->found_forward_ref && backref->found_dir_item)
3696                 backref->reachable = 1;
3697         return 0;
3698 }
3699
3700 static int merge_root_recs(struct btrfs_root *root,
3701                            struct cache_tree *src_cache,
3702                            struct cache_tree *dst_cache)
3703 {
3704         struct cache_extent *cache;
3705         struct ptr_node *node;
3706         struct inode_record *rec;
3707         struct inode_backref *backref;
3708         int ret = 0;
3709
3710         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3711                 free_inode_recs_tree(src_cache);
3712                 return 0;
3713         }
3714
3715         while (1) {
3716                 cache = search_cache_extent(src_cache, 0);
3717                 if (!cache)
3718                         break;
3719                 node = container_of(cache, struct ptr_node, cache);
3720                 rec = node->data;
3721                 remove_cache_extent(src_cache, &node->cache);
3722                 free(node);
3723
3724                 ret = is_child_root(root, root->objectid, rec->ino);
3725                 if (ret < 0)
3726                         break;
3727                 else if (ret == 0)
3728                         goto skip;
3729
3730                 list_for_each_entry(backref, &rec->backrefs, list) {
3731                         BUG_ON(backref->found_inode_ref);
3732                         if (backref->found_dir_item)
3733                                 add_root_backref(dst_cache, rec->ino,
3734                                         root->root_key.objectid, backref->dir,
3735                                         backref->index, backref->name,
3736                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3737                                         backref->errors);
3738                         if (backref->found_dir_index)
3739                                 add_root_backref(dst_cache, rec->ino,
3740                                         root->root_key.objectid, backref->dir,
3741                                         backref->index, backref->name,
3742                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3743                                         backref->errors);
3744                 }
3745 skip:
3746                 free_inode_rec(rec);
3747         }
3748         if (ret < 0)
3749                 return ret;
3750         return 0;
3751 }
3752
3753 static int check_root_refs(struct btrfs_root *root,
3754                            struct cache_tree *root_cache)
3755 {
3756         struct root_record *rec;
3757         struct root_record *ref_root;
3758         struct root_backref *backref;
3759         struct cache_extent *cache;
3760         int loop = 1;
3761         int ret;
3762         int error;
3763         int errors = 0;
3764
3765         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3766         BUG_ON(IS_ERR(rec));
3767         rec->found_ref = 1;
3768
3769         /* fixme: this can not detect circular references */
3770         while (loop) {
3771                 loop = 0;
3772                 cache = search_cache_extent(root_cache, 0);
3773                 while (1) {
3774                         if (!cache)
3775                                 break;
3776                         rec = container_of(cache, struct root_record, cache);
3777                         cache = next_cache_extent(cache);
3778
3779                         if (rec->found_ref == 0)
3780                                 continue;
3781
3782                         list_for_each_entry(backref, &rec->backrefs, list) {
3783                                 if (!backref->reachable)
3784                                         continue;
3785
3786                                 ref_root = get_root_rec(root_cache,
3787                                                         backref->ref_root);
3788                                 BUG_ON(IS_ERR(ref_root));
3789                                 if (ref_root->found_ref > 0)
3790                                         continue;
3791
3792                                 backref->reachable = 0;
3793                                 rec->found_ref--;
3794                                 if (rec->found_ref == 0)
3795                                         loop = 1;
3796                         }
3797                 }
3798         }
3799
3800         cache = search_cache_extent(root_cache, 0);
3801         while (1) {
3802                 if (!cache)
3803                         break;
3804                 rec = container_of(cache, struct root_record, cache);
3805                 cache = next_cache_extent(cache);
3806
3807                 if (rec->found_ref == 0 &&
3808                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3809                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3810                         ret = check_orphan_item(root->fs_info->tree_root,
3811                                                 rec->objectid);
3812                         if (ret == 0)
3813                                 continue;
3814
3815                         /*
3816                          * If we don't have a root item then we likely just have
3817                          * a dir item in a snapshot for this root but no actual
3818                          * ref key or anything so it's meaningless.
3819                          */
3820                         if (!rec->found_root_item)
3821                                 continue;
3822                         errors++;
3823                         fprintf(stderr, "fs tree %llu not referenced\n",
3824                                 (unsigned long long)rec->objectid);
3825                 }
3826
3827                 error = 0;
3828                 if (rec->found_ref > 0 && !rec->found_root_item)
3829                         error = 1;
3830                 list_for_each_entry(backref, &rec->backrefs, list) {
3831                         if (!backref->found_dir_item)
3832                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3833                         if (!backref->found_dir_index)
3834                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3835                         if (!backref->found_back_ref)
3836                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3837                         if (!backref->found_forward_ref)
3838                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3839                         if (backref->reachable && backref->errors)
3840                                 error = 1;
3841                 }
3842                 if (!error)
3843                         continue;
3844
3845                 errors++;
3846                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3847                         (unsigned long long)rec->objectid, rec->found_ref,
3848                          rec->found_root_item ? "" : "not found");
3849
3850                 list_for_each_entry(backref, &rec->backrefs, list) {
3851                         if (!backref->reachable)
3852                                 continue;
3853                         if (!backref->errors && rec->found_root_item)
3854                                 continue;
3855                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3856                                 " index %llu namelen %u name %s errors %x\n",
3857                                 (unsigned long long)backref->ref_root,
3858                                 (unsigned long long)backref->dir,
3859                                 (unsigned long long)backref->index,
3860                                 backref->namelen, backref->name,
3861                                 backref->errors);
3862                         print_ref_error(backref->errors);
3863                 }
3864         }
3865         return errors > 0 ? 1 : 0;
3866 }
3867
3868 static int process_root_ref(struct extent_buffer *eb, int slot,
3869                             struct btrfs_key *key,
3870                             struct cache_tree *root_cache)
3871 {
3872         u64 dirid;
3873         u64 index;
3874         u32 len;
3875         u32 name_len;
3876         struct btrfs_root_ref *ref;
3877         char namebuf[BTRFS_NAME_LEN];
3878         int error;
3879
3880         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3881
3882         dirid = btrfs_root_ref_dirid(eb, ref);
3883         index = btrfs_root_ref_sequence(eb, ref);
3884         name_len = btrfs_root_ref_name_len(eb, ref);
3885
3886         if (name_len <= BTRFS_NAME_LEN) {
3887                 len = name_len;
3888                 error = 0;
3889         } else {
3890                 len = BTRFS_NAME_LEN;
3891                 error = REF_ERR_NAME_TOO_LONG;
3892         }
3893         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3894
3895         if (key->type == BTRFS_ROOT_REF_KEY) {
3896                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3897                                  index, namebuf, len, key->type, error);
3898         } else {
3899                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3900                                  index, namebuf, len, key->type, error);
3901         }
3902         return 0;
3903 }
3904
3905 static void free_corrupt_block(struct cache_extent *cache)
3906 {
3907         struct btrfs_corrupt_block *corrupt;
3908
3909         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3910         free(corrupt);
3911 }
3912
3913 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3914
3915 /*
3916  * Repair the btree of the given root.
3917  *
3918  * The fix is to remove the node key in corrupt_blocks cache_tree.
3919  * and rebalance the tree.
3920  * After the fix, the btree should be writeable.
3921  */
3922 static int repair_btree(struct btrfs_root *root,
3923                         struct cache_tree *corrupt_blocks)
3924 {
3925         struct btrfs_trans_handle *trans;
3926         struct btrfs_path path;
3927         struct btrfs_corrupt_block *corrupt;
3928         struct cache_extent *cache;
3929         struct btrfs_key key;
3930         u64 offset;
3931         int level;
3932         int ret = 0;
3933
3934         if (cache_tree_empty(corrupt_blocks))
3935                 return 0;
3936
3937         trans = btrfs_start_transaction(root, 1);
3938         if (IS_ERR(trans)) {
3939                 ret = PTR_ERR(trans);
3940                 fprintf(stderr, "Error starting transaction: %s\n",
3941                         strerror(-ret));
3942                 return ret;
3943         }
3944         btrfs_init_path(&path);
3945         cache = first_cache_extent(corrupt_blocks);
3946         while (cache) {
3947                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3948                                        cache);
3949                 level = corrupt->level;
3950                 path.lowest_level = level;
3951                 key.objectid = corrupt->key.objectid;
3952                 key.type = corrupt->key.type;
3953                 key.offset = corrupt->key.offset;
3954
3955                 /*
3956                  * Here we don't want to do any tree balance, since it may
3957                  * cause a balance with corrupted brother leaf/node,
3958                  * so ins_len set to 0 here.
3959                  * Balance will be done after all corrupt node/leaf is deleted.
3960                  */
3961                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3962                 if (ret < 0)
3963                         goto out;
3964                 offset = btrfs_node_blockptr(path.nodes[level],
3965                                              path.slots[level]);
3966
3967                 /* Remove the ptr */
3968                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3969                 if (ret < 0)
3970                         goto out;
3971                 /*
3972                  * Remove the corresponding extent
3973                  * return value is not concerned.
3974                  */
3975                 btrfs_release_path(&path);
3976                 ret = btrfs_free_extent(trans, root, offset,
3977                                 root->fs_info->nodesize, 0,
3978                                 root->root_key.objectid, level - 1, 0);
3979                 cache = next_cache_extent(cache);
3980         }
3981
3982         /* Balance the btree using btrfs_search_slot() */
3983         cache = first_cache_extent(corrupt_blocks);
3984         while (cache) {
3985                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3986                                        cache);
3987                 memcpy(&key, &corrupt->key, sizeof(key));
3988                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3989                 if (ret < 0)
3990                         goto out;
3991                 /* return will always >0 since it won't find the item */
3992                 ret = 0;
3993                 btrfs_release_path(&path);
3994                 cache = next_cache_extent(cache);
3995         }
3996 out:
3997         btrfs_commit_transaction(trans, root);
3998         btrfs_release_path(&path);
3999         return ret;
4000 }
4001
4002 static int check_fs_root(struct btrfs_root *root,
4003                          struct cache_tree *root_cache,
4004                          struct walk_control *wc)
4005 {
4006         int ret = 0;
4007         int err = 0;
4008         int wret;
4009         int level;
4010         struct btrfs_path path;
4011         struct shared_node root_node;
4012         struct root_record *rec;
4013         struct btrfs_root_item *root_item = &root->root_item;
4014         struct cache_tree corrupt_blocks;
4015         struct orphan_data_extent *orphan;
4016         struct orphan_data_extent *tmp;
4017         enum btrfs_tree_block_status status;
4018         struct node_refs nrefs;
4019
4020         /*
4021          * Reuse the corrupt_block cache tree to record corrupted tree block
4022          *
4023          * Unlike the usage in extent tree check, here we do it in a per
4024          * fs/subvol tree base.
4025          */
4026         cache_tree_init(&corrupt_blocks);
4027         root->fs_info->corrupt_blocks = &corrupt_blocks;
4028
4029         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4030                 rec = get_root_rec(root_cache, root->root_key.objectid);
4031                 BUG_ON(IS_ERR(rec));
4032                 if (btrfs_root_refs(root_item) > 0)
4033                         rec->found_root_item = 1;
4034         }
4035
4036         btrfs_init_path(&path);
4037         memset(&root_node, 0, sizeof(root_node));
4038         cache_tree_init(&root_node.root_cache);
4039         cache_tree_init(&root_node.inode_cache);
4040         memset(&nrefs, 0, sizeof(nrefs));
4041
4042         /* Move the orphan extent record to corresponding inode_record */
4043         list_for_each_entry_safe(orphan, tmp,
4044                                  &root->orphan_data_extents, list) {
4045                 struct inode_record *inode;
4046
4047                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4048                                       1);
4049                 BUG_ON(IS_ERR(inode));
4050                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4051                 list_move(&orphan->list, &inode->orphan_extents);
4052         }
4053
4054         level = btrfs_header_level(root->node);
4055         memset(wc->nodes, 0, sizeof(wc->nodes));
4056         wc->nodes[level] = &root_node;
4057         wc->active_node = level;
4058         wc->root_level = level;
4059
4060         /* We may not have checked the root block, lets do that now */
4061         if (btrfs_is_leaf(root->node))
4062                 status = btrfs_check_leaf(root, NULL, root->node);
4063         else
4064                 status = btrfs_check_node(root, NULL, root->node);
4065         if (status != BTRFS_TREE_BLOCK_CLEAN)
4066                 return -EIO;
4067
4068         if (btrfs_root_refs(root_item) > 0 ||
4069             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4070                 path.nodes[level] = root->node;
4071                 extent_buffer_get(root->node);
4072                 path.slots[level] = 0;
4073         } else {
4074                 struct btrfs_key key;
4075                 struct btrfs_disk_key found_key;
4076
4077                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4078                 level = root_item->drop_level;
4079                 path.lowest_level = level;
4080                 if (level > btrfs_header_level(root->node) ||
4081                     level >= BTRFS_MAX_LEVEL) {
4082                         error("ignoring invalid drop level: %u", level);
4083                         goto skip_walking;
4084                 }
4085                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4086                 if (wret < 0)
4087                         goto skip_walking;
4088                 btrfs_node_key(path.nodes[level], &found_key,
4089                                 path.slots[level]);
4090                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4091                                         sizeof(found_key)));
4092         }
4093
4094         while (1) {
4095                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4096                 if (wret < 0)
4097                         ret = wret;
4098                 if (wret != 0)
4099                         break;
4100
4101                 wret = walk_up_tree(root, &path, wc, &level);
4102                 if (wret < 0)
4103                         ret = wret;
4104                 if (wret != 0)
4105                         break;
4106         }
4107 skip_walking:
4108         btrfs_release_path(&path);
4109
4110         if (!cache_tree_empty(&corrupt_blocks)) {
4111                 struct cache_extent *cache;
4112                 struct btrfs_corrupt_block *corrupt;
4113
4114                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4115                        root->root_key.objectid);
4116                 cache = first_cache_extent(&corrupt_blocks);
4117                 while (cache) {
4118                         corrupt = container_of(cache,
4119                                                struct btrfs_corrupt_block,
4120                                                cache);
4121                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4122                                cache->start, corrupt->level,
4123                                corrupt->key.objectid, corrupt->key.type,
4124                                corrupt->key.offset);
4125                         cache = next_cache_extent(cache);
4126                 }
4127                 if (repair) {
4128                         printf("Try to repair the btree for root %llu\n",
4129                                root->root_key.objectid);
4130                         ret = repair_btree(root, &corrupt_blocks);
4131                         if (ret < 0)
4132                                 fprintf(stderr, "Failed to repair btree: %s\n",
4133                                         strerror(-ret));
4134                         if (!ret)
4135                                 printf("Btree for root %llu is fixed\n",
4136                                        root->root_key.objectid);
4137                 }
4138         }
4139
4140         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4141         if (err < 0)
4142                 ret = err;
4143
4144         if (root_node.current) {
4145                 root_node.current->checked = 1;
4146                 maybe_free_inode_rec(&root_node.inode_cache,
4147                                 root_node.current);
4148         }
4149
4150         err = check_inode_recs(root, &root_node.inode_cache);
4151         if (!ret)
4152                 ret = err;
4153
4154         free_corrupt_blocks_tree(&corrupt_blocks);
4155         root->fs_info->corrupt_blocks = NULL;
4156         free_orphan_data_extents(&root->orphan_data_extents);
4157         return ret;
4158 }
4159
4160 static int fs_root_objectid(u64 objectid)
4161 {
4162         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4163             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4164                 return 1;
4165         return is_fstree(objectid);
4166 }
4167
4168 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4169                           struct cache_tree *root_cache)
4170 {
4171         struct btrfs_path path;
4172         struct btrfs_key key;
4173         struct walk_control wc;
4174         struct extent_buffer *leaf, *tree_node;
4175         struct btrfs_root *tmp_root;
4176         struct btrfs_root *tree_root = fs_info->tree_root;
4177         int ret;
4178         int err = 0;
4179
4180         if (ctx.progress_enabled) {
4181                 ctx.tp = TASK_FS_ROOTS;
4182                 task_start(ctx.info);
4183         }
4184
4185         /*
4186          * Just in case we made any changes to the extent tree that weren't
4187          * reflected into the free space cache yet.
4188          */
4189         if (repair)
4190                 reset_cached_block_groups(fs_info);
4191         memset(&wc, 0, sizeof(wc));
4192         cache_tree_init(&wc.shared);
4193         btrfs_init_path(&path);
4194
4195 again:
4196         key.offset = 0;
4197         key.objectid = 0;
4198         key.type = BTRFS_ROOT_ITEM_KEY;
4199         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4200         if (ret < 0) {
4201                 err = 1;
4202                 goto out;
4203         }
4204         tree_node = tree_root->node;
4205         while (1) {
4206                 if (tree_node != tree_root->node) {
4207                         free_root_recs_tree(root_cache);
4208                         btrfs_release_path(&path);
4209                         goto again;
4210                 }
4211                 leaf = path.nodes[0];
4212                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4213                         ret = btrfs_next_leaf(tree_root, &path);
4214                         if (ret) {
4215                                 if (ret < 0)
4216                                         err = 1;
4217                                 break;
4218                         }
4219                         leaf = path.nodes[0];
4220                 }
4221                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4222                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4223                     fs_root_objectid(key.objectid)) {
4224                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4225                                 tmp_root = btrfs_read_fs_root_no_cache(
4226                                                 fs_info, &key);
4227                         } else {
4228                                 key.offset = (u64)-1;
4229                                 tmp_root = btrfs_read_fs_root(
4230                                                 fs_info, &key);
4231                         }
4232                         if (IS_ERR(tmp_root)) {
4233                                 err = 1;
4234                                 goto next;
4235                         }
4236                         ret = check_fs_root(tmp_root, root_cache, &wc);
4237                         if (ret == -EAGAIN) {
4238                                 free_root_recs_tree(root_cache);
4239                                 btrfs_release_path(&path);
4240                                 goto again;
4241                         }
4242                         if (ret)
4243                                 err = 1;
4244                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4245                                 btrfs_free_fs_root(tmp_root);
4246                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4247                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4248                         process_root_ref(leaf, path.slots[0], &key,
4249                                          root_cache);
4250                 }
4251 next:
4252                 path.slots[0]++;
4253         }
4254 out:
4255         btrfs_release_path(&path);
4256         if (err)
4257                 free_extent_cache_tree(&wc.shared);
4258         if (!cache_tree_empty(&wc.shared))
4259                 fprintf(stderr, "warning line %d\n", __LINE__);
4260
4261         task_stop(ctx.info);
4262
4263         return err;
4264 }
4265
4266 /*
4267  * Find the @index according by @ino and name.
4268  * Notice:time efficiency is O(N)
4269  *
4270  * @root:       the root of the fs/file tree
4271  * @index_ret:  the index as return value
4272  * @namebuf:    the name to match
4273  * @name_len:   the length of name to match
4274  * @file_type:  the file_type of INODE_ITEM to match
4275  *
4276  * Returns 0 if found and *@index_ret will be modified with right value
4277  * Returns< 0 not found and *@index_ret will be (u64)-1
4278  */
4279 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4280                           u64 *index_ret, char *namebuf, u32 name_len,
4281                           u8 file_type)
4282 {
4283         struct btrfs_path path;
4284         struct extent_buffer *node;
4285         struct btrfs_dir_item *di;
4286         struct btrfs_key key;
4287         struct btrfs_key location;
4288         char name[BTRFS_NAME_LEN] = {0};
4289
4290         u32 total;
4291         u32 cur = 0;
4292         u32 len;
4293         u32 data_len;
4294         u8 filetype;
4295         int slot;
4296         int ret;
4297
4298         ASSERT(index_ret);
4299
4300         /* search from the last index */
4301         key.objectid = dirid;
4302         key.offset = (u64)-1;
4303         key.type = BTRFS_DIR_INDEX_KEY;
4304
4305         btrfs_init_path(&path);
4306         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4307         if (ret < 0)
4308                 return ret;
4309
4310 loop:
4311         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4312         if (ret) {
4313                 ret = -ENOENT;
4314                 *index_ret = (64)-1;
4315                 goto out;
4316         }
4317         /* Check whether inode_id/filetype/name match */
4318         node = path.nodes[0];
4319         slot = path.slots[0];
4320         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4321         total = btrfs_item_size_nr(node, slot);
4322         while (cur < total) {
4323                 ret = -ENOENT;
4324                 len = btrfs_dir_name_len(node, di);
4325                 data_len = btrfs_dir_data_len(node, di);
4326
4327                 btrfs_dir_item_key_to_cpu(node, di, &location);
4328                 if (location.objectid != location_id ||
4329                     location.type != BTRFS_INODE_ITEM_KEY ||
4330                     location.offset != 0)
4331                         goto next;
4332
4333                 filetype = btrfs_dir_type(node, di);
4334                 if (file_type != filetype)
4335                         goto next;
4336
4337                 if (len > BTRFS_NAME_LEN)
4338                         len = BTRFS_NAME_LEN;
4339
4340                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4341                 if (len != name_len || strncmp(namebuf, name, len))
4342                         goto next;
4343
4344                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4345                 *index_ret = key.offset;
4346                 ret = 0;
4347                 goto out;
4348 next:
4349                 len += sizeof(*di) + data_len;
4350                 di = (struct btrfs_dir_item *)((char *)di + len);
4351                 cur += len;
4352         }
4353         goto loop;
4354
4355 out:
4356         btrfs_release_path(&path);
4357         return ret;
4358 }
4359
4360 /*
4361  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4362  * INODE_REF/INODE_EXTREF match.
4363  *
4364  * @root:       the root of the fs/file tree
4365  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4366  *              value while find index
4367  * @location_key: location key of the struct btrfs_dir_item to match
4368  * @name:       the name to match
4369  * @namelen:    the length of name
4370  * @file_type:  the type of file to math
4371  *
4372  * Return 0 if no error occurred.
4373  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4374  * DIR_ITEM/DIR_INDEX
4375  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4376  * and DIR_ITEM/DIR_INDEX mismatch
4377  */
4378 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4379                          struct btrfs_key *location_key, char *name,
4380                          u32 namelen, u8 file_type)
4381 {
4382         struct btrfs_path path;
4383         struct extent_buffer *node;
4384         struct btrfs_dir_item *di;
4385         struct btrfs_key location;
4386         char namebuf[BTRFS_NAME_LEN] = {0};
4387         u32 total;
4388         u32 cur = 0;
4389         u32 len;
4390         u32 data_len;
4391         u8 filetype;
4392         int slot;
4393         int ret;
4394
4395         /* get the index by traversing all index */
4396         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4397                 ret = find_dir_index(root, key->objectid,
4398                                      location_key->objectid, &key->offset,
4399                                      name, namelen, file_type);
4400                 if (ret)
4401                         ret = DIR_INDEX_MISSING;
4402                 return ret;
4403         }
4404
4405         btrfs_init_path(&path);
4406         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4407         if (ret) {
4408                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4409                         DIR_INDEX_MISSING;
4410                 goto out;
4411         }
4412
4413         /* Check whether inode_id/filetype/name match */
4414         node = path.nodes[0];
4415         slot = path.slots[0];
4416         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4417         total = btrfs_item_size_nr(node, slot);
4418         while (cur < total) {
4419                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4420                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4421
4422                 len = btrfs_dir_name_len(node, di);
4423                 data_len = btrfs_dir_data_len(node, di);
4424
4425                 btrfs_dir_item_key_to_cpu(node, di, &location);
4426                 if (location.objectid != location_key->objectid ||
4427                     location.type != location_key->type ||
4428                     location.offset != location_key->offset)
4429                         goto next;
4430
4431                 filetype = btrfs_dir_type(node, di);
4432                 if (file_type != filetype)
4433                         goto next;
4434
4435                 if (len > BTRFS_NAME_LEN) {
4436                         len = BTRFS_NAME_LEN;
4437                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4438                         root->objectid,
4439                         key->type == BTRFS_DIR_ITEM_KEY ?
4440                         "DIR_ITEM" : "DIR_INDEX",
4441                         key->objectid, key->offset, len);
4442                 }
4443                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4444                                    len);
4445                 if (len != namelen || strncmp(namebuf, name, len))
4446                         goto next;
4447
4448                 ret = 0;
4449                 goto out;
4450 next:
4451                 len += sizeof(*di) + data_len;
4452                 di = (struct btrfs_dir_item *)((char *)di + len);
4453                 cur += len;
4454         }
4455
4456 out:
4457         btrfs_release_path(&path);
4458         return ret;
4459 }
4460
4461 /*
4462  * Prints inode ref error message
4463  */
4464 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4465                                 u64 index, const char *namebuf, int name_len,
4466                                 u8 filetype, int err)
4467 {
4468         if (!err)
4469                 return;
4470
4471         /* root dir error */
4472         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4473                 error(
4474         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4475                       root->objectid, key->objectid, key->offset, namebuf);
4476                 return;
4477         }
4478
4479         /* normal error */
4480         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4481                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4482                       root->objectid, key->offset,
4483                       btrfs_name_hash(namebuf, name_len),
4484                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4485                       namebuf, filetype);
4486         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4487                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4488                       root->objectid, key->offset, index,
4489                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4490                       namebuf, filetype);
4491 }
4492
4493 /*
4494  * Insert the missing inode item.
4495  *
4496  * Returns 0 means success.
4497  * Returns <0 means error.
4498  */
4499 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4500                                      u8 filetype)
4501 {
4502         struct btrfs_key key;
4503         struct btrfs_trans_handle *trans;
4504         struct btrfs_path path;
4505         int ret;
4506
4507         key.objectid = ino;
4508         key.type = BTRFS_INODE_ITEM_KEY;
4509         key.offset = 0;
4510
4511         btrfs_init_path(&path);
4512         trans = btrfs_start_transaction(root, 1);
4513         if (IS_ERR(trans)) {
4514                 ret = -EIO;
4515                 goto out;
4516         }
4517
4518         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4519         if (ret < 0 || !ret)
4520                 goto fail;
4521
4522         /* insert inode item */
4523         create_inode_item_lowmem(trans, root, ino, filetype);
4524         ret = 0;
4525 fail:
4526         btrfs_commit_transaction(trans, root);
4527 out:
4528         if (ret)
4529                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4530                       root->objectid, ino);
4531         btrfs_release_path(&path);
4532         return ret;
4533 }
4534
4535 /*
4536  * The ternary means dir item, dir index and relative inode ref.
4537  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4538  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4539  * strategy:
4540  * If two of three is missing or mismatched, delete the existing one.
4541  * If one of three is missing or mismatched, add the missing one.
4542  *
4543  * returns 0 means success.
4544  * returns not 0 means on error;
4545  */
4546 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4547                           u64 index, char *name, int name_len, u8 filetype,
4548                           int err)
4549 {
4550         struct btrfs_trans_handle *trans;
4551         int stage = 0;
4552         int ret = 0;
4553
4554         /*
4555          * stage shall be one of following valild values:
4556          *      0: Fine, nothing to do.
4557          *      1: One of three is wrong, so add missing one.
4558          *      2: Two of three is wrong, so delete existed one.
4559          */
4560         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4561                 stage++;
4562         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4563                 stage++;
4564         if (err & (INODE_REF_MISSING))
4565                 stage++;
4566
4567         /* stage must be smllarer than 3 */
4568         ASSERT(stage < 3);
4569
4570         trans = btrfs_start_transaction(root, 1);
4571         if (stage == 2) {
4572                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4573                                    name_len, 0);
4574                 goto out;
4575         }
4576         if (stage == 1) {
4577                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4578                                filetype, &index, 1, 1);
4579                 goto out;
4580         }
4581 out:
4582         btrfs_commit_transaction(trans, root);
4583
4584         if (ret)
4585                 error("fail to repair inode %llu name %s filetype %u",
4586                       ino, name, filetype);
4587         else
4588                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4589                        stage == 2 ? "Delete" : "Add",
4590                        ino, name, filetype);
4591
4592         return ret;
4593 }
4594
4595 /*
4596  * Traverse the given INODE_REF and call find_dir_item() to find related
4597  * DIR_ITEM/DIR_INDEX.
4598  *
4599  * @root:       the root of the fs/file tree
4600  * @ref_key:    the key of the INODE_REF
4601  * @path        the path provides node and slot
4602  * @refs:       the count of INODE_REF
4603  * @mode:       the st_mode of INODE_ITEM
4604  * @name_ret:   returns with the first ref's name
4605  * @name_len_ret:    len of the name_ret
4606  *
4607  * Return 0 if no error occurred.
4608  */
4609 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4610                            struct btrfs_path *path, char *name_ret,
4611                            u32 *namelen_ret, u64 *refs_ret, int mode)
4612 {
4613         struct btrfs_key key;
4614         struct btrfs_key location;
4615         struct btrfs_inode_ref *ref;
4616         struct extent_buffer *node;
4617         char namebuf[BTRFS_NAME_LEN] = {0};
4618         u32 total;
4619         u32 cur = 0;
4620         u32 len;
4621         u32 name_len;
4622         u64 index;
4623         int ret;
4624         int err = 0;
4625         int tmp_err;
4626         int slot;
4627         int need_research = 0;
4628         u64 refs;
4629
4630 begin:
4631         err = 0;
4632         cur = 0;
4633         refs = *refs_ret;
4634
4635         /* since after repair, path and the dir item may be changed */
4636         if (need_research) {
4637                 need_research = 0;
4638                 btrfs_release_path(path);
4639                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4640                 /* the item was deleted, let path point to the last checked item */
4641                 if (ret > 0) {
4642                         if (path->slots[0] == 0)
4643                                 btrfs_prev_leaf(root, path);
4644                         else
4645                                 path->slots[0]--;
4646                 }
4647                 if (ret)
4648                         goto out;
4649         }
4650
4651         location.objectid = ref_key->objectid;
4652         location.type = BTRFS_INODE_ITEM_KEY;
4653         location.offset = 0;
4654         node = path->nodes[0];
4655         slot = path->slots[0];
4656
4657         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4658         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4659         total = btrfs_item_size_nr(node, slot);
4660
4661 next:
4662         /* Update inode ref count */
4663         refs++;
4664         tmp_err = 0;
4665         index = btrfs_inode_ref_index(node, ref);
4666         name_len = btrfs_inode_ref_name_len(node, ref);
4667
4668         if (name_len <= BTRFS_NAME_LEN) {
4669                 len = name_len;
4670         } else {
4671                 len = BTRFS_NAME_LEN;
4672                 warning("root %llu INODE_REF[%llu %llu] name too long",
4673                         root->objectid, ref_key->objectid, ref_key->offset);
4674         }
4675
4676         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4677
4678         /* copy the first name found to name_ret */
4679         if (refs == 1 && name_ret) {
4680                 memcpy(name_ret, namebuf, len);
4681                 *namelen_ret = len;
4682         }
4683
4684         /* Check root dir ref */
4685         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4686                 if (index != 0 || len != strlen("..") ||
4687                     strncmp("..", namebuf, len) ||
4688                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4689                         /* set err bits then repair will delete the ref */
4690                         err |= DIR_INDEX_MISSING;
4691                         err |= DIR_ITEM_MISSING;
4692                 }
4693                 goto end;
4694         }
4695
4696         /* Find related DIR_INDEX */
4697         key.objectid = ref_key->offset;
4698         key.type = BTRFS_DIR_INDEX_KEY;
4699         key.offset = index;
4700         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4701                             imode_to_type(mode));
4702
4703         /* Find related dir_item */
4704         key.objectid = ref_key->offset;
4705         key.type = BTRFS_DIR_ITEM_KEY;
4706         key.offset = btrfs_name_hash(namebuf, len);
4707         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4708                             imode_to_type(mode));
4709 end:
4710         if (tmp_err && repair) {
4711                 ret = repair_ternary_lowmem(root, ref_key->offset,
4712                                             ref_key->objectid, index, namebuf,
4713                                             name_len, imode_to_type(mode),
4714                                             tmp_err);
4715                 if (!ret) {
4716                         need_research = 1;
4717                         goto begin;
4718                 }
4719         }
4720         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4721                             imode_to_type(mode), tmp_err);
4722         err |= tmp_err;
4723         len = sizeof(*ref) + name_len;
4724         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4725         cur += len;
4726         if (cur < total)
4727                 goto next;
4728
4729 out:
4730         *refs_ret = refs;
4731         return err;
4732 }
4733
4734 /*
4735  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4736  * DIR_ITEM/DIR_INDEX.
4737  *
4738  * @root:       the root of the fs/file tree
4739  * @ref_key:    the key of the INODE_EXTREF
4740  * @refs:       the count of INODE_EXTREF
4741  * @mode:       the st_mode of INODE_ITEM
4742  *
4743  * Return 0 if no error occurred.
4744  */
4745 static int check_inode_extref(struct btrfs_root *root,
4746                               struct btrfs_key *ref_key,
4747                               struct extent_buffer *node, int slot, u64 *refs,
4748                               int mode)
4749 {
4750         struct btrfs_key key;
4751         struct btrfs_key location;
4752         struct btrfs_inode_extref *extref;
4753         char namebuf[BTRFS_NAME_LEN] = {0};
4754         u32 total;
4755         u32 cur = 0;
4756         u32 len;
4757         u32 name_len;
4758         u64 index;
4759         u64 parent;
4760         int ret;
4761         int err = 0;
4762
4763         location.objectid = ref_key->objectid;
4764         location.type = BTRFS_INODE_ITEM_KEY;
4765         location.offset = 0;
4766
4767         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4768         total = btrfs_item_size_nr(node, slot);
4769
4770 next:
4771         /* update inode ref count */
4772         (*refs)++;
4773         name_len = btrfs_inode_extref_name_len(node, extref);
4774         index = btrfs_inode_extref_index(node, extref);
4775         parent = btrfs_inode_extref_parent(node, extref);
4776         if (name_len <= BTRFS_NAME_LEN) {
4777                 len = name_len;
4778         } else {
4779                 len = BTRFS_NAME_LEN;
4780                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4781                         root->objectid, ref_key->objectid, ref_key->offset);
4782         }
4783         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4784
4785         /* Check root dir ref name */
4786         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4787                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4788                       root->objectid, ref_key->objectid, ref_key->offset,
4789                       namebuf);
4790                 err |= ROOT_DIR_ERROR;
4791         }
4792
4793         /* find related dir_index */
4794         key.objectid = parent;
4795         key.type = BTRFS_DIR_INDEX_KEY;
4796         key.offset = index;
4797         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4798         err |= ret;
4799
4800         /* find related dir_item */
4801         key.objectid = parent;
4802         key.type = BTRFS_DIR_ITEM_KEY;
4803         key.offset = btrfs_name_hash(namebuf, len);
4804         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4805         err |= ret;
4806
4807         len = sizeof(*extref) + name_len;
4808         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4809         cur += len;
4810
4811         if (cur < total)
4812                 goto next;
4813
4814         return err;
4815 }
4816
4817 /*
4818  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4819  * DIR_ITEM/DIR_INDEX match.
4820  * Return with @index_ret.
4821  *
4822  * @root:       the root of the fs/file tree
4823  * @key:        the key of the INODE_REF/INODE_EXTREF
4824  * @name:       the name in the INODE_REF/INODE_EXTREF
4825  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4826  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4827  *              value (64)-1 means do not check index
4828  * @ext_ref:    the EXTENDED_IREF feature
4829  *
4830  * Return 0 if no error occurred.
4831  * Return >0 for error bitmap
4832  */
4833 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4834                           char *name, int namelen, u64 *index_ret,
4835                           unsigned int ext_ref)
4836 {
4837         struct btrfs_path path;
4838         struct btrfs_inode_ref *ref;
4839         struct btrfs_inode_extref *extref;
4840         struct extent_buffer *node;
4841         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4842         u32 total;
4843         u32 cur = 0;
4844         u32 len;
4845         u32 ref_namelen;
4846         u64 ref_index;
4847         u64 parent;
4848         u64 dir_id;
4849         int slot;
4850         int ret;
4851
4852         ASSERT(index_ret);
4853
4854         btrfs_init_path(&path);
4855         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4856         if (ret) {
4857                 ret = INODE_REF_MISSING;
4858                 goto extref;
4859         }
4860
4861         node = path.nodes[0];
4862         slot = path.slots[0];
4863
4864         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4865         total = btrfs_item_size_nr(node, slot);
4866
4867         /* Iterate all entry of INODE_REF */
4868         while (cur < total) {
4869                 ret = INODE_REF_MISSING;
4870
4871                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4872                 ref_index = btrfs_inode_ref_index(node, ref);
4873                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4874                         goto next_ref;
4875
4876                 if (cur + sizeof(*ref) + ref_namelen > total ||
4877                     ref_namelen > BTRFS_NAME_LEN) {
4878                         warning("root %llu INODE %s[%llu %llu] name too long",
4879                                 root->objectid,
4880                                 key->type == BTRFS_INODE_REF_KEY ?
4881                                         "REF" : "EXTREF",
4882                                 key->objectid, key->offset);
4883
4884                         if (cur + sizeof(*ref) > total)
4885                                 break;
4886                         len = min_t(u32, total - cur - sizeof(*ref),
4887                                     BTRFS_NAME_LEN);
4888                 } else {
4889                         len = ref_namelen;
4890                 }
4891
4892                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4893                                    len);
4894
4895                 if (len != namelen || strncmp(ref_namebuf, name, len))
4896                         goto next_ref;
4897
4898                 *index_ret = ref_index;
4899                 ret = 0;
4900                 goto out;
4901 next_ref:
4902                 len = sizeof(*ref) + ref_namelen;
4903                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4904                 cur += len;
4905         }
4906
4907 extref:
4908         /* Skip if not support EXTENDED_IREF feature */
4909         if (!ext_ref)
4910                 goto out;
4911
4912         btrfs_release_path(&path);
4913         btrfs_init_path(&path);
4914
4915         dir_id = key->offset;
4916         key->type = BTRFS_INODE_EXTREF_KEY;
4917         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4918
4919         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4920         if (ret) {
4921                 ret = INODE_REF_MISSING;
4922                 goto out;
4923         }
4924
4925         node = path.nodes[0];
4926         slot = path.slots[0];
4927
4928         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4929         cur = 0;
4930         total = btrfs_item_size_nr(node, slot);
4931
4932         /* Iterate all entry of INODE_EXTREF */
4933         while (cur < total) {
4934                 ret = INODE_REF_MISSING;
4935
4936                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4937                 ref_index = btrfs_inode_extref_index(node, extref);
4938                 parent = btrfs_inode_extref_parent(node, extref);
4939                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4940                         goto next_extref;
4941
4942                 if (parent != dir_id)
4943                         goto next_extref;
4944
4945                 if (ref_namelen <= BTRFS_NAME_LEN) {
4946                         len = ref_namelen;
4947                 } else {
4948                         len = BTRFS_NAME_LEN;
4949                         warning("root %llu INODE %s[%llu %llu] name too long",
4950                                 root->objectid,
4951                                 key->type == BTRFS_INODE_REF_KEY ?
4952                                         "REF" : "EXTREF",
4953                                 key->objectid, key->offset);
4954                 }
4955                 read_extent_buffer(node, ref_namebuf,
4956                                    (unsigned long)(extref + 1), len);
4957
4958                 if (len != namelen || strncmp(ref_namebuf, name, len))
4959                         goto next_extref;
4960
4961                 *index_ret = ref_index;
4962                 ret = 0;
4963                 goto out;
4964
4965 next_extref:
4966                 len = sizeof(*extref) + ref_namelen;
4967                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4968                 cur += len;
4969
4970         }
4971 out:
4972         btrfs_release_path(&path);
4973         return ret;
4974 }
4975
4976 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4977                                u64 ino, u64 index, const char *namebuf,
4978                                int name_len, u8 filetype, int err)
4979 {
4980         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4981                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4982                       root->objectid, key->objectid, key->offset, namebuf,
4983                       filetype,
4984                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4985         }
4986
4987         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4988                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4989                       root->objectid, key->objectid, index, namebuf, filetype,
4990                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4991         }
4992
4993         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4994                 error(
4995                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4996                       root->objectid, ino, index, namebuf, filetype,
4997                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4998         }
4999
5000         if (err & INODE_REF_MISSING)
5001                 error(
5002                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5003                       root->objectid, ino, key->objectid, namebuf, filetype);
5004
5005 }
5006
5007 /*
5008  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5009  *
5010  * Returns error after repair
5011  */
5012 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5013                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5014                            int err)
5015 {
5016         int ret;
5017
5018         if (err & INODE_ITEM_MISSING) {
5019                 ret = repair_inode_item_missing(root, ino, filetype);
5020                 if (!ret)
5021                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5022         }
5023
5024         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5025                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5026                                             name_len, filetype, err);
5027                 if (!ret) {
5028                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5029                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5030                         err &= ~(INODE_REF_MISSING);
5031                 }
5032         }
5033         return err;
5034 }
5035
5036 /*
5037  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5038  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5039  *
5040  * @root:       the root of the fs/file tree
5041  * @key:        the key of the INODE_REF/INODE_EXTREF
5042  * @path:       the path
5043  * @size:       the st_size of the INODE_ITEM
5044  * @ext_ref:    the EXTENDED_IREF feature
5045  *
5046  * Return 0 if no error occurred.
5047  */
5048 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5049                           struct btrfs_path *path, u64 *size,
5050                           unsigned int ext_ref)
5051 {
5052         struct btrfs_dir_item *di;
5053         struct btrfs_inode_item *ii;
5054         struct btrfs_key key;
5055         struct btrfs_key location;
5056         struct extent_buffer *node;
5057         int slot;
5058         char namebuf[BTRFS_NAME_LEN] = {0};
5059         u32 total;
5060         u32 cur = 0;
5061         u32 len;
5062         u32 name_len;
5063         u32 data_len;
5064         u8 filetype;
5065         u32 mode = 0;
5066         u64 index;
5067         int ret;
5068         int err;
5069         int tmp_err;
5070         int need_research = 0;
5071
5072         /*
5073          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5074          * ignore index check.
5075          */
5076         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5077                 index = di_key->offset;
5078         else
5079                 index = (u64)-1;
5080 begin:
5081         err = 0;
5082         cur = 0;
5083
5084         /* since after repair, path and the dir item may be changed */
5085         if (need_research) {
5086                 need_research = 0;
5087                 btrfs_release_path(path);
5088                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5089                 /* the item was deleted, let path point the last checked item */
5090                 if (ret > 0) {
5091                         if (path->slots[0] == 0)
5092                                 btrfs_prev_leaf(root, path);
5093                         else
5094                                 path->slots[0]--;
5095                 }
5096                 if (ret)
5097                         goto out;
5098         }
5099
5100         node = path->nodes[0];
5101         slot = path->slots[0];
5102
5103         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5104         total = btrfs_item_size_nr(node, slot);
5105         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5106
5107         while (cur < total) {
5108                 data_len = btrfs_dir_data_len(node, di);
5109                 tmp_err = 0;
5110                 if (data_len)
5111                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5112                               root->objectid,
5113               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5114                               di_key->objectid, di_key->offset, data_len);
5115
5116                 name_len = btrfs_dir_name_len(node, di);
5117                 if (name_len <= BTRFS_NAME_LEN) {
5118                         len = name_len;
5119                 } else {
5120                         len = BTRFS_NAME_LEN;
5121                         warning("root %llu %s[%llu %llu] name too long",
5122                                 root->objectid,
5123                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5124                                 di_key->objectid, di_key->offset);
5125                 }
5126                 (*size) += name_len;
5127                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5128                                    len);
5129                 filetype = btrfs_dir_type(node, di);
5130
5131                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5132                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5133                         err |= -EIO;
5134                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5135                         root->objectid, di_key->objectid, di_key->offset,
5136                         namebuf, len, filetype, di_key->offset,
5137                         btrfs_name_hash(namebuf, len));
5138                 }
5139
5140                 btrfs_dir_item_key_to_cpu(node, di, &location);
5141                 /* Ignore related ROOT_ITEM check */
5142                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5143                         goto next;
5144
5145                 btrfs_release_path(path);
5146                 /* Check relative INODE_ITEM(existence/filetype) */
5147                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5148                 if (ret) {
5149                         tmp_err |= INODE_ITEM_MISSING;
5150                         goto next;
5151                 }
5152
5153                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5154                                     struct btrfs_inode_item);
5155                 mode = btrfs_inode_mode(path->nodes[0], ii);
5156                 if (imode_to_type(mode) != filetype) {
5157                         tmp_err |= INODE_ITEM_MISMATCH;
5158                         goto next;
5159                 }
5160
5161                 /* Check relative INODE_REF/INODE_EXTREF */
5162                 key.objectid = location.objectid;
5163                 key.type = BTRFS_INODE_REF_KEY;
5164                 key.offset = di_key->objectid;
5165                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5166                                           &index, ext_ref);
5167
5168                 /* check relative INDEX/ITEM */
5169                 key.objectid = di_key->objectid;
5170                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5171                         key.type = BTRFS_DIR_INDEX_KEY;
5172                         key.offset = index;
5173                 } else {
5174                         key.type = BTRFS_DIR_ITEM_KEY;
5175                         key.offset = btrfs_name_hash(namebuf, name_len);
5176                 }
5177
5178                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5179                                          name_len, filetype);
5180                 /* find_dir_item may find index */
5181                 if (key.type == BTRFS_DIR_INDEX_KEY)
5182                         index = key.offset;
5183 next:
5184
5185                 if (tmp_err && repair) {
5186                         ret = repair_dir_item(root, di_key->objectid,
5187                                               location.objectid, index,
5188                                               imode_to_type(mode), namebuf,
5189                                               name_len, tmp_err);
5190                         if (ret != tmp_err) {
5191                                 need_research = 1;
5192                                 goto begin;
5193                         }
5194                 }
5195                 btrfs_release_path(path);
5196                 print_dir_item_err(root, di_key, location.objectid, index,
5197                                    namebuf, name_len, filetype, tmp_err);
5198                 err |= tmp_err;
5199                 len = sizeof(*di) + name_len + data_len;
5200                 di = (struct btrfs_dir_item *)((char *)di + len);
5201                 cur += len;
5202
5203                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5204                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5205                               root->objectid, di_key->objectid,
5206                               di_key->offset);
5207                         break;
5208                 }
5209         }
5210 out:
5211         /* research path */
5212         btrfs_release_path(path);
5213         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5214         if (ret)
5215                 err |= ret > 0 ? -ENOENT : ret;
5216         return err;
5217 }
5218
5219 /*
5220  * Check file extent datasum/hole, update the size of the file extents,
5221  * check and update the last offset of the file extent.
5222  *
5223  * @root:       the root of fs/file tree.
5224  * @fkey:       the key of the file extent.
5225  * @nodatasum:  INODE_NODATASUM feature.
5226  * @size:       the sum of all EXTENT_DATA items size for this inode.
5227  * @end:        the offset of the last extent.
5228  *
5229  * Return 0 if no error occurred.
5230  */
5231 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5232                              struct extent_buffer *node, int slot,
5233                              unsigned int nodatasum, u64 *size, u64 *end)
5234 {
5235         struct btrfs_file_extent_item *fi;
5236         u64 disk_bytenr;
5237         u64 disk_num_bytes;
5238         u64 extent_num_bytes;
5239         u64 extent_offset;
5240         u64 csum_found;         /* In byte size, sectorsize aligned */
5241         u64 search_start;       /* Logical range start we search for csum */
5242         u64 search_len;         /* Logical range len we search for csum */
5243         unsigned int extent_type;
5244         unsigned int is_hole;
5245         int compressed = 0;
5246         int ret;
5247         int err = 0;
5248
5249         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5250
5251         /* Check inline extent */
5252         extent_type = btrfs_file_extent_type(node, fi);
5253         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5254                 struct btrfs_item *e = btrfs_item_nr(slot);
5255                 u32 item_inline_len;
5256
5257                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5258                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5259                 compressed = btrfs_file_extent_compression(node, fi);
5260                 if (extent_num_bytes == 0) {
5261                         error(
5262                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5263                                 root->objectid, fkey->objectid, fkey->offset);
5264                         err |= FILE_EXTENT_ERROR;
5265                 }
5266                 if (!compressed && extent_num_bytes != item_inline_len) {
5267                         error(
5268                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5269                                 root->objectid, fkey->objectid, fkey->offset,
5270                                 extent_num_bytes, item_inline_len);
5271                         err |= FILE_EXTENT_ERROR;
5272                 }
5273                 *end += extent_num_bytes;
5274                 *size += extent_num_bytes;
5275                 return err;
5276         }
5277
5278         /* Check extent type */
5279         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5280                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5281                 err |= FILE_EXTENT_ERROR;
5282                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5283                       root->objectid, fkey->objectid, fkey->offset);
5284                 return err;
5285         }
5286
5287         /* Check REG_EXTENT/PREALLOC_EXTENT */
5288         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5289         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5290         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5291         extent_offset = btrfs_file_extent_offset(node, fi);
5292         compressed = btrfs_file_extent_compression(node, fi);
5293         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5294
5295         /*
5296          * Check EXTENT_DATA csum
5297          *
5298          * For plain (uncompressed) extent, we should only check the range
5299          * we're referring to, as it's possible that part of prealloc extent
5300          * has been written, and has csum:
5301          *
5302          * |<--- Original large preallocated extent A ---->|
5303          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5304          *      No csum                         Has csum
5305          *
5306          * For compressed extent, we should check the whole range.
5307          */
5308         if (!compressed) {
5309                 search_start = disk_bytenr + extent_offset;
5310                 search_len = extent_num_bytes;
5311         } else {
5312                 search_start = disk_bytenr;
5313                 search_len = disk_num_bytes;
5314         }
5315         ret = count_csum_range(root, search_start, search_len, &csum_found);
5316         if (csum_found > 0 && nodatasum) {
5317                 err |= ODD_CSUM_ITEM;
5318                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5319                       root->objectid, fkey->objectid, fkey->offset);
5320         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5321                    !is_hole && (ret < 0 || csum_found < search_len)) {
5322                 err |= CSUM_ITEM_MISSING;
5323                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5324                       root->objectid, fkey->objectid, fkey->offset,
5325                       csum_found, search_len);
5326         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5327                 err |= ODD_CSUM_ITEM;
5328                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5329                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5330         }
5331
5332         /* Check EXTENT_DATA hole */
5333         if (!no_holes && *end != fkey->offset) {
5334                 err |= FILE_EXTENT_ERROR;
5335                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5336                       root->objectid, fkey->objectid, fkey->offset);
5337         }
5338
5339         *end += extent_num_bytes;
5340         if (!is_hole)
5341                 *size += extent_num_bytes;
5342
5343         return err;
5344 }
5345
5346 /*
5347  * Set inode item nbytes to @nbytes
5348  *
5349  * Returns  0     on success
5350  * Returns  != 0  on error
5351  */
5352 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5353                                       struct btrfs_path *path,
5354                                       u64 ino, u64 nbytes)
5355 {
5356         struct btrfs_trans_handle *trans;
5357         struct btrfs_inode_item *ii;
5358         struct btrfs_key key;
5359         struct btrfs_key research_key;
5360         int err = 0;
5361         int ret;
5362
5363         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5364
5365         key.objectid = ino;
5366         key.type = BTRFS_INODE_ITEM_KEY;
5367         key.offset = 0;
5368
5369         trans = btrfs_start_transaction(root, 1);
5370         if (IS_ERR(trans)) {
5371                 ret = PTR_ERR(trans);
5372                 err |= ret;
5373                 goto out;
5374         }
5375
5376         btrfs_release_path(path);
5377         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5378         if (ret > 0)
5379                 ret = -ENOENT;
5380         if (ret) {
5381                 err |= ret;
5382                 goto fail;
5383         }
5384
5385         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5386                             struct btrfs_inode_item);
5387         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5388         btrfs_mark_buffer_dirty(path->nodes[0]);
5389 fail:
5390         btrfs_commit_transaction(trans, root);
5391 out:
5392         if (ret)
5393                 error("failed to set nbytes in inode %llu root %llu",
5394                       ino, root->root_key.objectid);
5395         else
5396                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5397                        root->root_key.objectid, nbytes);
5398
5399         /* research path */
5400         btrfs_release_path(path);
5401         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5402         err |= ret;
5403
5404         return err;
5405 }
5406
5407 /*
5408  * Set directory inode isize to @isize.
5409  *
5410  * Returns 0     on success.
5411  * Returns != 0  on error.
5412  */
5413 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5414                                    struct btrfs_path *path,
5415                                    u64 ino, u64 isize)
5416 {
5417         struct btrfs_trans_handle *trans;
5418         struct btrfs_inode_item *ii;
5419         struct btrfs_key key;
5420         struct btrfs_key research_key;
5421         int ret;
5422         int err = 0;
5423
5424         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5425
5426         key.objectid = ino;
5427         key.type = BTRFS_INODE_ITEM_KEY;
5428         key.offset = 0;
5429
5430         trans = btrfs_start_transaction(root, 1);
5431         if (IS_ERR(trans)) {
5432                 ret = PTR_ERR(trans);
5433                 err |= ret;
5434                 goto out;
5435         }
5436
5437         btrfs_release_path(path);
5438         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5439         if (ret > 0)
5440                 ret = -ENOENT;
5441         if (ret) {
5442                 err |= ret;
5443                 goto fail;
5444         }
5445
5446         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5447                             struct btrfs_inode_item);
5448         btrfs_set_inode_size(path->nodes[0], ii, isize);
5449         btrfs_mark_buffer_dirty(path->nodes[0]);
5450 fail:
5451         btrfs_commit_transaction(trans, root);
5452 out:
5453         if (ret)
5454                 error("failed to set isize in inode %llu root %llu",
5455                       ino, root->root_key.objectid);
5456         else
5457                 printf("Set isize in inode %llu root %llu to %llu\n",
5458                        ino, root->root_key.objectid, isize);
5459
5460         btrfs_release_path(path);
5461         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5462         err |= ret;
5463
5464         return err;
5465 }
5466
5467 /*
5468  * Wrapper function for btrfs_add_orphan_item().
5469  *
5470  * Returns 0     on success.
5471  * Returns != 0  on error.
5472  */
5473 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5474                                            struct btrfs_path *path, u64 ino)
5475 {
5476         struct btrfs_trans_handle *trans;
5477         struct btrfs_key research_key;
5478         int ret;
5479         int err = 0;
5480
5481         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5482
5483         trans = btrfs_start_transaction(root, 1);
5484         if (IS_ERR(trans)) {
5485                 ret = PTR_ERR(trans);
5486                 err |= ret;
5487                 goto out;
5488         }
5489
5490         btrfs_release_path(path);
5491         ret = btrfs_add_orphan_item(trans, root, path, ino);
5492         err |= ret;
5493         btrfs_commit_transaction(trans, root);
5494 out:
5495         if (ret)
5496                 error("failed to add inode %llu as orphan item root %llu",
5497                       ino, root->root_key.objectid);
5498         else
5499                 printf("Added inode %llu as orphan item root %llu\n",
5500                        ino, root->root_key.objectid);
5501
5502         btrfs_release_path(path);
5503         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5504         err |= ret;
5505
5506         return err;
5507 }
5508
5509 /*
5510  * Check INODE_ITEM and related ITEMs (the same inode number)
5511  * 1. check link count
5512  * 2. check inode ref/extref
5513  * 3. check dir item/index
5514  *
5515  * @ext_ref:    the EXTENDED_IREF feature
5516  *
5517  * Return 0 if no error occurred.
5518  * Return >0 for error or hit the traversal is done(by error bitmap)
5519  */
5520 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5521                             unsigned int ext_ref)
5522 {
5523         struct extent_buffer *node;
5524         struct btrfs_inode_item *ii;
5525         struct btrfs_key key;
5526         u64 inode_id;
5527         u32 mode;
5528         u64 nlink;
5529         u64 nbytes;
5530         u64 isize;
5531         u64 size = 0;
5532         u64 refs = 0;
5533         u64 extent_end = 0;
5534         u64 extent_size = 0;
5535         unsigned int dir;
5536         unsigned int nodatasum;
5537         int slot;
5538         int ret;
5539         int err = 0;
5540         char namebuf[BTRFS_NAME_LEN] = {0};
5541         u32 name_len = 0;
5542
5543         node = path->nodes[0];
5544         slot = path->slots[0];
5545
5546         btrfs_item_key_to_cpu(node, &key, slot);
5547         inode_id = key.objectid;
5548
5549         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5550                 ret = btrfs_next_item(root, path);
5551                 if (ret > 0)
5552                         err |= LAST_ITEM;
5553                 return err;
5554         }
5555
5556         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5557         isize = btrfs_inode_size(node, ii);
5558         nbytes = btrfs_inode_nbytes(node, ii);
5559         mode = btrfs_inode_mode(node, ii);
5560         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5561         nlink = btrfs_inode_nlink(node, ii);
5562         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5563
5564         while (1) {
5565                 ret = btrfs_next_item(root, path);
5566                 if (ret < 0) {
5567                         /* out will fill 'err' rusing current statistics */
5568                         goto out;
5569                 } else if (ret > 0) {
5570                         err |= LAST_ITEM;
5571                         goto out;
5572                 }
5573
5574                 node = path->nodes[0];
5575                 slot = path->slots[0];
5576                 btrfs_item_key_to_cpu(node, &key, slot);
5577                 if (key.objectid != inode_id)
5578                         goto out;
5579
5580                 switch (key.type) {
5581                 case BTRFS_INODE_REF_KEY:
5582                         ret = check_inode_ref(root, &key, path, namebuf,
5583                                               &name_len, &refs, mode);
5584                         err |= ret;
5585                         break;
5586                 case BTRFS_INODE_EXTREF_KEY:
5587                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5588                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5589                                         root->objectid, key.objectid,
5590                                         key.offset);
5591                         ret = check_inode_extref(root, &key, node, slot, &refs,
5592                                                  mode);
5593                         err |= ret;
5594                         break;
5595                 case BTRFS_DIR_ITEM_KEY:
5596                 case BTRFS_DIR_INDEX_KEY:
5597                         if (!dir) {
5598                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5599                                         root->objectid, inode_id,
5600                                         imode_to_type(mode), key.objectid,
5601                                         key.offset);
5602                         }
5603                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5604                         err |= ret;
5605                         break;
5606                 case BTRFS_EXTENT_DATA_KEY:
5607                         if (dir) {
5608                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5609                                         root->objectid, inode_id, key.objectid,
5610                                         key.offset);
5611                         }
5612                         ret = check_file_extent(root, &key, node, slot,
5613                                                 nodatasum, &extent_size,
5614                                                 &extent_end);
5615                         err |= ret;
5616                         break;
5617                 case BTRFS_XATTR_ITEM_KEY:
5618                         break;
5619                 default:
5620                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5621                               key.objectid, key.type, key.offset);
5622                 }
5623         }
5624
5625 out:
5626         /* verify INODE_ITEM nlink/isize/nbytes */
5627         if (dir) {
5628                 if (nlink != 1) {
5629                         err |= LINK_COUNT_ERROR;
5630                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5631                               root->objectid, inode_id, nlink);
5632                 }
5633
5634                 /*
5635                  * Just a warning, as dir inode nbytes is just an
5636                  * instructive value.
5637                  */
5638                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5639                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5640                                 root->objectid, inode_id,
5641                                 root->fs_info->nodesize);
5642                 }
5643
5644                 if (isize != size) {
5645                         if (repair)
5646                                 ret = repair_dir_isize_lowmem(root, path,
5647                                                               inode_id, size);
5648                         if (!repair || ret) {
5649                                 err |= ISIZE_ERROR;
5650                                 error(
5651                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5652                                       root->objectid, inode_id, isize, size);
5653                         }
5654                 }
5655         } else {
5656                 if (nlink != refs) {
5657                         err |= LINK_COUNT_ERROR;
5658                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5659                               root->objectid, inode_id, nlink, refs);
5660                 } else if (!nlink) {
5661                         if (repair)
5662                                 ret = repair_inode_orphan_item_lowmem(root,
5663                                                               path, inode_id);
5664                         if (!repair || ret) {
5665                                 err |= ORPHAN_ITEM;
5666                                 error("root %llu INODE[%llu] is orphan item",
5667                                       root->objectid, inode_id);
5668                         }
5669                 }
5670
5671                 if (!nbytes && !no_holes && extent_end < isize) {
5672                         err |= NBYTES_ERROR;
5673                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5674                               root->objectid, inode_id, isize);
5675                 }
5676
5677                 if (nbytes != extent_size) {
5678                         if (repair)
5679                                 ret = repair_inode_nbytes_lowmem(root, path,
5680                                                          inode_id, extent_size);
5681                         if (!repair || ret) {
5682                                 err |= NBYTES_ERROR;
5683                                 error(
5684         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5685                                       root->objectid, inode_id, nbytes,
5686                                       extent_size);
5687                         }
5688                 }
5689         }
5690
5691         return err;
5692 }
5693
5694 /*
5695  * Insert the missing inode item and inode ref.
5696  *
5697  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5698  * Root dir should be handled specially because root dir is the root of fs.
5699  *
5700  * returns err (>0 or 0) after repair
5701  */
5702 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5703 {
5704         struct btrfs_trans_handle *trans;
5705         struct btrfs_key key;
5706         struct btrfs_path path;
5707         int filetype = BTRFS_FT_DIR;
5708         int ret = 0;
5709
5710         btrfs_init_path(&path);
5711
5712         if (err & INODE_REF_MISSING) {
5713                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5714                 key.type = BTRFS_INODE_REF_KEY;
5715                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5716
5717                 trans = btrfs_start_transaction(root, 1);
5718                 if (IS_ERR(trans)) {
5719                         ret = PTR_ERR(trans);
5720                         goto out;
5721                 }
5722
5723                 btrfs_release_path(&path);
5724                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5725                 if (ret)
5726                         goto trans_fail;
5727
5728                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5729                                              BTRFS_FIRST_FREE_OBJECTID,
5730                                              BTRFS_FIRST_FREE_OBJECTID, 0);
5731                 if (ret)
5732                         goto trans_fail;
5733
5734                 printf("Add INODE_REF[%llu %llu] name %s\n",
5735                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5736                        "..");
5737                 err &= ~INODE_REF_MISSING;
5738 trans_fail:
5739                 if (ret)
5740                         error("fail to insert first inode's ref");
5741                 btrfs_commit_transaction(trans, root);
5742         }
5743
5744         if (err & INODE_ITEM_MISSING) {
5745                 ret = repair_inode_item_missing(root,
5746                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
5747                 if (ret)
5748                         goto out;
5749                 err &= ~INODE_ITEM_MISSING;
5750         }
5751 out:
5752         if (ret)
5753                 error("fail to repair first inode");
5754         btrfs_release_path(&path);
5755         return err;
5756 }
5757
5758 /*
5759  * check first root dir's inode_item and inode_ref
5760  *
5761  * returns 0 means no error
5762  * returns >0 means error
5763  * returns <0 means fatal error
5764  */
5765 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5766 {
5767         struct btrfs_path path;
5768         struct btrfs_key key;
5769         struct btrfs_inode_item *ii;
5770         u64 index;
5771         u32 mode;
5772         int err = 0;
5773         int ret;
5774
5775         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5776         key.type = BTRFS_INODE_ITEM_KEY;
5777         key.offset = 0;
5778
5779         /* For root being dropped, we don't need to check first inode */
5780         if (btrfs_root_refs(&root->root_item) == 0 &&
5781             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5782             BTRFS_FIRST_FREE_OBJECTID)
5783                 return 0;
5784
5785         btrfs_init_path(&path);
5786         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5787         if (ret < 0)
5788                 goto out;
5789         if (ret > 0) {
5790                 ret = 0;
5791                 err |= INODE_ITEM_MISSING;
5792         } else {
5793                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5794                                     struct btrfs_inode_item);
5795                 mode = btrfs_inode_mode(path.nodes[0], ii);
5796                 if (imode_to_type(mode) != BTRFS_FT_DIR)
5797                         err |= INODE_ITEM_MISMATCH;
5798         }
5799
5800         /* lookup first inode ref */
5801         key.offset = BTRFS_FIRST_FREE_OBJECTID;
5802         key.type = BTRFS_INODE_REF_KEY;
5803         /* special index value */
5804         index = 0;
5805
5806         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5807         if (ret < 0)
5808                 goto out;
5809         err |= ret;
5810
5811 out:
5812         btrfs_release_path(&path);
5813
5814         if (err && repair)
5815                 err = repair_fs_first_inode(root, err);
5816
5817         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5818                 error("root dir INODE_ITEM is %s",
5819                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5820         if (err & INODE_REF_MISSING)
5821                 error("root dir INODE_REF is missing");
5822
5823         return ret < 0 ? ret : err;
5824 }
5825
5826 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5827                                                 u64 parent, u64 root)
5828 {
5829         struct rb_node *node;
5830         struct tree_backref *back = NULL;
5831         struct tree_backref match = {
5832                 .node = {
5833                         .is_data = 0,
5834                 },
5835         };
5836
5837         if (parent) {
5838                 match.parent = parent;
5839                 match.node.full_backref = 1;
5840         } else {
5841                 match.root = root;
5842         }
5843
5844         node = rb_search(&rec->backref_tree, &match.node.node,
5845                          (rb_compare_keys)compare_extent_backref, NULL);
5846         if (node)
5847                 back = to_tree_backref(rb_node_to_extent_backref(node));
5848
5849         return back;
5850 }
5851
5852 static struct data_backref *find_data_backref(struct extent_record *rec,
5853                                                 u64 parent, u64 root,
5854                                                 u64 owner, u64 offset,
5855                                                 int found_ref,
5856                                                 u64 disk_bytenr, u64 bytes)
5857 {
5858         struct rb_node *node;
5859         struct data_backref *back = NULL;
5860         struct data_backref match = {
5861                 .node = {
5862                         .is_data = 1,
5863                 },
5864                 .owner = owner,
5865                 .offset = offset,
5866                 .bytes = bytes,
5867                 .found_ref = found_ref,
5868                 .disk_bytenr = disk_bytenr,
5869         };
5870
5871         if (parent) {
5872                 match.parent = parent;
5873                 match.node.full_backref = 1;
5874         } else {
5875                 match.root = root;
5876         }
5877
5878         node = rb_search(&rec->backref_tree, &match.node.node,
5879                          (rb_compare_keys)compare_extent_backref, NULL);
5880         if (node)
5881                 back = to_data_backref(rb_node_to_extent_backref(node));
5882
5883         return back;
5884 }
5885 /*
5886  * Iterate all item on the tree and call check_inode_item() to check.
5887  *
5888  * @root:       the root of the tree to be checked.
5889  * @ext_ref:    the EXTENDED_IREF feature
5890  *
5891  * Return 0 if no error found.
5892  * Return <0 for error.
5893  */
5894 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5895 {
5896         struct btrfs_path path;
5897         struct node_refs nrefs;
5898         struct btrfs_root_item *root_item = &root->root_item;
5899         int ret;
5900         int level;
5901         int err = 0;
5902
5903         /*
5904          * We need to manually check the first inode item(256)
5905          * As the following traversal function will only start from
5906          * the first inode item in the leaf, if inode item(256) is missing
5907          * we will just skip it forever.
5908          */
5909         ret = check_fs_first_inode(root, ext_ref);
5910         if (ret < 0)
5911                 return ret;
5912         err |= !!ret;
5913
5914         memset(&nrefs, 0, sizeof(nrefs));
5915         level = btrfs_header_level(root->node);
5916         btrfs_init_path(&path);
5917
5918         if (btrfs_root_refs(root_item) > 0 ||
5919             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5920                 path.nodes[level] = root->node;
5921                 path.slots[level] = 0;
5922                 extent_buffer_get(root->node);
5923         } else {
5924                 struct btrfs_key key;
5925
5926                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5927                 level = root_item->drop_level;
5928                 path.lowest_level = level;
5929                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5930                 if (ret < 0)
5931                         goto out;
5932                 ret = 0;
5933         }
5934
5935         while (1) {
5936                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5937                 err |= !!ret;
5938
5939                 /* if ret is negative, walk shall stop */
5940                 if (ret < 0) {
5941                         ret = err;
5942                         break;
5943                 }
5944
5945                 ret = walk_up_tree_v2(root, &path, &level);
5946                 if (ret != 0) {
5947                         /* Normal exit, reset ret to err */
5948                         ret = err;
5949                         break;
5950                 }
5951         }
5952
5953 out:
5954         btrfs_release_path(&path);
5955         return ret;
5956 }
5957
5958 /*
5959  * Find the relative ref for root_ref and root_backref.
5960  *
5961  * @root:       the root of the root tree.
5962  * @ref_key:    the key of the root ref.
5963  *
5964  * Return 0 if no error occurred.
5965  */
5966 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5967                           struct extent_buffer *node, int slot)
5968 {
5969         struct btrfs_path path;
5970         struct btrfs_key key;
5971         struct btrfs_root_ref *ref;
5972         struct btrfs_root_ref *backref;
5973         char ref_name[BTRFS_NAME_LEN] = {0};
5974         char backref_name[BTRFS_NAME_LEN] = {0};
5975         u64 ref_dirid;
5976         u64 ref_seq;
5977         u32 ref_namelen;
5978         u64 backref_dirid;
5979         u64 backref_seq;
5980         u32 backref_namelen;
5981         u32 len;
5982         int ret;
5983         int err = 0;
5984
5985         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5986         ref_dirid = btrfs_root_ref_dirid(node, ref);
5987         ref_seq = btrfs_root_ref_sequence(node, ref);
5988         ref_namelen = btrfs_root_ref_name_len(node, ref);
5989
5990         if (ref_namelen <= BTRFS_NAME_LEN) {
5991                 len = ref_namelen;
5992         } else {
5993                 len = BTRFS_NAME_LEN;
5994                 warning("%s[%llu %llu] ref_name too long",
5995                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5996                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5997                         ref_key->offset);
5998         }
5999         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6000
6001         /* Find relative root_ref */
6002         key.objectid = ref_key->offset;
6003         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6004         key.offset = ref_key->objectid;
6005
6006         btrfs_init_path(&path);
6007         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6008         if (ret) {
6009                 err |= ROOT_REF_MISSING;
6010                 error("%s[%llu %llu] couldn't find relative ref",
6011                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6012                       "ROOT_REF" : "ROOT_BACKREF",
6013                       ref_key->objectid, ref_key->offset);
6014                 goto out;
6015         }
6016
6017         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6018                                  struct btrfs_root_ref);
6019         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6020         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6021         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6022
6023         if (backref_namelen <= BTRFS_NAME_LEN) {
6024                 len = backref_namelen;
6025         } else {
6026                 len = BTRFS_NAME_LEN;
6027                 warning("%s[%llu %llu] ref_name too long",
6028                         key.type == BTRFS_ROOT_REF_KEY ?
6029                         "ROOT_REF" : "ROOT_BACKREF",
6030                         key.objectid, key.offset);
6031         }
6032         read_extent_buffer(path.nodes[0], backref_name,
6033                            (unsigned long)(backref + 1), len);
6034
6035         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6036             ref_namelen != backref_namelen ||
6037             strncmp(ref_name, backref_name, len)) {
6038                 err |= ROOT_REF_MISMATCH;
6039                 error("%s[%llu %llu] mismatch relative ref",
6040                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6041                       "ROOT_REF" : "ROOT_BACKREF",
6042                       ref_key->objectid, ref_key->offset);
6043         }
6044 out:
6045         btrfs_release_path(&path);
6046         return err;
6047 }
6048
6049 /*
6050  * Check all fs/file tree in low_memory mode.
6051  *
6052  * 1. for fs tree root item, call check_fs_root_v2()
6053  * 2. for fs tree root ref/backref, call check_root_ref()
6054  *
6055  * Return 0 if no error occurred.
6056  */
6057 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6058 {
6059         struct btrfs_root *tree_root = fs_info->tree_root;
6060         struct btrfs_root *cur_root = NULL;
6061         struct btrfs_path path;
6062         struct btrfs_key key;
6063         struct extent_buffer *node;
6064         unsigned int ext_ref;
6065         int slot;
6066         int ret;
6067         int err = 0;
6068
6069         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6070
6071         btrfs_init_path(&path);
6072         key.objectid = BTRFS_FS_TREE_OBJECTID;
6073         key.offset = 0;
6074         key.type = BTRFS_ROOT_ITEM_KEY;
6075
6076         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6077         if (ret < 0) {
6078                 err = ret;
6079                 goto out;
6080         } else if (ret > 0) {
6081                 err = -ENOENT;
6082                 goto out;
6083         }
6084
6085         while (1) {
6086                 node = path.nodes[0];
6087                 slot = path.slots[0];
6088                 btrfs_item_key_to_cpu(node, &key, slot);
6089                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6090                         goto out;
6091                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6092                     fs_root_objectid(key.objectid)) {
6093                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6094                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6095                                                                        &key);
6096                         } else {
6097                                 key.offset = (u64)-1;
6098                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6099                         }
6100
6101                         if (IS_ERR(cur_root)) {
6102                                 error("Fail to read fs/subvol tree: %lld",
6103                                       key.objectid);
6104                                 err = -EIO;
6105                                 goto next;
6106                         }
6107
6108                         ret = check_fs_root_v2(cur_root, ext_ref);
6109                         err |= ret;
6110
6111                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6112                                 btrfs_free_fs_root(cur_root);
6113                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6114                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6115                         ret = check_root_ref(tree_root, &key, node, slot);
6116                         err |= ret;
6117                 }
6118 next:
6119                 ret = btrfs_next_item(tree_root, &path);
6120                 if (ret > 0)
6121                         goto out;
6122                 if (ret < 0) {
6123                         err = ret;
6124                         goto out;
6125                 }
6126         }
6127
6128 out:
6129         btrfs_release_path(&path);
6130         return err;
6131 }
6132
6133 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6134                           struct cache_tree *root_cache)
6135 {
6136         int ret;
6137
6138         if (!ctx.progress_enabled)
6139                 fprintf(stderr, "checking fs roots\n");
6140         if (check_mode == CHECK_MODE_LOWMEM)
6141                 ret = check_fs_roots_v2(fs_info);
6142         else
6143                 ret = check_fs_roots(fs_info, root_cache);
6144
6145         return ret;
6146 }
6147
6148 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6149 {
6150         struct extent_backref *back, *tmp;
6151         struct tree_backref *tback;
6152         struct data_backref *dback;
6153         u64 found = 0;
6154         int err = 0;
6155
6156         rbtree_postorder_for_each_entry_safe(back, tmp,
6157                                              &rec->backref_tree, node) {
6158                 if (!back->found_extent_tree) {
6159                         err = 1;
6160                         if (!print_errs)
6161                                 goto out;
6162                         if (back->is_data) {
6163                                 dback = to_data_backref(back);
6164                                 fprintf(stderr, "Data backref %llu %s %llu"
6165                                         " owner %llu offset %llu num_refs %lu"
6166                                         " not found in extent tree\n",
6167                                         (unsigned long long)rec->start,
6168                                         back->full_backref ?
6169                                         "parent" : "root",
6170                                         back->full_backref ?
6171                                         (unsigned long long)dback->parent:
6172                                         (unsigned long long)dback->root,
6173                                         (unsigned long long)dback->owner,
6174                                         (unsigned long long)dback->offset,
6175                                         (unsigned long)dback->num_refs);
6176                         } else {
6177                                 tback = to_tree_backref(back);
6178                                 fprintf(stderr, "Tree backref %llu parent %llu"
6179                                         " root %llu not found in extent tree\n",
6180                                         (unsigned long long)rec->start,
6181                                         (unsigned long long)tback->parent,
6182                                         (unsigned long long)tback->root);
6183                         }
6184                 }
6185                 if (!back->is_data && !back->found_ref) {
6186                         err = 1;
6187                         if (!print_errs)
6188                                 goto out;
6189                         tback = to_tree_backref(back);
6190                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6191                                 (unsigned long long)rec->start,
6192                                 back->full_backref ? "parent" : "root",
6193                                 back->full_backref ?
6194                                 (unsigned long long)tback->parent :
6195                                 (unsigned long long)tback->root, back);
6196                 }
6197                 if (back->is_data) {
6198                         dback = to_data_backref(back);
6199                         if (dback->found_ref != dback->num_refs) {
6200                                 err = 1;
6201                                 if (!print_errs)
6202                                         goto out;
6203                                 fprintf(stderr, "Incorrect local backref count"
6204                                         " on %llu %s %llu owner %llu"
6205                                         " offset %llu found %u wanted %u back %p\n",
6206                                         (unsigned long long)rec->start,
6207                                         back->full_backref ?
6208                                         "parent" : "root",
6209                                         back->full_backref ?
6210                                         (unsigned long long)dback->parent:
6211                                         (unsigned long long)dback->root,
6212                                         (unsigned long long)dback->owner,
6213                                         (unsigned long long)dback->offset,
6214                                         dback->found_ref, dback->num_refs, back);
6215                         }
6216                         if (dback->disk_bytenr != rec->start) {
6217                                 err = 1;
6218                                 if (!print_errs)
6219                                         goto out;
6220                                 fprintf(stderr, "Backref disk bytenr does not"
6221                                         " match extent record, bytenr=%llu, "
6222                                         "ref bytenr=%llu\n",
6223                                         (unsigned long long)rec->start,
6224                                         (unsigned long long)dback->disk_bytenr);
6225                         }
6226
6227                         if (dback->bytes != rec->nr) {
6228                                 err = 1;
6229                                 if (!print_errs)
6230                                         goto out;
6231                                 fprintf(stderr, "Backref bytes do not match "
6232                                         "extent backref, bytenr=%llu, ref "
6233                                         "bytes=%llu, backref bytes=%llu\n",
6234                                         (unsigned long long)rec->start,
6235                                         (unsigned long long)rec->nr,
6236                                         (unsigned long long)dback->bytes);
6237                         }
6238                 }
6239                 if (!back->is_data) {
6240                         found += 1;
6241                 } else {
6242                         dback = to_data_backref(back);
6243                         found += dback->found_ref;
6244                 }
6245         }
6246         if (found != rec->refs) {
6247                 err = 1;
6248                 if (!print_errs)
6249                         goto out;
6250                 fprintf(stderr, "Incorrect global backref count "
6251                         "on %llu found %llu wanted %llu\n",
6252                         (unsigned long long)rec->start,
6253                         (unsigned long long)found,
6254                         (unsigned long long)rec->refs);
6255         }
6256 out:
6257         return err;
6258 }
6259
6260 static void __free_one_backref(struct rb_node *node)
6261 {
6262         struct extent_backref *back = rb_node_to_extent_backref(node);
6263
6264         free(back);
6265 }
6266
6267 static void free_all_extent_backrefs(struct extent_record *rec)
6268 {
6269         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6270 }
6271
6272 static void free_extent_record_cache(struct cache_tree *extent_cache)
6273 {
6274         struct cache_extent *cache;
6275         struct extent_record *rec;
6276
6277         while (1) {
6278                 cache = first_cache_extent(extent_cache);
6279                 if (!cache)
6280                         break;
6281                 rec = container_of(cache, struct extent_record, cache);
6282                 remove_cache_extent(extent_cache, cache);
6283                 free_all_extent_backrefs(rec);
6284                 free(rec);
6285         }
6286 }
6287
6288 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6289                                  struct extent_record *rec)
6290 {
6291         if (rec->content_checked && rec->owner_ref_checked &&
6292             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6293             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6294             !rec->bad_full_backref && !rec->crossing_stripes &&
6295             !rec->wrong_chunk_type) {
6296                 remove_cache_extent(extent_cache, &rec->cache);
6297                 free_all_extent_backrefs(rec);
6298                 list_del_init(&rec->list);
6299                 free(rec);
6300         }
6301         return 0;
6302 }
6303
6304 static int check_owner_ref(struct btrfs_root *root,
6305                             struct extent_record *rec,
6306                             struct extent_buffer *buf)
6307 {
6308         struct extent_backref *node, *tmp;
6309         struct tree_backref *back;
6310         struct btrfs_root *ref_root;
6311         struct btrfs_key key;
6312         struct btrfs_path path;
6313         struct extent_buffer *parent;
6314         int level;
6315         int found = 0;
6316         int ret;
6317
6318         rbtree_postorder_for_each_entry_safe(node, tmp,
6319                                              &rec->backref_tree, node) {
6320                 if (node->is_data)
6321                         continue;
6322                 if (!node->found_ref)
6323                         continue;
6324                 if (node->full_backref)
6325                         continue;
6326                 back = to_tree_backref(node);
6327                 if (btrfs_header_owner(buf) == back->root)
6328                         return 0;
6329         }
6330         BUG_ON(rec->is_root);
6331
6332         /* try to find the block by search corresponding fs tree */
6333         key.objectid = btrfs_header_owner(buf);
6334         key.type = BTRFS_ROOT_ITEM_KEY;
6335         key.offset = (u64)-1;
6336
6337         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6338         if (IS_ERR(ref_root))
6339                 return 1;
6340
6341         level = btrfs_header_level(buf);
6342         if (level == 0)
6343                 btrfs_item_key_to_cpu(buf, &key, 0);
6344         else
6345                 btrfs_node_key_to_cpu(buf, &key, 0);
6346
6347         btrfs_init_path(&path);
6348         path.lowest_level = level + 1;
6349         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6350         if (ret < 0)
6351                 return 0;
6352
6353         parent = path.nodes[level + 1];
6354         if (parent && buf->start == btrfs_node_blockptr(parent,
6355                                                         path.slots[level + 1]))
6356                 found = 1;
6357
6358         btrfs_release_path(&path);
6359         return found ? 0 : 1;
6360 }
6361
6362 static int is_extent_tree_record(struct extent_record *rec)
6363 {
6364         struct extent_backref *node, *tmp;
6365         struct tree_backref *back;
6366         int is_extent = 0;
6367
6368         rbtree_postorder_for_each_entry_safe(node, tmp,
6369                                              &rec->backref_tree, node) {
6370                 if (node->is_data)
6371                         return 0;
6372                 back = to_tree_backref(node);
6373                 if (node->full_backref)
6374                         return 0;
6375                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6376                         is_extent = 1;
6377         }
6378         return is_extent;
6379 }
6380
6381
6382 static int record_bad_block_io(struct btrfs_fs_info *info,
6383                                struct cache_tree *extent_cache,
6384                                u64 start, u64 len)
6385 {
6386         struct extent_record *rec;
6387         struct cache_extent *cache;
6388         struct btrfs_key key;
6389
6390         cache = lookup_cache_extent(extent_cache, start, len);
6391         if (!cache)
6392                 return 0;
6393
6394         rec = container_of(cache, struct extent_record, cache);
6395         if (!is_extent_tree_record(rec))
6396                 return 0;
6397
6398         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6399         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6400 }
6401
6402 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6403                        struct extent_buffer *buf, int slot)
6404 {
6405         if (btrfs_header_level(buf)) {
6406                 struct btrfs_key_ptr ptr1, ptr2;
6407
6408                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6409                                    sizeof(struct btrfs_key_ptr));
6410                 read_extent_buffer(buf, &ptr2,
6411                                    btrfs_node_key_ptr_offset(slot + 1),
6412                                    sizeof(struct btrfs_key_ptr));
6413                 write_extent_buffer(buf, &ptr1,
6414                                     btrfs_node_key_ptr_offset(slot + 1),
6415                                     sizeof(struct btrfs_key_ptr));
6416                 write_extent_buffer(buf, &ptr2,
6417                                     btrfs_node_key_ptr_offset(slot),
6418                                     sizeof(struct btrfs_key_ptr));
6419                 if (slot == 0) {
6420                         struct btrfs_disk_key key;
6421                         btrfs_node_key(buf, &key, 0);
6422                         btrfs_fixup_low_keys(root, path, &key,
6423                                              btrfs_header_level(buf) + 1);
6424                 }
6425         } else {
6426                 struct btrfs_item *item1, *item2;
6427                 struct btrfs_key k1, k2;
6428                 char *item1_data, *item2_data;
6429                 u32 item1_offset, item2_offset, item1_size, item2_size;
6430
6431                 item1 = btrfs_item_nr(slot);
6432                 item2 = btrfs_item_nr(slot + 1);
6433                 btrfs_item_key_to_cpu(buf, &k1, slot);
6434                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6435                 item1_offset = btrfs_item_offset(buf, item1);
6436                 item2_offset = btrfs_item_offset(buf, item2);
6437                 item1_size = btrfs_item_size(buf, item1);
6438                 item2_size = btrfs_item_size(buf, item2);
6439
6440                 item1_data = malloc(item1_size);
6441                 if (!item1_data)
6442                         return -ENOMEM;
6443                 item2_data = malloc(item2_size);
6444                 if (!item2_data) {
6445                         free(item1_data);
6446                         return -ENOMEM;
6447                 }
6448
6449                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6450                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6451
6452                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6453                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6454                 free(item1_data);
6455                 free(item2_data);
6456
6457                 btrfs_set_item_offset(buf, item1, item2_offset);
6458                 btrfs_set_item_offset(buf, item2, item1_offset);
6459                 btrfs_set_item_size(buf, item1, item2_size);
6460                 btrfs_set_item_size(buf, item2, item1_size);
6461
6462                 path->slots[0] = slot;
6463                 btrfs_set_item_key_unsafe(root, path, &k2);
6464                 path->slots[0] = slot + 1;
6465                 btrfs_set_item_key_unsafe(root, path, &k1);
6466         }
6467         return 0;
6468 }
6469
6470 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6471 {
6472         struct extent_buffer *buf;
6473         struct btrfs_key k1, k2;
6474         int i;
6475         int level = path->lowest_level;
6476         int ret = -EIO;
6477
6478         buf = path->nodes[level];
6479         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6480                 if (level) {
6481                         btrfs_node_key_to_cpu(buf, &k1, i);
6482                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6483                 } else {
6484                         btrfs_item_key_to_cpu(buf, &k1, i);
6485                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6486                 }
6487                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6488                         continue;
6489                 ret = swap_values(root, path, buf, i);
6490                 if (ret)
6491                         break;
6492                 btrfs_mark_buffer_dirty(buf);
6493                 i = 0;
6494         }
6495         return ret;
6496 }
6497
6498 static int delete_bogus_item(struct btrfs_root *root,
6499                              struct btrfs_path *path,
6500                              struct extent_buffer *buf, int slot)
6501 {
6502         struct btrfs_key key;
6503         int nritems = btrfs_header_nritems(buf);
6504
6505         btrfs_item_key_to_cpu(buf, &key, slot);
6506
6507         /* These are all the keys we can deal with missing. */
6508         if (key.type != BTRFS_DIR_INDEX_KEY &&
6509             key.type != BTRFS_EXTENT_ITEM_KEY &&
6510             key.type != BTRFS_METADATA_ITEM_KEY &&
6511             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6512             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6513                 return -1;
6514
6515         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6516                (unsigned long long)key.objectid, key.type,
6517                (unsigned long long)key.offset, slot, buf->start);
6518         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6519                               btrfs_item_nr_offset(slot + 1),
6520                               sizeof(struct btrfs_item) *
6521                               (nritems - slot - 1));
6522         btrfs_set_header_nritems(buf, nritems - 1);
6523         if (slot == 0) {
6524                 struct btrfs_disk_key disk_key;
6525
6526                 btrfs_item_key(buf, &disk_key, 0);
6527                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6528         }
6529         btrfs_mark_buffer_dirty(buf);
6530         return 0;
6531 }
6532
6533 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6534 {
6535         struct extent_buffer *buf;
6536         int i;
6537         int ret = 0;
6538
6539         /* We should only get this for leaves */
6540         BUG_ON(path->lowest_level);
6541         buf = path->nodes[0];
6542 again:
6543         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6544                 unsigned int shift = 0, offset;
6545
6546                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6547                     BTRFS_LEAF_DATA_SIZE(root)) {
6548                         if (btrfs_item_end_nr(buf, i) >
6549                             BTRFS_LEAF_DATA_SIZE(root)) {
6550                                 ret = delete_bogus_item(root, path, buf, i);
6551                                 if (!ret)
6552                                         goto again;
6553                                 fprintf(stderr, "item is off the end of the "
6554                                         "leaf, can't fix\n");
6555                                 ret = -EIO;
6556                                 break;
6557                         }
6558                         shift = BTRFS_LEAF_DATA_SIZE(root) -
6559                                 btrfs_item_end_nr(buf, i);
6560                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6561                            btrfs_item_offset_nr(buf, i - 1)) {
6562                         if (btrfs_item_end_nr(buf, i) >
6563                             btrfs_item_offset_nr(buf, i - 1)) {
6564                                 ret = delete_bogus_item(root, path, buf, i);
6565                                 if (!ret)
6566                                         goto again;
6567                                 fprintf(stderr, "items overlap, can't fix\n");
6568                                 ret = -EIO;
6569                                 break;
6570                         }
6571                         shift = btrfs_item_offset_nr(buf, i - 1) -
6572                                 btrfs_item_end_nr(buf, i);
6573                 }
6574                 if (!shift)
6575                         continue;
6576
6577                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6578                        i, shift, (unsigned long long)buf->start);
6579                 offset = btrfs_item_offset_nr(buf, i);
6580                 memmove_extent_buffer(buf,
6581                                       btrfs_leaf_data(buf) + offset + shift,
6582                                       btrfs_leaf_data(buf) + offset,
6583                                       btrfs_item_size_nr(buf, i));
6584                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6585                                       offset + shift);
6586                 btrfs_mark_buffer_dirty(buf);
6587         }
6588
6589         /*
6590          * We may have moved things, in which case we want to exit so we don't
6591          * write those changes out.  Once we have proper abort functionality in
6592          * progs this can be changed to something nicer.
6593          */
6594         BUG_ON(ret);
6595         return ret;
6596 }
6597
6598 /*
6599  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6600  * then just return -EIO.
6601  */
6602 static int try_to_fix_bad_block(struct btrfs_root *root,
6603                                 struct extent_buffer *buf,
6604                                 enum btrfs_tree_block_status status)
6605 {
6606         struct btrfs_trans_handle *trans;
6607         struct ulist *roots;
6608         struct ulist_node *node;
6609         struct btrfs_root *search_root;
6610         struct btrfs_path path;
6611         struct ulist_iterator iter;
6612         struct btrfs_key root_key, key;
6613         int ret;
6614
6615         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6616             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6617                 return -EIO;
6618
6619         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6620         if (ret)
6621                 return -EIO;
6622
6623         btrfs_init_path(&path);
6624         ULIST_ITER_INIT(&iter);
6625         while ((node = ulist_next(roots, &iter))) {
6626                 root_key.objectid = node->val;
6627                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6628                 root_key.offset = (u64)-1;
6629
6630                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6631                 if (IS_ERR(root)) {
6632                         ret = -EIO;
6633                         break;
6634                 }
6635
6636
6637                 trans = btrfs_start_transaction(search_root, 0);
6638                 if (IS_ERR(trans)) {
6639                         ret = PTR_ERR(trans);
6640                         break;
6641                 }
6642
6643                 path.lowest_level = btrfs_header_level(buf);
6644                 path.skip_check_block = 1;
6645                 if (path.lowest_level)
6646                         btrfs_node_key_to_cpu(buf, &key, 0);
6647                 else
6648                         btrfs_item_key_to_cpu(buf, &key, 0);
6649                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6650                 if (ret) {
6651                         ret = -EIO;
6652                         btrfs_commit_transaction(trans, search_root);
6653                         break;
6654                 }
6655                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6656                         ret = fix_key_order(search_root, &path);
6657                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6658                         ret = fix_item_offset(search_root, &path);
6659                 if (ret) {
6660                         btrfs_commit_transaction(trans, search_root);
6661                         break;
6662                 }
6663                 btrfs_release_path(&path);
6664                 btrfs_commit_transaction(trans, search_root);
6665         }
6666         ulist_free(roots);
6667         btrfs_release_path(&path);
6668         return ret;
6669 }
6670
6671 static int check_block(struct btrfs_root *root,
6672                        struct cache_tree *extent_cache,
6673                        struct extent_buffer *buf, u64 flags)
6674 {
6675         struct extent_record *rec;
6676         struct cache_extent *cache;
6677         struct btrfs_key key;
6678         enum btrfs_tree_block_status status;
6679         int ret = 0;
6680         int level;
6681
6682         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6683         if (!cache)
6684                 return 1;
6685         rec = container_of(cache, struct extent_record, cache);
6686         rec->generation = btrfs_header_generation(buf);
6687
6688         level = btrfs_header_level(buf);
6689         if (btrfs_header_nritems(buf) > 0) {
6690
6691                 if (level == 0)
6692                         btrfs_item_key_to_cpu(buf, &key, 0);
6693                 else
6694                         btrfs_node_key_to_cpu(buf, &key, 0);
6695
6696                 rec->info_objectid = key.objectid;
6697         }
6698         rec->info_level = level;
6699
6700         if (btrfs_is_leaf(buf))
6701                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6702         else
6703                 status = btrfs_check_node(root, &rec->parent_key, buf);
6704
6705         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6706                 if (repair)
6707                         status = try_to_fix_bad_block(root, buf, status);
6708                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6709                         ret = -EIO;
6710                         fprintf(stderr, "bad block %llu\n",
6711                                 (unsigned long long)buf->start);
6712                 } else {
6713                         /*
6714                          * Signal to callers we need to start the scan over
6715                          * again since we'll have cowed blocks.
6716                          */
6717                         ret = -EAGAIN;
6718                 }
6719         } else {
6720                 rec->content_checked = 1;
6721                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6722                         rec->owner_ref_checked = 1;
6723                 else {
6724                         ret = check_owner_ref(root, rec, buf);
6725                         if (!ret)
6726                                 rec->owner_ref_checked = 1;
6727                 }
6728         }
6729         if (!ret)
6730                 maybe_free_extent_rec(extent_cache, rec);
6731         return ret;
6732 }
6733
6734 #if 0
6735 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6736                                                 u64 parent, u64 root)
6737 {
6738         struct list_head *cur = rec->backrefs.next;
6739         struct extent_backref *node;
6740         struct tree_backref *back;
6741
6742         while(cur != &rec->backrefs) {
6743                 node = to_extent_backref(cur);
6744                 cur = cur->next;
6745                 if (node->is_data)
6746                         continue;
6747                 back = to_tree_backref(node);
6748                 if (parent > 0) {
6749                         if (!node->full_backref)
6750                                 continue;
6751                         if (parent == back->parent)
6752                                 return back;
6753                 } else {
6754                         if (node->full_backref)
6755                                 continue;
6756                         if (back->root == root)
6757                                 return back;
6758                 }
6759         }
6760         return NULL;
6761 }
6762 #endif
6763
6764 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6765                                                 u64 parent, u64 root)
6766 {
6767         struct tree_backref *ref = malloc(sizeof(*ref));
6768
6769         if (!ref)
6770                 return NULL;
6771         memset(&ref->node, 0, sizeof(ref->node));
6772         if (parent > 0) {
6773                 ref->parent = parent;
6774                 ref->node.full_backref = 1;
6775         } else {
6776                 ref->root = root;
6777                 ref->node.full_backref = 0;
6778         }
6779
6780         return ref;
6781 }
6782
6783 #if 0
6784 static struct data_backref *find_data_backref(struct extent_record *rec,
6785                                                 u64 parent, u64 root,
6786                                                 u64 owner, u64 offset,
6787                                                 int found_ref,
6788                                                 u64 disk_bytenr, u64 bytes)
6789 {
6790         struct list_head *cur = rec->backrefs.next;
6791         struct extent_backref *node;
6792         struct data_backref *back;
6793
6794         while(cur != &rec->backrefs) {
6795                 node = to_extent_backref(cur);
6796                 cur = cur->next;
6797                 if (!node->is_data)
6798                         continue;
6799                 back = to_data_backref(node);
6800                 if (parent > 0) {
6801                         if (!node->full_backref)
6802                                 continue;
6803                         if (parent == back->parent)
6804                                 return back;
6805                 } else {
6806                         if (node->full_backref)
6807                                 continue;
6808                         if (back->root == root && back->owner == owner &&
6809                             back->offset == offset) {
6810                                 if (found_ref && node->found_ref &&
6811                                     (back->bytes != bytes ||
6812                                     back->disk_bytenr != disk_bytenr))
6813                                         continue;
6814                                 return back;
6815                         }
6816                 }
6817         }
6818         return NULL;
6819 }
6820 #endif
6821
6822 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6823                                                 u64 parent, u64 root,
6824                                                 u64 owner, u64 offset,
6825                                                 u64 max_size)
6826 {
6827         struct data_backref *ref = malloc(sizeof(*ref));
6828
6829         if (!ref)
6830                 return NULL;
6831         memset(&ref->node, 0, sizeof(ref->node));
6832         ref->node.is_data = 1;
6833
6834         if (parent > 0) {
6835                 ref->parent = parent;
6836                 ref->owner = 0;
6837                 ref->offset = 0;
6838                 ref->node.full_backref = 1;
6839         } else {
6840                 ref->root = root;
6841                 ref->owner = owner;
6842                 ref->offset = offset;
6843                 ref->node.full_backref = 0;
6844         }
6845         ref->bytes = max_size;
6846         ref->found_ref = 0;
6847         ref->num_refs = 0;
6848         if (max_size > rec->max_size)
6849                 rec->max_size = max_size;
6850         return ref;
6851 }
6852
6853 /* Check if the type of extent matches with its chunk */
6854 static void check_extent_type(struct extent_record *rec)
6855 {
6856         struct btrfs_block_group_cache *bg_cache;
6857
6858         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6859         if (!bg_cache)
6860                 return;
6861
6862         /* data extent, check chunk directly*/
6863         if (!rec->metadata) {
6864                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6865                         rec->wrong_chunk_type = 1;
6866                 return;
6867         }
6868
6869         /* metadata extent, check the obvious case first */
6870         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6871                                  BTRFS_BLOCK_GROUP_METADATA))) {
6872                 rec->wrong_chunk_type = 1;
6873                 return;
6874         }
6875
6876         /*
6877          * Check SYSTEM extent, as it's also marked as metadata, we can only
6878          * make sure it's a SYSTEM extent by its backref
6879          */
6880         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6881                 struct extent_backref *node;
6882                 struct tree_backref *tback;
6883                 u64 bg_type;
6884
6885                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6886                 if (node->is_data) {
6887                         /* tree block shouldn't have data backref */
6888                         rec->wrong_chunk_type = 1;
6889                         return;
6890                 }
6891                 tback = container_of(node, struct tree_backref, node);
6892
6893                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6894                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6895                 else
6896                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6897                 if (!(bg_cache->flags & bg_type))
6898                         rec->wrong_chunk_type = 1;
6899         }
6900 }
6901
6902 /*
6903  * Allocate a new extent record, fill default values from @tmpl and insert int
6904  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6905  * the cache, otherwise it fails.
6906  */
6907 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6908                 struct extent_record *tmpl)
6909 {
6910         struct extent_record *rec;
6911         int ret = 0;
6912
6913         BUG_ON(tmpl->max_size == 0);
6914         rec = malloc(sizeof(*rec));
6915         if (!rec)
6916                 return -ENOMEM;
6917         rec->start = tmpl->start;
6918         rec->max_size = tmpl->max_size;
6919         rec->nr = max(tmpl->nr, tmpl->max_size);
6920         rec->found_rec = tmpl->found_rec;
6921         rec->content_checked = tmpl->content_checked;
6922         rec->owner_ref_checked = tmpl->owner_ref_checked;
6923         rec->num_duplicates = 0;
6924         rec->metadata = tmpl->metadata;
6925         rec->flag_block_full_backref = FLAG_UNSET;
6926         rec->bad_full_backref = 0;
6927         rec->crossing_stripes = 0;
6928         rec->wrong_chunk_type = 0;
6929         rec->is_root = tmpl->is_root;
6930         rec->refs = tmpl->refs;
6931         rec->extent_item_refs = tmpl->extent_item_refs;
6932         rec->parent_generation = tmpl->parent_generation;
6933         INIT_LIST_HEAD(&rec->backrefs);
6934         INIT_LIST_HEAD(&rec->dups);
6935         INIT_LIST_HEAD(&rec->list);
6936         rec->backref_tree = RB_ROOT;
6937         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6938         rec->cache.start = tmpl->start;
6939         rec->cache.size = tmpl->nr;
6940         ret = insert_cache_extent(extent_cache, &rec->cache);
6941         if (ret) {
6942                 free(rec);
6943                 return ret;
6944         }
6945         bytes_used += rec->nr;
6946
6947         if (tmpl->metadata)
6948                 rec->crossing_stripes = check_crossing_stripes(global_info,
6949                                 rec->start, global_info->nodesize);
6950         check_extent_type(rec);
6951         return ret;
6952 }
6953
6954 /*
6955  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6956  * some are hints:
6957  * - refs              - if found, increase refs
6958  * - is_root           - if found, set
6959  * - content_checked   - if found, set
6960  * - owner_ref_checked - if found, set
6961  *
6962  * If not found, create a new one, initialize and insert.
6963  */
6964 static int add_extent_rec(struct cache_tree *extent_cache,
6965                 struct extent_record *tmpl)
6966 {
6967         struct extent_record *rec;
6968         struct cache_extent *cache;
6969         int ret = 0;
6970         int dup = 0;
6971
6972         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6973         if (cache) {
6974                 rec = container_of(cache, struct extent_record, cache);
6975                 if (tmpl->refs)
6976                         rec->refs++;
6977                 if (rec->nr == 1)
6978                         rec->nr = max(tmpl->nr, tmpl->max_size);
6979
6980                 /*
6981                  * We need to make sure to reset nr to whatever the extent
6982                  * record says was the real size, this way we can compare it to
6983                  * the backrefs.
6984                  */
6985                 if (tmpl->found_rec) {
6986                         if (tmpl->start != rec->start || rec->found_rec) {
6987                                 struct extent_record *tmp;
6988
6989                                 dup = 1;
6990                                 if (list_empty(&rec->list))
6991                                         list_add_tail(&rec->list,
6992                                                       &duplicate_extents);
6993
6994                                 /*
6995                                  * We have to do this song and dance in case we
6996                                  * find an extent record that falls inside of
6997                                  * our current extent record but does not have
6998                                  * the same objectid.
6999                                  */
7000                                 tmp = malloc(sizeof(*tmp));
7001                                 if (!tmp)
7002                                         return -ENOMEM;
7003                                 tmp->start = tmpl->start;
7004                                 tmp->max_size = tmpl->max_size;
7005                                 tmp->nr = tmpl->nr;
7006                                 tmp->found_rec = 1;
7007                                 tmp->metadata = tmpl->metadata;
7008                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7009                                 INIT_LIST_HEAD(&tmp->list);
7010                                 list_add_tail(&tmp->list, &rec->dups);
7011                                 rec->num_duplicates++;
7012                         } else {
7013                                 rec->nr = tmpl->nr;
7014                                 rec->found_rec = 1;
7015                         }
7016                 }
7017
7018                 if (tmpl->extent_item_refs && !dup) {
7019                         if (rec->extent_item_refs) {
7020                                 fprintf(stderr, "block %llu rec "
7021                                         "extent_item_refs %llu, passed %llu\n",
7022                                         (unsigned long long)tmpl->start,
7023                                         (unsigned long long)
7024                                                         rec->extent_item_refs,
7025                                         (unsigned long long)tmpl->extent_item_refs);
7026                         }
7027                         rec->extent_item_refs = tmpl->extent_item_refs;
7028                 }
7029                 if (tmpl->is_root)
7030                         rec->is_root = 1;
7031                 if (tmpl->content_checked)
7032                         rec->content_checked = 1;
7033                 if (tmpl->owner_ref_checked)
7034                         rec->owner_ref_checked = 1;
7035                 memcpy(&rec->parent_key, &tmpl->parent_key,
7036                                 sizeof(tmpl->parent_key));
7037                 if (tmpl->parent_generation)
7038                         rec->parent_generation = tmpl->parent_generation;
7039                 if (rec->max_size < tmpl->max_size)
7040                         rec->max_size = tmpl->max_size;
7041
7042                 /*
7043                  * A metadata extent can't cross stripe_len boundary, otherwise
7044                  * kernel scrub won't be able to handle it.
7045                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7046                  * it.
7047                  */
7048                 if (tmpl->metadata)
7049                         rec->crossing_stripes = check_crossing_stripes(
7050                                         global_info, rec->start,
7051                                         global_info->nodesize);
7052                 check_extent_type(rec);
7053                 maybe_free_extent_rec(extent_cache, rec);
7054                 return ret;
7055         }
7056
7057         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7058
7059         return ret;
7060 }
7061
7062 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7063                             u64 parent, u64 root, int found_ref)
7064 {
7065         struct extent_record *rec;
7066         struct tree_backref *back;
7067         struct cache_extent *cache;
7068         int ret;
7069         bool insert = false;
7070
7071         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7072         if (!cache) {
7073                 struct extent_record tmpl;
7074
7075                 memset(&tmpl, 0, sizeof(tmpl));
7076                 tmpl.start = bytenr;
7077                 tmpl.nr = 1;
7078                 tmpl.metadata = 1;
7079                 tmpl.max_size = 1;
7080
7081                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7082                 if (ret)
7083                         return ret;
7084
7085                 /* really a bug in cache_extent implement now */
7086                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7087                 if (!cache)
7088                         return -ENOENT;
7089         }
7090
7091         rec = container_of(cache, struct extent_record, cache);
7092         if (rec->start != bytenr) {
7093                 /*
7094                  * Several cause, from unaligned bytenr to over lapping extents
7095                  */
7096                 return -EEXIST;
7097         }
7098
7099         back = find_tree_backref(rec, parent, root);
7100         if (!back) {
7101                 back = alloc_tree_backref(rec, parent, root);
7102                 if (!back)
7103                         return -ENOMEM;
7104                 insert = true;
7105         }
7106
7107         if (found_ref) {
7108                 if (back->node.found_ref) {
7109                         fprintf(stderr, "Extent back ref already exists "
7110                                 "for %llu parent %llu root %llu \n",
7111                                 (unsigned long long)bytenr,
7112                                 (unsigned long long)parent,
7113                                 (unsigned long long)root);
7114                 }
7115                 back->node.found_ref = 1;
7116         } else {
7117                 if (back->node.found_extent_tree) {
7118                         fprintf(stderr, "Extent back ref already exists "
7119                                 "for %llu parent %llu root %llu \n",
7120                                 (unsigned long long)bytenr,
7121                                 (unsigned long long)parent,
7122                                 (unsigned long long)root);
7123                 }
7124                 back->node.found_extent_tree = 1;
7125         }
7126         if (insert)
7127                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7128                         compare_extent_backref));
7129         check_extent_type(rec);
7130         maybe_free_extent_rec(extent_cache, rec);
7131         return 0;
7132 }
7133
7134 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7135                             u64 parent, u64 root, u64 owner, u64 offset,
7136                             u32 num_refs, int found_ref, u64 max_size)
7137 {
7138         struct extent_record *rec;
7139         struct data_backref *back;
7140         struct cache_extent *cache;
7141         int ret;
7142         bool insert = false;
7143
7144         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7145         if (!cache) {
7146                 struct extent_record tmpl;
7147
7148                 memset(&tmpl, 0, sizeof(tmpl));
7149                 tmpl.start = bytenr;
7150                 tmpl.nr = 1;
7151                 tmpl.max_size = max_size;
7152
7153                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7154                 if (ret)
7155                         return ret;
7156
7157                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7158                 if (!cache)
7159                         abort();
7160         }
7161
7162         rec = container_of(cache, struct extent_record, cache);
7163         if (rec->max_size < max_size)
7164                 rec->max_size = max_size;
7165
7166         /*
7167          * If found_ref is set then max_size is the real size and must match the
7168          * existing refs.  So if we have already found a ref then we need to
7169          * make sure that this ref matches the existing one, otherwise we need
7170          * to add a new backref so we can notice that the backrefs don't match
7171          * and we need to figure out who is telling the truth.  This is to
7172          * account for that awful fsync bug I introduced where we'd end up with
7173          * a btrfs_file_extent_item that would have its length include multiple
7174          * prealloc extents or point inside of a prealloc extent.
7175          */
7176         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7177                                  bytenr, max_size);
7178         if (!back) {
7179                 back = alloc_data_backref(rec, parent, root, owner, offset,
7180                                           max_size);
7181                 BUG_ON(!back);
7182                 insert = true;
7183         }
7184
7185         if (found_ref) {
7186                 BUG_ON(num_refs != 1);
7187                 if (back->node.found_ref)
7188                         BUG_ON(back->bytes != max_size);
7189                 back->node.found_ref = 1;
7190                 back->found_ref += 1;
7191                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7192                         back->bytes = max_size;
7193                         back->disk_bytenr = bytenr;
7194
7195                         /* Need to reinsert if not already in the tree */
7196                         if (!insert) {
7197                                 rb_erase(&back->node.node, &rec->backref_tree);
7198                                 insert = true;
7199                         }
7200                 }
7201                 rec->refs += 1;
7202                 rec->content_checked = 1;
7203                 rec->owner_ref_checked = 1;
7204         } else {
7205                 if (back->node.found_extent_tree) {
7206                         fprintf(stderr, "Extent back ref already exists "
7207                                 "for %llu parent %llu root %llu "
7208                                 "owner %llu offset %llu num_refs %lu\n",
7209                                 (unsigned long long)bytenr,
7210                                 (unsigned long long)parent,
7211                                 (unsigned long long)root,
7212                                 (unsigned long long)owner,
7213                                 (unsigned long long)offset,
7214                                 (unsigned long)num_refs);
7215                 }
7216                 back->num_refs = num_refs;
7217                 back->node.found_extent_tree = 1;
7218         }
7219         if (insert)
7220                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7221                         compare_extent_backref));
7222
7223         maybe_free_extent_rec(extent_cache, rec);
7224         return 0;
7225 }
7226
7227 static int add_pending(struct cache_tree *pending,
7228                        struct cache_tree *seen, u64 bytenr, u32 size)
7229 {
7230         int ret;
7231         ret = add_cache_extent(seen, bytenr, size);
7232         if (ret)
7233                 return ret;
7234         add_cache_extent(pending, bytenr, size);
7235         return 0;
7236 }
7237
7238 static int pick_next_pending(struct cache_tree *pending,
7239                         struct cache_tree *reada,
7240                         struct cache_tree *nodes,
7241                         u64 last, struct block_info *bits, int bits_nr,
7242                         int *reada_bits)
7243 {
7244         unsigned long node_start = last;
7245         struct cache_extent *cache;
7246         int ret;
7247
7248         cache = search_cache_extent(reada, 0);
7249         if (cache) {
7250                 bits[0].start = cache->start;
7251                 bits[0].size = cache->size;
7252                 *reada_bits = 1;
7253                 return 1;
7254         }
7255         *reada_bits = 0;
7256         if (node_start > 32768)
7257                 node_start -= 32768;
7258
7259         cache = search_cache_extent(nodes, node_start);
7260         if (!cache)
7261                 cache = search_cache_extent(nodes, 0);
7262
7263         if (!cache) {
7264                  cache = search_cache_extent(pending, 0);
7265                  if (!cache)
7266                          return 0;
7267                  ret = 0;
7268                  do {
7269                          bits[ret].start = cache->start;
7270                          bits[ret].size = cache->size;
7271                          cache = next_cache_extent(cache);
7272                          ret++;
7273                  } while (cache && ret < bits_nr);
7274                  return ret;
7275         }
7276
7277         ret = 0;
7278         do {
7279                 bits[ret].start = cache->start;
7280                 bits[ret].size = cache->size;
7281                 cache = next_cache_extent(cache);
7282                 ret++;
7283         } while (cache && ret < bits_nr);
7284
7285         if (bits_nr - ret > 8) {
7286                 u64 lookup = bits[0].start + bits[0].size;
7287                 struct cache_extent *next;
7288                 next = search_cache_extent(pending, lookup);
7289                 while(next) {
7290                         if (next->start - lookup > 32768)
7291                                 break;
7292                         bits[ret].start = next->start;
7293                         bits[ret].size = next->size;
7294                         lookup = next->start + next->size;
7295                         ret++;
7296                         if (ret == bits_nr)
7297                                 break;
7298                         next = next_cache_extent(next);
7299                         if (!next)
7300                                 break;
7301                 }
7302         }
7303         return ret;
7304 }
7305
7306 static void free_chunk_record(struct cache_extent *cache)
7307 {
7308         struct chunk_record *rec;
7309
7310         rec = container_of(cache, struct chunk_record, cache);
7311         list_del_init(&rec->list);
7312         list_del_init(&rec->dextents);
7313         free(rec);
7314 }
7315
7316 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7317 {
7318         cache_tree_free_extents(chunk_cache, free_chunk_record);
7319 }
7320
7321 static void free_device_record(struct rb_node *node)
7322 {
7323         struct device_record *rec;
7324
7325         rec = container_of(node, struct device_record, node);
7326         free(rec);
7327 }
7328
7329 FREE_RB_BASED_TREE(device_cache, free_device_record);
7330
7331 int insert_block_group_record(struct block_group_tree *tree,
7332                               struct block_group_record *bg_rec)
7333 {
7334         int ret;
7335
7336         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7337         if (ret)
7338                 return ret;
7339
7340         list_add_tail(&bg_rec->list, &tree->block_groups);
7341         return 0;
7342 }
7343
7344 static void free_block_group_record(struct cache_extent *cache)
7345 {
7346         struct block_group_record *rec;
7347
7348         rec = container_of(cache, struct block_group_record, cache);
7349         list_del_init(&rec->list);
7350         free(rec);
7351 }
7352
7353 void free_block_group_tree(struct block_group_tree *tree)
7354 {
7355         cache_tree_free_extents(&tree->tree, free_block_group_record);
7356 }
7357
7358 int insert_device_extent_record(struct device_extent_tree *tree,
7359                                 struct device_extent_record *de_rec)
7360 {
7361         int ret;
7362
7363         /*
7364          * Device extent is a bit different from the other extents, because
7365          * the extents which belong to the different devices may have the
7366          * same start and size, so we need use the special extent cache
7367          * search/insert functions.
7368          */
7369         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7370         if (ret)
7371                 return ret;
7372
7373         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7374         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7375         return 0;
7376 }
7377
7378 static void free_device_extent_record(struct cache_extent *cache)
7379 {
7380         struct device_extent_record *rec;
7381
7382         rec = container_of(cache, struct device_extent_record, cache);
7383         if (!list_empty(&rec->chunk_list))
7384                 list_del_init(&rec->chunk_list);
7385         if (!list_empty(&rec->device_list))
7386                 list_del_init(&rec->device_list);
7387         free(rec);
7388 }
7389
7390 void free_device_extent_tree(struct device_extent_tree *tree)
7391 {
7392         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7393 }
7394
7395 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7396 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7397                                  struct extent_buffer *leaf, int slot)
7398 {
7399         struct btrfs_extent_ref_v0 *ref0;
7400         struct btrfs_key key;
7401         int ret;
7402
7403         btrfs_item_key_to_cpu(leaf, &key, slot);
7404         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7405         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7406                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7407                                 0, 0);
7408         } else {
7409                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7410                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7411         }
7412         return ret;
7413 }
7414 #endif
7415
7416 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7417                                             struct btrfs_key *key,
7418                                             int slot)
7419 {
7420         struct btrfs_chunk *ptr;
7421         struct chunk_record *rec;
7422         int num_stripes, i;
7423
7424         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7425         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7426
7427         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7428         if (!rec) {
7429                 fprintf(stderr, "memory allocation failed\n");
7430                 exit(-1);
7431         }
7432
7433         INIT_LIST_HEAD(&rec->list);
7434         INIT_LIST_HEAD(&rec->dextents);
7435         rec->bg_rec = NULL;
7436
7437         rec->cache.start = key->offset;
7438         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7439
7440         rec->generation = btrfs_header_generation(leaf);
7441
7442         rec->objectid = key->objectid;
7443         rec->type = key->type;
7444         rec->offset = key->offset;
7445
7446         rec->length = rec->cache.size;
7447         rec->owner = btrfs_chunk_owner(leaf, ptr);
7448         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7449         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7450         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7451         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7452         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7453         rec->num_stripes = num_stripes;
7454         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7455
7456         for (i = 0; i < rec->num_stripes; ++i) {
7457                 rec->stripes[i].devid =
7458                         btrfs_stripe_devid_nr(leaf, ptr, i);
7459                 rec->stripes[i].offset =
7460                         btrfs_stripe_offset_nr(leaf, ptr, i);
7461                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7462                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7463                                 BTRFS_UUID_SIZE);
7464         }
7465
7466         return rec;
7467 }
7468
7469 static int process_chunk_item(struct cache_tree *chunk_cache,
7470                               struct btrfs_key *key, struct extent_buffer *eb,
7471                               int slot)
7472 {
7473         struct chunk_record *rec;
7474         struct btrfs_chunk *chunk;
7475         int ret = 0;
7476
7477         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7478         /*
7479          * Do extra check for this chunk item,
7480          *
7481          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7482          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7483          * and owner<->key_type check.
7484          */
7485         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7486                                       key->offset);
7487         if (ret < 0) {
7488                 error("chunk(%llu, %llu) is not valid, ignore it",
7489                       key->offset, btrfs_chunk_length(eb, chunk));
7490                 return 0;
7491         }
7492         rec = btrfs_new_chunk_record(eb, key, slot);
7493         ret = insert_cache_extent(chunk_cache, &rec->cache);
7494         if (ret) {
7495                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7496                         rec->offset, rec->length);
7497                 free(rec);
7498         }
7499
7500         return ret;
7501 }
7502
7503 static int process_device_item(struct rb_root *dev_cache,
7504                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7505 {
7506         struct btrfs_dev_item *ptr;
7507         struct device_record *rec;
7508         int ret = 0;
7509
7510         ptr = btrfs_item_ptr(eb,
7511                 slot, struct btrfs_dev_item);
7512
7513         rec = malloc(sizeof(*rec));
7514         if (!rec) {
7515                 fprintf(stderr, "memory allocation failed\n");
7516                 return -ENOMEM;
7517         }
7518
7519         rec->devid = key->offset;
7520         rec->generation = btrfs_header_generation(eb);
7521
7522         rec->objectid = key->objectid;
7523         rec->type = key->type;
7524         rec->offset = key->offset;
7525
7526         rec->devid = btrfs_device_id(eb, ptr);
7527         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7528         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7529
7530         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7531         if (ret) {
7532                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7533                 free(rec);
7534         }
7535
7536         return ret;
7537 }
7538
7539 struct block_group_record *
7540 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7541                              int slot)
7542 {
7543         struct btrfs_block_group_item *ptr;
7544         struct block_group_record *rec;
7545
7546         rec = calloc(1, sizeof(*rec));
7547         if (!rec) {
7548                 fprintf(stderr, "memory allocation failed\n");
7549                 exit(-1);
7550         }
7551
7552         rec->cache.start = key->objectid;
7553         rec->cache.size = key->offset;
7554
7555         rec->generation = btrfs_header_generation(leaf);
7556
7557         rec->objectid = key->objectid;
7558         rec->type = key->type;
7559         rec->offset = key->offset;
7560
7561         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7562         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7563
7564         INIT_LIST_HEAD(&rec->list);
7565
7566         return rec;
7567 }
7568
7569 static int process_block_group_item(struct block_group_tree *block_group_cache,
7570                                     struct btrfs_key *key,
7571                                     struct extent_buffer *eb, int slot)
7572 {
7573         struct block_group_record *rec;
7574         int ret = 0;
7575
7576         rec = btrfs_new_block_group_record(eb, key, slot);
7577         ret = insert_block_group_record(block_group_cache, rec);
7578         if (ret) {
7579                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7580                         rec->objectid, rec->offset);
7581                 free(rec);
7582         }
7583
7584         return ret;
7585 }
7586
7587 struct device_extent_record *
7588 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7589                                struct btrfs_key *key, int slot)
7590 {
7591         struct device_extent_record *rec;
7592         struct btrfs_dev_extent *ptr;
7593
7594         rec = calloc(1, sizeof(*rec));
7595         if (!rec) {
7596                 fprintf(stderr, "memory allocation failed\n");
7597                 exit(-1);
7598         }
7599
7600         rec->cache.objectid = key->objectid;
7601         rec->cache.start = key->offset;
7602
7603         rec->generation = btrfs_header_generation(leaf);
7604
7605         rec->objectid = key->objectid;
7606         rec->type = key->type;
7607         rec->offset = key->offset;
7608
7609         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7610         rec->chunk_objecteid =
7611                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7612         rec->chunk_offset =
7613                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7614         rec->length = btrfs_dev_extent_length(leaf, ptr);
7615         rec->cache.size = rec->length;
7616
7617         INIT_LIST_HEAD(&rec->chunk_list);
7618         INIT_LIST_HEAD(&rec->device_list);
7619
7620         return rec;
7621 }
7622
7623 static int
7624 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7625                            struct btrfs_key *key, struct extent_buffer *eb,
7626                            int slot)
7627 {
7628         struct device_extent_record *rec;
7629         int ret;
7630
7631         rec = btrfs_new_device_extent_record(eb, key, slot);
7632         ret = insert_device_extent_record(dev_extent_cache, rec);
7633         if (ret) {
7634                 fprintf(stderr,
7635                         "Device extent[%llu, %llu, %llu] existed.\n",
7636                         rec->objectid, rec->offset, rec->length);
7637                 free(rec);
7638         }
7639
7640         return ret;
7641 }
7642
7643 static int process_extent_item(struct btrfs_root *root,
7644                                struct cache_tree *extent_cache,
7645                                struct extent_buffer *eb, int slot)
7646 {
7647         struct btrfs_extent_item *ei;
7648         struct btrfs_extent_inline_ref *iref;
7649         struct btrfs_extent_data_ref *dref;
7650         struct btrfs_shared_data_ref *sref;
7651         struct btrfs_key key;
7652         struct extent_record tmpl;
7653         unsigned long end;
7654         unsigned long ptr;
7655         int ret;
7656         int type;
7657         u32 item_size = btrfs_item_size_nr(eb, slot);
7658         u64 refs = 0;
7659         u64 offset;
7660         u64 num_bytes;
7661         int metadata = 0;
7662
7663         btrfs_item_key_to_cpu(eb, &key, slot);
7664
7665         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7666                 metadata = 1;
7667                 num_bytes = root->fs_info->nodesize;
7668         } else {
7669                 num_bytes = key.offset;
7670         }
7671
7672         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7673                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7674                       key.objectid, root->fs_info->sectorsize);
7675                 return -EIO;
7676         }
7677         if (item_size < sizeof(*ei)) {
7678 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7679                 struct btrfs_extent_item_v0 *ei0;
7680                 BUG_ON(item_size != sizeof(*ei0));
7681                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7682                 refs = btrfs_extent_refs_v0(eb, ei0);
7683 #else
7684                 BUG();
7685 #endif
7686                 memset(&tmpl, 0, sizeof(tmpl));
7687                 tmpl.start = key.objectid;
7688                 tmpl.nr = num_bytes;
7689                 tmpl.extent_item_refs = refs;
7690                 tmpl.metadata = metadata;
7691                 tmpl.found_rec = 1;
7692                 tmpl.max_size = num_bytes;
7693
7694                 return add_extent_rec(extent_cache, &tmpl);
7695         }
7696
7697         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7698         refs = btrfs_extent_refs(eb, ei);
7699         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7700                 metadata = 1;
7701         else
7702                 metadata = 0;
7703         if (metadata && num_bytes != root->fs_info->nodesize) {
7704                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7705                       num_bytes, root->fs_info->nodesize);
7706                 return -EIO;
7707         }
7708         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7709                 error("ignore invalid data extent, length %llu is not aligned to %u",
7710                       num_bytes, root->fs_info->sectorsize);
7711                 return -EIO;
7712         }
7713
7714         memset(&tmpl, 0, sizeof(tmpl));
7715         tmpl.start = key.objectid;
7716         tmpl.nr = num_bytes;
7717         tmpl.extent_item_refs = refs;
7718         tmpl.metadata = metadata;
7719         tmpl.found_rec = 1;
7720         tmpl.max_size = num_bytes;
7721         add_extent_rec(extent_cache, &tmpl);
7722
7723         ptr = (unsigned long)(ei + 1);
7724         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7725             key.type == BTRFS_EXTENT_ITEM_KEY)
7726                 ptr += sizeof(struct btrfs_tree_block_info);
7727
7728         end = (unsigned long)ei + item_size;
7729         while (ptr < end) {
7730                 iref = (struct btrfs_extent_inline_ref *)ptr;
7731                 type = btrfs_extent_inline_ref_type(eb, iref);
7732                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7733                 switch (type) {
7734                 case BTRFS_TREE_BLOCK_REF_KEY:
7735                         ret = add_tree_backref(extent_cache, key.objectid,
7736                                         0, offset, 0);
7737                         if (ret < 0)
7738                                 error(
7739                         "add_tree_backref failed (extent items tree block): %s",
7740                                       strerror(-ret));
7741                         break;
7742                 case BTRFS_SHARED_BLOCK_REF_KEY:
7743                         ret = add_tree_backref(extent_cache, key.objectid,
7744                                         offset, 0, 0);
7745                         if (ret < 0)
7746                                 error(
7747                         "add_tree_backref failed (extent items shared block): %s",
7748                                       strerror(-ret));
7749                         break;
7750                 case BTRFS_EXTENT_DATA_REF_KEY:
7751                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7752                         add_data_backref(extent_cache, key.objectid, 0,
7753                                         btrfs_extent_data_ref_root(eb, dref),
7754                                         btrfs_extent_data_ref_objectid(eb,
7755                                                                        dref),
7756                                         btrfs_extent_data_ref_offset(eb, dref),
7757                                         btrfs_extent_data_ref_count(eb, dref),
7758                                         0, num_bytes);
7759                         break;
7760                 case BTRFS_SHARED_DATA_REF_KEY:
7761                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7762                         add_data_backref(extent_cache, key.objectid, offset,
7763                                         0, 0, 0,
7764                                         btrfs_shared_data_ref_count(eb, sref),
7765                                         0, num_bytes);
7766                         break;
7767                 default:
7768                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7769                                 key.objectid, key.type, num_bytes);
7770                         goto out;
7771                 }
7772                 ptr += btrfs_extent_inline_ref_size(type);
7773         }
7774         WARN_ON(ptr > end);
7775 out:
7776         return 0;
7777 }
7778
7779 static int check_cache_range(struct btrfs_root *root,
7780                              struct btrfs_block_group_cache *cache,
7781                              u64 offset, u64 bytes)
7782 {
7783         struct btrfs_free_space *entry;
7784         u64 *logical;
7785         u64 bytenr;
7786         int stripe_len;
7787         int i, nr, ret;
7788
7789         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7790                 bytenr = btrfs_sb_offset(i);
7791                 ret = btrfs_rmap_block(root->fs_info,
7792                                        cache->key.objectid, bytenr, 0,
7793                                        &logical, &nr, &stripe_len);
7794                 if (ret)
7795                         return ret;
7796
7797                 while (nr--) {
7798                         if (logical[nr] + stripe_len <= offset)
7799                                 continue;
7800                         if (offset + bytes <= logical[nr])
7801                                 continue;
7802                         if (logical[nr] == offset) {
7803                                 if (stripe_len >= bytes) {
7804                                         free(logical);
7805                                         return 0;
7806                                 }
7807                                 bytes -= stripe_len;
7808                                 offset += stripe_len;
7809                         } else if (logical[nr] < offset) {
7810                                 if (logical[nr] + stripe_len >=
7811                                     offset + bytes) {
7812                                         free(logical);
7813                                         return 0;
7814                                 }
7815                                 bytes = (offset + bytes) -
7816                                         (logical[nr] + stripe_len);
7817                                 offset = logical[nr] + stripe_len;
7818                         } else {
7819                                 /*
7820                                  * Could be tricky, the super may land in the
7821                                  * middle of the area we're checking.  First
7822                                  * check the easiest case, it's at the end.
7823                                  */
7824                                 if (logical[nr] + stripe_len >=
7825                                     bytes + offset) {
7826                                         bytes = logical[nr] - offset;
7827                                         continue;
7828                                 }
7829
7830                                 /* Check the left side */
7831                                 ret = check_cache_range(root, cache,
7832                                                         offset,
7833                                                         logical[nr] - offset);
7834                                 if (ret) {
7835                                         free(logical);
7836                                         return ret;
7837                                 }
7838
7839                                 /* Now we continue with the right side */
7840                                 bytes = (offset + bytes) -
7841                                         (logical[nr] + stripe_len);
7842                                 offset = logical[nr] + stripe_len;
7843                         }
7844                 }
7845
7846                 free(logical);
7847         }
7848
7849         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7850         if (!entry) {
7851                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7852                         offset, offset+bytes);
7853                 return -EINVAL;
7854         }
7855
7856         if (entry->offset != offset) {
7857                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7858                         entry->offset);
7859                 return -EINVAL;
7860         }
7861
7862         if (entry->bytes != bytes) {
7863                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7864                         bytes, entry->bytes, offset);
7865                 return -EINVAL;
7866         }
7867
7868         unlink_free_space(cache->free_space_ctl, entry);
7869         free(entry);
7870         return 0;
7871 }
7872
7873 static int verify_space_cache(struct btrfs_root *root,
7874                               struct btrfs_block_group_cache *cache)
7875 {
7876         struct btrfs_path path;
7877         struct extent_buffer *leaf;
7878         struct btrfs_key key;
7879         u64 last;
7880         int ret = 0;
7881
7882         root = root->fs_info->extent_root;
7883
7884         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7885
7886         btrfs_init_path(&path);
7887         key.objectid = last;
7888         key.offset = 0;
7889         key.type = BTRFS_EXTENT_ITEM_KEY;
7890         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7891         if (ret < 0)
7892                 goto out;
7893         ret = 0;
7894         while (1) {
7895                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7896                         ret = btrfs_next_leaf(root, &path);
7897                         if (ret < 0)
7898                                 goto out;
7899                         if (ret > 0) {
7900                                 ret = 0;
7901                                 break;
7902                         }
7903                 }
7904                 leaf = path.nodes[0];
7905                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7906                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7907                         break;
7908                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7909                     key.type != BTRFS_METADATA_ITEM_KEY) {
7910                         path.slots[0]++;
7911                         continue;
7912                 }
7913
7914                 if (last == key.objectid) {
7915                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7916                                 last = key.objectid + key.offset;
7917                         else
7918                                 last = key.objectid + root->fs_info->nodesize;
7919                         path.slots[0]++;
7920                         continue;
7921                 }
7922
7923                 ret = check_cache_range(root, cache, last,
7924                                         key.objectid - last);
7925                 if (ret)
7926                         break;
7927                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7928                         last = key.objectid + key.offset;
7929                 else
7930                         last = key.objectid + root->fs_info->nodesize;
7931                 path.slots[0]++;
7932         }
7933
7934         if (last < cache->key.objectid + cache->key.offset)
7935                 ret = check_cache_range(root, cache, last,
7936                                         cache->key.objectid +
7937                                         cache->key.offset - last);
7938
7939 out:
7940         btrfs_release_path(&path);
7941
7942         if (!ret &&
7943             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7944                 fprintf(stderr, "There are still entries left in the space "
7945                         "cache\n");
7946                 ret = -EINVAL;
7947         }
7948
7949         return ret;
7950 }
7951
7952 static int check_space_cache(struct btrfs_root *root)
7953 {
7954         struct btrfs_block_group_cache *cache;
7955         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7956         int ret;
7957         int error = 0;
7958
7959         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7960             btrfs_super_generation(root->fs_info->super_copy) !=
7961             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7962                 printf("cache and super generation don't match, space cache "
7963                        "will be invalidated\n");
7964                 return 0;
7965         }
7966
7967         if (ctx.progress_enabled) {
7968                 ctx.tp = TASK_FREE_SPACE;
7969                 task_start(ctx.info);
7970         }
7971
7972         while (1) {
7973                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7974                 if (!cache)
7975                         break;
7976
7977                 start = cache->key.objectid + cache->key.offset;
7978                 if (!cache->free_space_ctl) {
7979                         if (btrfs_init_free_space_ctl(cache,
7980                                                 root->fs_info->sectorsize)) {
7981                                 ret = -ENOMEM;
7982                                 break;
7983                         }
7984                 } else {
7985                         btrfs_remove_free_space_cache(cache);
7986                 }
7987
7988                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7989                         ret = exclude_super_stripes(root, cache);
7990                         if (ret) {
7991                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7992                                         strerror(-ret));
7993                                 error++;
7994                                 continue;
7995                         }
7996                         ret = load_free_space_tree(root->fs_info, cache);
7997                         free_excluded_extents(root, cache);
7998                         if (ret < 0) {
7999                                 fprintf(stderr, "could not load free space tree: %s\n",
8000                                         strerror(-ret));
8001                                 error++;
8002                                 continue;
8003                         }
8004                         error += ret;
8005                 } else {
8006                         ret = load_free_space_cache(root->fs_info, cache);
8007                         if (!ret)
8008                                 continue;
8009                 }
8010
8011                 ret = verify_space_cache(root, cache);
8012                 if (ret) {
8013                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8014                                 cache->key.objectid);
8015                         error++;
8016                 }
8017         }
8018
8019         task_stop(ctx.info);
8020
8021         return error ? -EINVAL : 0;
8022 }
8023
8024 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8025                         u64 num_bytes, unsigned long leaf_offset,
8026                         struct extent_buffer *eb) {
8027
8028         struct btrfs_fs_info *fs_info = root->fs_info;
8029         u64 offset = 0;
8030         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8031         char *data;
8032         unsigned long csum_offset;
8033         u32 csum;
8034         u32 csum_expected;
8035         u64 read_len;
8036         u64 data_checked = 0;
8037         u64 tmp;
8038         int ret = 0;
8039         int mirror;
8040         int num_copies;
8041
8042         if (num_bytes % fs_info->sectorsize)
8043                 return -EINVAL;
8044
8045         data = malloc(num_bytes);
8046         if (!data)
8047                 return -ENOMEM;
8048
8049         while (offset < num_bytes) {
8050                 mirror = 0;
8051 again:
8052                 read_len = num_bytes - offset;
8053                 /* read as much space once a time */
8054                 ret = read_extent_data(fs_info, data + offset,
8055                                 bytenr + offset, &read_len, mirror);
8056                 if (ret)
8057                         goto out;
8058                 data_checked = 0;
8059                 /* verify every 4k data's checksum */
8060                 while (data_checked < read_len) {
8061                         csum = ~(u32)0;
8062                         tmp = offset + data_checked;
8063
8064                         csum = btrfs_csum_data((char *)data + tmp,
8065                                                csum, fs_info->sectorsize);
8066                         btrfs_csum_final(csum, (u8 *)&csum);
8067
8068                         csum_offset = leaf_offset +
8069                                  tmp / fs_info->sectorsize * csum_size;
8070                         read_extent_buffer(eb, (char *)&csum_expected,
8071                                            csum_offset, csum_size);
8072                         /* try another mirror */
8073                         if (csum != csum_expected) {
8074                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8075                                                 mirror, bytenr + tmp,
8076                                                 csum, csum_expected);
8077                                 num_copies = btrfs_num_copies(root->fs_info,
8078                                                 bytenr, num_bytes);
8079                                 if (mirror < num_copies - 1) {
8080                                         mirror += 1;
8081                                         goto again;
8082                                 }
8083                         }
8084                         data_checked += fs_info->sectorsize;
8085                 }
8086                 offset += read_len;
8087         }
8088 out:
8089         free(data);
8090         return ret;
8091 }
8092
8093 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8094                                u64 num_bytes)
8095 {
8096         struct btrfs_path path;
8097         struct extent_buffer *leaf;
8098         struct btrfs_key key;
8099         int ret;
8100
8101         btrfs_init_path(&path);
8102         key.objectid = bytenr;
8103         key.type = BTRFS_EXTENT_ITEM_KEY;
8104         key.offset = (u64)-1;
8105
8106 again:
8107         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8108                                 0, 0);
8109         if (ret < 0) {
8110                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8111                 btrfs_release_path(&path);
8112                 return ret;
8113         } else if (ret) {
8114                 if (path.slots[0] > 0) {
8115                         path.slots[0]--;
8116                 } else {
8117                         ret = btrfs_prev_leaf(root, &path);
8118                         if (ret < 0) {
8119                                 goto out;
8120                         } else if (ret > 0) {
8121                                 ret = 0;
8122                                 goto out;
8123                         }
8124                 }
8125         }
8126
8127         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8128
8129         /*
8130          * Block group items come before extent items if they have the same
8131          * bytenr, so walk back one more just in case.  Dear future traveller,
8132          * first congrats on mastering time travel.  Now if it's not too much
8133          * trouble could you go back to 2006 and tell Chris to make the
8134          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8135          * EXTENT_ITEM_KEY please?
8136          */
8137         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8138                 if (path.slots[0] > 0) {
8139                         path.slots[0]--;
8140                 } else {
8141                         ret = btrfs_prev_leaf(root, &path);
8142                         if (ret < 0) {
8143                                 goto out;
8144                         } else if (ret > 0) {
8145                                 ret = 0;
8146                                 goto out;
8147                         }
8148                 }
8149                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8150         }
8151
8152         while (num_bytes) {
8153                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8154                         ret = btrfs_next_leaf(root, &path);
8155                         if (ret < 0) {
8156                                 fprintf(stderr, "Error going to next leaf "
8157                                         "%d\n", ret);
8158                                 btrfs_release_path(&path);
8159                                 return ret;
8160                         } else if (ret) {
8161                                 break;
8162                         }
8163                 }
8164                 leaf = path.nodes[0];
8165                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8166                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8167                         path.slots[0]++;
8168                         continue;
8169                 }
8170                 if (key.objectid + key.offset < bytenr) {
8171                         path.slots[0]++;
8172                         continue;
8173                 }
8174                 if (key.objectid > bytenr + num_bytes)
8175                         break;
8176
8177                 if (key.objectid == bytenr) {
8178                         if (key.offset >= num_bytes) {
8179                                 num_bytes = 0;
8180                                 break;
8181                         }
8182                         num_bytes -= key.offset;
8183                         bytenr += key.offset;
8184                 } else if (key.objectid < bytenr) {
8185                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8186                                 num_bytes = 0;
8187                                 break;
8188                         }
8189                         num_bytes = (bytenr + num_bytes) -
8190                                 (key.objectid + key.offset);
8191                         bytenr = key.objectid + key.offset;
8192                 } else {
8193                         if (key.objectid + key.offset < bytenr + num_bytes) {
8194                                 u64 new_start = key.objectid + key.offset;
8195                                 u64 new_bytes = bytenr + num_bytes - new_start;
8196
8197                                 /*
8198                                  * Weird case, the extent is in the middle of
8199                                  * our range, we'll have to search one side
8200                                  * and then the other.  Not sure if this happens
8201                                  * in real life, but no harm in coding it up
8202                                  * anyway just in case.
8203                                  */
8204                                 btrfs_release_path(&path);
8205                                 ret = check_extent_exists(root, new_start,
8206                                                           new_bytes);
8207                                 if (ret) {
8208                                         fprintf(stderr, "Right section didn't "
8209                                                 "have a record\n");
8210                                         break;
8211                                 }
8212                                 num_bytes = key.objectid - bytenr;
8213                                 goto again;
8214                         }
8215                         num_bytes = key.objectid - bytenr;
8216                 }
8217                 path.slots[0]++;
8218         }
8219         ret = 0;
8220
8221 out:
8222         if (num_bytes && !ret) {
8223                 fprintf(stderr, "There are no extents for csum range "
8224                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8225                 ret = 1;
8226         }
8227
8228         btrfs_release_path(&path);
8229         return ret;
8230 }
8231
8232 static int check_csums(struct btrfs_root *root)
8233 {
8234         struct btrfs_path path;
8235         struct extent_buffer *leaf;
8236         struct btrfs_key key;
8237         u64 offset = 0, num_bytes = 0;
8238         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8239         int errors = 0;
8240         int ret;
8241         u64 data_len;
8242         unsigned long leaf_offset;
8243
8244         root = root->fs_info->csum_root;
8245         if (!extent_buffer_uptodate(root->node)) {
8246                 fprintf(stderr, "No valid csum tree found\n");
8247                 return -ENOENT;
8248         }
8249
8250         btrfs_init_path(&path);
8251         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8252         key.type = BTRFS_EXTENT_CSUM_KEY;
8253         key.offset = 0;
8254         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8255         if (ret < 0) {
8256                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8257                 btrfs_release_path(&path);
8258                 return ret;
8259         }
8260
8261         if (ret > 0 && path.slots[0])
8262                 path.slots[0]--;
8263         ret = 0;
8264
8265         while (1) {
8266                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8267                         ret = btrfs_next_leaf(root, &path);
8268                         if (ret < 0) {
8269                                 fprintf(stderr, "Error going to next leaf "
8270                                         "%d\n", ret);
8271                                 break;
8272                         }
8273                         if (ret)
8274                                 break;
8275                 }
8276                 leaf = path.nodes[0];
8277
8278                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8279                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8280                         path.slots[0]++;
8281                         continue;
8282                 }
8283
8284                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8285                               csum_size) * root->fs_info->sectorsize;
8286                 if (!check_data_csum)
8287                         goto skip_csum_check;
8288                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8289                 ret = check_extent_csums(root, key.offset, data_len,
8290                                          leaf_offset, leaf);
8291                 if (ret)
8292                         break;
8293 skip_csum_check:
8294                 if (!num_bytes) {
8295                         offset = key.offset;
8296                 } else if (key.offset != offset + num_bytes) {
8297                         ret = check_extent_exists(root, offset, num_bytes);
8298                         if (ret) {
8299                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8300                                         "there is no extent record\n",
8301                                         offset, offset+num_bytes);
8302                                 errors++;
8303                         }
8304                         offset = key.offset;
8305                         num_bytes = 0;
8306                 }
8307                 num_bytes += data_len;
8308                 path.slots[0]++;
8309         }
8310
8311         btrfs_release_path(&path);
8312         return errors;
8313 }
8314
8315 static int is_dropped_key(struct btrfs_key *key,
8316                           struct btrfs_key *drop_key) {
8317         if (key->objectid < drop_key->objectid)
8318                 return 1;
8319         else if (key->objectid == drop_key->objectid) {
8320                 if (key->type < drop_key->type)
8321                         return 1;
8322                 else if (key->type == drop_key->type) {
8323                         if (key->offset < drop_key->offset)
8324                                 return 1;
8325                 }
8326         }
8327         return 0;
8328 }
8329
8330 /*
8331  * Here are the rules for FULL_BACKREF.
8332  *
8333  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8334  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8335  *      FULL_BACKREF set.
8336  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8337  *    if it happened after the relocation occurred since we'll have dropped the
8338  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8339  *    have no real way to know for sure.
8340  *
8341  * We process the blocks one root at a time, and we start from the lowest root
8342  * objectid and go to the highest.  So we can just lookup the owner backref for
8343  * the record and if we don't find it then we know it doesn't exist and we have
8344  * a FULL BACKREF.
8345  *
8346  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8347  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8348  * be set or not and then we can check later once we've gathered all the refs.
8349  */
8350 static int calc_extent_flag(struct cache_tree *extent_cache,
8351                            struct extent_buffer *buf,
8352                            struct root_item_record *ri,
8353                            u64 *flags)
8354 {
8355         struct extent_record *rec;
8356         struct cache_extent *cache;
8357         struct tree_backref *tback;
8358         u64 owner = 0;
8359
8360         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8361         /* we have added this extent before */
8362         if (!cache)
8363                 return -ENOENT;
8364
8365         rec = container_of(cache, struct extent_record, cache);
8366
8367         /*
8368          * Except file/reloc tree, we can not have
8369          * FULL BACKREF MODE
8370          */
8371         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8372                 goto normal;
8373         /*
8374          * root node
8375          */
8376         if (buf->start == ri->bytenr)
8377                 goto normal;
8378
8379         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8380                 goto full_backref;
8381
8382         owner = btrfs_header_owner(buf);
8383         if (owner == ri->objectid)
8384                 goto normal;
8385
8386         tback = find_tree_backref(rec, 0, owner);
8387         if (!tback)
8388                 goto full_backref;
8389 normal:
8390         *flags = 0;
8391         if (rec->flag_block_full_backref != FLAG_UNSET &&
8392             rec->flag_block_full_backref != 0)
8393                 rec->bad_full_backref = 1;
8394         return 0;
8395 full_backref:
8396         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8397         if (rec->flag_block_full_backref != FLAG_UNSET &&
8398             rec->flag_block_full_backref != 1)
8399                 rec->bad_full_backref = 1;
8400         return 0;
8401 }
8402
8403 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8404 {
8405         fprintf(stderr, "Invalid key type(");
8406         print_key_type(stderr, 0, key_type);
8407         fprintf(stderr, ") found in root(");
8408         print_objectid(stderr, rootid, 0);
8409         fprintf(stderr, ")\n");
8410 }
8411
8412 /*
8413  * Check if the key is valid with its extent buffer.
8414  *
8415  * This is a early check in case invalid key exists in a extent buffer
8416  * This is not comprehensive yet, but should prevent wrong key/item passed
8417  * further
8418  */
8419 static int check_type_with_root(u64 rootid, u8 key_type)
8420 {
8421         switch (key_type) {
8422         /* Only valid in chunk tree */
8423         case BTRFS_DEV_ITEM_KEY:
8424         case BTRFS_CHUNK_ITEM_KEY:
8425                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8426                         goto err;
8427                 break;
8428         /* valid in csum and log tree */
8429         case BTRFS_CSUM_TREE_OBJECTID:
8430                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8431                       is_fstree(rootid)))
8432                         goto err;
8433                 break;
8434         case BTRFS_EXTENT_ITEM_KEY:
8435         case BTRFS_METADATA_ITEM_KEY:
8436         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8437                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8438                         goto err;
8439                 break;
8440         case BTRFS_ROOT_ITEM_KEY:
8441                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8442                         goto err;
8443                 break;
8444         case BTRFS_DEV_EXTENT_KEY:
8445                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8446                         goto err;
8447                 break;
8448         }
8449         return 0;
8450 err:
8451         report_mismatch_key_root(key_type, rootid);
8452         return -EINVAL;
8453 }
8454
8455 static int run_next_block(struct btrfs_root *root,
8456                           struct block_info *bits,
8457                           int bits_nr,
8458                           u64 *last,
8459                           struct cache_tree *pending,
8460                           struct cache_tree *seen,
8461                           struct cache_tree *reada,
8462                           struct cache_tree *nodes,
8463                           struct cache_tree *extent_cache,
8464                           struct cache_tree *chunk_cache,
8465                           struct rb_root *dev_cache,
8466                           struct block_group_tree *block_group_cache,
8467                           struct device_extent_tree *dev_extent_cache,
8468                           struct root_item_record *ri)
8469 {
8470         struct btrfs_fs_info *fs_info = root->fs_info;
8471         struct extent_buffer *buf;
8472         struct extent_record *rec = NULL;
8473         u64 bytenr;
8474         u32 size;
8475         u64 parent;
8476         u64 owner;
8477         u64 flags;
8478         u64 ptr;
8479         u64 gen = 0;
8480         int ret = 0;
8481         int i;
8482         int nritems;
8483         struct btrfs_key key;
8484         struct cache_extent *cache;
8485         int reada_bits;
8486
8487         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8488                                     bits_nr, &reada_bits);
8489         if (nritems == 0)
8490                 return 1;
8491
8492         if (!reada_bits) {
8493                 for(i = 0; i < nritems; i++) {
8494                         ret = add_cache_extent(reada, bits[i].start,
8495                                                bits[i].size);
8496                         if (ret == -EEXIST)
8497                                 continue;
8498
8499                         /* fixme, get the parent transid */
8500                         readahead_tree_block(fs_info, bits[i].start, 0);
8501                 }
8502         }
8503         *last = bits[0].start;
8504         bytenr = bits[0].start;
8505         size = bits[0].size;
8506
8507         cache = lookup_cache_extent(pending, bytenr, size);
8508         if (cache) {
8509                 remove_cache_extent(pending, cache);
8510                 free(cache);
8511         }
8512         cache = lookup_cache_extent(reada, bytenr, size);
8513         if (cache) {
8514                 remove_cache_extent(reada, cache);
8515                 free(cache);
8516         }
8517         cache = lookup_cache_extent(nodes, bytenr, size);
8518         if (cache) {
8519                 remove_cache_extent(nodes, cache);
8520                 free(cache);
8521         }
8522         cache = lookup_cache_extent(extent_cache, bytenr, size);
8523         if (cache) {
8524                 rec = container_of(cache, struct extent_record, cache);
8525                 gen = rec->parent_generation;
8526         }
8527
8528         /* fixme, get the real parent transid */
8529         buf = read_tree_block(root->fs_info, bytenr, gen);
8530         if (!extent_buffer_uptodate(buf)) {
8531                 record_bad_block_io(root->fs_info,
8532                                     extent_cache, bytenr, size);
8533                 goto out;
8534         }
8535
8536         nritems = btrfs_header_nritems(buf);
8537
8538         flags = 0;
8539         if (!init_extent_tree) {
8540                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8541                                        btrfs_header_level(buf), 1, NULL,
8542                                        &flags);
8543                 if (ret < 0) {
8544                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8545                         if (ret < 0) {
8546                                 fprintf(stderr, "Couldn't calc extent flags\n");
8547                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8548                         }
8549                 }
8550         } else {
8551                 flags = 0;
8552                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8553                 if (ret < 0) {
8554                         fprintf(stderr, "Couldn't calc extent flags\n");
8555                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8556                 }
8557         }
8558
8559         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8560                 if (ri != NULL &&
8561                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8562                     ri->objectid == btrfs_header_owner(buf)) {
8563                         /*
8564                          * Ok we got to this block from it's original owner and
8565                          * we have FULL_BACKREF set.  Relocation can leave
8566                          * converted blocks over so this is altogether possible,
8567                          * however it's not possible if the generation > the
8568                          * last snapshot, so check for this case.
8569                          */
8570                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8571                             btrfs_header_generation(buf) > ri->last_snapshot) {
8572                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8573                                 rec->bad_full_backref = 1;
8574                         }
8575                 }
8576         } else {
8577                 if (ri != NULL &&
8578                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8579                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8580                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8581                         rec->bad_full_backref = 1;
8582                 }
8583         }
8584
8585         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8586                 rec->flag_block_full_backref = 1;
8587                 parent = bytenr;
8588                 owner = 0;
8589         } else {
8590                 rec->flag_block_full_backref = 0;
8591                 parent = 0;
8592                 owner = btrfs_header_owner(buf);
8593         }
8594
8595         ret = check_block(root, extent_cache, buf, flags);
8596         if (ret)
8597                 goto out;
8598
8599         if (btrfs_is_leaf(buf)) {
8600                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8601                 for (i = 0; i < nritems; i++) {
8602                         struct btrfs_file_extent_item *fi;
8603                         btrfs_item_key_to_cpu(buf, &key, i);
8604                         /*
8605                          * Check key type against the leaf owner.
8606                          * Could filter quite a lot of early error if
8607                          * owner is correct
8608                          */
8609                         if (check_type_with_root(btrfs_header_owner(buf),
8610                                                  key.type)) {
8611                                 fprintf(stderr, "ignoring invalid key\n");
8612                                 continue;
8613                         }
8614                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8615                                 process_extent_item(root, extent_cache, buf,
8616                                                     i);
8617                                 continue;
8618                         }
8619                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8620                                 process_extent_item(root, extent_cache, buf,
8621                                                     i);
8622                                 continue;
8623                         }
8624                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8625                                 total_csum_bytes +=
8626                                         btrfs_item_size_nr(buf, i);
8627                                 continue;
8628                         }
8629                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8630                                 process_chunk_item(chunk_cache, &key, buf, i);
8631                                 continue;
8632                         }
8633                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8634                                 process_device_item(dev_cache, &key, buf, i);
8635                                 continue;
8636                         }
8637                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8638                                 process_block_group_item(block_group_cache,
8639                                         &key, buf, i);
8640                                 continue;
8641                         }
8642                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8643                                 process_device_extent_item(dev_extent_cache,
8644                                         &key, buf, i);
8645                                 continue;
8646
8647                         }
8648                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8649 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8650                                 process_extent_ref_v0(extent_cache, buf, i);
8651 #else
8652                                 BUG();
8653 #endif
8654                                 continue;
8655                         }
8656
8657                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8658                                 ret = add_tree_backref(extent_cache,
8659                                                 key.objectid, 0, key.offset, 0);
8660                                 if (ret < 0)
8661                                         error(
8662                                 "add_tree_backref failed (leaf tree block): %s",
8663                                               strerror(-ret));
8664                                 continue;
8665                         }
8666                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8667                                 ret = add_tree_backref(extent_cache,
8668                                                 key.objectid, key.offset, 0, 0);
8669                                 if (ret < 0)
8670                                         error(
8671                                 "add_tree_backref failed (leaf shared block): %s",
8672                                               strerror(-ret));
8673                                 continue;
8674                         }
8675                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8676                                 struct btrfs_extent_data_ref *ref;
8677                                 ref = btrfs_item_ptr(buf, i,
8678                                                 struct btrfs_extent_data_ref);
8679                                 add_data_backref(extent_cache,
8680                                         key.objectid, 0,
8681                                         btrfs_extent_data_ref_root(buf, ref),
8682                                         btrfs_extent_data_ref_objectid(buf,
8683                                                                        ref),
8684                                         btrfs_extent_data_ref_offset(buf, ref),
8685                                         btrfs_extent_data_ref_count(buf, ref),
8686                                         0, root->fs_info->sectorsize);
8687                                 continue;
8688                         }
8689                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8690                                 struct btrfs_shared_data_ref *ref;
8691                                 ref = btrfs_item_ptr(buf, i,
8692                                                 struct btrfs_shared_data_ref);
8693                                 add_data_backref(extent_cache,
8694                                         key.objectid, key.offset, 0, 0, 0,
8695                                         btrfs_shared_data_ref_count(buf, ref),
8696                                         0, root->fs_info->sectorsize);
8697                                 continue;
8698                         }
8699                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8700                                 struct bad_item *bad;
8701
8702                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8703                                         continue;
8704                                 if (!owner)
8705                                         continue;
8706                                 bad = malloc(sizeof(struct bad_item));
8707                                 if (!bad)
8708                                         continue;
8709                                 INIT_LIST_HEAD(&bad->list);
8710                                 memcpy(&bad->key, &key,
8711                                        sizeof(struct btrfs_key));
8712                                 bad->root_id = owner;
8713                                 list_add_tail(&bad->list, &delete_items);
8714                                 continue;
8715                         }
8716                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8717                                 continue;
8718                         fi = btrfs_item_ptr(buf, i,
8719                                             struct btrfs_file_extent_item);
8720                         if (btrfs_file_extent_type(buf, fi) ==
8721                             BTRFS_FILE_EXTENT_INLINE)
8722                                 continue;
8723                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8724                                 continue;
8725
8726                         data_bytes_allocated +=
8727                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8728                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8729                                 abort();
8730                         }
8731                         data_bytes_referenced +=
8732                                 btrfs_file_extent_num_bytes(buf, fi);
8733                         add_data_backref(extent_cache,
8734                                 btrfs_file_extent_disk_bytenr(buf, fi),
8735                                 parent, owner, key.objectid, key.offset -
8736                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8737                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8738                 }
8739         } else {
8740                 int level;
8741                 struct btrfs_key first_key;
8742
8743                 first_key.objectid = 0;
8744
8745                 if (nritems > 0)
8746                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8747                 level = btrfs_header_level(buf);
8748                 for (i = 0; i < nritems; i++) {
8749                         struct extent_record tmpl;
8750
8751                         ptr = btrfs_node_blockptr(buf, i);
8752                         size = root->fs_info->nodesize;
8753                         btrfs_node_key_to_cpu(buf, &key, i);
8754                         if (ri != NULL) {
8755                                 if ((level == ri->drop_level)
8756                                     && is_dropped_key(&key, &ri->drop_key)) {
8757                                         continue;
8758                                 }
8759                         }
8760
8761                         memset(&tmpl, 0, sizeof(tmpl));
8762                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8763                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8764                         tmpl.start = ptr;
8765                         tmpl.nr = size;
8766                         tmpl.refs = 1;
8767                         tmpl.metadata = 1;
8768                         tmpl.max_size = size;
8769                         ret = add_extent_rec(extent_cache, &tmpl);
8770                         if (ret < 0)
8771                                 goto out;
8772
8773                         ret = add_tree_backref(extent_cache, ptr, parent,
8774                                         owner, 1);
8775                         if (ret < 0) {
8776                                 error(
8777                                 "add_tree_backref failed (non-leaf block): %s",
8778                                       strerror(-ret));
8779                                 continue;
8780                         }
8781
8782                         if (level > 1) {
8783                                 add_pending(nodes, seen, ptr, size);
8784                         } else {
8785                                 add_pending(pending, seen, ptr, size);
8786                         }
8787                 }
8788                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8789                                       nritems) * sizeof(struct btrfs_key_ptr);
8790         }
8791         total_btree_bytes += buf->len;
8792         if (fs_root_objectid(btrfs_header_owner(buf)))
8793                 total_fs_tree_bytes += buf->len;
8794         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8795                 total_extent_tree_bytes += buf->len;
8796 out:
8797         free_extent_buffer(buf);
8798         return ret;
8799 }
8800
8801 static int add_root_to_pending(struct extent_buffer *buf,
8802                                struct cache_tree *extent_cache,
8803                                struct cache_tree *pending,
8804                                struct cache_tree *seen,
8805                                struct cache_tree *nodes,
8806                                u64 objectid)
8807 {
8808         struct extent_record tmpl;
8809         int ret;
8810
8811         if (btrfs_header_level(buf) > 0)
8812                 add_pending(nodes, seen, buf->start, buf->len);
8813         else
8814                 add_pending(pending, seen, buf->start, buf->len);
8815
8816         memset(&tmpl, 0, sizeof(tmpl));
8817         tmpl.start = buf->start;
8818         tmpl.nr = buf->len;
8819         tmpl.is_root = 1;
8820         tmpl.refs = 1;
8821         tmpl.metadata = 1;
8822         tmpl.max_size = buf->len;
8823         add_extent_rec(extent_cache, &tmpl);
8824
8825         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8826             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8827                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8828                                 0, 1);
8829         else
8830                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8831                                 1);
8832         return ret;
8833 }
8834
8835 /* as we fix the tree, we might be deleting blocks that
8836  * we're tracking for repair.  This hook makes sure we
8837  * remove any backrefs for blocks as we are fixing them.
8838  */
8839 static int free_extent_hook(struct btrfs_trans_handle *trans,
8840                             struct btrfs_root *root,
8841                             u64 bytenr, u64 num_bytes, u64 parent,
8842                             u64 root_objectid, u64 owner, u64 offset,
8843                             int refs_to_drop)
8844 {
8845         struct extent_record *rec;
8846         struct cache_extent *cache;
8847         int is_data;
8848         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8849
8850         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8851         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8852         if (!cache)
8853                 return 0;
8854
8855         rec = container_of(cache, struct extent_record, cache);
8856         if (is_data) {
8857                 struct data_backref *back;
8858                 back = find_data_backref(rec, parent, root_objectid, owner,
8859                                          offset, 1, bytenr, num_bytes);
8860                 if (!back)
8861                         goto out;
8862                 if (back->node.found_ref) {
8863                         back->found_ref -= refs_to_drop;
8864                         if (rec->refs)
8865                                 rec->refs -= refs_to_drop;
8866                 }
8867                 if (back->node.found_extent_tree) {
8868                         back->num_refs -= refs_to_drop;
8869                         if (rec->extent_item_refs)
8870                                 rec->extent_item_refs -= refs_to_drop;
8871                 }
8872                 if (back->found_ref == 0)
8873                         back->node.found_ref = 0;
8874                 if (back->num_refs == 0)
8875                         back->node.found_extent_tree = 0;
8876
8877                 if (!back->node.found_extent_tree && back->node.found_ref) {
8878                         rb_erase(&back->node.node, &rec->backref_tree);
8879                         free(back);
8880                 }
8881         } else {
8882                 struct tree_backref *back;
8883                 back = find_tree_backref(rec, parent, root_objectid);
8884                 if (!back)
8885                         goto out;
8886                 if (back->node.found_ref) {
8887                         if (rec->refs)
8888                                 rec->refs--;
8889                         back->node.found_ref = 0;
8890                 }
8891                 if (back->node.found_extent_tree) {
8892                         if (rec->extent_item_refs)
8893                                 rec->extent_item_refs--;
8894                         back->node.found_extent_tree = 0;
8895                 }
8896                 if (!back->node.found_extent_tree && back->node.found_ref) {
8897                         rb_erase(&back->node.node, &rec->backref_tree);
8898                         free(back);
8899                 }
8900         }
8901         maybe_free_extent_rec(extent_cache, rec);
8902 out:
8903         return 0;
8904 }
8905
8906 static int delete_extent_records(struct btrfs_trans_handle *trans,
8907                                  struct btrfs_root *root,
8908                                  struct btrfs_path *path,
8909                                  u64 bytenr)
8910 {
8911         struct btrfs_key key;
8912         struct btrfs_key found_key;
8913         struct extent_buffer *leaf;
8914         int ret;
8915         int slot;
8916
8917
8918         key.objectid = bytenr;
8919         key.type = (u8)-1;
8920         key.offset = (u64)-1;
8921
8922         while(1) {
8923                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8924                                         &key, path, 0, 1);
8925                 if (ret < 0)
8926                         break;
8927
8928                 if (ret > 0) {
8929                         ret = 0;
8930                         if (path->slots[0] == 0)
8931                                 break;
8932                         path->slots[0]--;
8933                 }
8934                 ret = 0;
8935
8936                 leaf = path->nodes[0];
8937                 slot = path->slots[0];
8938
8939                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8940                 if (found_key.objectid != bytenr)
8941                         break;
8942
8943                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8944                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8945                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8946                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8947                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8948                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8949                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8950                         btrfs_release_path(path);
8951                         if (found_key.type == 0) {
8952                                 if (found_key.offset == 0)
8953                                         break;
8954                                 key.offset = found_key.offset - 1;
8955                                 key.type = found_key.type;
8956                         }
8957                         key.type = found_key.type - 1;
8958                         key.offset = (u64)-1;
8959                         continue;
8960                 }
8961
8962                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8963                         found_key.objectid, found_key.type, found_key.offset);
8964
8965                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8966                 if (ret)
8967                         break;
8968                 btrfs_release_path(path);
8969
8970                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8971                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8972                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8973                                 found_key.offset : root->fs_info->nodesize;
8974
8975                         ret = btrfs_update_block_group(trans, root, bytenr,
8976                                                        bytes, 0, 0);
8977                         if (ret)
8978                                 break;
8979                 }
8980         }
8981
8982         btrfs_release_path(path);
8983         return ret;
8984 }
8985
8986 /*
8987  * for a single backref, this will allocate a new extent
8988  * and add the backref to it.
8989  */
8990 static int record_extent(struct btrfs_trans_handle *trans,
8991                          struct btrfs_fs_info *info,
8992                          struct btrfs_path *path,
8993                          struct extent_record *rec,
8994                          struct extent_backref *back,
8995                          int allocated, u64 flags)
8996 {
8997         int ret = 0;
8998         struct btrfs_root *extent_root = info->extent_root;
8999         struct extent_buffer *leaf;
9000         struct btrfs_key ins_key;
9001         struct btrfs_extent_item *ei;
9002         struct data_backref *dback;
9003         struct btrfs_tree_block_info *bi;
9004
9005         if (!back->is_data)
9006                 rec->max_size = max_t(u64, rec->max_size,
9007                                     info->nodesize);
9008
9009         if (!allocated) {
9010                 u32 item_size = sizeof(*ei);
9011
9012                 if (!back->is_data)
9013                         item_size += sizeof(*bi);
9014
9015                 ins_key.objectid = rec->start;
9016                 ins_key.offset = rec->max_size;
9017                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9018
9019                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9020                                         &ins_key, item_size);
9021                 if (ret)
9022                         goto fail;
9023
9024                 leaf = path->nodes[0];
9025                 ei = btrfs_item_ptr(leaf, path->slots[0],
9026                                     struct btrfs_extent_item);
9027
9028                 btrfs_set_extent_refs(leaf, ei, 0);
9029                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9030
9031                 if (back->is_data) {
9032                         btrfs_set_extent_flags(leaf, ei,
9033                                                BTRFS_EXTENT_FLAG_DATA);
9034                 } else {
9035                         struct btrfs_disk_key copy_key;;
9036
9037                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9038                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9039                                              sizeof(*bi));
9040
9041                         btrfs_set_disk_key_objectid(&copy_key,
9042                                                     rec->info_objectid);
9043                         btrfs_set_disk_key_type(&copy_key, 0);
9044                         btrfs_set_disk_key_offset(&copy_key, 0);
9045
9046                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9047                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9048
9049                         btrfs_set_extent_flags(leaf, ei,
9050                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9051                 }
9052
9053                 btrfs_mark_buffer_dirty(leaf);
9054                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9055                                                rec->max_size, 1, 0);
9056                 if (ret)
9057                         goto fail;
9058                 btrfs_release_path(path);
9059         }
9060
9061         if (back->is_data) {
9062                 u64 parent;
9063                 int i;
9064
9065                 dback = to_data_backref(back);
9066                 if (back->full_backref)
9067                         parent = dback->parent;
9068                 else
9069                         parent = 0;
9070
9071                 for (i = 0; i < dback->found_ref; i++) {
9072                         /* if parent != 0, we're doing a full backref
9073                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9074                          * just makes the backref allocator create a data
9075                          * backref
9076                          */
9077                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9078                                                    rec->start, rec->max_size,
9079                                                    parent,
9080                                                    dback->root,
9081                                                    parent ?
9082                                                    BTRFS_FIRST_FREE_OBJECTID :
9083                                                    dback->owner,
9084                                                    dback->offset);
9085                         if (ret)
9086                                 break;
9087                 }
9088                 fprintf(stderr, "adding new data backref"
9089                                 " on %llu %s %llu owner %llu"
9090                                 " offset %llu found %d\n",
9091                                 (unsigned long long)rec->start,
9092                                 back->full_backref ?
9093                                 "parent" : "root",
9094                                 back->full_backref ?
9095                                 (unsigned long long)parent :
9096                                 (unsigned long long)dback->root,
9097                                 (unsigned long long)dback->owner,
9098                                 (unsigned long long)dback->offset,
9099                                 dback->found_ref);
9100         } else {
9101                 u64 parent;
9102                 struct tree_backref *tback;
9103
9104                 tback = to_tree_backref(back);
9105                 if (back->full_backref)
9106                         parent = tback->parent;
9107                 else
9108                         parent = 0;
9109
9110                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9111                                            rec->start, rec->max_size,
9112                                            parent, tback->root, 0, 0);
9113                 fprintf(stderr, "adding new tree backref on "
9114                         "start %llu len %llu parent %llu root %llu\n",
9115                         rec->start, rec->max_size, parent, tback->root);
9116         }
9117 fail:
9118         btrfs_release_path(path);
9119         return ret;
9120 }
9121
9122 static struct extent_entry *find_entry(struct list_head *entries,
9123                                        u64 bytenr, u64 bytes)
9124 {
9125         struct extent_entry *entry = NULL;
9126
9127         list_for_each_entry(entry, entries, list) {
9128                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9129                         return entry;
9130         }
9131
9132         return NULL;
9133 }
9134
9135 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9136 {
9137         struct extent_entry *entry, *best = NULL, *prev = NULL;
9138
9139         list_for_each_entry(entry, entries, list) {
9140                 /*
9141                  * If there are as many broken entries as entries then we know
9142                  * not to trust this particular entry.
9143                  */
9144                 if (entry->broken == entry->count)
9145                         continue;
9146
9147                 /*
9148                  * Special case, when there are only two entries and 'best' is
9149                  * the first one
9150                  */
9151                 if (!prev) {
9152                         best = entry;
9153                         prev = entry;
9154                         continue;
9155                 }
9156
9157                 /*
9158                  * If our current entry == best then we can't be sure our best
9159                  * is really the best, so we need to keep searching.
9160                  */
9161                 if (best && best->count == entry->count) {
9162                         prev = entry;
9163                         best = NULL;
9164                         continue;
9165                 }
9166
9167                 /* Prev == entry, not good enough, have to keep searching */
9168                 if (!prev->broken && prev->count == entry->count)
9169                         continue;
9170
9171                 if (!best)
9172                         best = (prev->count > entry->count) ? prev : entry;
9173                 else if (best->count < entry->count)
9174                         best = entry;
9175                 prev = entry;
9176         }
9177
9178         return best;
9179 }
9180
9181 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9182                       struct data_backref *dback, struct extent_entry *entry)
9183 {
9184         struct btrfs_trans_handle *trans;
9185         struct btrfs_root *root;
9186         struct btrfs_file_extent_item *fi;
9187         struct extent_buffer *leaf;
9188         struct btrfs_key key;
9189         u64 bytenr, bytes;
9190         int ret, err;
9191
9192         key.objectid = dback->root;
9193         key.type = BTRFS_ROOT_ITEM_KEY;
9194         key.offset = (u64)-1;
9195         root = btrfs_read_fs_root(info, &key);
9196         if (IS_ERR(root)) {
9197                 fprintf(stderr, "Couldn't find root for our ref\n");
9198                 return -EINVAL;
9199         }
9200
9201         /*
9202          * The backref points to the original offset of the extent if it was
9203          * split, so we need to search down to the offset we have and then walk
9204          * forward until we find the backref we're looking for.
9205          */
9206         key.objectid = dback->owner;
9207         key.type = BTRFS_EXTENT_DATA_KEY;
9208         key.offset = dback->offset;
9209         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9210         if (ret < 0) {
9211                 fprintf(stderr, "Error looking up ref %d\n", ret);
9212                 return ret;
9213         }
9214
9215         while (1) {
9216                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9217                         ret = btrfs_next_leaf(root, path);
9218                         if (ret) {
9219                                 fprintf(stderr, "Couldn't find our ref, next\n");
9220                                 return -EINVAL;
9221                         }
9222                 }
9223                 leaf = path->nodes[0];
9224                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9225                 if (key.objectid != dback->owner ||
9226                     key.type != BTRFS_EXTENT_DATA_KEY) {
9227                         fprintf(stderr, "Couldn't find our ref, search\n");
9228                         return -EINVAL;
9229                 }
9230                 fi = btrfs_item_ptr(leaf, path->slots[0],
9231                                     struct btrfs_file_extent_item);
9232                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9233                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9234
9235                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9236                         break;
9237                 path->slots[0]++;
9238         }
9239
9240         btrfs_release_path(path);
9241
9242         trans = btrfs_start_transaction(root, 1);
9243         if (IS_ERR(trans))
9244                 return PTR_ERR(trans);
9245
9246         /*
9247          * Ok we have the key of the file extent we want to fix, now we can cow
9248          * down to the thing and fix it.
9249          */
9250         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9251         if (ret < 0) {
9252                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9253                         key.objectid, key.type, key.offset, ret);
9254                 goto out;
9255         }
9256         if (ret > 0) {
9257                 fprintf(stderr, "Well that's odd, we just found this key "
9258                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9259                         key.offset);
9260                 ret = -EINVAL;
9261                 goto out;
9262         }
9263         leaf = path->nodes[0];
9264         fi = btrfs_item_ptr(leaf, path->slots[0],
9265                             struct btrfs_file_extent_item);
9266
9267         if (btrfs_file_extent_compression(leaf, fi) &&
9268             dback->disk_bytenr != entry->bytenr) {
9269                 fprintf(stderr, "Ref doesn't match the record start and is "
9270                         "compressed, please take a btrfs-image of this file "
9271                         "system and send it to a btrfs developer so they can "
9272                         "complete this functionality for bytenr %Lu\n",
9273                         dback->disk_bytenr);
9274                 ret = -EINVAL;
9275                 goto out;
9276         }
9277
9278         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9279                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9280         } else if (dback->disk_bytenr > entry->bytenr) {
9281                 u64 off_diff, offset;
9282
9283                 off_diff = dback->disk_bytenr - entry->bytenr;
9284                 offset = btrfs_file_extent_offset(leaf, fi);
9285                 if (dback->disk_bytenr + offset +
9286                     btrfs_file_extent_num_bytes(leaf, fi) >
9287                     entry->bytenr + entry->bytes) {
9288                         fprintf(stderr, "Ref is past the entry end, please "
9289                                 "take a btrfs-image of this file system and "
9290                                 "send it to a btrfs developer, ref %Lu\n",
9291                                 dback->disk_bytenr);
9292                         ret = -EINVAL;
9293                         goto out;
9294                 }
9295                 offset += off_diff;
9296                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9297                 btrfs_set_file_extent_offset(leaf, fi, offset);
9298         } else if (dback->disk_bytenr < entry->bytenr) {
9299                 u64 offset;
9300
9301                 offset = btrfs_file_extent_offset(leaf, fi);
9302                 if (dback->disk_bytenr + offset < entry->bytenr) {
9303                         fprintf(stderr, "Ref is before the entry start, please"
9304                                 " take a btrfs-image of this file system and "
9305                                 "send it to a btrfs developer, ref %Lu\n",
9306                                 dback->disk_bytenr);
9307                         ret = -EINVAL;
9308                         goto out;
9309                 }
9310
9311                 offset += dback->disk_bytenr;
9312                 offset -= entry->bytenr;
9313                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9314                 btrfs_set_file_extent_offset(leaf, fi, offset);
9315         }
9316
9317         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9318
9319         /*
9320          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9321          * only do this if we aren't using compression, otherwise it's a
9322          * trickier case.
9323          */
9324         if (!btrfs_file_extent_compression(leaf, fi))
9325                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9326         else
9327                 printf("ram bytes may be wrong?\n");
9328         btrfs_mark_buffer_dirty(leaf);
9329 out:
9330         err = btrfs_commit_transaction(trans, root);
9331         btrfs_release_path(path);
9332         return ret ? ret : err;
9333 }
9334
9335 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9336                            struct extent_record *rec)
9337 {
9338         struct extent_backref *back, *tmp;
9339         struct data_backref *dback;
9340         struct extent_entry *entry, *best = NULL;
9341         LIST_HEAD(entries);
9342         int nr_entries = 0;
9343         int broken_entries = 0;
9344         int ret = 0;
9345         short mismatch = 0;
9346
9347         /*
9348          * Metadata is easy and the backrefs should always agree on bytenr and
9349          * size, if not we've got bigger issues.
9350          */
9351         if (rec->metadata)
9352                 return 0;
9353
9354         rbtree_postorder_for_each_entry_safe(back, tmp,
9355                                              &rec->backref_tree, node) {
9356                 if (back->full_backref || !back->is_data)
9357                         continue;
9358
9359                 dback = to_data_backref(back);
9360
9361                 /*
9362                  * We only pay attention to backrefs that we found a real
9363                  * backref for.
9364                  */
9365                 if (dback->found_ref == 0)
9366                         continue;
9367
9368                 /*
9369                  * For now we only catch when the bytes don't match, not the
9370                  * bytenr.  We can easily do this at the same time, but I want
9371                  * to have a fs image to test on before we just add repair
9372                  * functionality willy-nilly so we know we won't screw up the
9373                  * repair.
9374                  */
9375
9376                 entry = find_entry(&entries, dback->disk_bytenr,
9377                                    dback->bytes);
9378                 if (!entry) {
9379                         entry = malloc(sizeof(struct extent_entry));
9380                         if (!entry) {
9381                                 ret = -ENOMEM;
9382                                 goto out;
9383                         }
9384                         memset(entry, 0, sizeof(*entry));
9385                         entry->bytenr = dback->disk_bytenr;
9386                         entry->bytes = dback->bytes;
9387                         list_add_tail(&entry->list, &entries);
9388                         nr_entries++;
9389                 }
9390
9391                 /*
9392                  * If we only have on entry we may think the entries agree when
9393                  * in reality they don't so we have to do some extra checking.
9394                  */
9395                 if (dback->disk_bytenr != rec->start ||
9396                     dback->bytes != rec->nr || back->broken)
9397                         mismatch = 1;
9398
9399                 if (back->broken) {
9400                         entry->broken++;
9401                         broken_entries++;
9402                 }
9403
9404                 entry->count++;
9405         }
9406
9407         /* Yay all the backrefs agree, carry on good sir */
9408         if (nr_entries <= 1 && !mismatch)
9409                 goto out;
9410
9411         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9412                 "%Lu\n", rec->start);
9413
9414         /*
9415          * First we want to see if the backrefs can agree amongst themselves who
9416          * is right, so figure out which one of the entries has the highest
9417          * count.
9418          */
9419         best = find_most_right_entry(&entries);
9420
9421         /*
9422          * Ok so we may have an even split between what the backrefs think, so
9423          * this is where we use the extent ref to see what it thinks.
9424          */
9425         if (!best) {
9426                 entry = find_entry(&entries, rec->start, rec->nr);
9427                 if (!entry && (!broken_entries || !rec->found_rec)) {
9428                         fprintf(stderr, "Backrefs don't agree with each other "
9429                                 "and extent record doesn't agree with anybody,"
9430                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9431                                 rec->start, rec->nr);
9432                         ret = -EINVAL;
9433                         goto out;
9434                 } else if (!entry) {
9435                         /*
9436                          * Ok our backrefs were broken, we'll assume this is the
9437                          * correct value and add an entry for this range.
9438                          */
9439                         entry = malloc(sizeof(struct extent_entry));
9440                         if (!entry) {
9441                                 ret = -ENOMEM;
9442                                 goto out;
9443                         }
9444                         memset(entry, 0, sizeof(*entry));
9445                         entry->bytenr = rec->start;
9446                         entry->bytes = rec->nr;
9447                         list_add_tail(&entry->list, &entries);
9448                         nr_entries++;
9449                 }
9450                 entry->count++;
9451                 best = find_most_right_entry(&entries);
9452                 if (!best) {
9453                         fprintf(stderr, "Backrefs and extent record evenly "
9454                                 "split on who is right, this is going to "
9455                                 "require user input to fix bytenr %Lu bytes "
9456                                 "%Lu\n", rec->start, rec->nr);
9457                         ret = -EINVAL;
9458                         goto out;
9459                 }
9460         }
9461
9462         /*
9463          * I don't think this can happen currently as we'll abort() if we catch
9464          * this case higher up, but in case somebody removes that we still can't
9465          * deal with it properly here yet, so just bail out of that's the case.
9466          */
9467         if (best->bytenr != rec->start) {
9468                 fprintf(stderr, "Extent start and backref starts don't match, "
9469                         "please use btrfs-image on this file system and send "
9470                         "it to a btrfs developer so they can make fsck fix "
9471                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9472                         rec->start, rec->nr);
9473                 ret = -EINVAL;
9474                 goto out;
9475         }
9476
9477         /*
9478          * Ok great we all agreed on an extent record, let's go find the real
9479          * references and fix up the ones that don't match.
9480          */
9481         rbtree_postorder_for_each_entry_safe(back, tmp,
9482                                              &rec->backref_tree, node) {
9483                 if (back->full_backref || !back->is_data)
9484                         continue;
9485
9486                 dback = to_data_backref(back);
9487
9488                 /*
9489                  * Still ignoring backrefs that don't have a real ref attached
9490                  * to them.
9491                  */
9492                 if (dback->found_ref == 0)
9493                         continue;
9494
9495                 if (dback->bytes == best->bytes &&
9496                     dback->disk_bytenr == best->bytenr)
9497                         continue;
9498
9499                 ret = repair_ref(info, path, dback, best);
9500                 if (ret)
9501                         goto out;
9502         }
9503
9504         /*
9505          * Ok we messed with the actual refs, which means we need to drop our
9506          * entire cache and go back and rescan.  I know this is a huge pain and
9507          * adds a lot of extra work, but it's the only way to be safe.  Once all
9508          * the backrefs agree we may not need to do anything to the extent
9509          * record itself.
9510          */
9511         ret = -EAGAIN;
9512 out:
9513         while (!list_empty(&entries)) {
9514                 entry = list_entry(entries.next, struct extent_entry, list);
9515                 list_del_init(&entry->list);
9516                 free(entry);
9517         }
9518         return ret;
9519 }
9520
9521 static int process_duplicates(struct cache_tree *extent_cache,
9522                               struct extent_record *rec)
9523 {
9524         struct extent_record *good, *tmp;
9525         struct cache_extent *cache;
9526         int ret;
9527
9528         /*
9529          * If we found a extent record for this extent then return, or if we
9530          * have more than one duplicate we are likely going to need to delete
9531          * something.
9532          */
9533         if (rec->found_rec || rec->num_duplicates > 1)
9534                 return 0;
9535
9536         /* Shouldn't happen but just in case */
9537         BUG_ON(!rec->num_duplicates);
9538
9539         /*
9540          * So this happens if we end up with a backref that doesn't match the
9541          * actual extent entry.  So either the backref is bad or the extent
9542          * entry is bad.  Either way we want to have the extent_record actually
9543          * reflect what we found in the extent_tree, so we need to take the
9544          * duplicate out and use that as the extent_record since the only way we
9545          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9546          */
9547         remove_cache_extent(extent_cache, &rec->cache);
9548
9549         good = to_extent_record(rec->dups.next);
9550         list_del_init(&good->list);
9551         INIT_LIST_HEAD(&good->backrefs);
9552         INIT_LIST_HEAD(&good->dups);
9553         good->cache.start = good->start;
9554         good->cache.size = good->nr;
9555         good->content_checked = 0;
9556         good->owner_ref_checked = 0;
9557         good->num_duplicates = 0;
9558         good->refs = rec->refs;
9559         list_splice_init(&rec->backrefs, &good->backrefs);
9560         while (1) {
9561                 cache = lookup_cache_extent(extent_cache, good->start,
9562                                             good->nr);
9563                 if (!cache)
9564                         break;
9565                 tmp = container_of(cache, struct extent_record, cache);
9566
9567                 /*
9568                  * If we find another overlapping extent and it's found_rec is
9569                  * set then it's a duplicate and we need to try and delete
9570                  * something.
9571                  */
9572                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9573                         if (list_empty(&good->list))
9574                                 list_add_tail(&good->list,
9575                                               &duplicate_extents);
9576                         good->num_duplicates += tmp->num_duplicates + 1;
9577                         list_splice_init(&tmp->dups, &good->dups);
9578                         list_del_init(&tmp->list);
9579                         list_add_tail(&tmp->list, &good->dups);
9580                         remove_cache_extent(extent_cache, &tmp->cache);
9581                         continue;
9582                 }
9583
9584                 /*
9585                  * Ok we have another non extent item backed extent rec, so lets
9586                  * just add it to this extent and carry on like we did above.
9587                  */
9588                 good->refs += tmp->refs;
9589                 list_splice_init(&tmp->backrefs, &good->backrefs);
9590                 remove_cache_extent(extent_cache, &tmp->cache);
9591                 free(tmp);
9592         }
9593         ret = insert_cache_extent(extent_cache, &good->cache);
9594         BUG_ON(ret);
9595         free(rec);
9596         return good->num_duplicates ? 0 : 1;
9597 }
9598
9599 static int delete_duplicate_records(struct btrfs_root *root,
9600                                     struct extent_record *rec)
9601 {
9602         struct btrfs_trans_handle *trans;
9603         LIST_HEAD(delete_list);
9604         struct btrfs_path path;
9605         struct extent_record *tmp, *good, *n;
9606         int nr_del = 0;
9607         int ret = 0, err;
9608         struct btrfs_key key;
9609
9610         btrfs_init_path(&path);
9611
9612         good = rec;
9613         /* Find the record that covers all of the duplicates. */
9614         list_for_each_entry(tmp, &rec->dups, list) {
9615                 if (good->start < tmp->start)
9616                         continue;
9617                 if (good->nr > tmp->nr)
9618                         continue;
9619
9620                 if (tmp->start + tmp->nr < good->start + good->nr) {
9621                         fprintf(stderr, "Ok we have overlapping extents that "
9622                                 "aren't completely covered by each other, this "
9623                                 "is going to require more careful thought.  "
9624                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9625                                 tmp->start, tmp->nr, good->start, good->nr);
9626                         abort();
9627                 }
9628                 good = tmp;
9629         }
9630
9631         if (good != rec)
9632                 list_add_tail(&rec->list, &delete_list);
9633
9634         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9635                 if (tmp == good)
9636                         continue;
9637                 list_move_tail(&tmp->list, &delete_list);
9638         }
9639
9640         root = root->fs_info->extent_root;
9641         trans = btrfs_start_transaction(root, 1);
9642         if (IS_ERR(trans)) {
9643                 ret = PTR_ERR(trans);
9644                 goto out;
9645         }
9646
9647         list_for_each_entry(tmp, &delete_list, list) {
9648                 if (tmp->found_rec == 0)
9649                         continue;
9650                 key.objectid = tmp->start;
9651                 key.type = BTRFS_EXTENT_ITEM_KEY;
9652                 key.offset = tmp->nr;
9653
9654                 /* Shouldn't happen but just in case */
9655                 if (tmp->metadata) {
9656                         fprintf(stderr, "Well this shouldn't happen, extent "
9657                                 "record overlaps but is metadata? "
9658                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9659                         abort();
9660                 }
9661
9662                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9663                 if (ret) {
9664                         if (ret > 0)
9665                                 ret = -EINVAL;
9666                         break;
9667                 }
9668                 ret = btrfs_del_item(trans, root, &path);
9669                 if (ret)
9670                         break;
9671                 btrfs_release_path(&path);
9672                 nr_del++;
9673         }
9674         err = btrfs_commit_transaction(trans, root);
9675         if (err && !ret)
9676                 ret = err;
9677 out:
9678         while (!list_empty(&delete_list)) {
9679                 tmp = to_extent_record(delete_list.next);
9680                 list_del_init(&tmp->list);
9681                 if (tmp == rec)
9682                         continue;
9683                 free(tmp);
9684         }
9685
9686         while (!list_empty(&rec->dups)) {
9687                 tmp = to_extent_record(rec->dups.next);
9688                 list_del_init(&tmp->list);
9689                 free(tmp);
9690         }
9691
9692         btrfs_release_path(&path);
9693
9694         if (!ret && !nr_del)
9695                 rec->num_duplicates = 0;
9696
9697         return ret ? ret : nr_del;
9698 }
9699
9700 static int find_possible_backrefs(struct btrfs_fs_info *info,
9701                                   struct btrfs_path *path,
9702                                   struct cache_tree *extent_cache,
9703                                   struct extent_record *rec)
9704 {
9705         struct btrfs_root *root;
9706         struct extent_backref *back, *tmp;
9707         struct data_backref *dback;
9708         struct cache_extent *cache;
9709         struct btrfs_file_extent_item *fi;
9710         struct btrfs_key key;
9711         u64 bytenr, bytes;
9712         int ret;
9713
9714         rbtree_postorder_for_each_entry_safe(back, tmp,
9715                                              &rec->backref_tree, node) {
9716                 /* Don't care about full backrefs (poor unloved backrefs) */
9717                 if (back->full_backref || !back->is_data)
9718                         continue;
9719
9720                 dback = to_data_backref(back);
9721
9722                 /* We found this one, we don't need to do a lookup */
9723                 if (dback->found_ref)
9724                         continue;
9725
9726                 key.objectid = dback->root;
9727                 key.type = BTRFS_ROOT_ITEM_KEY;
9728                 key.offset = (u64)-1;
9729
9730                 root = btrfs_read_fs_root(info, &key);
9731
9732                 /* No root, definitely a bad ref, skip */
9733                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9734                         continue;
9735                 /* Other err, exit */
9736                 if (IS_ERR(root))
9737                         return PTR_ERR(root);
9738
9739                 key.objectid = dback->owner;
9740                 key.type = BTRFS_EXTENT_DATA_KEY;
9741                 key.offset = dback->offset;
9742                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9743                 if (ret) {
9744                         btrfs_release_path(path);
9745                         if (ret < 0)
9746                                 return ret;
9747                         /* Didn't find it, we can carry on */
9748                         ret = 0;
9749                         continue;
9750                 }
9751
9752                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9753                                     struct btrfs_file_extent_item);
9754                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9755                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9756                 btrfs_release_path(path);
9757                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9758                 if (cache) {
9759                         struct extent_record *tmp;
9760                         tmp = container_of(cache, struct extent_record, cache);
9761
9762                         /*
9763                          * If we found an extent record for the bytenr for this
9764                          * particular backref then we can't add it to our
9765                          * current extent record.  We only want to add backrefs
9766                          * that don't have a corresponding extent item in the
9767                          * extent tree since they likely belong to this record
9768                          * and we need to fix it if it doesn't match bytenrs.
9769                          */
9770                         if  (tmp->found_rec)
9771                                 continue;
9772                 }
9773
9774                 dback->found_ref += 1;
9775                 dback->disk_bytenr = bytenr;
9776                 dback->bytes = bytes;
9777
9778                 /*
9779                  * Set this so the verify backref code knows not to trust the
9780                  * values in this backref.
9781                  */
9782                 back->broken = 1;
9783         }
9784
9785         return 0;
9786 }
9787
9788 /*
9789  * Record orphan data ref into corresponding root.
9790  *
9791  * Return 0 if the extent item contains data ref and recorded.
9792  * Return 1 if the extent item contains no useful data ref
9793  *   On that case, it may contains only shared_dataref or metadata backref
9794  *   or the file extent exists(this should be handled by the extent bytenr
9795  *   recovery routine)
9796  * Return <0 if something goes wrong.
9797  */
9798 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9799                                       struct extent_record *rec)
9800 {
9801         struct btrfs_key key;
9802         struct btrfs_root *dest_root;
9803         struct extent_backref *back, *tmp;
9804         struct data_backref *dback;
9805         struct orphan_data_extent *orphan;
9806         struct btrfs_path path;
9807         int recorded_data_ref = 0;
9808         int ret = 0;
9809
9810         if (rec->metadata)
9811                 return 1;
9812         btrfs_init_path(&path);
9813         rbtree_postorder_for_each_entry_safe(back, tmp,
9814                                              &rec->backref_tree, node) {
9815                 if (back->full_backref || !back->is_data ||
9816                     !back->found_extent_tree)
9817                         continue;
9818                 dback = to_data_backref(back);
9819                 if (dback->found_ref)
9820                         continue;
9821                 key.objectid = dback->root;
9822                 key.type = BTRFS_ROOT_ITEM_KEY;
9823                 key.offset = (u64)-1;
9824
9825                 dest_root = btrfs_read_fs_root(fs_info, &key);
9826
9827                 /* For non-exist root we just skip it */
9828                 if (IS_ERR(dest_root) || !dest_root)
9829                         continue;
9830
9831                 key.objectid = dback->owner;
9832                 key.type = BTRFS_EXTENT_DATA_KEY;
9833                 key.offset = dback->offset;
9834
9835                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9836                 btrfs_release_path(&path);
9837                 /*
9838                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9839                  * we need to record it for inode/file extent rebuild.
9840                  * For ret > 0, we record it only for file extent rebuild.
9841                  * For ret == 0, the file extent exists but only bytenr
9842                  * mismatch, let the original bytenr fix routine to handle,
9843                  * don't record it.
9844                  */
9845                 if (ret == 0)
9846                         continue;
9847                 ret = 0;
9848                 orphan = malloc(sizeof(*orphan));
9849                 if (!orphan) {
9850                         ret = -ENOMEM;
9851                         goto out;
9852                 }
9853                 INIT_LIST_HEAD(&orphan->list);
9854                 orphan->root = dback->root;
9855                 orphan->objectid = dback->owner;
9856                 orphan->offset = dback->offset;
9857                 orphan->disk_bytenr = rec->cache.start;
9858                 orphan->disk_len = rec->cache.size;
9859                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9860                 recorded_data_ref = 1;
9861         }
9862 out:
9863         btrfs_release_path(&path);
9864         if (!ret)
9865                 return !recorded_data_ref;
9866         else
9867                 return ret;
9868 }
9869
9870 /*
9871  * when an incorrect extent item is found, this will delete
9872  * all of the existing entries for it and recreate them
9873  * based on what the tree scan found.
9874  */
9875 static int fixup_extent_refs(struct btrfs_fs_info *info,
9876                              struct cache_tree *extent_cache,
9877                              struct extent_record *rec)
9878 {
9879         struct btrfs_trans_handle *trans = NULL;
9880         int ret;
9881         struct btrfs_path path;
9882         struct cache_extent *cache;
9883         struct extent_backref *back, *tmp;
9884         int allocated = 0;
9885         u64 flags = 0;
9886
9887         if (rec->flag_block_full_backref)
9888                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9889
9890         btrfs_init_path(&path);
9891         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9892                 /*
9893                  * Sometimes the backrefs themselves are so broken they don't
9894                  * get attached to any meaningful rec, so first go back and
9895                  * check any of our backrefs that we couldn't find and throw
9896                  * them into the list if we find the backref so that
9897                  * verify_backrefs can figure out what to do.
9898                  */
9899                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9900                 if (ret < 0)
9901                         goto out;
9902         }
9903
9904         /* step one, make sure all of the backrefs agree */
9905         ret = verify_backrefs(info, &path, rec);
9906         if (ret < 0)
9907                 goto out;
9908
9909         trans = btrfs_start_transaction(info->extent_root, 1);
9910         if (IS_ERR(trans)) {
9911                 ret = PTR_ERR(trans);
9912                 goto out;
9913         }
9914
9915         /* step two, delete all the existing records */
9916         ret = delete_extent_records(trans, info->extent_root, &path,
9917                                     rec->start);
9918
9919         if (ret < 0)
9920                 goto out;
9921
9922         /* was this block corrupt?  If so, don't add references to it */
9923         cache = lookup_cache_extent(info->corrupt_blocks,
9924                                     rec->start, rec->max_size);
9925         if (cache) {
9926                 ret = 0;
9927                 goto out;
9928         }
9929
9930         /* step three, recreate all the refs we did find */
9931         rbtree_postorder_for_each_entry_safe(back, tmp,
9932                                              &rec->backref_tree, node) {
9933                 /*
9934                  * if we didn't find any references, don't create a
9935                  * new extent record
9936                  */
9937                 if (!back->found_ref)
9938                         continue;
9939
9940                 rec->bad_full_backref = 0;
9941                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9942                 allocated = 1;
9943
9944                 if (ret)
9945                         goto out;
9946         }
9947 out:
9948         if (trans) {
9949                 int err = btrfs_commit_transaction(trans, info->extent_root);
9950                 if (!ret)
9951                         ret = err;
9952         }
9953
9954         if (!ret)
9955                 fprintf(stderr, "Repaired extent references for %llu\n",
9956                                 (unsigned long long)rec->start);
9957
9958         btrfs_release_path(&path);
9959         return ret;
9960 }
9961
9962 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9963                               struct extent_record *rec)
9964 {
9965         struct btrfs_trans_handle *trans;
9966         struct btrfs_root *root = fs_info->extent_root;
9967         struct btrfs_path path;
9968         struct btrfs_extent_item *ei;
9969         struct btrfs_key key;
9970         u64 flags;
9971         int ret = 0;
9972
9973         key.objectid = rec->start;
9974         if (rec->metadata) {
9975                 key.type = BTRFS_METADATA_ITEM_KEY;
9976                 key.offset = rec->info_level;
9977         } else {
9978                 key.type = BTRFS_EXTENT_ITEM_KEY;
9979                 key.offset = rec->max_size;
9980         }
9981
9982         trans = btrfs_start_transaction(root, 0);
9983         if (IS_ERR(trans))
9984                 return PTR_ERR(trans);
9985
9986         btrfs_init_path(&path);
9987         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9988         if (ret < 0) {
9989                 btrfs_release_path(&path);
9990                 btrfs_commit_transaction(trans, root);
9991                 return ret;
9992         } else if (ret) {
9993                 fprintf(stderr, "Didn't find extent for %llu\n",
9994                         (unsigned long long)rec->start);
9995                 btrfs_release_path(&path);
9996                 btrfs_commit_transaction(trans, root);
9997                 return -ENOENT;
9998         }
9999
10000         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10001                             struct btrfs_extent_item);
10002         flags = btrfs_extent_flags(path.nodes[0], ei);
10003         if (rec->flag_block_full_backref) {
10004                 fprintf(stderr, "setting full backref on %llu\n",
10005                         (unsigned long long)key.objectid);
10006                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10007         } else {
10008                 fprintf(stderr, "clearing full backref on %llu\n",
10009                         (unsigned long long)key.objectid);
10010                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10011         }
10012         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10013         btrfs_mark_buffer_dirty(path.nodes[0]);
10014         btrfs_release_path(&path);
10015         ret = btrfs_commit_transaction(trans, root);
10016         if (!ret)
10017                 fprintf(stderr, "Repaired extent flags for %llu\n",
10018                                 (unsigned long long)rec->start);
10019
10020         return ret;
10021 }
10022
10023 /* right now we only prune from the extent allocation tree */
10024 static int prune_one_block(struct btrfs_trans_handle *trans,
10025                            struct btrfs_fs_info *info,
10026                            struct btrfs_corrupt_block *corrupt)
10027 {
10028         int ret;
10029         struct btrfs_path path;
10030         struct extent_buffer *eb;
10031         u64 found;
10032         int slot;
10033         int nritems;
10034         int level = corrupt->level + 1;
10035
10036         btrfs_init_path(&path);
10037 again:
10038         /* we want to stop at the parent to our busted block */
10039         path.lowest_level = level;
10040
10041         ret = btrfs_search_slot(trans, info->extent_root,
10042                                 &corrupt->key, &path, -1, 1);
10043
10044         if (ret < 0)
10045                 goto out;
10046
10047         eb = path.nodes[level];
10048         if (!eb) {
10049                 ret = -ENOENT;
10050                 goto out;
10051         }
10052
10053         /*
10054          * hopefully the search gave us the block we want to prune,
10055          * lets try that first
10056          */
10057         slot = path.slots[level];
10058         found =  btrfs_node_blockptr(eb, slot);
10059         if (found == corrupt->cache.start)
10060                 goto del_ptr;
10061
10062         nritems = btrfs_header_nritems(eb);
10063
10064         /* the search failed, lets scan this node and hope we find it */
10065         for (slot = 0; slot < nritems; slot++) {
10066                 found =  btrfs_node_blockptr(eb, slot);
10067                 if (found == corrupt->cache.start)
10068                         goto del_ptr;
10069         }
10070         /*
10071          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10072          * to this block
10073          */
10074         if (eb == info->extent_root->node) {
10075                 ret = -ENOENT;
10076                 goto out;
10077         } else {
10078                 level++;
10079                 btrfs_release_path(&path);
10080                 goto again;
10081         }
10082
10083 del_ptr:
10084         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10085         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10086
10087 out:
10088         btrfs_release_path(&path);
10089         return ret;
10090 }
10091
10092 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10093 {
10094         struct btrfs_trans_handle *trans = NULL;
10095         struct cache_extent *cache;
10096         struct btrfs_corrupt_block *corrupt;
10097
10098         while (1) {
10099                 cache = search_cache_extent(info->corrupt_blocks, 0);
10100                 if (!cache)
10101                         break;
10102                 if (!trans) {
10103                         trans = btrfs_start_transaction(info->extent_root, 1);
10104                         if (IS_ERR(trans))
10105                                 return PTR_ERR(trans);
10106                 }
10107                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10108                 prune_one_block(trans, info, corrupt);
10109                 remove_cache_extent(info->corrupt_blocks, cache);
10110         }
10111         if (trans)
10112                 return btrfs_commit_transaction(trans, info->extent_root);
10113         return 0;
10114 }
10115
10116 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10117 {
10118         struct btrfs_block_group_cache *cache;
10119         u64 start, end;
10120         int ret;
10121
10122         while (1) {
10123                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10124                                             &start, &end, EXTENT_DIRTY);
10125                 if (ret)
10126                         break;
10127                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10128         }
10129
10130         start = 0;
10131         while (1) {
10132                 cache = btrfs_lookup_first_block_group(fs_info, start);
10133                 if (!cache)
10134                         break;
10135                 if (cache->cached)
10136                         cache->cached = 0;
10137                 start = cache->key.objectid + cache->key.offset;
10138         }
10139 }
10140
10141 static int check_extent_refs(struct btrfs_root *root,
10142                              struct cache_tree *extent_cache)
10143 {
10144         struct extent_record *rec;
10145         struct cache_extent *cache;
10146         int ret = 0;
10147         int had_dups = 0;
10148
10149         if (repair) {
10150                 /*
10151                  * if we're doing a repair, we have to make sure
10152                  * we don't allocate from the problem extents.
10153                  * In the worst case, this will be all the
10154                  * extents in the FS
10155                  */
10156                 cache = search_cache_extent(extent_cache, 0);
10157                 while(cache) {
10158                         rec = container_of(cache, struct extent_record, cache);
10159                         set_extent_dirty(root->fs_info->excluded_extents,
10160                                          rec->start,
10161                                          rec->start + rec->max_size - 1);
10162                         cache = next_cache_extent(cache);
10163                 }
10164
10165                 /* pin down all the corrupted blocks too */
10166                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10167                 while(cache) {
10168                         set_extent_dirty(root->fs_info->excluded_extents,
10169                                          cache->start,
10170                                          cache->start + cache->size - 1);
10171                         cache = next_cache_extent(cache);
10172                 }
10173                 prune_corrupt_blocks(root->fs_info);
10174                 reset_cached_block_groups(root->fs_info);
10175         }
10176
10177         reset_cached_block_groups(root->fs_info);
10178
10179         /*
10180          * We need to delete any duplicate entries we find first otherwise we
10181          * could mess up the extent tree when we have backrefs that actually
10182          * belong to a different extent item and not the weird duplicate one.
10183          */
10184         while (repair && !list_empty(&duplicate_extents)) {
10185                 rec = to_extent_record(duplicate_extents.next);
10186                 list_del_init(&rec->list);
10187
10188                 /* Sometimes we can find a backref before we find an actual
10189                  * extent, so we need to process it a little bit to see if there
10190                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10191                  * if this is a backref screwup.  If we need to delete stuff
10192                  * process_duplicates() will return 0, otherwise it will return
10193                  * 1 and we
10194                  */
10195                 if (process_duplicates(extent_cache, rec))
10196                         continue;
10197                 ret = delete_duplicate_records(root, rec);
10198                 if (ret < 0)
10199                         return ret;
10200                 /*
10201                  * delete_duplicate_records will return the number of entries
10202                  * deleted, so if it's greater than 0 then we know we actually
10203                  * did something and we need to remove.
10204                  */
10205                 if (ret)
10206                         had_dups = 1;
10207         }
10208
10209         if (had_dups)
10210                 return -EAGAIN;
10211
10212         while(1) {
10213                 int cur_err = 0;
10214                 int fix = 0;
10215
10216                 cache = search_cache_extent(extent_cache, 0);
10217                 if (!cache)
10218                         break;
10219                 rec = container_of(cache, struct extent_record, cache);
10220                 if (rec->num_duplicates) {
10221                         fprintf(stderr, "extent item %llu has multiple extent "
10222                                 "items\n", (unsigned long long)rec->start);
10223                         cur_err = 1;
10224                 }
10225
10226                 if (rec->refs != rec->extent_item_refs) {
10227                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10228                                 (unsigned long long)rec->start,
10229                                 (unsigned long long)rec->nr);
10230                         fprintf(stderr, "extent item %llu, found %llu\n",
10231                                 (unsigned long long)rec->extent_item_refs,
10232                                 (unsigned long long)rec->refs);
10233                         ret = record_orphan_data_extents(root->fs_info, rec);
10234                         if (ret < 0)
10235                                 goto repair_abort;
10236                         fix = ret;
10237                         cur_err = 1;
10238                 }
10239                 if (all_backpointers_checked(rec, 1)) {
10240                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10241                                 (unsigned long long)rec->start,
10242                                 (unsigned long long)rec->nr);
10243                         fix = 1;
10244                         cur_err = 1;
10245                 }
10246                 if (!rec->owner_ref_checked) {
10247                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10248                                 (unsigned long long)rec->start,
10249                                 (unsigned long long)rec->nr);
10250                         fix = 1;
10251                         cur_err = 1;
10252                 }
10253
10254                 if (repair && fix) {
10255                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10256                         if (ret)
10257                                 goto repair_abort;
10258                 }
10259
10260
10261                 if (rec->bad_full_backref) {
10262                         fprintf(stderr, "bad full backref, on [%llu]\n",
10263                                 (unsigned long long)rec->start);
10264                         if (repair) {
10265                                 ret = fixup_extent_flags(root->fs_info, rec);
10266                                 if (ret)
10267                                         goto repair_abort;
10268                                 fix = 1;
10269                         }
10270                         cur_err = 1;
10271                 }
10272                 /*
10273                  * Although it's not a extent ref's problem, we reuse this
10274                  * routine for error reporting.
10275                  * No repair function yet.
10276                  */
10277                 if (rec->crossing_stripes) {
10278                         fprintf(stderr,
10279                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10280                                 rec->start, rec->start + rec->max_size);
10281                         cur_err = 1;
10282                 }
10283
10284                 if (rec->wrong_chunk_type) {
10285                         fprintf(stderr,
10286                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10287                                 rec->start, rec->start + rec->max_size);
10288                         cur_err = 1;
10289                 }
10290
10291                 remove_cache_extent(extent_cache, cache);
10292                 free_all_extent_backrefs(rec);
10293                 if (!init_extent_tree && repair && (!cur_err || fix))
10294                         clear_extent_dirty(root->fs_info->excluded_extents,
10295                                            rec->start,
10296                                            rec->start + rec->max_size - 1);
10297                 free(rec);
10298         }
10299 repair_abort:
10300         if (repair) {
10301                 if (ret && ret != -EAGAIN) {
10302                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10303                         exit(1);
10304                 } else if (!ret) {
10305                         struct btrfs_trans_handle *trans;
10306
10307                         root = root->fs_info->extent_root;
10308                         trans = btrfs_start_transaction(root, 1);
10309                         if (IS_ERR(trans)) {
10310                                 ret = PTR_ERR(trans);
10311                                 goto repair_abort;
10312                         }
10313
10314                         ret = btrfs_fix_block_accounting(trans, root);
10315                         if (ret)
10316                                 goto repair_abort;
10317                         ret = btrfs_commit_transaction(trans, root);
10318                         if (ret)
10319                                 goto repair_abort;
10320                 }
10321                 return ret;
10322         }
10323         return 0;
10324 }
10325
10326 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10327 {
10328         u64 stripe_size;
10329
10330         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10331                 stripe_size = length;
10332                 stripe_size /= num_stripes;
10333         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10334                 stripe_size = length * 2;
10335                 stripe_size /= num_stripes;
10336         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10337                 stripe_size = length;
10338                 stripe_size /= (num_stripes - 1);
10339         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10340                 stripe_size = length;
10341                 stripe_size /= (num_stripes - 2);
10342         } else {
10343                 stripe_size = length;
10344         }
10345         return stripe_size;
10346 }
10347
10348 /*
10349  * Check the chunk with its block group/dev list ref:
10350  * Return 0 if all refs seems valid.
10351  * Return 1 if part of refs seems valid, need later check for rebuild ref
10352  * like missing block group and needs to search extent tree to rebuild them.
10353  * Return -1 if essential refs are missing and unable to rebuild.
10354  */
10355 static int check_chunk_refs(struct chunk_record *chunk_rec,
10356                             struct block_group_tree *block_group_cache,
10357                             struct device_extent_tree *dev_extent_cache,
10358                             int silent)
10359 {
10360         struct cache_extent *block_group_item;
10361         struct block_group_record *block_group_rec;
10362         struct cache_extent *dev_extent_item;
10363         struct device_extent_record *dev_extent_rec;
10364         u64 devid;
10365         u64 offset;
10366         u64 length;
10367         int metadump_v2 = 0;
10368         int i;
10369         int ret = 0;
10370
10371         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10372                                                chunk_rec->offset,
10373                                                chunk_rec->length);
10374         if (block_group_item) {
10375                 block_group_rec = container_of(block_group_item,
10376                                                struct block_group_record,
10377                                                cache);
10378                 if (chunk_rec->length != block_group_rec->offset ||
10379                     chunk_rec->offset != block_group_rec->objectid ||
10380                     (!metadump_v2 &&
10381                      chunk_rec->type_flags != block_group_rec->flags)) {
10382                         if (!silent)
10383                                 fprintf(stderr,
10384                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10385                                         chunk_rec->objectid,
10386                                         chunk_rec->type,
10387                                         chunk_rec->offset,
10388                                         chunk_rec->length,
10389                                         chunk_rec->offset,
10390                                         chunk_rec->type_flags,
10391                                         block_group_rec->objectid,
10392                                         block_group_rec->type,
10393                                         block_group_rec->offset,
10394                                         block_group_rec->offset,
10395                                         block_group_rec->objectid,
10396                                         block_group_rec->flags);
10397                         ret = -1;
10398                 } else {
10399                         list_del_init(&block_group_rec->list);
10400                         chunk_rec->bg_rec = block_group_rec;
10401                 }
10402         } else {
10403                 if (!silent)
10404                         fprintf(stderr,
10405                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10406                                 chunk_rec->objectid,
10407                                 chunk_rec->type,
10408                                 chunk_rec->offset,
10409                                 chunk_rec->length,
10410                                 chunk_rec->offset,
10411                                 chunk_rec->type_flags);
10412                 ret = 1;
10413         }
10414
10415         if (metadump_v2)
10416                 return ret;
10417
10418         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10419                                     chunk_rec->num_stripes);
10420         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10421                 devid = chunk_rec->stripes[i].devid;
10422                 offset = chunk_rec->stripes[i].offset;
10423                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10424                                                        devid, offset, length);
10425                 if (dev_extent_item) {
10426                         dev_extent_rec = container_of(dev_extent_item,
10427                                                 struct device_extent_record,
10428                                                 cache);
10429                         if (dev_extent_rec->objectid != devid ||
10430                             dev_extent_rec->offset != offset ||
10431                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10432                             dev_extent_rec->length != length) {
10433                                 if (!silent)
10434                                         fprintf(stderr,
10435                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10436                                                 chunk_rec->objectid,
10437                                                 chunk_rec->type,
10438                                                 chunk_rec->offset,
10439                                                 chunk_rec->stripes[i].devid,
10440                                                 chunk_rec->stripes[i].offset,
10441                                                 dev_extent_rec->objectid,
10442                                                 dev_extent_rec->offset,
10443                                                 dev_extent_rec->length);
10444                                 ret = -1;
10445                         } else {
10446                                 list_move(&dev_extent_rec->chunk_list,
10447                                           &chunk_rec->dextents);
10448                         }
10449                 } else {
10450                         if (!silent)
10451                                 fprintf(stderr,
10452                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10453                                         chunk_rec->objectid,
10454                                         chunk_rec->type,
10455                                         chunk_rec->offset,
10456                                         chunk_rec->stripes[i].devid,
10457                                         chunk_rec->stripes[i].offset);
10458                         ret = -1;
10459                 }
10460         }
10461         return ret;
10462 }
10463
10464 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10465 int check_chunks(struct cache_tree *chunk_cache,
10466                  struct block_group_tree *block_group_cache,
10467                  struct device_extent_tree *dev_extent_cache,
10468                  struct list_head *good, struct list_head *bad,
10469                  struct list_head *rebuild, int silent)
10470 {
10471         struct cache_extent *chunk_item;
10472         struct chunk_record *chunk_rec;
10473         struct block_group_record *bg_rec;
10474         struct device_extent_record *dext_rec;
10475         int err;
10476         int ret = 0;
10477
10478         chunk_item = first_cache_extent(chunk_cache);
10479         while (chunk_item) {
10480                 chunk_rec = container_of(chunk_item, struct chunk_record,
10481                                          cache);
10482                 err = check_chunk_refs(chunk_rec, block_group_cache,
10483                                        dev_extent_cache, silent);
10484                 if (err < 0)
10485                         ret = err;
10486                 if (err == 0 && good)
10487                         list_add_tail(&chunk_rec->list, good);
10488                 if (err > 0 && rebuild)
10489                         list_add_tail(&chunk_rec->list, rebuild);
10490                 if (err < 0 && bad)
10491                         list_add_tail(&chunk_rec->list, bad);
10492                 chunk_item = next_cache_extent(chunk_item);
10493         }
10494
10495         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10496                 if (!silent)
10497                         fprintf(stderr,
10498                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10499                                 bg_rec->objectid,
10500                                 bg_rec->offset,
10501                                 bg_rec->flags);
10502                 if (!ret)
10503                         ret = 1;
10504         }
10505
10506         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10507                             chunk_list) {
10508                 if (!silent)
10509                         fprintf(stderr,
10510                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10511                                 dext_rec->objectid,
10512                                 dext_rec->offset,
10513                                 dext_rec->length);
10514                 if (!ret)
10515                         ret = 1;
10516         }
10517         return ret;
10518 }
10519
10520
10521 static int check_device_used(struct device_record *dev_rec,
10522                              struct device_extent_tree *dext_cache)
10523 {
10524         struct cache_extent *cache;
10525         struct device_extent_record *dev_extent_rec;
10526         u64 total_byte = 0;
10527
10528         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10529         while (cache) {
10530                 dev_extent_rec = container_of(cache,
10531                                               struct device_extent_record,
10532                                               cache);
10533                 if (dev_extent_rec->objectid != dev_rec->devid)
10534                         break;
10535
10536                 list_del_init(&dev_extent_rec->device_list);
10537                 total_byte += dev_extent_rec->length;
10538                 cache = next_cache_extent(cache);
10539         }
10540
10541         if (total_byte != dev_rec->byte_used) {
10542                 fprintf(stderr,
10543                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10544                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10545                         dev_rec->type, dev_rec->offset);
10546                 return -1;
10547         } else {
10548                 return 0;
10549         }
10550 }
10551
10552 /* check btrfs_dev_item -> btrfs_dev_extent */
10553 static int check_devices(struct rb_root *dev_cache,
10554                          struct device_extent_tree *dev_extent_cache)
10555 {
10556         struct rb_node *dev_node;
10557         struct device_record *dev_rec;
10558         struct device_extent_record *dext_rec;
10559         int err;
10560         int ret = 0;
10561
10562         dev_node = rb_first(dev_cache);
10563         while (dev_node) {
10564                 dev_rec = container_of(dev_node, struct device_record, node);
10565                 err = check_device_used(dev_rec, dev_extent_cache);
10566                 if (err)
10567                         ret = err;
10568
10569                 dev_node = rb_next(dev_node);
10570         }
10571         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10572                             device_list) {
10573                 fprintf(stderr,
10574                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10575                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10576                 if (!ret)
10577                         ret = 1;
10578         }
10579         return ret;
10580 }
10581
10582 static int add_root_item_to_list(struct list_head *head,
10583                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10584                                   u8 level, u8 drop_level,
10585                                   struct btrfs_key *drop_key)
10586 {
10587
10588         struct root_item_record *ri_rec;
10589         ri_rec = malloc(sizeof(*ri_rec));
10590         if (!ri_rec)
10591                 return -ENOMEM;
10592         ri_rec->bytenr = bytenr;
10593         ri_rec->objectid = objectid;
10594         ri_rec->level = level;
10595         ri_rec->drop_level = drop_level;
10596         ri_rec->last_snapshot = last_snapshot;
10597         if (drop_key)
10598                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10599         list_add_tail(&ri_rec->list, head);
10600
10601         return 0;
10602 }
10603
10604 static void free_root_item_list(struct list_head *list)
10605 {
10606         struct root_item_record *ri_rec;
10607
10608         while (!list_empty(list)) {
10609                 ri_rec = list_first_entry(list, struct root_item_record,
10610                                           list);
10611                 list_del_init(&ri_rec->list);
10612                 free(ri_rec);
10613         }
10614 }
10615
10616 static int deal_root_from_list(struct list_head *list,
10617                                struct btrfs_root *root,
10618                                struct block_info *bits,
10619                                int bits_nr,
10620                                struct cache_tree *pending,
10621                                struct cache_tree *seen,
10622                                struct cache_tree *reada,
10623                                struct cache_tree *nodes,
10624                                struct cache_tree *extent_cache,
10625                                struct cache_tree *chunk_cache,
10626                                struct rb_root *dev_cache,
10627                                struct block_group_tree *block_group_cache,
10628                                struct device_extent_tree *dev_extent_cache)
10629 {
10630         int ret = 0;
10631         u64 last;
10632
10633         while (!list_empty(list)) {
10634                 struct root_item_record *rec;
10635                 struct extent_buffer *buf;
10636                 rec = list_entry(list->next,
10637                                  struct root_item_record, list);
10638                 last = 0;
10639                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10640                 if (!extent_buffer_uptodate(buf)) {
10641                         free_extent_buffer(buf);
10642                         ret = -EIO;
10643                         break;
10644                 }
10645                 ret = add_root_to_pending(buf, extent_cache, pending,
10646                                     seen, nodes, rec->objectid);
10647                 if (ret < 0)
10648                         break;
10649                 /*
10650                  * To rebuild extent tree, we need deal with snapshot
10651                  * one by one, otherwise we deal with node firstly which
10652                  * can maximize readahead.
10653                  */
10654                 while (1) {
10655                         ret = run_next_block(root, bits, bits_nr, &last,
10656                                              pending, seen, reada, nodes,
10657                                              extent_cache, chunk_cache,
10658                                              dev_cache, block_group_cache,
10659                                              dev_extent_cache, rec);
10660                         if (ret != 0)
10661                                 break;
10662                 }
10663                 free_extent_buffer(buf);
10664                 list_del(&rec->list);
10665                 free(rec);
10666                 if (ret < 0)
10667                         break;
10668         }
10669         while (ret >= 0) {
10670                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10671                                      reada, nodes, extent_cache, chunk_cache,
10672                                      dev_cache, block_group_cache,
10673                                      dev_extent_cache, NULL);
10674                 if (ret != 0) {
10675                         if (ret > 0)
10676                                 ret = 0;
10677                         break;
10678                 }
10679         }
10680         return ret;
10681 }
10682
10683 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10684 {
10685         struct rb_root dev_cache;
10686         struct cache_tree chunk_cache;
10687         struct block_group_tree block_group_cache;
10688         struct device_extent_tree dev_extent_cache;
10689         struct cache_tree extent_cache;
10690         struct cache_tree seen;
10691         struct cache_tree pending;
10692         struct cache_tree reada;
10693         struct cache_tree nodes;
10694         struct extent_io_tree excluded_extents;
10695         struct cache_tree corrupt_blocks;
10696         struct btrfs_path path;
10697         struct btrfs_key key;
10698         struct btrfs_key found_key;
10699         int ret, err = 0;
10700         struct block_info *bits;
10701         int bits_nr;
10702         struct extent_buffer *leaf;
10703         int slot;
10704         struct btrfs_root_item ri;
10705         struct list_head dropping_trees;
10706         struct list_head normal_trees;
10707         struct btrfs_root *root1;
10708         struct btrfs_root *root;
10709         u64 objectid;
10710         u8 level;
10711
10712         root = fs_info->fs_root;
10713         dev_cache = RB_ROOT;
10714         cache_tree_init(&chunk_cache);
10715         block_group_tree_init(&block_group_cache);
10716         device_extent_tree_init(&dev_extent_cache);
10717
10718         cache_tree_init(&extent_cache);
10719         cache_tree_init(&seen);
10720         cache_tree_init(&pending);
10721         cache_tree_init(&nodes);
10722         cache_tree_init(&reada);
10723         cache_tree_init(&corrupt_blocks);
10724         extent_io_tree_init(&excluded_extents);
10725         INIT_LIST_HEAD(&dropping_trees);
10726         INIT_LIST_HEAD(&normal_trees);
10727
10728         if (repair) {
10729                 fs_info->excluded_extents = &excluded_extents;
10730                 fs_info->fsck_extent_cache = &extent_cache;
10731                 fs_info->free_extent_hook = free_extent_hook;
10732                 fs_info->corrupt_blocks = &corrupt_blocks;
10733         }
10734
10735         bits_nr = 1024;
10736         bits = malloc(bits_nr * sizeof(struct block_info));
10737         if (!bits) {
10738                 perror("malloc");
10739                 exit(1);
10740         }
10741
10742         if (ctx.progress_enabled) {
10743                 ctx.tp = TASK_EXTENTS;
10744                 task_start(ctx.info);
10745         }
10746
10747 again:
10748         root1 = fs_info->tree_root;
10749         level = btrfs_header_level(root1->node);
10750         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10751                                     root1->node->start, 0, level, 0, NULL);
10752         if (ret < 0)
10753                 goto out;
10754         root1 = fs_info->chunk_root;
10755         level = btrfs_header_level(root1->node);
10756         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10757                                     root1->node->start, 0, level, 0, NULL);
10758         if (ret < 0)
10759                 goto out;
10760         btrfs_init_path(&path);
10761         key.offset = 0;
10762         key.objectid = 0;
10763         key.type = BTRFS_ROOT_ITEM_KEY;
10764         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10765         if (ret < 0)
10766                 goto out;
10767         while(1) {
10768                 leaf = path.nodes[0];
10769                 slot = path.slots[0];
10770                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10771                         ret = btrfs_next_leaf(root, &path);
10772                         if (ret != 0)
10773                                 break;
10774                         leaf = path.nodes[0];
10775                         slot = path.slots[0];
10776                 }
10777                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10778                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10779                         unsigned long offset;
10780                         u64 last_snapshot;
10781
10782                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10783                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10784                         last_snapshot = btrfs_root_last_snapshot(&ri);
10785                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10786                                 level = btrfs_root_level(&ri);
10787                                 ret = add_root_item_to_list(&normal_trees,
10788                                                 found_key.objectid,
10789                                                 btrfs_root_bytenr(&ri),
10790                                                 last_snapshot, level,
10791                                                 0, NULL);
10792                                 if (ret < 0)
10793                                         goto out;
10794                         } else {
10795                                 level = btrfs_root_level(&ri);
10796                                 objectid = found_key.objectid;
10797                                 btrfs_disk_key_to_cpu(&found_key,
10798                                                       &ri.drop_progress);
10799                                 ret = add_root_item_to_list(&dropping_trees,
10800                                                 objectid,
10801                                                 btrfs_root_bytenr(&ri),
10802                                                 last_snapshot, level,
10803                                                 ri.drop_level, &found_key);
10804                                 if (ret < 0)
10805                                         goto out;
10806                         }
10807                 }
10808                 path.slots[0]++;
10809         }
10810         btrfs_release_path(&path);
10811
10812         /*
10813          * check_block can return -EAGAIN if it fixes something, please keep
10814          * this in mind when dealing with return values from these functions, if
10815          * we get -EAGAIN we want to fall through and restart the loop.
10816          */
10817         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10818                                   &seen, &reada, &nodes, &extent_cache,
10819                                   &chunk_cache, &dev_cache, &block_group_cache,
10820                                   &dev_extent_cache);
10821         if (ret < 0) {
10822                 if (ret == -EAGAIN)
10823                         goto loop;
10824                 goto out;
10825         }
10826         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10827                                   &pending, &seen, &reada, &nodes,
10828                                   &extent_cache, &chunk_cache, &dev_cache,
10829                                   &block_group_cache, &dev_extent_cache);
10830         if (ret < 0) {
10831                 if (ret == -EAGAIN)
10832                         goto loop;
10833                 goto out;
10834         }
10835
10836         ret = check_chunks(&chunk_cache, &block_group_cache,
10837                            &dev_extent_cache, NULL, NULL, NULL, 0);
10838         if (ret) {
10839                 if (ret == -EAGAIN)
10840                         goto loop;
10841                 err = ret;
10842         }
10843
10844         ret = check_extent_refs(root, &extent_cache);
10845         if (ret < 0) {
10846                 if (ret == -EAGAIN)
10847                         goto loop;
10848                 goto out;
10849         }
10850
10851         ret = check_devices(&dev_cache, &dev_extent_cache);
10852         if (ret && err)
10853                 ret = err;
10854
10855 out:
10856         task_stop(ctx.info);
10857         if (repair) {
10858                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10859                 extent_io_tree_cleanup(&excluded_extents);
10860                 fs_info->fsck_extent_cache = NULL;
10861                 fs_info->free_extent_hook = NULL;
10862                 fs_info->corrupt_blocks = NULL;
10863                 fs_info->excluded_extents = NULL;
10864         }
10865         free(bits);
10866         free_chunk_cache_tree(&chunk_cache);
10867         free_device_cache_tree(&dev_cache);
10868         free_block_group_tree(&block_group_cache);
10869         free_device_extent_tree(&dev_extent_cache);
10870         free_extent_cache_tree(&seen);
10871         free_extent_cache_tree(&pending);
10872         free_extent_cache_tree(&reada);
10873         free_extent_cache_tree(&nodes);
10874         free_root_item_list(&normal_trees);
10875         free_root_item_list(&dropping_trees);
10876         return ret;
10877 loop:
10878         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10879         free_extent_cache_tree(&seen);
10880         free_extent_cache_tree(&pending);
10881         free_extent_cache_tree(&reada);
10882         free_extent_cache_tree(&nodes);
10883         free_chunk_cache_tree(&chunk_cache);
10884         free_block_group_tree(&block_group_cache);
10885         free_device_cache_tree(&dev_cache);
10886         free_device_extent_tree(&dev_extent_cache);
10887         free_extent_record_cache(&extent_cache);
10888         free_root_item_list(&normal_trees);
10889         free_root_item_list(&dropping_trees);
10890         extent_io_tree_cleanup(&excluded_extents);
10891         goto again;
10892 }
10893
10894 /*
10895  * Check backrefs of a tree block given by @bytenr or @eb.
10896  *
10897  * @root:       the root containing the @bytenr or @eb
10898  * @eb:         tree block extent buffer, can be NULL
10899  * @bytenr:     bytenr of the tree block to search
10900  * @level:      tree level of the tree block
10901  * @owner:      owner of the tree block
10902  *
10903  * Return >0 for any error found and output error message
10904  * Return 0 for no error found
10905  */
10906 static int check_tree_block_ref(struct btrfs_root *root,
10907                                 struct extent_buffer *eb, u64 bytenr,
10908                                 int level, u64 owner)
10909 {
10910         struct btrfs_key key;
10911         struct btrfs_root *extent_root = root->fs_info->extent_root;
10912         struct btrfs_path path;
10913         struct btrfs_extent_item *ei;
10914         struct btrfs_extent_inline_ref *iref;
10915         struct extent_buffer *leaf;
10916         unsigned long end;
10917         unsigned long ptr;
10918         int slot;
10919         int skinny_level;
10920         int type;
10921         u32 nodesize = root->fs_info->nodesize;
10922         u32 item_size;
10923         u64 offset;
10924         int tree_reloc_root = 0;
10925         int found_ref = 0;
10926         int err = 0;
10927         int ret;
10928
10929         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10930             btrfs_header_bytenr(root->node) == bytenr)
10931                 tree_reloc_root = 1;
10932
10933         btrfs_init_path(&path);
10934         key.objectid = bytenr;
10935         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10936                 key.type = BTRFS_METADATA_ITEM_KEY;
10937         else
10938                 key.type = BTRFS_EXTENT_ITEM_KEY;
10939         key.offset = (u64)-1;
10940
10941         /* Search for the backref in extent tree */
10942         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10943         if (ret < 0) {
10944                 err |= BACKREF_MISSING;
10945                 goto out;
10946         }
10947         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10948         if (ret) {
10949                 err |= BACKREF_MISSING;
10950                 goto out;
10951         }
10952
10953         leaf = path.nodes[0];
10954         slot = path.slots[0];
10955         btrfs_item_key_to_cpu(leaf, &key, slot);
10956
10957         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10958
10959         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10960                 skinny_level = (int)key.offset;
10961                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10962         } else {
10963                 struct btrfs_tree_block_info *info;
10964
10965                 info = (struct btrfs_tree_block_info *)(ei + 1);
10966                 skinny_level = btrfs_tree_block_level(leaf, info);
10967                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10968         }
10969
10970         if (eb) {
10971                 u64 header_gen;
10972                 u64 extent_gen;
10973
10974                 if (!(btrfs_extent_flags(leaf, ei) &
10975                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10976                         error(
10977                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10978                                 key.objectid, nodesize,
10979                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10980                         err = BACKREF_MISMATCH;
10981                 }
10982                 header_gen = btrfs_header_generation(eb);
10983                 extent_gen = btrfs_extent_generation(leaf, ei);
10984                 if (header_gen != extent_gen) {
10985                         error(
10986         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10987                                 key.objectid, nodesize, header_gen,
10988                                 extent_gen);
10989                         err = BACKREF_MISMATCH;
10990                 }
10991                 if (level != skinny_level) {
10992                         error(
10993                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10994                                 key.objectid, nodesize, level, skinny_level);
10995                         err = BACKREF_MISMATCH;
10996                 }
10997                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10998                         error(
10999                         "extent[%llu %u] is referred by other roots than %llu",
11000                                 key.objectid, nodesize, root->objectid);
11001                         err = BACKREF_MISMATCH;
11002                 }
11003         }
11004
11005         /*
11006          * Iterate the extent/metadata item to find the exact backref
11007          */
11008         item_size = btrfs_item_size_nr(leaf, slot);
11009         ptr = (unsigned long)iref;
11010         end = (unsigned long)ei + item_size;
11011         while (ptr < end) {
11012                 iref = (struct btrfs_extent_inline_ref *)ptr;
11013                 type = btrfs_extent_inline_ref_type(leaf, iref);
11014                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11015
11016                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11017                         (offset == root->objectid || offset == owner)) {
11018                         found_ref = 1;
11019                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11020                         /*
11021                          * Backref of tree reloc root points to itself, no need
11022                          * to check backref any more.
11023                          */
11024                         if (tree_reloc_root)
11025                                 found_ref = 1;
11026                         else
11027                         /* Check if the backref points to valid referencer */
11028                                 found_ref = !check_tree_block_ref(root, NULL,
11029                                                 offset, level + 1, owner);
11030                 }
11031
11032                 if (found_ref)
11033                         break;
11034                 ptr += btrfs_extent_inline_ref_size(type);
11035         }
11036
11037         /*
11038          * Inlined extent item doesn't have what we need, check
11039          * TREE_BLOCK_REF_KEY
11040          */
11041         if (!found_ref) {
11042                 btrfs_release_path(&path);
11043                 key.objectid = bytenr;
11044                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11045                 key.offset = root->objectid;
11046
11047                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11048                 if (!ret)
11049                         found_ref = 1;
11050         }
11051         if (!found_ref)
11052                 err |= BACKREF_MISSING;
11053 out:
11054         btrfs_release_path(&path);
11055         if (eb && (err & BACKREF_MISSING))
11056                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
11057                         bytenr, nodesize, owner, level);
11058         return err;
11059 }
11060
11061 /*
11062  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11063  *
11064  * Return >0 any error found and output error message
11065  * Return 0 for no error found
11066  */
11067 static int check_extent_data_item(struct btrfs_root *root,
11068                                   struct extent_buffer *eb, int slot)
11069 {
11070         struct btrfs_file_extent_item *fi;
11071         struct btrfs_path path;
11072         struct btrfs_root *extent_root = root->fs_info->extent_root;
11073         struct btrfs_key fi_key;
11074         struct btrfs_key dbref_key;
11075         struct extent_buffer *leaf;
11076         struct btrfs_extent_item *ei;
11077         struct btrfs_extent_inline_ref *iref;
11078         struct btrfs_extent_data_ref *dref;
11079         u64 owner;
11080         u64 disk_bytenr;
11081         u64 disk_num_bytes;
11082         u64 extent_num_bytes;
11083         u64 extent_flags;
11084         u32 item_size;
11085         unsigned long end;
11086         unsigned long ptr;
11087         int type;
11088         u64 ref_root;
11089         int found_dbackref = 0;
11090         int err = 0;
11091         int ret;
11092
11093         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11094         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11095
11096         /* Nothing to check for hole and inline data extents */
11097         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11098             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11099                 return 0;
11100
11101         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11102         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11103         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11104
11105         /* Check unaligned disk_num_bytes and num_bytes */
11106         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11107                 error(
11108 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11109                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11110                         root->fs_info->sectorsize);
11111                 err |= BYTES_UNALIGNED;
11112         } else {
11113                 data_bytes_allocated += disk_num_bytes;
11114         }
11115         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11116                 error(
11117 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11118                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11119                         root->fs_info->sectorsize);
11120                 err |= BYTES_UNALIGNED;
11121         } else {
11122                 data_bytes_referenced += extent_num_bytes;
11123         }
11124         owner = btrfs_header_owner(eb);
11125
11126         /* Check the extent item of the file extent in extent tree */
11127         btrfs_init_path(&path);
11128         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11129         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11130         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11131
11132         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11133         if (ret)
11134                 goto out;
11135
11136         leaf = path.nodes[0];
11137         slot = path.slots[0];
11138         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11139
11140         extent_flags = btrfs_extent_flags(leaf, ei);
11141
11142         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11143                 error(
11144                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11145                     disk_bytenr, disk_num_bytes,
11146                     BTRFS_EXTENT_FLAG_DATA);
11147                 err |= BACKREF_MISMATCH;
11148         }
11149
11150         /* Check data backref inside that extent item */
11151         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11152         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11153         ptr = (unsigned long)iref;
11154         end = (unsigned long)ei + item_size;
11155         while (ptr < end) {
11156                 iref = (struct btrfs_extent_inline_ref *)ptr;
11157                 type = btrfs_extent_inline_ref_type(leaf, iref);
11158                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11159
11160                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11161                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11162                         if (ref_root == owner || ref_root == root->objectid)
11163                                 found_dbackref = 1;
11164                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11165                         found_dbackref = !check_tree_block_ref(root, NULL,
11166                                 btrfs_extent_inline_ref_offset(leaf, iref),
11167                                 0, owner);
11168                 }
11169
11170                 if (found_dbackref)
11171                         break;
11172                 ptr += btrfs_extent_inline_ref_size(type);
11173         }
11174
11175         if (!found_dbackref) {
11176                 btrfs_release_path(&path);
11177
11178                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11179                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11180                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11181                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11182                                 fi_key.objectid, fi_key.offset);
11183
11184                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11185                                         &dbref_key, &path, 0, 0);
11186                 if (!ret) {
11187                         found_dbackref = 1;
11188                         goto out;
11189                 }
11190
11191                 btrfs_release_path(&path);
11192
11193                 /*
11194                  * Neither inlined nor EXTENT_DATA_REF found, try
11195                  * SHARED_DATA_REF as last chance.
11196                  */
11197                 dbref_key.objectid = disk_bytenr;
11198                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11199                 dbref_key.offset = eb->start;
11200
11201                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11202                                         &dbref_key, &path, 0, 0);
11203                 if (!ret) {
11204                         found_dbackref = 1;
11205                         goto out;
11206                 }
11207         }
11208
11209 out:
11210         if (!found_dbackref)
11211                 err |= BACKREF_MISSING;
11212         btrfs_release_path(&path);
11213         if (err & BACKREF_MISSING) {
11214                 error("data extent[%llu %llu] backref lost",
11215                       disk_bytenr, disk_num_bytes);
11216         }
11217         return err;
11218 }
11219
11220 /*
11221  * Get real tree block level for the case like shared block
11222  * Return >= 0 as tree level
11223  * Return <0 for error
11224  */
11225 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11226 {
11227         struct extent_buffer *eb;
11228         struct btrfs_path path;
11229         struct btrfs_key key;
11230         struct btrfs_extent_item *ei;
11231         u64 flags;
11232         u64 transid;
11233         u8 backref_level;
11234         u8 header_level;
11235         int ret;
11236
11237         /* Search extent tree for extent generation and level */
11238         key.objectid = bytenr;
11239         key.type = BTRFS_METADATA_ITEM_KEY;
11240         key.offset = (u64)-1;
11241
11242         btrfs_init_path(&path);
11243         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11244         if (ret < 0)
11245                 goto release_out;
11246         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11247         if (ret < 0)
11248                 goto release_out;
11249         if (ret > 0) {
11250                 ret = -ENOENT;
11251                 goto release_out;
11252         }
11253
11254         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11255         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11256                             struct btrfs_extent_item);
11257         flags = btrfs_extent_flags(path.nodes[0], ei);
11258         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11259                 ret = -ENOENT;
11260                 goto release_out;
11261         }
11262
11263         /* Get transid for later read_tree_block() check */
11264         transid = btrfs_extent_generation(path.nodes[0], ei);
11265
11266         /* Get backref level as one source */
11267         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11268                 backref_level = key.offset;
11269         } else {
11270                 struct btrfs_tree_block_info *info;
11271
11272                 info = (struct btrfs_tree_block_info *)(ei + 1);
11273                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11274         }
11275         btrfs_release_path(&path);
11276
11277         /* Get level from tree block as an alternative source */
11278         eb = read_tree_block(fs_info, bytenr, transid);
11279         if (!extent_buffer_uptodate(eb)) {
11280                 free_extent_buffer(eb);
11281                 return -EIO;
11282         }
11283         header_level = btrfs_header_level(eb);
11284         free_extent_buffer(eb);
11285
11286         if (header_level != backref_level)
11287                 return -EIO;
11288         return header_level;
11289
11290 release_out:
11291         btrfs_release_path(&path);
11292         return ret;
11293 }
11294
11295 /*
11296  * Check if a tree block backref is valid (points to a valid tree block)
11297  * if level == -1, level will be resolved
11298  * Return >0 for any error found and print error message
11299  */
11300 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11301                                     u64 bytenr, int level)
11302 {
11303         struct btrfs_root *root;
11304         struct btrfs_key key;
11305         struct btrfs_path path;
11306         struct extent_buffer *eb;
11307         struct extent_buffer *node;
11308         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11309         int err = 0;
11310         int ret;
11311
11312         /* Query level for level == -1 special case */
11313         if (level == -1)
11314                 level = query_tree_block_level(fs_info, bytenr);
11315         if (level < 0) {
11316                 err |= REFERENCER_MISSING;
11317                 goto out;
11318         }
11319
11320         key.objectid = root_id;
11321         key.type = BTRFS_ROOT_ITEM_KEY;
11322         key.offset = (u64)-1;
11323
11324         root = btrfs_read_fs_root(fs_info, &key);
11325         if (IS_ERR(root)) {
11326                 err |= REFERENCER_MISSING;
11327                 goto out;
11328         }
11329
11330         /* Read out the tree block to get item/node key */
11331         eb = read_tree_block(fs_info, bytenr, 0);
11332         if (!extent_buffer_uptodate(eb)) {
11333                 err |= REFERENCER_MISSING;
11334                 free_extent_buffer(eb);
11335                 goto out;
11336         }
11337
11338         /* Empty tree, no need to check key */
11339         if (!btrfs_header_nritems(eb) && !level) {
11340                 free_extent_buffer(eb);
11341                 goto out;
11342         }
11343
11344         if (level)
11345                 btrfs_node_key_to_cpu(eb, &key, 0);
11346         else
11347                 btrfs_item_key_to_cpu(eb, &key, 0);
11348
11349         free_extent_buffer(eb);
11350
11351         btrfs_init_path(&path);
11352         path.lowest_level = level;
11353         /* Search with the first key, to ensure we can reach it */
11354         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11355         if (ret < 0) {
11356                 err |= REFERENCER_MISSING;
11357                 goto release_out;
11358         }
11359
11360         node = path.nodes[level];
11361         if (btrfs_header_bytenr(node) != bytenr) {
11362                 error(
11363         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11364                         bytenr, nodesize, bytenr,
11365                         btrfs_header_bytenr(node));
11366                 err |= REFERENCER_MISMATCH;
11367         }
11368         if (btrfs_header_level(node) != level) {
11369                 error(
11370         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11371                         bytenr, nodesize, level,
11372                         btrfs_header_level(node));
11373                 err |= REFERENCER_MISMATCH;
11374         }
11375
11376 release_out:
11377         btrfs_release_path(&path);
11378 out:
11379         if (err & REFERENCER_MISSING) {
11380                 if (level < 0)
11381                         error("extent [%llu %d] lost referencer (owner: %llu)",
11382                                 bytenr, nodesize, root_id);
11383                 else
11384                         error(
11385                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11386                                 bytenr, nodesize, root_id, level);
11387         }
11388
11389         return err;
11390 }
11391
11392 /*
11393  * Check if tree block @eb is tree reloc root.
11394  * Return 0 if it's not or any problem happens
11395  * Return 1 if it's a tree reloc root
11396  */
11397 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11398                                  struct extent_buffer *eb)
11399 {
11400         struct btrfs_root *tree_reloc_root;
11401         struct btrfs_key key;
11402         u64 bytenr = btrfs_header_bytenr(eb);
11403         u64 owner = btrfs_header_owner(eb);
11404         int ret = 0;
11405
11406         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11407         key.offset = owner;
11408         key.type = BTRFS_ROOT_ITEM_KEY;
11409
11410         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11411         if (IS_ERR(tree_reloc_root))
11412                 return 0;
11413
11414         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11415                 ret = 1;
11416         btrfs_free_fs_root(tree_reloc_root);
11417         return ret;
11418 }
11419
11420 /*
11421  * Check referencer for shared block backref
11422  * If level == -1, this function will resolve the level.
11423  */
11424 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11425                                      u64 parent, u64 bytenr, int level)
11426 {
11427         struct extent_buffer *eb;
11428         u32 nr;
11429         int found_parent = 0;
11430         int i;
11431
11432         eb = read_tree_block(fs_info, parent, 0);
11433         if (!extent_buffer_uptodate(eb))
11434                 goto out;
11435
11436         if (level == -1)
11437                 level = query_tree_block_level(fs_info, bytenr);
11438         if (level < 0)
11439                 goto out;
11440
11441         /* It's possible it's a tree reloc root */
11442         if (parent == bytenr) {
11443                 if (is_tree_reloc_root(fs_info, eb))
11444                         found_parent = 1;
11445                 goto out;
11446         }
11447
11448         if (level + 1 != btrfs_header_level(eb))
11449                 goto out;
11450
11451         nr = btrfs_header_nritems(eb);
11452         for (i = 0; i < nr; i++) {
11453                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11454                         found_parent = 1;
11455                         break;
11456                 }
11457         }
11458 out:
11459         free_extent_buffer(eb);
11460         if (!found_parent) {
11461                 error(
11462         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11463                         bytenr, fs_info->nodesize, parent, level);
11464                 return REFERENCER_MISSING;
11465         }
11466         return 0;
11467 }
11468
11469 /*
11470  * Check referencer for normal (inlined) data ref
11471  * If len == 0, it will be resolved by searching in extent tree
11472  */
11473 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11474                                      u64 root_id, u64 objectid, u64 offset,
11475                                      u64 bytenr, u64 len, u32 count)
11476 {
11477         struct btrfs_root *root;
11478         struct btrfs_root *extent_root = fs_info->extent_root;
11479         struct btrfs_key key;
11480         struct btrfs_path path;
11481         struct extent_buffer *leaf;
11482         struct btrfs_file_extent_item *fi;
11483         u32 found_count = 0;
11484         int slot;
11485         int ret = 0;
11486
11487         if (!len) {
11488                 key.objectid = bytenr;
11489                 key.type = BTRFS_EXTENT_ITEM_KEY;
11490                 key.offset = (u64)-1;
11491
11492                 btrfs_init_path(&path);
11493                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11494                 if (ret < 0)
11495                         goto out;
11496                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11497                 if (ret)
11498                         goto out;
11499                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11500                 if (key.objectid != bytenr ||
11501                     key.type != BTRFS_EXTENT_ITEM_KEY)
11502                         goto out;
11503                 len = key.offset;
11504                 btrfs_release_path(&path);
11505         }
11506         key.objectid = root_id;
11507         key.type = BTRFS_ROOT_ITEM_KEY;
11508         key.offset = (u64)-1;
11509         btrfs_init_path(&path);
11510
11511         root = btrfs_read_fs_root(fs_info, &key);
11512         if (IS_ERR(root))
11513                 goto out;
11514
11515         key.objectid = objectid;
11516         key.type = BTRFS_EXTENT_DATA_KEY;
11517         /*
11518          * It can be nasty as data backref offset is
11519          * file offset - file extent offset, which is smaller or
11520          * equal to original backref offset.  The only special case is
11521          * overflow.  So we need to special check and do further search.
11522          */
11523         key.offset = offset & (1ULL << 63) ? 0 : offset;
11524
11525         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11526         if (ret < 0)
11527                 goto out;
11528
11529         /*
11530          * Search afterwards to get correct one
11531          * NOTE: As we must do a comprehensive check on the data backref to
11532          * make sure the dref count also matches, we must iterate all file
11533          * extents for that inode.
11534          */
11535         while (1) {
11536                 leaf = path.nodes[0];
11537                 slot = path.slots[0];
11538
11539                 if (slot >= btrfs_header_nritems(leaf))
11540                         goto next;
11541                 btrfs_item_key_to_cpu(leaf, &key, slot);
11542                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11543                         break;
11544                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11545                 /*
11546                  * Except normal disk bytenr and disk num bytes, we still
11547                  * need to do extra check on dbackref offset as
11548                  * dbackref offset = file_offset - file_extent_offset
11549                  */
11550                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11551                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11552                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11553                     offset)
11554                         found_count++;
11555
11556 next:
11557                 ret = btrfs_next_item(root, &path);
11558                 if (ret)
11559                         break;
11560         }
11561 out:
11562         btrfs_release_path(&path);
11563         if (found_count != count) {
11564                 error(
11565 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11566                         bytenr, len, root_id, objectid, offset, count, found_count);
11567                 return REFERENCER_MISSING;
11568         }
11569         return 0;
11570 }
11571
11572 /*
11573  * Check if the referencer of a shared data backref exists
11574  */
11575 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11576                                      u64 parent, u64 bytenr)
11577 {
11578         struct extent_buffer *eb;
11579         struct btrfs_key key;
11580         struct btrfs_file_extent_item *fi;
11581         u32 nr;
11582         int found_parent = 0;
11583         int i;
11584
11585         eb = read_tree_block(fs_info, parent, 0);
11586         if (!extent_buffer_uptodate(eb))
11587                 goto out;
11588
11589         nr = btrfs_header_nritems(eb);
11590         for (i = 0; i < nr; i++) {
11591                 btrfs_item_key_to_cpu(eb, &key, i);
11592                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11593                         continue;
11594
11595                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11596                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11597                         continue;
11598
11599                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11600                         found_parent = 1;
11601                         break;
11602                 }
11603         }
11604
11605 out:
11606         free_extent_buffer(eb);
11607         if (!found_parent) {
11608                 error("shared extent %llu referencer lost (parent: %llu)",
11609                         bytenr, parent);
11610                 return REFERENCER_MISSING;
11611         }
11612         return 0;
11613 }
11614
11615 /*
11616  * This function will check a given extent item, including its backref and
11617  * itself (like crossing stripe boundary and type)
11618  *
11619  * Since we don't use extent_record anymore, introduce new error bit
11620  */
11621 static int check_extent_item(struct btrfs_fs_info *fs_info,
11622                              struct extent_buffer *eb, int slot)
11623 {
11624         struct btrfs_extent_item *ei;
11625         struct btrfs_extent_inline_ref *iref;
11626         struct btrfs_extent_data_ref *dref;
11627         unsigned long end;
11628         unsigned long ptr;
11629         int type;
11630         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11631         u32 item_size = btrfs_item_size_nr(eb, slot);
11632         u64 flags;
11633         u64 offset;
11634         int metadata = 0;
11635         int level;
11636         struct btrfs_key key;
11637         int ret;
11638         int err = 0;
11639
11640         btrfs_item_key_to_cpu(eb, &key, slot);
11641         if (key.type == BTRFS_EXTENT_ITEM_KEY)
11642                 bytes_used += key.offset;
11643         else
11644                 bytes_used += nodesize;
11645
11646         if (item_size < sizeof(*ei)) {
11647                 /*
11648                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11649                  * old thing when on disk format is still un-determined.
11650                  * No need to care about it anymore
11651                  */
11652                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11653                 return -ENOTTY;
11654         }
11655
11656         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11657         flags = btrfs_extent_flags(eb, ei);
11658
11659         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11660                 metadata = 1;
11661         if (metadata && check_crossing_stripes(global_info, key.objectid,
11662                                                eb->len)) {
11663                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11664                       key.objectid, key.objectid + nodesize);
11665                 err |= CROSSING_STRIPE_BOUNDARY;
11666         }
11667
11668         ptr = (unsigned long)(ei + 1);
11669
11670         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11671                 /* Old EXTENT_ITEM metadata */
11672                 struct btrfs_tree_block_info *info;
11673
11674                 info = (struct btrfs_tree_block_info *)ptr;
11675                 level = btrfs_tree_block_level(eb, info);
11676                 ptr += sizeof(struct btrfs_tree_block_info);
11677         } else {
11678                 /* New METADATA_ITEM */
11679                 level = key.offset;
11680         }
11681         end = (unsigned long)ei + item_size;
11682
11683 next:
11684         /* Reached extent item end normally */
11685         if (ptr == end)
11686                 goto out;
11687
11688         /* Beyond extent item end, wrong item size */
11689         if (ptr > end) {
11690                 err |= ITEM_SIZE_MISMATCH;
11691                 error("extent item at bytenr %llu slot %d has wrong size",
11692                         eb->start, slot);
11693                 goto out;
11694         }
11695
11696         /* Now check every backref in this extent item */
11697         iref = (struct btrfs_extent_inline_ref *)ptr;
11698         type = btrfs_extent_inline_ref_type(eb, iref);
11699         offset = btrfs_extent_inline_ref_offset(eb, iref);
11700         switch (type) {
11701         case BTRFS_TREE_BLOCK_REF_KEY:
11702                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11703                                                level);
11704                 err |= ret;
11705                 break;
11706         case BTRFS_SHARED_BLOCK_REF_KEY:
11707                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11708                                                  level);
11709                 err |= ret;
11710                 break;
11711         case BTRFS_EXTENT_DATA_REF_KEY:
11712                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11713                 ret = check_extent_data_backref(fs_info,
11714                                 btrfs_extent_data_ref_root(eb, dref),
11715                                 btrfs_extent_data_ref_objectid(eb, dref),
11716                                 btrfs_extent_data_ref_offset(eb, dref),
11717                                 key.objectid, key.offset,
11718                                 btrfs_extent_data_ref_count(eb, dref));
11719                 err |= ret;
11720                 break;
11721         case BTRFS_SHARED_DATA_REF_KEY:
11722                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11723                 err |= ret;
11724                 break;
11725         default:
11726                 error("extent[%llu %d %llu] has unknown ref type: %d",
11727                         key.objectid, key.type, key.offset, type);
11728                 err |= UNKNOWN_TYPE;
11729                 goto out;
11730         }
11731
11732         ptr += btrfs_extent_inline_ref_size(type);
11733         goto next;
11734
11735 out:
11736         return err;
11737 }
11738
11739 /*
11740  * Check if a dev extent item is referred correctly by its chunk
11741  */
11742 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11743                                  struct extent_buffer *eb, int slot)
11744 {
11745         struct btrfs_root *chunk_root = fs_info->chunk_root;
11746         struct btrfs_dev_extent *ptr;
11747         struct btrfs_path path;
11748         struct btrfs_key chunk_key;
11749         struct btrfs_key devext_key;
11750         struct btrfs_chunk *chunk;
11751         struct extent_buffer *l;
11752         int num_stripes;
11753         u64 length;
11754         int i;
11755         int found_chunk = 0;
11756         int ret;
11757
11758         btrfs_item_key_to_cpu(eb, &devext_key, slot);
11759         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11760         length = btrfs_dev_extent_length(eb, ptr);
11761
11762         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11763         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11764         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11765
11766         btrfs_init_path(&path);
11767         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11768         if (ret)
11769                 goto out;
11770
11771         l = path.nodes[0];
11772         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11773         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11774                                       chunk_key.offset);
11775         if (ret < 0)
11776                 goto out;
11777
11778         if (btrfs_stripe_length(fs_info, l, chunk) != length)
11779                 goto out;
11780
11781         num_stripes = btrfs_chunk_num_stripes(l, chunk);
11782         for (i = 0; i < num_stripes; i++) {
11783                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11784                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11785
11786                 if (devid == devext_key.objectid &&
11787                     offset == devext_key.offset) {
11788                         found_chunk = 1;
11789                         break;
11790                 }
11791         }
11792 out:
11793         btrfs_release_path(&path);
11794         if (!found_chunk) {
11795                 error(
11796                 "device extent[%llu, %llu, %llu] did not find the related chunk",
11797                         devext_key.objectid, devext_key.offset, length);
11798                 return REFERENCER_MISSING;
11799         }
11800         return 0;
11801 }
11802
11803 /*
11804  * Check if the used space is correct with the dev item
11805  */
11806 static int check_dev_item(struct btrfs_fs_info *fs_info,
11807                           struct extent_buffer *eb, int slot)
11808 {
11809         struct btrfs_root *dev_root = fs_info->dev_root;
11810         struct btrfs_dev_item *dev_item;
11811         struct btrfs_path path;
11812         struct btrfs_key key;
11813         struct btrfs_dev_extent *ptr;
11814         u64 dev_id;
11815         u64 used;
11816         u64 total = 0;
11817         int ret;
11818
11819         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11820         dev_id = btrfs_device_id(eb, dev_item);
11821         used = btrfs_device_bytes_used(eb, dev_item);
11822
11823         key.objectid = dev_id;
11824         key.type = BTRFS_DEV_EXTENT_KEY;
11825         key.offset = 0;
11826
11827         btrfs_init_path(&path);
11828         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11829         if (ret < 0) {
11830                 btrfs_item_key_to_cpu(eb, &key, slot);
11831                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11832                         key.objectid, key.type, key.offset);
11833                 btrfs_release_path(&path);
11834                 return REFERENCER_MISSING;
11835         }
11836
11837         /* Iterate dev_extents to calculate the used space of a device */
11838         while (1) {
11839                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11840                         goto next;
11841
11842                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11843                 if (key.objectid > dev_id)
11844                         break;
11845                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11846                         goto next;
11847
11848                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11849                                      struct btrfs_dev_extent);
11850                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11851 next:
11852                 ret = btrfs_next_item(dev_root, &path);
11853                 if (ret)
11854                         break;
11855         }
11856         btrfs_release_path(&path);
11857
11858         if (used != total) {
11859                 btrfs_item_key_to_cpu(eb, &key, slot);
11860                 error(
11861 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11862                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11863                         BTRFS_DEV_EXTENT_KEY, dev_id);
11864                 return ACCOUNTING_MISMATCH;
11865         }
11866         return 0;
11867 }
11868
11869 /*
11870  * Check a block group item with its referener (chunk) and its used space
11871  * with extent/metadata item
11872  */
11873 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11874                                   struct extent_buffer *eb, int slot)
11875 {
11876         struct btrfs_root *extent_root = fs_info->extent_root;
11877         struct btrfs_root *chunk_root = fs_info->chunk_root;
11878         struct btrfs_block_group_item *bi;
11879         struct btrfs_block_group_item bg_item;
11880         struct btrfs_path path;
11881         struct btrfs_key bg_key;
11882         struct btrfs_key chunk_key;
11883         struct btrfs_key extent_key;
11884         struct btrfs_chunk *chunk;
11885         struct extent_buffer *leaf;
11886         struct btrfs_extent_item *ei;
11887         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11888         u64 flags;
11889         u64 bg_flags;
11890         u64 used;
11891         u64 total = 0;
11892         int ret;
11893         int err = 0;
11894
11895         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11896         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11897         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11898         used = btrfs_block_group_used(&bg_item);
11899         bg_flags = btrfs_block_group_flags(&bg_item);
11900
11901         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11902         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11903         chunk_key.offset = bg_key.objectid;
11904
11905         btrfs_init_path(&path);
11906         /* Search for the referencer chunk */
11907         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11908         if (ret) {
11909                 error(
11910                 "block group[%llu %llu] did not find the related chunk item",
11911                         bg_key.objectid, bg_key.offset);
11912                 err |= REFERENCER_MISSING;
11913         } else {
11914                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11915                                         struct btrfs_chunk);
11916                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11917                                                 bg_key.offset) {
11918                         error(
11919         "block group[%llu %llu] related chunk item length does not match",
11920                                 bg_key.objectid, bg_key.offset);
11921                         err |= REFERENCER_MISMATCH;
11922                 }
11923         }
11924         btrfs_release_path(&path);
11925
11926         /* Search from the block group bytenr */
11927         extent_key.objectid = bg_key.objectid;
11928         extent_key.type = 0;
11929         extent_key.offset = 0;
11930
11931         btrfs_init_path(&path);
11932         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11933         if (ret < 0)
11934                 goto out;
11935
11936         /* Iterate extent tree to account used space */
11937         while (1) {
11938                 leaf = path.nodes[0];
11939
11940                 /* Search slot can point to the last item beyond leaf nritems */
11941                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11942                         goto next;
11943
11944                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11945                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11946                         break;
11947
11948                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11949                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11950                         goto next;
11951                 if (extent_key.objectid < bg_key.objectid)
11952                         goto next;
11953
11954                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11955                         total += nodesize;
11956                 else
11957                         total += extent_key.offset;
11958
11959                 ei = btrfs_item_ptr(leaf, path.slots[0],
11960                                     struct btrfs_extent_item);
11961                 flags = btrfs_extent_flags(leaf, ei);
11962                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11963                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11964                                 error(
11965                         "bad extent[%llu, %llu) type mismatch with chunk",
11966                                         extent_key.objectid,
11967                                         extent_key.objectid + extent_key.offset);
11968                                 err |= CHUNK_TYPE_MISMATCH;
11969                         }
11970                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11971                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11972                                     BTRFS_BLOCK_GROUP_METADATA))) {
11973                                 error(
11974                         "bad extent[%llu, %llu) type mismatch with chunk",
11975                                         extent_key.objectid,
11976                                         extent_key.objectid + nodesize);
11977                                 err |= CHUNK_TYPE_MISMATCH;
11978                         }
11979                 }
11980 next:
11981                 ret = btrfs_next_item(extent_root, &path);
11982                 if (ret)
11983                         break;
11984         }
11985
11986 out:
11987         btrfs_release_path(&path);
11988
11989         if (total != used) {
11990                 error(
11991                 "block group[%llu %llu] used %llu but extent items used %llu",
11992                         bg_key.objectid, bg_key.offset, used, total);
11993                 err |= ACCOUNTING_MISMATCH;
11994         }
11995         return err;
11996 }
11997
11998 /*
11999  * Check a chunk item.
12000  * Including checking all referred dev_extents and block group
12001  */
12002 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12003                             struct extent_buffer *eb, int slot)
12004 {
12005         struct btrfs_root *extent_root = fs_info->extent_root;
12006         struct btrfs_root *dev_root = fs_info->dev_root;
12007         struct btrfs_path path;
12008         struct btrfs_key chunk_key;
12009         struct btrfs_key bg_key;
12010         struct btrfs_key devext_key;
12011         struct btrfs_chunk *chunk;
12012         struct extent_buffer *leaf;
12013         struct btrfs_block_group_item *bi;
12014         struct btrfs_block_group_item bg_item;
12015         struct btrfs_dev_extent *ptr;
12016         u64 length;
12017         u64 chunk_end;
12018         u64 stripe_len;
12019         u64 type;
12020         int num_stripes;
12021         u64 offset;
12022         u64 objectid;
12023         int i;
12024         int ret;
12025         int err = 0;
12026
12027         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12028         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12029         length = btrfs_chunk_length(eb, chunk);
12030         chunk_end = chunk_key.offset + length;
12031         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12032                                       chunk_key.offset);
12033         if (ret < 0) {
12034                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12035                         chunk_end);
12036                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12037                 goto out;
12038         }
12039         type = btrfs_chunk_type(eb, chunk);
12040
12041         bg_key.objectid = chunk_key.offset;
12042         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12043         bg_key.offset = length;
12044
12045         btrfs_init_path(&path);
12046         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12047         if (ret) {
12048                 error(
12049                 "chunk[%llu %llu) did not find the related block group item",
12050                         chunk_key.offset, chunk_end);
12051                 err |= REFERENCER_MISSING;
12052         } else{
12053                 leaf = path.nodes[0];
12054                 bi = btrfs_item_ptr(leaf, path.slots[0],
12055                                     struct btrfs_block_group_item);
12056                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12057                                    sizeof(bg_item));
12058                 if (btrfs_block_group_flags(&bg_item) != type) {
12059                         error(
12060 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12061                                 chunk_key.offset, chunk_end, type,
12062                                 btrfs_block_group_flags(&bg_item));
12063                         err |= REFERENCER_MISSING;
12064                 }
12065         }
12066
12067         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12068         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12069         for (i = 0; i < num_stripes; i++) {
12070                 btrfs_release_path(&path);
12071                 btrfs_init_path(&path);
12072                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12073                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12074                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12075
12076                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12077                                         0, 0);
12078                 if (ret)
12079                         goto not_match_dev;
12080
12081                 leaf = path.nodes[0];
12082                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12083                                      struct btrfs_dev_extent);
12084                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12085                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12086                 if (objectid != chunk_key.objectid ||
12087                     offset != chunk_key.offset ||
12088                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12089                         goto not_match_dev;
12090                 continue;
12091 not_match_dev:
12092                 err |= BACKREF_MISSING;
12093                 error(
12094                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12095                         chunk_key.objectid, chunk_end, i);
12096                 continue;
12097         }
12098         btrfs_release_path(&path);
12099 out:
12100         return err;
12101 }
12102
12103 /*
12104  * Main entry function to check known items and update related accounting info
12105  */
12106 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
12107 {
12108         struct btrfs_fs_info *fs_info = root->fs_info;
12109         struct btrfs_key key;
12110         int slot = 0;
12111         int type;
12112         struct btrfs_extent_data_ref *dref;
12113         int ret;
12114         int err = 0;
12115
12116 next:
12117         btrfs_item_key_to_cpu(eb, &key, slot);
12118         type = key.type;
12119
12120         switch (type) {
12121         case BTRFS_EXTENT_DATA_KEY:
12122                 ret = check_extent_data_item(root, eb, slot);
12123                 err |= ret;
12124                 break;
12125         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12126                 ret = check_block_group_item(fs_info, eb, slot);
12127                 err |= ret;
12128                 break;
12129         case BTRFS_DEV_ITEM_KEY:
12130                 ret = check_dev_item(fs_info, eb, slot);
12131                 err |= ret;
12132                 break;
12133         case BTRFS_CHUNK_ITEM_KEY:
12134                 ret = check_chunk_item(fs_info, eb, slot);
12135                 err |= ret;
12136                 break;
12137         case BTRFS_DEV_EXTENT_KEY:
12138                 ret = check_dev_extent_item(fs_info, eb, slot);
12139                 err |= ret;
12140                 break;
12141         case BTRFS_EXTENT_ITEM_KEY:
12142         case BTRFS_METADATA_ITEM_KEY:
12143                 ret = check_extent_item(fs_info, eb, slot);
12144                 err |= ret;
12145                 break;
12146         case BTRFS_EXTENT_CSUM_KEY:
12147                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12148                 break;
12149         case BTRFS_TREE_BLOCK_REF_KEY:
12150                 ret = check_tree_block_backref(fs_info, key.offset,
12151                                                key.objectid, -1);
12152                 err |= ret;
12153                 break;
12154         case BTRFS_EXTENT_DATA_REF_KEY:
12155                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12156                 ret = check_extent_data_backref(fs_info,
12157                                 btrfs_extent_data_ref_root(eb, dref),
12158                                 btrfs_extent_data_ref_objectid(eb, dref),
12159                                 btrfs_extent_data_ref_offset(eb, dref),
12160                                 key.objectid, 0,
12161                                 btrfs_extent_data_ref_count(eb, dref));
12162                 err |= ret;
12163                 break;
12164         case BTRFS_SHARED_BLOCK_REF_KEY:
12165                 ret = check_shared_block_backref(fs_info, key.offset,
12166                                                  key.objectid, -1);
12167                 err |= ret;
12168                 break;
12169         case BTRFS_SHARED_DATA_REF_KEY:
12170                 ret = check_shared_data_backref(fs_info, key.offset,
12171                                                 key.objectid);
12172                 err |= ret;
12173                 break;
12174         default:
12175                 break;
12176         }
12177
12178         if (++slot < btrfs_header_nritems(eb))
12179                 goto next;
12180
12181         return err;
12182 }
12183
12184 /*
12185  * Helper function for later fs/subvol tree check.  To determine if a tree
12186  * block should be checked.
12187  * This function will ensure only the direct referencer with lowest rootid to
12188  * check a fs/subvolume tree block.
12189  *
12190  * Backref check at extent tree would detect errors like missing subvolume
12191  * tree, so we can do aggressive check to reduce duplicated checks.
12192  */
12193 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
12194 {
12195         struct btrfs_root *extent_root = root->fs_info->extent_root;
12196         struct btrfs_key key;
12197         struct btrfs_path path;
12198         struct extent_buffer *leaf;
12199         int slot;
12200         struct btrfs_extent_item *ei;
12201         unsigned long ptr;
12202         unsigned long end;
12203         int type;
12204         u32 item_size;
12205         u64 offset;
12206         struct btrfs_extent_inline_ref *iref;
12207         int ret;
12208
12209         btrfs_init_path(&path);
12210         key.objectid = btrfs_header_bytenr(eb);
12211         key.type = BTRFS_METADATA_ITEM_KEY;
12212         key.offset = (u64)-1;
12213
12214         /*
12215          * Any failure in backref resolving means we can't determine
12216          * whom the tree block belongs to.
12217          * So in that case, we need to check that tree block
12218          */
12219         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12220         if (ret < 0)
12221                 goto need_check;
12222
12223         ret = btrfs_previous_extent_item(extent_root, &path,
12224                                          btrfs_header_bytenr(eb));
12225         if (ret)
12226                 goto need_check;
12227
12228         leaf = path.nodes[0];
12229         slot = path.slots[0];
12230         btrfs_item_key_to_cpu(leaf, &key, slot);
12231         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12232
12233         if (key.type == BTRFS_METADATA_ITEM_KEY) {
12234                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12235         } else {
12236                 struct btrfs_tree_block_info *info;
12237
12238                 info = (struct btrfs_tree_block_info *)(ei + 1);
12239                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
12240         }
12241
12242         item_size = btrfs_item_size_nr(leaf, slot);
12243         ptr = (unsigned long)iref;
12244         end = (unsigned long)ei + item_size;
12245         while (ptr < end) {
12246                 iref = (struct btrfs_extent_inline_ref *)ptr;
12247                 type = btrfs_extent_inline_ref_type(leaf, iref);
12248                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
12249
12250                 /*
12251                  * We only check the tree block if current root is
12252                  * the lowest referencer of it.
12253                  */
12254                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
12255                     offset < root->objectid) {
12256                         btrfs_release_path(&path);
12257                         return 0;
12258                 }
12259
12260                 ptr += btrfs_extent_inline_ref_size(type);
12261         }
12262         /*
12263          * Normally we should also check keyed tree block ref, but that may be
12264          * very time consuming.  Inlined ref should already make us skip a lot
12265          * of refs now.  So skip search keyed tree block ref.
12266          */
12267
12268 need_check:
12269         btrfs_release_path(&path);
12270         return 1;
12271 }
12272
12273 /*
12274  * Traversal function for tree block. We will do:
12275  * 1) Skip shared fs/subvolume tree blocks
12276  * 2) Update related bytes accounting
12277  * 3) Pre-order traversal
12278  */
12279 static int traverse_tree_block(struct btrfs_root *root,
12280                                 struct extent_buffer *node)
12281 {
12282         struct extent_buffer *eb;
12283         struct btrfs_key key;
12284         struct btrfs_key drop_key;
12285         int level;
12286         u64 nr;
12287         int i;
12288         int err = 0;
12289         int ret;
12290
12291         /*
12292          * Skip shared fs/subvolume tree block, in that case they will
12293          * be checked by referencer with lowest rootid
12294          */
12295         if (is_fstree(root->objectid) && !should_check(root, node))
12296                 return 0;
12297
12298         /* Update bytes accounting */
12299         total_btree_bytes += node->len;
12300         if (fs_root_objectid(btrfs_header_owner(node)))
12301                 total_fs_tree_bytes += node->len;
12302         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
12303                 total_extent_tree_bytes += node->len;
12304
12305         /* pre-order tranversal, check itself first */
12306         level = btrfs_header_level(node);
12307         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
12308                                    btrfs_header_level(node),
12309                                    btrfs_header_owner(node));
12310         err |= ret;
12311         if (err)
12312                 error(
12313         "check %s failed root %llu bytenr %llu level %d, force continue check",
12314                         level ? "node":"leaf", root->objectid,
12315                         btrfs_header_bytenr(node), btrfs_header_level(node));
12316
12317         if (!level) {
12318                 btree_space_waste += btrfs_leaf_free_space(root, node);
12319                 ret = check_leaf_items(root, node);
12320                 err |= ret;
12321                 return err;
12322         }
12323
12324         nr = btrfs_header_nritems(node);
12325         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
12326         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
12327                 sizeof(struct btrfs_key_ptr);
12328
12329         /* Then check all its children */
12330         for (i = 0; i < nr; i++) {
12331                 u64 blocknr = btrfs_node_blockptr(node, i);
12332
12333                 btrfs_node_key_to_cpu(node, &key, i);
12334                 if (level == root->root_item.drop_level &&
12335                     is_dropped_key(&key, &drop_key))
12336                         continue;
12337
12338                 /*
12339                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12340                  * to call the function itself.
12341                  */
12342                 eb = read_tree_block(root->fs_info, blocknr, 0);
12343                 if (extent_buffer_uptodate(eb)) {
12344                         ret = traverse_tree_block(root, eb);
12345                         err |= ret;
12346                 }
12347                 free_extent_buffer(eb);
12348         }
12349
12350         return err;
12351 }
12352
12353 /*
12354  * Low memory usage version check_chunks_and_extents.
12355  */
12356 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12357 {
12358         struct btrfs_path path;
12359         struct btrfs_key key;
12360         struct btrfs_root *root1;
12361         struct btrfs_root *root;
12362         struct btrfs_root *cur_root;
12363         int err = 0;
12364         int ret;
12365
12366         root = fs_info->fs_root;
12367
12368         root1 = root->fs_info->chunk_root;
12369         ret = traverse_tree_block(root1, root1->node);
12370         err |= ret;
12371
12372         root1 = root->fs_info->tree_root;
12373         ret = traverse_tree_block(root1, root1->node);
12374         err |= ret;
12375
12376         btrfs_init_path(&path);
12377         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12378         key.offset = 0;
12379         key.type = BTRFS_ROOT_ITEM_KEY;
12380
12381         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12382         if (ret) {
12383                 error("cannot find extent treet in tree_root");
12384                 goto out;
12385         }
12386
12387         while (1) {
12388                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12389                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12390                         goto next;
12391                 key.offset = (u64)-1;
12392
12393                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12394                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12395                                         &key);
12396                 else
12397                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12398                 if (IS_ERR(cur_root) || !cur_root) {
12399                         error("failed to read tree: %lld", key.objectid);
12400                         goto next;
12401                 }
12402
12403                 ret = traverse_tree_block(cur_root, cur_root->node);
12404                 err |= ret;
12405
12406                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12407                         btrfs_free_fs_root(cur_root);
12408 next:
12409                 ret = btrfs_next_item(root1, &path);
12410                 if (ret)
12411                         goto out;
12412         }
12413
12414 out:
12415         btrfs_release_path(&path);
12416         return err;
12417 }
12418
12419 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12420 {
12421         int ret;
12422
12423         if (!ctx.progress_enabled)
12424                 fprintf(stderr, "checking extents\n");
12425         if (check_mode == CHECK_MODE_LOWMEM)
12426                 ret = check_chunks_and_extents_v2(fs_info);
12427         else
12428                 ret = check_chunks_and_extents(fs_info);
12429
12430         return ret;
12431 }
12432
12433 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12434                            struct btrfs_root *root, int overwrite)
12435 {
12436         struct extent_buffer *c;
12437         struct extent_buffer *old = root->node;
12438         int level;
12439         int ret;
12440         struct btrfs_disk_key disk_key = {0,0,0};
12441
12442         level = 0;
12443
12444         if (overwrite) {
12445                 c = old;
12446                 extent_buffer_get(c);
12447                 goto init;
12448         }
12449         c = btrfs_alloc_free_block(trans, root,
12450                                    root->fs_info->nodesize,
12451                                    root->root_key.objectid,
12452                                    &disk_key, level, 0, 0);
12453         if (IS_ERR(c)) {
12454                 c = old;
12455                 extent_buffer_get(c);
12456                 overwrite = 1;
12457         }
12458 init:
12459         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12460         btrfs_set_header_level(c, level);
12461         btrfs_set_header_bytenr(c, c->start);
12462         btrfs_set_header_generation(c, trans->transid);
12463         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12464         btrfs_set_header_owner(c, root->root_key.objectid);
12465
12466         write_extent_buffer(c, root->fs_info->fsid,
12467                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12468
12469         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12470                             btrfs_header_chunk_tree_uuid(c),
12471                             BTRFS_UUID_SIZE);
12472
12473         btrfs_mark_buffer_dirty(c);
12474         /*
12475          * this case can happen in the following case:
12476          *
12477          * 1.overwrite previous root.
12478          *
12479          * 2.reinit reloc data root, this is because we skip pin
12480          * down reloc data tree before which means we can allocate
12481          * same block bytenr here.
12482          */
12483         if (old->start == c->start) {
12484                 btrfs_set_root_generation(&root->root_item,
12485                                           trans->transid);
12486                 root->root_item.level = btrfs_header_level(root->node);
12487                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12488                                         &root->root_key, &root->root_item);
12489                 if (ret) {
12490                         free_extent_buffer(c);
12491                         return ret;
12492                 }
12493         }
12494         free_extent_buffer(old);
12495         root->node = c;
12496         add_root_to_dirty_list(root);
12497         return 0;
12498 }
12499
12500 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12501                                 struct extent_buffer *eb, int tree_root)
12502 {
12503         struct extent_buffer *tmp;
12504         struct btrfs_root_item *ri;
12505         struct btrfs_key key;
12506         u64 bytenr;
12507         int level = btrfs_header_level(eb);
12508         int nritems;
12509         int ret;
12510         int i;
12511
12512         /*
12513          * If we have pinned this block before, don't pin it again.
12514          * This can not only avoid forever loop with broken filesystem
12515          * but also give us some speedups.
12516          */
12517         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12518                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12519                 return 0;
12520
12521         btrfs_pin_extent(fs_info, eb->start, eb->len);
12522
12523         nritems = btrfs_header_nritems(eb);
12524         for (i = 0; i < nritems; i++) {
12525                 if (level == 0) {
12526                         btrfs_item_key_to_cpu(eb, &key, i);
12527                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12528                                 continue;
12529                         /* Skip the extent root and reloc roots */
12530                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12531                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12532                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12533                                 continue;
12534                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12535                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12536
12537                         /*
12538                          * If at any point we start needing the real root we
12539                          * will have to build a stump root for the root we are
12540                          * in, but for now this doesn't actually use the root so
12541                          * just pass in extent_root.
12542                          */
12543                         tmp = read_tree_block(fs_info, bytenr, 0);
12544                         if (!extent_buffer_uptodate(tmp)) {
12545                                 fprintf(stderr, "Error reading root block\n");
12546                                 return -EIO;
12547                         }
12548                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12549                         free_extent_buffer(tmp);
12550                         if (ret)
12551                                 return ret;
12552                 } else {
12553                         bytenr = btrfs_node_blockptr(eb, i);
12554
12555                         /* If we aren't the tree root don't read the block */
12556                         if (level == 1 && !tree_root) {
12557                                 btrfs_pin_extent(fs_info, bytenr,
12558                                                 fs_info->nodesize);
12559                                 continue;
12560                         }
12561
12562                         tmp = read_tree_block(fs_info, bytenr, 0);
12563                         if (!extent_buffer_uptodate(tmp)) {
12564                                 fprintf(stderr, "Error reading tree block\n");
12565                                 return -EIO;
12566                         }
12567                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12568                         free_extent_buffer(tmp);
12569                         if (ret)
12570                                 return ret;
12571                 }
12572         }
12573
12574         return 0;
12575 }
12576
12577 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12578 {
12579         int ret;
12580
12581         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12582         if (ret)
12583                 return ret;
12584
12585         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12586 }
12587
12588 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12589 {
12590         struct btrfs_block_group_cache *cache;
12591         struct btrfs_path path;
12592         struct extent_buffer *leaf;
12593         struct btrfs_chunk *chunk;
12594         struct btrfs_key key;
12595         int ret;
12596         u64 start;
12597
12598         btrfs_init_path(&path);
12599         key.objectid = 0;
12600         key.type = BTRFS_CHUNK_ITEM_KEY;
12601         key.offset = 0;
12602         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12603         if (ret < 0) {
12604                 btrfs_release_path(&path);
12605                 return ret;
12606         }
12607
12608         /*
12609          * We do this in case the block groups were screwed up and had alloc
12610          * bits that aren't actually set on the chunks.  This happens with
12611          * restored images every time and could happen in real life I guess.
12612          */
12613         fs_info->avail_data_alloc_bits = 0;
12614         fs_info->avail_metadata_alloc_bits = 0;
12615         fs_info->avail_system_alloc_bits = 0;
12616
12617         /* First we need to create the in-memory block groups */
12618         while (1) {
12619                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12620                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12621                         if (ret < 0) {
12622                                 btrfs_release_path(&path);
12623                                 return ret;
12624                         }
12625                         if (ret) {
12626                                 ret = 0;
12627                                 break;
12628                         }
12629                 }
12630                 leaf = path.nodes[0];
12631                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12632                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12633                         path.slots[0]++;
12634                         continue;
12635                 }
12636
12637                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12638                 btrfs_add_block_group(fs_info, 0,
12639                                       btrfs_chunk_type(leaf, chunk),
12640                                       key.objectid, key.offset,
12641                                       btrfs_chunk_length(leaf, chunk));
12642                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12643                                  key.offset + btrfs_chunk_length(leaf, chunk));
12644                 path.slots[0]++;
12645         }
12646         start = 0;
12647         while (1) {
12648                 cache = btrfs_lookup_first_block_group(fs_info, start);
12649                 if (!cache)
12650                         break;
12651                 cache->cached = 1;
12652                 start = cache->key.objectid + cache->key.offset;
12653         }
12654
12655         btrfs_release_path(&path);
12656         return 0;
12657 }
12658
12659 static int reset_balance(struct btrfs_trans_handle *trans,
12660                          struct btrfs_fs_info *fs_info)
12661 {
12662         struct btrfs_root *root = fs_info->tree_root;
12663         struct btrfs_path path;
12664         struct extent_buffer *leaf;
12665         struct btrfs_key key;
12666         int del_slot, del_nr = 0;
12667         int ret;
12668         int found = 0;
12669
12670         btrfs_init_path(&path);
12671         key.objectid = BTRFS_BALANCE_OBJECTID;
12672         key.type = BTRFS_BALANCE_ITEM_KEY;
12673         key.offset = 0;
12674         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12675         if (ret) {
12676                 if (ret > 0)
12677                         ret = 0;
12678                 if (!ret)
12679                         goto reinit_data_reloc;
12680                 else
12681                         goto out;
12682         }
12683
12684         ret = btrfs_del_item(trans, root, &path);
12685         if (ret)
12686                 goto out;
12687         btrfs_release_path(&path);
12688
12689         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12690         key.type = BTRFS_ROOT_ITEM_KEY;
12691         key.offset = 0;
12692         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12693         if (ret < 0)
12694                 goto out;
12695         while (1) {
12696                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12697                         if (!found)
12698                                 break;
12699
12700                         if (del_nr) {
12701                                 ret = btrfs_del_items(trans, root, &path,
12702                                                       del_slot, del_nr);
12703                                 del_nr = 0;
12704                                 if (ret)
12705                                         goto out;
12706                         }
12707                         key.offset++;
12708                         btrfs_release_path(&path);
12709
12710                         found = 0;
12711                         ret = btrfs_search_slot(trans, root, &key, &path,
12712                                                 -1, 1);
12713                         if (ret < 0)
12714                                 goto out;
12715                         continue;
12716                 }
12717                 found = 1;
12718                 leaf = path.nodes[0];
12719                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12720                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12721                         break;
12722                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12723                         path.slots[0]++;
12724                         continue;
12725                 }
12726                 if (!del_nr) {
12727                         del_slot = path.slots[0];
12728                         del_nr = 1;
12729                 } else {
12730                         del_nr++;
12731                 }
12732                 path.slots[0]++;
12733         }
12734
12735         if (del_nr) {
12736                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12737                 if (ret)
12738                         goto out;
12739         }
12740         btrfs_release_path(&path);
12741
12742 reinit_data_reloc:
12743         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12744         key.type = BTRFS_ROOT_ITEM_KEY;
12745         key.offset = (u64)-1;
12746         root = btrfs_read_fs_root(fs_info, &key);
12747         if (IS_ERR(root)) {
12748                 fprintf(stderr, "Error reading data reloc tree\n");
12749                 ret = PTR_ERR(root);
12750                 goto out;
12751         }
12752         record_root_in_trans(trans, root);
12753         ret = btrfs_fsck_reinit_root(trans, root, 0);
12754         if (ret)
12755                 goto out;
12756         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12757 out:
12758         btrfs_release_path(&path);
12759         return ret;
12760 }
12761
12762 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12763                               struct btrfs_fs_info *fs_info)
12764 {
12765         u64 start = 0;
12766         int ret;
12767
12768         /*
12769          * The only reason we don't do this is because right now we're just
12770          * walking the trees we find and pinning down their bytes, we don't look
12771          * at any of the leaves.  In order to do mixed groups we'd have to check
12772          * the leaves of any fs roots and pin down the bytes for any file
12773          * extents we find.  Not hard but why do it if we don't have to?
12774          */
12775         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12776                 fprintf(stderr, "We don't support re-initing the extent tree "
12777                         "for mixed block groups yet, please notify a btrfs "
12778                         "developer you want to do this so they can add this "
12779                         "functionality.\n");
12780                 return -EINVAL;
12781         }
12782
12783         /*
12784          * first we need to walk all of the trees except the extent tree and pin
12785          * down the bytes that are in use so we don't overwrite any existing
12786          * metadata.
12787          */
12788         ret = pin_metadata_blocks(fs_info);
12789         if (ret) {
12790                 fprintf(stderr, "error pinning down used bytes\n");
12791                 return ret;
12792         }
12793
12794         /*
12795          * Need to drop all the block groups since we're going to recreate all
12796          * of them again.
12797          */
12798         btrfs_free_block_groups(fs_info);
12799         ret = reset_block_groups(fs_info);
12800         if (ret) {
12801                 fprintf(stderr, "error resetting the block groups\n");
12802                 return ret;
12803         }
12804
12805         /* Ok we can allocate now, reinit the extent root */
12806         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12807         if (ret) {
12808                 fprintf(stderr, "extent root initialization failed\n");
12809                 /*
12810                  * When the transaction code is updated we should end the
12811                  * transaction, but for now progs only knows about commit so
12812                  * just return an error.
12813                  */
12814                 return ret;
12815         }
12816
12817         /*
12818          * Now we have all the in-memory block groups setup so we can make
12819          * allocations properly, and the metadata we care about is safe since we
12820          * pinned all of it above.
12821          */
12822         while (1) {
12823                 struct btrfs_block_group_cache *cache;
12824
12825                 cache = btrfs_lookup_first_block_group(fs_info, start);
12826                 if (!cache)
12827                         break;
12828                 start = cache->key.objectid + cache->key.offset;
12829                 ret = btrfs_insert_item(trans, fs_info->extent_root,
12830                                         &cache->key, &cache->item,
12831                                         sizeof(cache->item));
12832                 if (ret) {
12833                         fprintf(stderr, "Error adding block group\n");
12834                         return ret;
12835                 }
12836                 btrfs_extent_post_op(trans, fs_info->extent_root);
12837         }
12838
12839         ret = reset_balance(trans, fs_info);
12840         if (ret)
12841                 fprintf(stderr, "error resetting the pending balance\n");
12842
12843         return ret;
12844 }
12845
12846 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12847 {
12848         struct btrfs_path path;
12849         struct btrfs_trans_handle *trans;
12850         struct btrfs_key key;
12851         int ret;
12852
12853         printf("Recowing metadata block %llu\n", eb->start);
12854         key.objectid = btrfs_header_owner(eb);
12855         key.type = BTRFS_ROOT_ITEM_KEY;
12856         key.offset = (u64)-1;
12857
12858         root = btrfs_read_fs_root(root->fs_info, &key);
12859         if (IS_ERR(root)) {
12860                 fprintf(stderr, "Couldn't find owner root %llu\n",
12861                         key.objectid);
12862                 return PTR_ERR(root);
12863         }
12864
12865         trans = btrfs_start_transaction(root, 1);
12866         if (IS_ERR(trans))
12867                 return PTR_ERR(trans);
12868
12869         btrfs_init_path(&path);
12870         path.lowest_level = btrfs_header_level(eb);
12871         if (path.lowest_level)
12872                 btrfs_node_key_to_cpu(eb, &key, 0);
12873         else
12874                 btrfs_item_key_to_cpu(eb, &key, 0);
12875
12876         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12877         btrfs_commit_transaction(trans, root);
12878         btrfs_release_path(&path);
12879         return ret;
12880 }
12881
12882 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12883 {
12884         struct btrfs_path path;
12885         struct btrfs_trans_handle *trans;
12886         struct btrfs_key key;
12887         int ret;
12888
12889         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12890                bad->key.type, bad->key.offset);
12891         key.objectid = bad->root_id;
12892         key.type = BTRFS_ROOT_ITEM_KEY;
12893         key.offset = (u64)-1;
12894
12895         root = btrfs_read_fs_root(root->fs_info, &key);
12896         if (IS_ERR(root)) {
12897                 fprintf(stderr, "Couldn't find owner root %llu\n",
12898                         key.objectid);
12899                 return PTR_ERR(root);
12900         }
12901
12902         trans = btrfs_start_transaction(root, 1);
12903         if (IS_ERR(trans))
12904                 return PTR_ERR(trans);
12905
12906         btrfs_init_path(&path);
12907         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12908         if (ret) {
12909                 if (ret > 0)
12910                         ret = 0;
12911                 goto out;
12912         }
12913         ret = btrfs_del_item(trans, root, &path);
12914 out:
12915         btrfs_commit_transaction(trans, root);
12916         btrfs_release_path(&path);
12917         return ret;
12918 }
12919
12920 static int zero_log_tree(struct btrfs_root *root)
12921 {
12922         struct btrfs_trans_handle *trans;
12923         int ret;
12924
12925         trans = btrfs_start_transaction(root, 1);
12926         if (IS_ERR(trans)) {
12927                 ret = PTR_ERR(trans);
12928                 return ret;
12929         }
12930         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12931         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12932         ret = btrfs_commit_transaction(trans, root);
12933         return ret;
12934 }
12935
12936 static int populate_csum(struct btrfs_trans_handle *trans,
12937                          struct btrfs_root *csum_root, char *buf, u64 start,
12938                          u64 len)
12939 {
12940         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12941         u64 offset = 0;
12942         u64 sectorsize;
12943         int ret = 0;
12944
12945         while (offset < len) {
12946                 sectorsize = fs_info->sectorsize;
12947                 ret = read_extent_data(fs_info, buf, start + offset,
12948                                        &sectorsize, 0);
12949                 if (ret)
12950                         break;
12951                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12952                                             start + offset, buf, sectorsize);
12953                 if (ret)
12954                         break;
12955                 offset += sectorsize;
12956         }
12957         return ret;
12958 }
12959
12960 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12961                                       struct btrfs_root *csum_root,
12962                                       struct btrfs_root *cur_root)
12963 {
12964         struct btrfs_path path;
12965         struct btrfs_key key;
12966         struct extent_buffer *node;
12967         struct btrfs_file_extent_item *fi;
12968         char *buf = NULL;
12969         u64 start = 0;
12970         u64 len = 0;
12971         int slot = 0;
12972         int ret = 0;
12973
12974         buf = malloc(cur_root->fs_info->sectorsize);
12975         if (!buf)
12976                 return -ENOMEM;
12977
12978         btrfs_init_path(&path);
12979         key.objectid = 0;
12980         key.offset = 0;
12981         key.type = 0;
12982         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12983         if (ret < 0)
12984                 goto out;
12985         /* Iterate all regular file extents and fill its csum */
12986         while (1) {
12987                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12988
12989                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12990                         goto next;
12991                 node = path.nodes[0];
12992                 slot = path.slots[0];
12993                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12994                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12995                         goto next;
12996                 start = btrfs_file_extent_disk_bytenr(node, fi);
12997                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12998
12999                 ret = populate_csum(trans, csum_root, buf, start, len);
13000                 if (ret == -EEXIST)
13001                         ret = 0;
13002                 if (ret < 0)
13003                         goto out;
13004 next:
13005                 /*
13006                  * TODO: if next leaf is corrupted, jump to nearest next valid
13007                  * leaf.
13008                  */
13009                 ret = btrfs_next_item(cur_root, &path);
13010                 if (ret < 0)
13011                         goto out;
13012                 if (ret > 0) {
13013                         ret = 0;
13014                         goto out;
13015                 }
13016         }
13017
13018 out:
13019         btrfs_release_path(&path);
13020         free(buf);
13021         return ret;
13022 }
13023
13024 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13025                                   struct btrfs_root *csum_root)
13026 {
13027         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13028         struct btrfs_path path;
13029         struct btrfs_root *tree_root = fs_info->tree_root;
13030         struct btrfs_root *cur_root;
13031         struct extent_buffer *node;
13032         struct btrfs_key key;
13033         int slot = 0;
13034         int ret = 0;
13035
13036         btrfs_init_path(&path);
13037         key.objectid = BTRFS_FS_TREE_OBJECTID;
13038         key.offset = 0;
13039         key.type = BTRFS_ROOT_ITEM_KEY;
13040         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13041         if (ret < 0)
13042                 goto out;
13043         if (ret > 0) {
13044                 ret = -ENOENT;
13045                 goto out;
13046         }
13047
13048         while (1) {
13049                 node = path.nodes[0];
13050                 slot = path.slots[0];
13051                 btrfs_item_key_to_cpu(node, &key, slot);
13052                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13053                         goto out;
13054                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13055                         goto next;
13056                 if (!is_fstree(key.objectid))
13057                         goto next;
13058                 key.offset = (u64)-1;
13059
13060                 cur_root = btrfs_read_fs_root(fs_info, &key);
13061                 if (IS_ERR(cur_root) || !cur_root) {
13062                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13063                                 key.objectid);
13064                         goto out;
13065                 }
13066                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13067                                 cur_root);
13068                 if (ret < 0)
13069                         goto out;
13070 next:
13071                 ret = btrfs_next_item(tree_root, &path);
13072                 if (ret > 0) {
13073                         ret = 0;
13074                         goto out;
13075                 }
13076                 if (ret < 0)
13077                         goto out;
13078         }
13079
13080 out:
13081         btrfs_release_path(&path);
13082         return ret;
13083 }
13084
13085 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13086                                       struct btrfs_root *csum_root)
13087 {
13088         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13089         struct btrfs_path path;
13090         struct btrfs_extent_item *ei;
13091         struct extent_buffer *leaf;
13092         char *buf;
13093         struct btrfs_key key;
13094         int ret;
13095
13096         btrfs_init_path(&path);
13097         key.objectid = 0;
13098         key.type = BTRFS_EXTENT_ITEM_KEY;
13099         key.offset = 0;
13100         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13101         if (ret < 0) {
13102                 btrfs_release_path(&path);
13103                 return ret;
13104         }
13105
13106         buf = malloc(csum_root->fs_info->sectorsize);
13107         if (!buf) {
13108                 btrfs_release_path(&path);
13109                 return -ENOMEM;
13110         }
13111
13112         while (1) {
13113                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13114                         ret = btrfs_next_leaf(extent_root, &path);
13115                         if (ret < 0)
13116                                 break;
13117                         if (ret) {
13118                                 ret = 0;
13119                                 break;
13120                         }
13121                 }
13122                 leaf = path.nodes[0];
13123
13124                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13125                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13126                         path.slots[0]++;
13127                         continue;
13128                 }
13129
13130                 ei = btrfs_item_ptr(leaf, path.slots[0],
13131                                     struct btrfs_extent_item);
13132                 if (!(btrfs_extent_flags(leaf, ei) &
13133                       BTRFS_EXTENT_FLAG_DATA)) {
13134                         path.slots[0]++;
13135                         continue;
13136                 }
13137
13138                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13139                                     key.offset);
13140                 if (ret)
13141                         break;
13142                 path.slots[0]++;
13143         }
13144
13145         btrfs_release_path(&path);
13146         free(buf);
13147         return ret;
13148 }
13149
13150 /*
13151  * Recalculate the csum and put it into the csum tree.
13152  *
13153  * Extent tree init will wipe out all the extent info, so in that case, we
13154  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13155  * will use fs/subvol trees to init the csum tree.
13156  */
13157 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13158                           struct btrfs_root *csum_root,
13159                           int search_fs_tree)
13160 {
13161         if (search_fs_tree)
13162                 return fill_csum_tree_from_fs(trans, csum_root);
13163         else
13164                 return fill_csum_tree_from_extent(trans, csum_root);
13165 }
13166
13167 static void free_roots_info_cache(void)
13168 {
13169         if (!roots_info_cache)
13170                 return;
13171
13172         while (!cache_tree_empty(roots_info_cache)) {
13173                 struct cache_extent *entry;
13174                 struct root_item_info *rii;
13175
13176                 entry = first_cache_extent(roots_info_cache);
13177                 if (!entry)
13178                         break;
13179                 remove_cache_extent(roots_info_cache, entry);
13180                 rii = container_of(entry, struct root_item_info, cache_extent);
13181                 free(rii);
13182         }
13183
13184         free(roots_info_cache);
13185         roots_info_cache = NULL;
13186 }
13187
13188 static int build_roots_info_cache(struct btrfs_fs_info *info)
13189 {
13190         int ret = 0;
13191         struct btrfs_key key;
13192         struct extent_buffer *leaf;
13193         struct btrfs_path path;
13194
13195         if (!roots_info_cache) {
13196                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13197                 if (!roots_info_cache)
13198                         return -ENOMEM;
13199                 cache_tree_init(roots_info_cache);
13200         }
13201
13202         btrfs_init_path(&path);
13203         key.objectid = 0;
13204         key.type = BTRFS_EXTENT_ITEM_KEY;
13205         key.offset = 0;
13206         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13207         if (ret < 0)
13208                 goto out;
13209         leaf = path.nodes[0];
13210
13211         while (1) {
13212                 struct btrfs_key found_key;
13213                 struct btrfs_extent_item *ei;
13214                 struct btrfs_extent_inline_ref *iref;
13215                 int slot = path.slots[0];
13216                 int type;
13217                 u64 flags;
13218                 u64 root_id;
13219                 u8 level;
13220                 struct cache_extent *entry;
13221                 struct root_item_info *rii;
13222
13223                 if (slot >= btrfs_header_nritems(leaf)) {
13224                         ret = btrfs_next_leaf(info->extent_root, &path);
13225                         if (ret < 0) {
13226                                 break;
13227                         } else if (ret) {
13228                                 ret = 0;
13229                                 break;
13230                         }
13231                         leaf = path.nodes[0];
13232                         slot = path.slots[0];
13233                 }
13234
13235                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13236
13237                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13238                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13239                         goto next;
13240
13241                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13242                 flags = btrfs_extent_flags(leaf, ei);
13243
13244                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13245                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13246                         goto next;
13247
13248                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13249                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13250                         level = found_key.offset;
13251                 } else {
13252                         struct btrfs_tree_block_info *binfo;
13253
13254                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13255                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13256                         level = btrfs_tree_block_level(leaf, binfo);
13257                 }
13258
13259                 /*
13260                  * For a root extent, it must be of the following type and the
13261                  * first (and only one) iref in the item.
13262                  */
13263                 type = btrfs_extent_inline_ref_type(leaf, iref);
13264                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13265                         goto next;
13266
13267                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13268                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13269                 if (!entry) {
13270                         rii = malloc(sizeof(struct root_item_info));
13271                         if (!rii) {
13272                                 ret = -ENOMEM;
13273                                 goto out;
13274                         }
13275                         rii->cache_extent.start = root_id;
13276                         rii->cache_extent.size = 1;
13277                         rii->level = (u8)-1;
13278                         entry = &rii->cache_extent;
13279                         ret = insert_cache_extent(roots_info_cache, entry);
13280                         ASSERT(ret == 0);
13281                 } else {
13282                         rii = container_of(entry, struct root_item_info,
13283                                            cache_extent);
13284                 }
13285
13286                 ASSERT(rii->cache_extent.start == root_id);
13287                 ASSERT(rii->cache_extent.size == 1);
13288
13289                 if (level > rii->level || rii->level == (u8)-1) {
13290                         rii->level = level;
13291                         rii->bytenr = found_key.objectid;
13292                         rii->gen = btrfs_extent_generation(leaf, ei);
13293                         rii->node_count = 1;
13294                 } else if (level == rii->level) {
13295                         rii->node_count++;
13296                 }
13297 next:
13298                 path.slots[0]++;
13299         }
13300
13301 out:
13302         btrfs_release_path(&path);
13303
13304         return ret;
13305 }
13306
13307 static int maybe_repair_root_item(struct btrfs_path *path,
13308                                   const struct btrfs_key *root_key,
13309                                   const int read_only_mode)
13310 {
13311         const u64 root_id = root_key->objectid;
13312         struct cache_extent *entry;
13313         struct root_item_info *rii;
13314         struct btrfs_root_item ri;
13315         unsigned long offset;
13316
13317         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13318         if (!entry) {
13319                 fprintf(stderr,
13320                         "Error: could not find extent items for root %llu\n",
13321                         root_key->objectid);
13322                 return -ENOENT;
13323         }
13324
13325         rii = container_of(entry, struct root_item_info, cache_extent);
13326         ASSERT(rii->cache_extent.start == root_id);
13327         ASSERT(rii->cache_extent.size == 1);
13328
13329         if (rii->node_count != 1) {
13330                 fprintf(stderr,
13331                         "Error: could not find btree root extent for root %llu\n",
13332                         root_id);
13333                 return -ENOENT;
13334         }
13335
13336         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13337         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13338
13339         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13340             btrfs_root_level(&ri) != rii->level ||
13341             btrfs_root_generation(&ri) != rii->gen) {
13342
13343                 /*
13344                  * If we're in repair mode but our caller told us to not update
13345                  * the root item, i.e. just check if it needs to be updated, don't
13346                  * print this message, since the caller will call us again shortly
13347                  * for the same root item without read only mode (the caller will
13348                  * open a transaction first).
13349                  */
13350                 if (!(read_only_mode && repair))
13351                         fprintf(stderr,
13352                                 "%sroot item for root %llu,"
13353                                 " current bytenr %llu, current gen %llu, current level %u,"
13354                                 " new bytenr %llu, new gen %llu, new level %u\n",
13355                                 (read_only_mode ? "" : "fixing "),
13356                                 root_id,
13357                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13358                                 btrfs_root_level(&ri),
13359                                 rii->bytenr, rii->gen, rii->level);
13360
13361                 if (btrfs_root_generation(&ri) > rii->gen) {
13362                         fprintf(stderr,
13363                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13364                                 root_id, btrfs_root_generation(&ri), rii->gen);
13365                         return -EINVAL;
13366                 }
13367
13368                 if (!read_only_mode) {
13369                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13370                         btrfs_set_root_level(&ri, rii->level);
13371                         btrfs_set_root_generation(&ri, rii->gen);
13372                         write_extent_buffer(path->nodes[0], &ri,
13373                                             offset, sizeof(ri));
13374                 }
13375
13376                 return 1;
13377         }
13378
13379         return 0;
13380 }
13381
13382 /*
13383  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13384  * caused read-only snapshots to be corrupted if they were created at a moment
13385  * when the source subvolume/snapshot had orphan items. The issue was that the
13386  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13387  * node instead of the post orphan cleanup root node.
13388  * So this function, and its callees, just detects and fixes those cases. Even
13389  * though the regression was for read-only snapshots, this function applies to
13390  * any snapshot/subvolume root.
13391  * This must be run before any other repair code - not doing it so, makes other
13392  * repair code delete or modify backrefs in the extent tree for example, which
13393  * will result in an inconsistent fs after repairing the root items.
13394  */
13395 static int repair_root_items(struct btrfs_fs_info *info)
13396 {
13397         struct btrfs_path path;
13398         struct btrfs_key key;
13399         struct extent_buffer *leaf;
13400         struct btrfs_trans_handle *trans = NULL;
13401         int ret = 0;
13402         int bad_roots = 0;
13403         int need_trans = 0;
13404
13405         btrfs_init_path(&path);
13406
13407         ret = build_roots_info_cache(info);
13408         if (ret)
13409                 goto out;
13410
13411         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13412         key.type = BTRFS_ROOT_ITEM_KEY;
13413         key.offset = 0;
13414
13415 again:
13416         /*
13417          * Avoid opening and committing transactions if a leaf doesn't have
13418          * any root items that need to be fixed, so that we avoid rotating
13419          * backup roots unnecessarily.
13420          */
13421         if (need_trans) {
13422                 trans = btrfs_start_transaction(info->tree_root, 1);
13423                 if (IS_ERR(trans)) {
13424                         ret = PTR_ERR(trans);
13425                         goto out;
13426                 }
13427         }
13428
13429         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13430                                 0, trans ? 1 : 0);
13431         if (ret < 0)
13432                 goto out;
13433         leaf = path.nodes[0];
13434
13435         while (1) {
13436                 struct btrfs_key found_key;
13437
13438                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13439                         int no_more_keys = find_next_key(&path, &key);
13440
13441                         btrfs_release_path(&path);
13442                         if (trans) {
13443                                 ret = btrfs_commit_transaction(trans,
13444                                                                info->tree_root);
13445                                 trans = NULL;
13446                                 if (ret < 0)
13447                                         goto out;
13448                         }
13449                         need_trans = 0;
13450                         if (no_more_keys)
13451                                 break;
13452                         goto again;
13453                 }
13454
13455                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13456
13457                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13458                         goto next;
13459                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13460                         goto next;
13461
13462                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13463                 if (ret < 0)
13464                         goto out;
13465                 if (ret) {
13466                         if (!trans && repair) {
13467                                 need_trans = 1;
13468                                 key = found_key;
13469                                 btrfs_release_path(&path);
13470                                 goto again;
13471                         }
13472                         bad_roots++;
13473                 }
13474 next:
13475                 path.slots[0]++;
13476         }
13477         ret = 0;
13478 out:
13479         free_roots_info_cache();
13480         btrfs_release_path(&path);
13481         if (trans)
13482                 btrfs_commit_transaction(trans, info->tree_root);
13483         if (ret < 0)
13484                 return ret;
13485
13486         return bad_roots;
13487 }
13488
13489 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13490 {
13491         struct btrfs_trans_handle *trans;
13492         struct btrfs_block_group_cache *bg_cache;
13493         u64 current = 0;
13494         int ret = 0;
13495
13496         /* Clear all free space cache inodes and its extent data */
13497         while (1) {
13498                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13499                 if (!bg_cache)
13500                         break;
13501                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13502                 if (ret < 0)
13503                         return ret;
13504                 current = bg_cache->key.objectid + bg_cache->key.offset;
13505         }
13506
13507         /* Don't forget to set cache_generation to -1 */
13508         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13509         if (IS_ERR(trans)) {
13510                 error("failed to update super block cache generation");
13511                 return PTR_ERR(trans);
13512         }
13513         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13514         btrfs_commit_transaction(trans, fs_info->tree_root);
13515
13516         return ret;
13517 }
13518
13519 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13520                 int clear_version)
13521 {
13522         int ret = 0;
13523
13524         if (clear_version == 1) {
13525                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13526                         error(
13527                 "free space cache v2 detected, use --clear-space-cache v2");
13528                         ret = 1;
13529                         goto close_out;
13530                 }
13531                 printf("Clearing free space cache\n");
13532                 ret = clear_free_space_cache(fs_info);
13533                 if (ret) {
13534                         error("failed to clear free space cache");
13535                         ret = 1;
13536                 } else {
13537                         printf("Free space cache cleared\n");
13538                 }
13539         } else if (clear_version == 2) {
13540                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13541                         printf("no free space cache v2 to clear\n");
13542                         ret = 0;
13543                         goto close_out;
13544                 }
13545                 printf("Clear free space cache v2\n");
13546                 ret = btrfs_clear_free_space_tree(fs_info);
13547                 if (ret) {
13548                         error("failed to clear free space cache v2: %d", ret);
13549                         ret = 1;
13550                 } else {
13551                         printf("free space cache v2 cleared\n");
13552                 }
13553         }
13554 close_out:
13555         return ret;
13556 }
13557
13558 const char * const cmd_check_usage[] = {
13559         "btrfs check [options] <device>",
13560         "Check structural integrity of a filesystem (unmounted).",
13561         "Check structural integrity of an unmounted filesystem. Verify internal",
13562         "trees' consistency and item connectivity. In the repair mode try to",
13563         "fix the problems found. ",
13564         "WARNING: the repair mode is considered dangerous",
13565         "",
13566         "-s|--super <superblock>     use this superblock copy",
13567         "-b|--backup                 use the first valid backup root copy",
13568         "--force                     skip mount checks, repair is not possible",
13569         "--repair                    try to repair the filesystem",
13570         "--readonly                  run in read-only mode (default)",
13571         "--init-csum-tree            create a new CRC tree",
13572         "--init-extent-tree          create a new extent tree",
13573         "--mode <MODE>               allows choice of memory/IO trade-offs",
13574         "                            where MODE is one of:",
13575         "                            original - read inodes and extents to memory (requires",
13576         "                                       more memory, does less IO)",
13577         "                            lowmem   - try to use less memory but read blocks again",
13578         "                                       when needed",
13579         "--check-data-csum           verify checksums of data blocks",
13580         "-Q|--qgroup-report          print a report on qgroup consistency",
13581         "-E|--subvol-extents <subvolid>",
13582         "                            print subvolume extents and sharing state",
13583         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
13584         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
13585         "-p|--progress               indicate progress",
13586         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
13587         NULL
13588 };
13589
13590 int cmd_check(int argc, char **argv)
13591 {
13592         struct cache_tree root_cache;
13593         struct btrfs_root *root;
13594         struct btrfs_fs_info *info;
13595         u64 bytenr = 0;
13596         u64 subvolid = 0;
13597         u64 tree_root_bytenr = 0;
13598         u64 chunk_root_bytenr = 0;
13599         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13600         int ret = 0;
13601         int err = 0;
13602         u64 num;
13603         int init_csum_tree = 0;
13604         int readonly = 0;
13605         int clear_space_cache = 0;
13606         int qgroup_report = 0;
13607         int qgroups_repaired = 0;
13608         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13609         int force = 0;
13610
13611         while(1) {
13612                 int c;
13613                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13614                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13615                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13616                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13617                         GETOPT_VAL_FORCE };
13618                 static const struct option long_options[] = {
13619                         { "super", required_argument, NULL, 's' },
13620                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13621                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13622                         { "init-csum-tree", no_argument, NULL,
13623                                 GETOPT_VAL_INIT_CSUM },
13624                         { "init-extent-tree", no_argument, NULL,
13625                                 GETOPT_VAL_INIT_EXTENT },
13626                         { "check-data-csum", no_argument, NULL,
13627                                 GETOPT_VAL_CHECK_CSUM },
13628                         { "backup", no_argument, NULL, 'b' },
13629                         { "subvol-extents", required_argument, NULL, 'E' },
13630                         { "qgroup-report", no_argument, NULL, 'Q' },
13631                         { "tree-root", required_argument, NULL, 'r' },
13632                         { "chunk-root", required_argument, NULL,
13633                                 GETOPT_VAL_CHUNK_TREE },
13634                         { "progress", no_argument, NULL, 'p' },
13635                         { "mode", required_argument, NULL,
13636                                 GETOPT_VAL_MODE },
13637                         { "clear-space-cache", required_argument, NULL,
13638                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
13639                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13640                         { NULL, 0, NULL, 0}
13641                 };
13642
13643                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13644                 if (c < 0)
13645                         break;
13646                 switch(c) {
13647                         case 'a': /* ignored */ break;
13648                         case 'b':
13649                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13650                                 break;
13651                         case 's':
13652                                 num = arg_strtou64(optarg);
13653                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13654                                         error(
13655                                         "super mirror should be less than %d",
13656                                                 BTRFS_SUPER_MIRROR_MAX);
13657                                         exit(1);
13658                                 }
13659                                 bytenr = btrfs_sb_offset(((int)num));
13660                                 printf("using SB copy %llu, bytenr %llu\n", num,
13661                                        (unsigned long long)bytenr);
13662                                 break;
13663                         case 'Q':
13664                                 qgroup_report = 1;
13665                                 break;
13666                         case 'E':
13667                                 subvolid = arg_strtou64(optarg);
13668                                 break;
13669                         case 'r':
13670                                 tree_root_bytenr = arg_strtou64(optarg);
13671                                 break;
13672                         case GETOPT_VAL_CHUNK_TREE:
13673                                 chunk_root_bytenr = arg_strtou64(optarg);
13674                                 break;
13675                         case 'p':
13676                                 ctx.progress_enabled = true;
13677                                 break;
13678                         case '?':
13679                         case 'h':
13680                                 usage(cmd_check_usage);
13681                         case GETOPT_VAL_REPAIR:
13682                                 printf("enabling repair mode\n");
13683                                 repair = 1;
13684                                 ctree_flags |= OPEN_CTREE_WRITES;
13685                                 break;
13686                         case GETOPT_VAL_READONLY:
13687                                 readonly = 1;
13688                                 break;
13689                         case GETOPT_VAL_INIT_CSUM:
13690                                 printf("Creating a new CRC tree\n");
13691                                 init_csum_tree = 1;
13692                                 repair = 1;
13693                                 ctree_flags |= OPEN_CTREE_WRITES;
13694                                 break;
13695                         case GETOPT_VAL_INIT_EXTENT:
13696                                 init_extent_tree = 1;
13697                                 ctree_flags |= (OPEN_CTREE_WRITES |
13698                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13699                                 repair = 1;
13700                                 break;
13701                         case GETOPT_VAL_CHECK_CSUM:
13702                                 check_data_csum = 1;
13703                                 break;
13704                         case GETOPT_VAL_MODE:
13705                                 check_mode = parse_check_mode(optarg);
13706                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13707                                         error("unknown mode: %s", optarg);
13708                                         exit(1);
13709                                 }
13710                                 break;
13711                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13712                                 if (strcmp(optarg, "v1") == 0) {
13713                                         clear_space_cache = 1;
13714                                 } else if (strcmp(optarg, "v2") == 0) {
13715                                         clear_space_cache = 2;
13716                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13717                                 } else {
13718                                         error(
13719                 "invalid argument to --clear-space-cache, must be v1 or v2");
13720                                         exit(1);
13721                                 }
13722                                 ctree_flags |= OPEN_CTREE_WRITES;
13723                                 break;
13724                         case GETOPT_VAL_FORCE:
13725                                 force = 1;
13726                                 break;
13727                 }
13728         }
13729
13730         if (check_argc_exact(argc - optind, 1))
13731                 usage(cmd_check_usage);
13732
13733         if (ctx.progress_enabled) {
13734                 ctx.tp = TASK_NOTHING;
13735                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13736         }
13737
13738         /* This check is the only reason for --readonly to exist */
13739         if (readonly && repair) {
13740                 error("repair options are not compatible with --readonly");
13741                 exit(1);
13742         }
13743
13744         /*
13745          * experimental and dangerous
13746          */
13747         if (repair && check_mode == CHECK_MODE_LOWMEM)
13748                 warning("low-memory mode repair support is only partial");
13749
13750         radix_tree_init();
13751         cache_tree_init(&root_cache);
13752
13753         ret = check_mounted(argv[optind]);
13754         if (!force) {
13755                 if (ret < 0) {
13756                         error("could not check mount status: %s",
13757                                         strerror(-ret));
13758                         err |= !!ret;
13759                         goto err_out;
13760                 } else if (ret) {
13761                         error(
13762 "%s is currently mounted, use --force if you really intend to check the filesystem",
13763                                 argv[optind]);
13764                         ret = -EBUSY;
13765                         err |= !!ret;
13766                         goto err_out;
13767                 }
13768         } else {
13769                 if (repair) {
13770                         error("repair and --force is not yet supported");
13771                         ret = 1;
13772                         err |= !!ret;
13773                         goto err_out;
13774                 }
13775                 if (ret < 0) {
13776                         warning(
13777 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13778                                 argv[optind]);
13779                 } else if (ret) {
13780                         warning(
13781                         "filesystem mounted, continuing because of --force");
13782                 }
13783                 /* A block device is mounted in exclusive mode by kernel */
13784                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13785         }
13786
13787         /* only allow partial opening under repair mode */
13788         if (repair)
13789                 ctree_flags |= OPEN_CTREE_PARTIAL;
13790
13791         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13792                                   chunk_root_bytenr, ctree_flags);
13793         if (!info) {
13794                 error("cannot open file system");
13795                 ret = -EIO;
13796                 err |= !!ret;
13797                 goto err_out;
13798         }
13799
13800         global_info = info;
13801         root = info->fs_root;
13802         uuid_unparse(info->super_copy->fsid, uuidbuf);
13803
13804         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13805
13806         /*
13807          * Check the bare minimum before starting anything else that could rely
13808          * on it, namely the tree roots, any local consistency checks
13809          */
13810         if (!extent_buffer_uptodate(info->tree_root->node) ||
13811             !extent_buffer_uptodate(info->dev_root->node) ||
13812             !extent_buffer_uptodate(info->chunk_root->node)) {
13813                 error("critical roots corrupted, unable to check the filesystem");
13814                 err |= !!ret;
13815                 ret = -EIO;
13816                 goto close_out;
13817         }
13818
13819         if (clear_space_cache) {
13820                 ret = do_clear_free_space_cache(info, clear_space_cache);
13821                 err |= !!ret;
13822                 goto close_out;
13823         }
13824
13825         /*
13826          * repair mode will force us to commit transaction which
13827          * will make us fail to load log tree when mounting.
13828          */
13829         if (repair && btrfs_super_log_root(info->super_copy)) {
13830                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13831                 if (!ret) {
13832                         ret = 1;
13833                         err |= !!ret;
13834                         goto close_out;
13835                 }
13836                 ret = zero_log_tree(root);
13837                 err |= !!ret;
13838                 if (ret) {
13839                         error("failed to zero log tree: %d", ret);
13840                         goto close_out;
13841                 }
13842         }
13843
13844         if (qgroup_report) {
13845                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13846                        uuidbuf);
13847                 ret = qgroup_verify_all(info);
13848                 err |= !!ret;
13849                 if (ret == 0)
13850                         report_qgroups(1);
13851                 goto close_out;
13852         }
13853         if (subvolid) {
13854                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13855                        subvolid, argv[optind], uuidbuf);
13856                 ret = print_extent_state(info, subvolid);
13857                 err |= !!ret;
13858                 goto close_out;
13859         }
13860
13861         if (init_extent_tree || init_csum_tree) {
13862                 struct btrfs_trans_handle *trans;
13863
13864                 trans = btrfs_start_transaction(info->extent_root, 0);
13865                 if (IS_ERR(trans)) {
13866                         error("error starting transaction");
13867                         ret = PTR_ERR(trans);
13868                         err |= !!ret;
13869                         goto close_out;
13870                 }
13871
13872                 if (init_extent_tree) {
13873                         printf("Creating a new extent tree\n");
13874                         ret = reinit_extent_tree(trans, info);
13875                         err |= !!ret;
13876                         if (ret)
13877                                 goto close_out;
13878                 }
13879
13880                 if (init_csum_tree) {
13881                         printf("Reinitialize checksum tree\n");
13882                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13883                         if (ret) {
13884                                 error("checksum tree initialization failed: %d",
13885                                                 ret);
13886                                 ret = -EIO;
13887                                 err |= !!ret;
13888                                 goto close_out;
13889                         }
13890
13891                         ret = fill_csum_tree(trans, info->csum_root,
13892                                              init_extent_tree);
13893                         err |= !!ret;
13894                         if (ret) {
13895                                 error("checksum tree refilling failed: %d", ret);
13896                                 return -EIO;
13897                         }
13898                 }
13899                 /*
13900                  * Ok now we commit and run the normal fsck, which will add
13901                  * extent entries for all of the items it finds.
13902                  */
13903                 ret = btrfs_commit_transaction(trans, info->extent_root);
13904                 err |= !!ret;
13905                 if (ret)
13906                         goto close_out;
13907         }
13908         if (!extent_buffer_uptodate(info->extent_root->node)) {
13909                 error("critical: extent_root, unable to check the filesystem");
13910                 ret = -EIO;
13911                 err |= !!ret;
13912                 goto close_out;
13913         }
13914         if (!extent_buffer_uptodate(info->csum_root->node)) {
13915                 error("critical: csum_root, unable to check the filesystem");
13916                 ret = -EIO;
13917                 err |= !!ret;
13918                 goto close_out;
13919         }
13920
13921         ret = do_check_chunks_and_extents(info);
13922         err |= !!ret;
13923         if (ret)
13924                 error(
13925                 "errors found in extent allocation tree or chunk allocation");
13926
13927         ret = repair_root_items(info);
13928         err |= !!ret;
13929         if (ret < 0) {
13930                 error("failed to repair root items: %s", strerror(-ret));
13931                 goto close_out;
13932         }
13933         if (repair) {
13934                 fprintf(stderr, "Fixed %d roots.\n", ret);
13935                 ret = 0;
13936         } else if (ret > 0) {
13937                 fprintf(stderr,
13938                        "Found %d roots with an outdated root item.\n",
13939                        ret);
13940                 fprintf(stderr,
13941                         "Please run a filesystem check with the option --repair to fix them.\n");
13942                 ret = 1;
13943                 err |= !!ret;
13944                 goto close_out;
13945         }
13946
13947         if (!ctx.progress_enabled) {
13948                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13949                         fprintf(stderr, "checking free space tree\n");
13950                 else
13951                         fprintf(stderr, "checking free space cache\n");
13952         }
13953         ret = check_space_cache(root);
13954         err |= !!ret;
13955         if (ret) {
13956                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13957                         error("errors found in free space tree");
13958                 else
13959                         error("errors found in free space cache");
13960                 goto out;
13961         }
13962
13963         /*
13964          * We used to have to have these hole extents in between our real
13965          * extents so if we don't have this flag set we need to make sure there
13966          * are no gaps in the file extents for inodes, otherwise we can just
13967          * ignore it when this happens.
13968          */
13969         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13970         ret = do_check_fs_roots(info, &root_cache);
13971         err |= !!ret;
13972         if (ret) {
13973                 error("errors found in fs roots");
13974                 goto out;
13975         }
13976
13977         fprintf(stderr, "checking csums\n");
13978         ret = check_csums(root);
13979         err |= !!ret;
13980         if (ret) {
13981                 error("errors found in csum tree");
13982                 goto out;
13983         }
13984
13985         fprintf(stderr, "checking root refs\n");
13986         /* For low memory mode, check_fs_roots_v2 handles root refs */
13987         if (check_mode != CHECK_MODE_LOWMEM) {
13988                 ret = check_root_refs(root, &root_cache);
13989                 err |= !!ret;
13990                 if (ret) {
13991                         error("errors found in root refs");
13992                         goto out;
13993                 }
13994         }
13995
13996         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13997                 struct extent_buffer *eb;
13998
13999                 eb = list_first_entry(&root->fs_info->recow_ebs,
14000                                       struct extent_buffer, recow);
14001                 list_del_init(&eb->recow);
14002                 ret = recow_extent_buffer(root, eb);
14003                 err |= !!ret;
14004                 if (ret) {
14005                         error("fails to fix transid errors");
14006                         break;
14007                 }
14008         }
14009
14010         while (!list_empty(&delete_items)) {
14011                 struct bad_item *bad;
14012
14013                 bad = list_first_entry(&delete_items, struct bad_item, list);
14014                 list_del_init(&bad->list);
14015                 if (repair) {
14016                         ret = delete_bad_item(root, bad);
14017                         err |= !!ret;
14018                 }
14019                 free(bad);
14020         }
14021
14022         if (info->quota_enabled) {
14023                 fprintf(stderr, "checking quota groups\n");
14024                 ret = qgroup_verify_all(info);
14025                 err |= !!ret;
14026                 if (ret) {
14027                         error("failed to check quota groups");
14028                         goto out;
14029                 }
14030                 report_qgroups(0);
14031                 ret = repair_qgroups(info, &qgroups_repaired);
14032                 err |= !!ret;
14033                 if (err) {
14034                         error("failed to repair quota groups");
14035                         goto out;
14036                 }
14037                 ret = 0;
14038         }
14039
14040         if (!list_empty(&root->fs_info->recow_ebs)) {
14041                 error("transid errors in file system");
14042                 ret = 1;
14043                 err |= !!ret;
14044         }
14045 out:
14046         printf("found %llu bytes used, ",
14047                (unsigned long long)bytes_used);
14048         if (err)
14049                 printf("error(s) found\n");
14050         else
14051                 printf("no error found\n");
14052         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14053         printf("total tree bytes: %llu\n",
14054                (unsigned long long)total_btree_bytes);
14055         printf("total fs tree bytes: %llu\n",
14056                (unsigned long long)total_fs_tree_bytes);
14057         printf("total extent tree bytes: %llu\n",
14058                (unsigned long long)total_extent_tree_bytes);
14059         printf("btree space waste bytes: %llu\n",
14060                (unsigned long long)btree_space_waste);
14061         printf("file data blocks allocated: %llu\n referenced %llu\n",
14062                 (unsigned long long)data_bytes_allocated,
14063                 (unsigned long long)data_bytes_referenced);
14064
14065         free_qgroup_counts();
14066         free_root_recs_tree(&root_cache);
14067 close_out:
14068         close_ctree(root);
14069 err_out:
14070         if (ctx.progress_enabled)
14071                 task_deinit(ctx.info);
14072
14073         return err;
14074 }