btrfs-progs: return errors from get_inode_rec, fail in callers
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "btrfsck.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
41 #include "backref.h"
42 #include "ulist.h"
43
44 enum task_position {
45         TASK_EXTENTS,
46         TASK_FREE_SPACE,
47         TASK_FS_ROOTS,
48         TASK_NOTHING, /* have to be the last element */
49 };
50
51 struct task_ctx {
52         int progress_enabled;
53         enum task_position tp;
54
55         struct task_info *info;
56 };
57
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75
76 static void *print_status_check(void *p)
77 {
78         struct task_ctx *priv = p;
79         const char work_indicator[] = { '.', 'o', 'O', 'o' };
80         uint32_t count = 0;
81         static char *task_position_string[] = {
82                 "checking extents",
83                 "checking free space cache",
84                 "checking fs roots",
85         };
86
87         task_period_start(priv->info, 1000 /* 1s */);
88
89         if (priv->tp == TASK_NOTHING)
90                 return NULL;
91
92         while (1) {
93                 printf("%s [%c]\r", task_position_string[priv->tp],
94                                 work_indicator[count % 4]);
95                 count++;
96                 fflush(stdout);
97                 task_period_wait(priv->info);
98         }
99         return NULL;
100 }
101
102 static int print_status_return(void *p)
103 {
104         printf("\n");
105         fflush(stdout);
106
107         return 0;
108 }
109
110 struct extent_backref {
111         struct list_head list;
112         unsigned int is_data:1;
113         unsigned int found_extent_tree:1;
114         unsigned int full_backref:1;
115         unsigned int found_ref:1;
116         unsigned int broken:1;
117 };
118
119 struct data_backref {
120         struct extent_backref node;
121         union {
122                 u64 parent;
123                 u64 root;
124         };
125         u64 owner;
126         u64 offset;
127         u64 disk_bytenr;
128         u64 bytes;
129         u64 ram_bytes;
130         u32 num_refs;
131         u32 found_ref;
132 };
133
134 /*
135  * Much like data_backref, just removed the undetermined members
136  * and change it to use list_head.
137  * During extent scan, it is stored in root->orphan_data_extent.
138  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
139  */
140 struct orphan_data_extent {
141         struct list_head list;
142         u64 root;
143         u64 objectid;
144         u64 offset;
145         u64 disk_bytenr;
146         u64 disk_len;
147 };
148
149 struct tree_backref {
150         struct extent_backref node;
151         union {
152                 u64 parent;
153                 u64 root;
154         };
155 };
156
157 struct extent_record {
158         struct list_head backrefs;
159         struct list_head dups;
160         struct list_head list;
161         struct cache_extent cache;
162         struct btrfs_disk_key parent_key;
163         u64 start;
164         u64 max_size;
165         u64 nr;
166         u64 refs;
167         u64 extent_item_refs;
168         u64 generation;
169         u64 parent_generation;
170         u64 info_objectid;
171         u32 num_duplicates;
172         u8 info_level;
173         int flag_block_full_backref;
174         unsigned int found_rec:1;
175         unsigned int content_checked:1;
176         unsigned int owner_ref_checked:1;
177         unsigned int is_root:1;
178         unsigned int metadata:1;
179         unsigned int bad_full_backref:1;
180         unsigned int crossing_stripes:1;
181         unsigned int wrong_chunk_type:1;
182 };
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         unsigned int filetype:8;
190         int errors;
191         unsigned int ref_type;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 struct root_item_record {
199         struct list_head list;
200         u64 objectid;
201         u64 bytenr;
202         u64 last_snapshot;
203         u8 level;
204         u8 drop_level;
205         int level_size;
206         struct btrfs_key drop_key;
207 };
208
209 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
211 #define REF_ERR_NO_INODE_REF            (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
214 #define REF_ERR_DUP_INODE_REF           (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF             (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
222
223 struct file_extent_hole {
224         struct rb_node node;
225         u64 start;
226         u64 len;
227 };
228
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
231 {
232         struct file_extent_hole *hole;
233
234         if (RB_EMPTY_ROOT(holes))
235                 return (u64)-1;
236
237         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
238         return hole->start;
239 }
240
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
242 {
243         struct file_extent_hole *hole1;
244         struct file_extent_hole *hole2;
245
246         hole1 = rb_entry(node1, struct file_extent_hole, node);
247         hole2 = rb_entry(node2, struct file_extent_hole, node);
248
249         if (hole1->start > hole2->start)
250                 return -1;
251         if (hole1->start < hole2->start)
252                 return 1;
253         /* Now hole1->start == hole2->start */
254         if (hole1->len >= hole2->len)
255                 /*
256                  * Hole 1 will be merge center
257                  * Same hole will be merged later
258                  */
259                 return -1;
260         /* Hole 2 will be merge center */
261         return 1;
262 }
263
264 /*
265  * Add a hole to the record
266  *
267  * This will do hole merge for copy_file_extent_holes(),
268  * which will ensure there won't be continuous holes.
269  */
270 static int add_file_extent_hole(struct rb_root *holes,
271                                 u64 start, u64 len)
272 {
273         struct file_extent_hole *hole;
274         struct file_extent_hole *prev = NULL;
275         struct file_extent_hole *next = NULL;
276
277         hole = malloc(sizeof(*hole));
278         if (!hole)
279                 return -ENOMEM;
280         hole->start = start;
281         hole->len = len;
282         /* Since compare will not return 0, no -EEXIST will happen */
283         rb_insert(holes, &hole->node, compare_hole);
284
285         /* simple merge with previous hole */
286         if (rb_prev(&hole->node))
287                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
288                                 node);
289         if (prev && prev->start + prev->len >= hole->start) {
290                 hole->len = hole->start + hole->len - prev->start;
291                 hole->start = prev->start;
292                 rb_erase(&prev->node, holes);
293                 free(prev);
294                 prev = NULL;
295         }
296
297         /* iterate merge with next holes */
298         while (1) {
299                 if (!rb_next(&hole->node))
300                         break;
301                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
302                                         node);
303                 if (hole->start + hole->len >= next->start) {
304                         if (hole->start + hole->len <= next->start + next->len)
305                                 hole->len = next->start + next->len -
306                                             hole->start;
307                         rb_erase(&next->node, holes);
308                         free(next);
309                         next = NULL;
310                 } else
311                         break;
312         }
313         return 0;
314 }
315
316 static int compare_hole_range(struct rb_node *node, void *data)
317 {
318         struct file_extent_hole *hole;
319         u64 start;
320
321         hole = (struct file_extent_hole *)data;
322         start = hole->start;
323
324         hole = rb_entry(node, struct file_extent_hole, node);
325         if (start < hole->start)
326                 return -1;
327         if (start >= hole->start && start < hole->start + hole->len)
328                 return 0;
329         return 1;
330 }
331
332 /*
333  * Delete a hole in the record
334  *
335  * This will do the hole split and is much restrict than add.
336  */
337 static int del_file_extent_hole(struct rb_root *holes,
338                                 u64 start, u64 len)
339 {
340         struct file_extent_hole *hole;
341         struct file_extent_hole tmp;
342         u64 prev_start = 0;
343         u64 prev_len = 0;
344         u64 next_start = 0;
345         u64 next_len = 0;
346         struct rb_node *node;
347         int have_prev = 0;
348         int have_next = 0;
349         int ret = 0;
350
351         tmp.start = start;
352         tmp.len = len;
353         node = rb_search(holes, &tmp, compare_hole_range, NULL);
354         if (!node)
355                 return -EEXIST;
356         hole = rb_entry(node, struct file_extent_hole, node);
357         if (start + len > hole->start + hole->len)
358                 return -EEXIST;
359
360         /*
361          * Now there will be no overflap, delete the hole and re-add the
362          * split(s) if they exists.
363          */
364         if (start > hole->start) {
365                 prev_start = hole->start;
366                 prev_len = start - hole->start;
367                 have_prev = 1;
368         }
369         if (hole->start + hole->len > start + len) {
370                 next_start = start + len;
371                 next_len = hole->start + hole->len - start - len;
372                 have_next = 1;
373         }
374         rb_erase(node, holes);
375         free(hole);
376         if (have_prev) {
377                 ret = add_file_extent_hole(holes, prev_start, prev_len);
378                 if (ret < 0)
379                         return ret;
380         }
381         if (have_next) {
382                 ret = add_file_extent_hole(holes, next_start, next_len);
383                 if (ret < 0)
384                         return ret;
385         }
386         return 0;
387 }
388
389 static int copy_file_extent_holes(struct rb_root *dst,
390                                   struct rb_root *src)
391 {
392         struct file_extent_hole *hole;
393         struct rb_node *node;
394         int ret = 0;
395
396         node = rb_first(src);
397         while (node) {
398                 hole = rb_entry(node, struct file_extent_hole, node);
399                 ret = add_file_extent_hole(dst, hole->start, hole->len);
400                 if (ret)
401                         break;
402                 node = rb_next(node);
403         }
404         return ret;
405 }
406
407 static void free_file_extent_holes(struct rb_root *holes)
408 {
409         struct rb_node *node;
410         struct file_extent_hole *hole;
411
412         node = rb_first(holes);
413         while (node) {
414                 hole = rb_entry(node, struct file_extent_hole, node);
415                 rb_erase(node, holes);
416                 free(hole);
417                 node = rb_first(holes);
418         }
419 }
420
421 struct inode_record {
422         struct list_head backrefs;
423         unsigned int checked:1;
424         unsigned int merging:1;
425         unsigned int found_inode_item:1;
426         unsigned int found_dir_item:1;
427         unsigned int found_file_extent:1;
428         unsigned int found_csum_item:1;
429         unsigned int some_csum_missing:1;
430         unsigned int nodatasum:1;
431         int errors;
432
433         u64 ino;
434         u32 nlink;
435         u32 imode;
436         u64 isize;
437         u64 nbytes;
438
439         u32 found_link;
440         u64 found_size;
441         u64 extent_start;
442         u64 extent_end;
443         struct rb_root holes;
444         struct list_head orphan_extents;
445
446         u32 refs;
447 };
448
449 #define I_ERR_NO_INODE_ITEM             (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
464
465 struct root_backref {
466         struct list_head list;
467         unsigned int found_dir_item:1;
468         unsigned int found_dir_index:1;
469         unsigned int found_back_ref:1;
470         unsigned int found_forward_ref:1;
471         unsigned int reachable:1;
472         int errors;
473         u64 ref_root;
474         u64 dir;
475         u64 index;
476         u16 namelen;
477         char name[0];
478 };
479
480 struct root_record {
481         struct list_head backrefs;
482         struct cache_extent cache;
483         unsigned int found_root_item:1;
484         u64 objectid;
485         u32 found_ref;
486 };
487
488 struct ptr_node {
489         struct cache_extent cache;
490         void *data;
491 };
492
493 struct shared_node {
494         struct cache_extent cache;
495         struct cache_tree root_cache;
496         struct cache_tree inode_cache;
497         struct inode_record *current;
498         u32 refs;
499 };
500
501 struct block_info {
502         u64 start;
503         u32 size;
504 };
505
506 struct walk_control {
507         struct cache_tree shared;
508         struct shared_node *nodes[BTRFS_MAX_LEVEL];
509         int active_node;
510         int root_level;
511 };
512
513 struct bad_item {
514         struct btrfs_key key;
515         u64 root_id;
516         struct list_head list;
517 };
518
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
520
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522                                  struct btrfs_root *root)
523 {
524         if (root->last_trans != trans->transid) {
525                 root->track_dirty = 1;
526                 root->last_trans = trans->transid;
527                 root->commit_root = root->node;
528                 extent_buffer_get(root->node);
529         }
530 }
531
532 static u8 imode_to_type(u32 imode)
533 {
534 #define S_SHIFT 12
535         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
537                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
538                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
539                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
540                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
541                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
542                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
543         };
544
545         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
546 #undef S_SHIFT
547 }
548
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
550 {
551         struct device_record *rec1;
552         struct device_record *rec2;
553
554         rec1 = rb_entry(node1, struct device_record, node);
555         rec2 = rb_entry(node2, struct device_record, node);
556         if (rec1->devid > rec2->devid)
557                 return -1;
558         else if (rec1->devid < rec2->devid)
559                 return 1;
560         else
561                 return 0;
562 }
563
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
565 {
566         struct inode_record *rec;
567         struct inode_backref *backref;
568         struct inode_backref *orig;
569         struct orphan_data_extent *src_orphan;
570         struct orphan_data_extent *dst_orphan;
571         size_t size;
572         int ret;
573
574         rec = malloc(sizeof(*rec));
575         memcpy(rec, orig_rec, sizeof(*rec));
576         rec->refs = 1;
577         INIT_LIST_HEAD(&rec->backrefs);
578         INIT_LIST_HEAD(&rec->orphan_extents);
579         rec->holes = RB_ROOT;
580
581         list_for_each_entry(orig, &orig_rec->backrefs, list) {
582                 size = sizeof(*orig) + orig->namelen + 1;
583                 backref = malloc(size);
584                 memcpy(backref, orig, size);
585                 list_add_tail(&backref->list, &rec->backrefs);
586         }
587         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
588                 dst_orphan = malloc(sizeof(*dst_orphan));
589                 /* TODO: Fix all the HELL of un-catched -ENOMEM case */
590                 BUG_ON(!dst_orphan);
591                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
592                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
593         }
594         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
595         BUG_ON(ret < 0);
596
597         return rec;
598 }
599
600 static void print_orphan_data_extents(struct list_head *orphan_extents,
601                                       u64 objectid)
602 {
603         struct orphan_data_extent *orphan;
604
605         if (list_empty(orphan_extents))
606                 return;
607         printf("The following data extent is lost in tree %llu:\n",
608                objectid);
609         list_for_each_entry(orphan, orphan_extents, list) {
610                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
611                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
612                        orphan->disk_len);
613         }
614 }
615
616 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
617 {
618         u64 root_objectid = root->root_key.objectid;
619         int errors = rec->errors;
620
621         if (!errors)
622                 return;
623         /* reloc root errors, we print its corresponding fs root objectid*/
624         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
625                 root_objectid = root->root_key.offset;
626                 fprintf(stderr, "reloc");
627         }
628         fprintf(stderr, "root %llu inode %llu errors %x",
629                 (unsigned long long) root_objectid,
630                 (unsigned long long) rec->ino, rec->errors);
631
632         if (errors & I_ERR_NO_INODE_ITEM)
633                 fprintf(stderr, ", no inode item");
634         if (errors & I_ERR_NO_ORPHAN_ITEM)
635                 fprintf(stderr, ", no orphan item");
636         if (errors & I_ERR_DUP_INODE_ITEM)
637                 fprintf(stderr, ", dup inode item");
638         if (errors & I_ERR_DUP_DIR_INDEX)
639                 fprintf(stderr, ", dup dir index");
640         if (errors & I_ERR_ODD_DIR_ITEM)
641                 fprintf(stderr, ", odd dir item");
642         if (errors & I_ERR_ODD_FILE_EXTENT)
643                 fprintf(stderr, ", odd file extent");
644         if (errors & I_ERR_BAD_FILE_EXTENT)
645                 fprintf(stderr, ", bad file extent");
646         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
647                 fprintf(stderr, ", file extent overlap");
648         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
649                 fprintf(stderr, ", file extent discount");
650         if (errors & I_ERR_DIR_ISIZE_WRONG)
651                 fprintf(stderr, ", dir isize wrong");
652         if (errors & I_ERR_FILE_NBYTES_WRONG)
653                 fprintf(stderr, ", nbytes wrong");
654         if (errors & I_ERR_ODD_CSUM_ITEM)
655                 fprintf(stderr, ", odd csum item");
656         if (errors & I_ERR_SOME_CSUM_MISSING)
657                 fprintf(stderr, ", some csum missing");
658         if (errors & I_ERR_LINK_COUNT_WRONG)
659                 fprintf(stderr, ", link count wrong");
660         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
661                 fprintf(stderr, ", orphan file extent");
662         fprintf(stderr, "\n");
663         /* Print the orphan extents if needed */
664         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
665                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
666
667         /* Print the holes if needed */
668         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
669                 struct file_extent_hole *hole;
670                 struct rb_node *node;
671                 int found = 0;
672
673                 node = rb_first(&rec->holes);
674                 fprintf(stderr, "Found file extent holes:\n");
675                 while (node) {
676                         found = 1;
677                         hole = rb_entry(node, struct file_extent_hole, node);
678                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
679                                 hole->start, hole->len);
680                         node = rb_next(node);
681                 }
682                 if (!found)
683                         fprintf(stderr, "\tstart: 0, len: %llu\n",
684                                 round_up(rec->isize, root->sectorsize));
685         }
686 }
687
688 static void print_ref_error(int errors)
689 {
690         if (errors & REF_ERR_NO_DIR_ITEM)
691                 fprintf(stderr, ", no dir item");
692         if (errors & REF_ERR_NO_DIR_INDEX)
693                 fprintf(stderr, ", no dir index");
694         if (errors & REF_ERR_NO_INODE_REF)
695                 fprintf(stderr, ", no inode ref");
696         if (errors & REF_ERR_DUP_DIR_ITEM)
697                 fprintf(stderr, ", dup dir item");
698         if (errors & REF_ERR_DUP_DIR_INDEX)
699                 fprintf(stderr, ", dup dir index");
700         if (errors & REF_ERR_DUP_INODE_REF)
701                 fprintf(stderr, ", dup inode ref");
702         if (errors & REF_ERR_INDEX_UNMATCH)
703                 fprintf(stderr, ", index unmatch");
704         if (errors & REF_ERR_FILETYPE_UNMATCH)
705                 fprintf(stderr, ", filetype unmatch");
706         if (errors & REF_ERR_NAME_TOO_LONG)
707                 fprintf(stderr, ", name too long");
708         if (errors & REF_ERR_NO_ROOT_REF)
709                 fprintf(stderr, ", no root ref");
710         if (errors & REF_ERR_NO_ROOT_BACKREF)
711                 fprintf(stderr, ", no root backref");
712         if (errors & REF_ERR_DUP_ROOT_REF)
713                 fprintf(stderr, ", dup root ref");
714         if (errors & REF_ERR_DUP_ROOT_BACKREF)
715                 fprintf(stderr, ", dup root backref");
716         fprintf(stderr, "\n");
717 }
718
719 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
720                                           u64 ino, int mod)
721 {
722         struct ptr_node *node;
723         struct cache_extent *cache;
724         struct inode_record *rec = NULL;
725         int ret;
726
727         cache = lookup_cache_extent(inode_cache, ino, 1);
728         if (cache) {
729                 node = container_of(cache, struct ptr_node, cache);
730                 rec = node->data;
731                 if (mod && rec->refs > 1) {
732                         node->data = clone_inode_rec(rec);
733                         rec->refs--;
734                         rec = node->data;
735                 }
736         } else if (mod) {
737                 rec = calloc(1, sizeof(*rec));
738                 if (!rec)
739                         return ERR_PTR(-ENOMEM);
740                 rec->ino = ino;
741                 rec->extent_start = (u64)-1;
742                 rec->refs = 1;
743                 INIT_LIST_HEAD(&rec->backrefs);
744                 INIT_LIST_HEAD(&rec->orphan_extents);
745                 rec->holes = RB_ROOT;
746
747                 node = malloc(sizeof(*node));
748                 if (!node) {
749                         free(rec);
750                         return ERR_PTR(-ENOMEM);
751                 }
752                 node->cache.start = ino;
753                 node->cache.size = 1;
754                 node->data = rec;
755
756                 if (ino == BTRFS_FREE_INO_OBJECTID)
757                         rec->found_link = 1;
758
759                 ret = insert_cache_extent(inode_cache, &node->cache);
760                 if (ret)
761                         return ERR_PTR(-EEXIST);
762         }
763         return rec;
764 }
765
766 static void free_orphan_data_extents(struct list_head *orphan_extents)
767 {
768         struct orphan_data_extent *orphan;
769
770         while (!list_empty(orphan_extents)) {
771                 orphan = list_entry(orphan_extents->next,
772                                     struct orphan_data_extent, list);
773                 list_del(&orphan->list);
774                 free(orphan);
775         }
776 }
777
778 static void free_inode_rec(struct inode_record *rec)
779 {
780         struct inode_backref *backref;
781
782         if (--rec->refs > 0)
783                 return;
784
785         while (!list_empty(&rec->backrefs)) {
786                 backref = list_entry(rec->backrefs.next,
787                                      struct inode_backref, list);
788                 list_del(&backref->list);
789                 free(backref);
790         }
791         free_orphan_data_extents(&rec->orphan_extents);
792         free_file_extent_holes(&rec->holes);
793         free(rec);
794 }
795
796 static int can_free_inode_rec(struct inode_record *rec)
797 {
798         if (!rec->errors && rec->checked && rec->found_inode_item &&
799             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
800                 return 1;
801         return 0;
802 }
803
804 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
805                                  struct inode_record *rec)
806 {
807         struct cache_extent *cache;
808         struct inode_backref *tmp, *backref;
809         struct ptr_node *node;
810         unsigned char filetype;
811
812         if (!rec->found_inode_item)
813                 return;
814
815         filetype = imode_to_type(rec->imode);
816         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
817                 if (backref->found_dir_item && backref->found_dir_index) {
818                         if (backref->filetype != filetype)
819                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
820                         if (!backref->errors && backref->found_inode_ref &&
821                             rec->nlink == rec->found_link) {
822                                 list_del(&backref->list);
823                                 free(backref);
824                         }
825                 }
826         }
827
828         if (!rec->checked || rec->merging)
829                 return;
830
831         if (S_ISDIR(rec->imode)) {
832                 if (rec->found_size != rec->isize)
833                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
834                 if (rec->found_file_extent)
835                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
836         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
837                 if (rec->found_dir_item)
838                         rec->errors |= I_ERR_ODD_DIR_ITEM;
839                 if (rec->found_size != rec->nbytes)
840                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
841                 if (rec->nlink > 0 && !no_holes &&
842                     (rec->extent_end < rec->isize ||
843                      first_extent_gap(&rec->holes) < rec->isize))
844                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
845         }
846
847         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
848                 if (rec->found_csum_item && rec->nodatasum)
849                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
850                 if (rec->some_csum_missing && !rec->nodatasum)
851                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
852         }
853
854         BUG_ON(rec->refs != 1);
855         if (can_free_inode_rec(rec)) {
856                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
857                 node = container_of(cache, struct ptr_node, cache);
858                 BUG_ON(node->data != rec);
859                 remove_cache_extent(inode_cache, &node->cache);
860                 free(node);
861                 free_inode_rec(rec);
862         }
863 }
864
865 static int check_orphan_item(struct btrfs_root *root, u64 ino)
866 {
867         struct btrfs_path path;
868         struct btrfs_key key;
869         int ret;
870
871         key.objectid = BTRFS_ORPHAN_OBJECTID;
872         key.type = BTRFS_ORPHAN_ITEM_KEY;
873         key.offset = ino;
874
875         btrfs_init_path(&path);
876         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
877         btrfs_release_path(&path);
878         if (ret > 0)
879                 ret = -ENOENT;
880         return ret;
881 }
882
883 static int process_inode_item(struct extent_buffer *eb,
884                               int slot, struct btrfs_key *key,
885                               struct shared_node *active_node)
886 {
887         struct inode_record *rec;
888         struct btrfs_inode_item *item;
889
890         rec = active_node->current;
891         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
892         if (rec->found_inode_item) {
893                 rec->errors |= I_ERR_DUP_INODE_ITEM;
894                 return 1;
895         }
896         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
897         rec->nlink = btrfs_inode_nlink(eb, item);
898         rec->isize = btrfs_inode_size(eb, item);
899         rec->nbytes = btrfs_inode_nbytes(eb, item);
900         rec->imode = btrfs_inode_mode(eb, item);
901         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
902                 rec->nodatasum = 1;
903         rec->found_inode_item = 1;
904         if (rec->nlink == 0)
905                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
906         maybe_free_inode_rec(&active_node->inode_cache, rec);
907         return 0;
908 }
909
910 static struct inode_backref *get_inode_backref(struct inode_record *rec,
911                                                 const char *name,
912                                                 int namelen, u64 dir)
913 {
914         struct inode_backref *backref;
915
916         list_for_each_entry(backref, &rec->backrefs, list) {
917                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
918                         break;
919                 if (backref->dir != dir || backref->namelen != namelen)
920                         continue;
921                 if (memcmp(name, backref->name, namelen))
922                         continue;
923                 return backref;
924         }
925
926         backref = malloc(sizeof(*backref) + namelen + 1);
927         memset(backref, 0, sizeof(*backref));
928         backref->dir = dir;
929         backref->namelen = namelen;
930         memcpy(backref->name, name, namelen);
931         backref->name[namelen] = '\0';
932         list_add_tail(&backref->list, &rec->backrefs);
933         return backref;
934 }
935
936 static int add_inode_backref(struct cache_tree *inode_cache,
937                              u64 ino, u64 dir, u64 index,
938                              const char *name, int namelen,
939                              int filetype, int itemtype, int errors)
940 {
941         struct inode_record *rec;
942         struct inode_backref *backref;
943
944         rec = get_inode_rec(inode_cache, ino, 1);
945         BUG_ON(IS_ERR(rec));
946         backref = get_inode_backref(rec, name, namelen, dir);
947         if (errors)
948                 backref->errors |= errors;
949         if (itemtype == BTRFS_DIR_INDEX_KEY) {
950                 if (backref->found_dir_index)
951                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
952                 if (backref->found_inode_ref && backref->index != index)
953                         backref->errors |= REF_ERR_INDEX_UNMATCH;
954                 if (backref->found_dir_item && backref->filetype != filetype)
955                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
956
957                 backref->index = index;
958                 backref->filetype = filetype;
959                 backref->found_dir_index = 1;
960         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
961                 rec->found_link++;
962                 if (backref->found_dir_item)
963                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
964                 if (backref->found_dir_index && backref->filetype != filetype)
965                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
966
967                 backref->filetype = filetype;
968                 backref->found_dir_item = 1;
969         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
970                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
971                 if (backref->found_inode_ref)
972                         backref->errors |= REF_ERR_DUP_INODE_REF;
973                 if (backref->found_dir_index && backref->index != index)
974                         backref->errors |= REF_ERR_INDEX_UNMATCH;
975                 else
976                         backref->index = index;
977
978                 backref->ref_type = itemtype;
979                 backref->found_inode_ref = 1;
980         } else {
981                 BUG_ON(1);
982         }
983
984         maybe_free_inode_rec(inode_cache, rec);
985         return 0;
986 }
987
988 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
989                             struct cache_tree *dst_cache)
990 {
991         struct inode_backref *backref;
992         u32 dir_count = 0;
993         int ret = 0;
994
995         dst->merging = 1;
996         list_for_each_entry(backref, &src->backrefs, list) {
997                 if (backref->found_dir_index) {
998                         add_inode_backref(dst_cache, dst->ino, backref->dir,
999                                         backref->index, backref->name,
1000                                         backref->namelen, backref->filetype,
1001                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1002                 }
1003                 if (backref->found_dir_item) {
1004                         dir_count++;
1005                         add_inode_backref(dst_cache, dst->ino,
1006                                         backref->dir, 0, backref->name,
1007                                         backref->namelen, backref->filetype,
1008                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1009                 }
1010                 if (backref->found_inode_ref) {
1011                         add_inode_backref(dst_cache, dst->ino,
1012                                         backref->dir, backref->index,
1013                                         backref->name, backref->namelen, 0,
1014                                         backref->ref_type, backref->errors);
1015                 }
1016         }
1017
1018         if (src->found_dir_item)
1019                 dst->found_dir_item = 1;
1020         if (src->found_file_extent)
1021                 dst->found_file_extent = 1;
1022         if (src->found_csum_item)
1023                 dst->found_csum_item = 1;
1024         if (src->some_csum_missing)
1025                 dst->some_csum_missing = 1;
1026         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1027                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1028                 if (ret < 0)
1029                         return ret;
1030         }
1031
1032         BUG_ON(src->found_link < dir_count);
1033         dst->found_link += src->found_link - dir_count;
1034         dst->found_size += src->found_size;
1035         if (src->extent_start != (u64)-1) {
1036                 if (dst->extent_start == (u64)-1) {
1037                         dst->extent_start = src->extent_start;
1038                         dst->extent_end = src->extent_end;
1039                 } else {
1040                         if (dst->extent_end > src->extent_start)
1041                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1042                         else if (dst->extent_end < src->extent_start) {
1043                                 ret = add_file_extent_hole(&dst->holes,
1044                                         dst->extent_end,
1045                                         src->extent_start - dst->extent_end);
1046                         }
1047                         if (dst->extent_end < src->extent_end)
1048                                 dst->extent_end = src->extent_end;
1049                 }
1050         }
1051
1052         dst->errors |= src->errors;
1053         if (src->found_inode_item) {
1054                 if (!dst->found_inode_item) {
1055                         dst->nlink = src->nlink;
1056                         dst->isize = src->isize;
1057                         dst->nbytes = src->nbytes;
1058                         dst->imode = src->imode;
1059                         dst->nodatasum = src->nodatasum;
1060                         dst->found_inode_item = 1;
1061                 } else {
1062                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1063                 }
1064         }
1065         dst->merging = 0;
1066
1067         return 0;
1068 }
1069
1070 static int splice_shared_node(struct shared_node *src_node,
1071                               struct shared_node *dst_node)
1072 {
1073         struct cache_extent *cache;
1074         struct ptr_node *node, *ins;
1075         struct cache_tree *src, *dst;
1076         struct inode_record *rec, *conflict;
1077         u64 current_ino = 0;
1078         int splice = 0;
1079         int ret;
1080
1081         if (--src_node->refs == 0)
1082                 splice = 1;
1083         if (src_node->current)
1084                 current_ino = src_node->current->ino;
1085
1086         src = &src_node->root_cache;
1087         dst = &dst_node->root_cache;
1088 again:
1089         cache = search_cache_extent(src, 0);
1090         while (cache) {
1091                 node = container_of(cache, struct ptr_node, cache);
1092                 rec = node->data;
1093                 cache = next_cache_extent(cache);
1094
1095                 if (splice) {
1096                         remove_cache_extent(src, &node->cache);
1097                         ins = node;
1098                 } else {
1099                         ins = malloc(sizeof(*ins));
1100                         ins->cache.start = node->cache.start;
1101                         ins->cache.size = node->cache.size;
1102                         ins->data = rec;
1103                         rec->refs++;
1104                 }
1105                 ret = insert_cache_extent(dst, &ins->cache);
1106                 if (ret == -EEXIST) {
1107                         conflict = get_inode_rec(dst, rec->ino, 1);
1108                         BUG_ON(IS_ERR(conflict));
1109                         merge_inode_recs(rec, conflict, dst);
1110                         if (rec->checked) {
1111                                 conflict->checked = 1;
1112                                 if (dst_node->current == conflict)
1113                                         dst_node->current = NULL;
1114                         }
1115                         maybe_free_inode_rec(dst, conflict);
1116                         free_inode_rec(rec);
1117                         free(ins);
1118                 } else {
1119                         BUG_ON(ret);
1120                 }
1121         }
1122
1123         if (src == &src_node->root_cache) {
1124                 src = &src_node->inode_cache;
1125                 dst = &dst_node->inode_cache;
1126                 goto again;
1127         }
1128
1129         if (current_ino > 0 && (!dst_node->current ||
1130             current_ino > dst_node->current->ino)) {
1131                 if (dst_node->current) {
1132                         dst_node->current->checked = 1;
1133                         maybe_free_inode_rec(dst, dst_node->current);
1134                 }
1135                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1136                 BUG_ON(IS_ERR(dst_node->current));
1137         }
1138         return 0;
1139 }
1140
1141 static void free_inode_ptr(struct cache_extent *cache)
1142 {
1143         struct ptr_node *node;
1144         struct inode_record *rec;
1145
1146         node = container_of(cache, struct ptr_node, cache);
1147         rec = node->data;
1148         free_inode_rec(rec);
1149         free(node);
1150 }
1151
1152 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1153
1154 static struct shared_node *find_shared_node(struct cache_tree *shared,
1155                                             u64 bytenr)
1156 {
1157         struct cache_extent *cache;
1158         struct shared_node *node;
1159
1160         cache = lookup_cache_extent(shared, bytenr, 1);
1161         if (cache) {
1162                 node = container_of(cache, struct shared_node, cache);
1163                 return node;
1164         }
1165         return NULL;
1166 }
1167
1168 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1169 {
1170         int ret;
1171         struct shared_node *node;
1172
1173         node = calloc(1, sizeof(*node));
1174         node->cache.start = bytenr;
1175         node->cache.size = 1;
1176         cache_tree_init(&node->root_cache);
1177         cache_tree_init(&node->inode_cache);
1178         node->refs = refs;
1179
1180         ret = insert_cache_extent(shared, &node->cache);
1181         BUG_ON(ret);
1182         return 0;
1183 }
1184
1185 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1186                              struct walk_control *wc, int level)
1187 {
1188         struct shared_node *node;
1189         struct shared_node *dest;
1190
1191         if (level == wc->active_node)
1192                 return 0;
1193
1194         BUG_ON(wc->active_node <= level);
1195         node = find_shared_node(&wc->shared, bytenr);
1196         if (!node) {
1197                 add_shared_node(&wc->shared, bytenr, refs);
1198                 node = find_shared_node(&wc->shared, bytenr);
1199                 wc->nodes[level] = node;
1200                 wc->active_node = level;
1201                 return 0;
1202         }
1203
1204         if (wc->root_level == wc->active_node &&
1205             btrfs_root_refs(&root->root_item) == 0) {
1206                 if (--node->refs == 0) {
1207                         free_inode_recs_tree(&node->root_cache);
1208                         free_inode_recs_tree(&node->inode_cache);
1209                         remove_cache_extent(&wc->shared, &node->cache);
1210                         free(node);
1211                 }
1212                 return 1;
1213         }
1214
1215         dest = wc->nodes[wc->active_node];
1216         splice_shared_node(node, dest);
1217         if (node->refs == 0) {
1218                 remove_cache_extent(&wc->shared, &node->cache);
1219                 free(node);
1220         }
1221         return 1;
1222 }
1223
1224 static int leave_shared_node(struct btrfs_root *root,
1225                              struct walk_control *wc, int level)
1226 {
1227         struct shared_node *node;
1228         struct shared_node *dest;
1229         int i;
1230
1231         if (level == wc->root_level)
1232                 return 0;
1233
1234         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1235                 if (wc->nodes[i])
1236                         break;
1237         }
1238         BUG_ON(i >= BTRFS_MAX_LEVEL);
1239
1240         node = wc->nodes[wc->active_node];
1241         wc->nodes[wc->active_node] = NULL;
1242         wc->active_node = i;
1243
1244         dest = wc->nodes[wc->active_node];
1245         if (wc->active_node < wc->root_level ||
1246             btrfs_root_refs(&root->root_item) > 0) {
1247                 BUG_ON(node->refs <= 1);
1248                 splice_shared_node(node, dest);
1249         } else {
1250                 BUG_ON(node->refs < 2);
1251                 node->refs--;
1252         }
1253         return 0;
1254 }
1255
1256 /*
1257  * Returns:
1258  * < 0 - on error
1259  * 1   - if the root with id child_root_id is a child of root parent_root_id
1260  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1261  *       has other root(s) as parent(s)
1262  * 2   - if the root child_root_id doesn't have any parent roots
1263  */
1264 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1265                          u64 child_root_id)
1266 {
1267         struct btrfs_path path;
1268         struct btrfs_key key;
1269         struct extent_buffer *leaf;
1270         int has_parent = 0;
1271         int ret;
1272
1273         btrfs_init_path(&path);
1274
1275         key.objectid = parent_root_id;
1276         key.type = BTRFS_ROOT_REF_KEY;
1277         key.offset = child_root_id;
1278         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1279                                 0, 0);
1280         if (ret < 0)
1281                 return ret;
1282         btrfs_release_path(&path);
1283         if (!ret)
1284                 return 1;
1285
1286         key.objectid = child_root_id;
1287         key.type = BTRFS_ROOT_BACKREF_KEY;
1288         key.offset = 0;
1289         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1290                                 0, 0);
1291         if (ret < 0)
1292                 goto out;
1293
1294         while (1) {
1295                 leaf = path.nodes[0];
1296                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1297                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1298                         if (ret)
1299                                 break;
1300                         leaf = path.nodes[0];
1301                 }
1302
1303                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1304                 if (key.objectid != child_root_id ||
1305                     key.type != BTRFS_ROOT_BACKREF_KEY)
1306                         break;
1307
1308                 has_parent = 1;
1309
1310                 if (key.offset == parent_root_id) {
1311                         btrfs_release_path(&path);
1312                         return 1;
1313                 }
1314
1315                 path.slots[0]++;
1316         }
1317 out:
1318         btrfs_release_path(&path);
1319         if (ret < 0)
1320                 return ret;
1321         return has_parent ? 0 : 2;
1322 }
1323
1324 static int process_dir_item(struct btrfs_root *root,
1325                             struct extent_buffer *eb,
1326                             int slot, struct btrfs_key *key,
1327                             struct shared_node *active_node)
1328 {
1329         u32 total;
1330         u32 cur = 0;
1331         u32 len;
1332         u32 name_len;
1333         u32 data_len;
1334         int error;
1335         int nritems = 0;
1336         int filetype;
1337         struct btrfs_dir_item *di;
1338         struct inode_record *rec;
1339         struct cache_tree *root_cache;
1340         struct cache_tree *inode_cache;
1341         struct btrfs_key location;
1342         char namebuf[BTRFS_NAME_LEN];
1343
1344         root_cache = &active_node->root_cache;
1345         inode_cache = &active_node->inode_cache;
1346         rec = active_node->current;
1347         rec->found_dir_item = 1;
1348
1349         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1350         total = btrfs_item_size_nr(eb, slot);
1351         while (cur < total) {
1352                 nritems++;
1353                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1354                 name_len = btrfs_dir_name_len(eb, di);
1355                 data_len = btrfs_dir_data_len(eb, di);
1356                 filetype = btrfs_dir_type(eb, di);
1357
1358                 rec->found_size += name_len;
1359                 if (name_len <= BTRFS_NAME_LEN) {
1360                         len = name_len;
1361                         error = 0;
1362                 } else {
1363                         len = BTRFS_NAME_LEN;
1364                         error = REF_ERR_NAME_TOO_LONG;
1365                 }
1366                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1367
1368                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1369                         add_inode_backref(inode_cache, location.objectid,
1370                                           key->objectid, key->offset, namebuf,
1371                                           len, filetype, key->type, error);
1372                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1373                         add_inode_backref(root_cache, location.objectid,
1374                                           key->objectid, key->offset,
1375                                           namebuf, len, filetype,
1376                                           key->type, error);
1377                 } else {
1378                         fprintf(stderr, "invalid location in dir item %u\n",
1379                                 location.type);
1380                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1381                                           key->objectid, key->offset, namebuf,
1382                                           len, filetype, key->type, error);
1383                 }
1384
1385                 len = sizeof(*di) + name_len + data_len;
1386                 di = (struct btrfs_dir_item *)((char *)di + len);
1387                 cur += len;
1388         }
1389         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1390                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1391
1392         return 0;
1393 }
1394
1395 static int process_inode_ref(struct extent_buffer *eb,
1396                              int slot, struct btrfs_key *key,
1397                              struct shared_node *active_node)
1398 {
1399         u32 total;
1400         u32 cur = 0;
1401         u32 len;
1402         u32 name_len;
1403         u64 index;
1404         int error;
1405         struct cache_tree *inode_cache;
1406         struct btrfs_inode_ref *ref;
1407         char namebuf[BTRFS_NAME_LEN];
1408
1409         inode_cache = &active_node->inode_cache;
1410
1411         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1412         total = btrfs_item_size_nr(eb, slot);
1413         while (cur < total) {
1414                 name_len = btrfs_inode_ref_name_len(eb, ref);
1415                 index = btrfs_inode_ref_index(eb, ref);
1416                 if (name_len <= BTRFS_NAME_LEN) {
1417                         len = name_len;
1418                         error = 0;
1419                 } else {
1420                         len = BTRFS_NAME_LEN;
1421                         error = REF_ERR_NAME_TOO_LONG;
1422                 }
1423                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1424                 add_inode_backref(inode_cache, key->objectid, key->offset,
1425                                   index, namebuf, len, 0, key->type, error);
1426
1427                 len = sizeof(*ref) + name_len;
1428                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1429                 cur += len;
1430         }
1431         return 0;
1432 }
1433
1434 static int process_inode_extref(struct extent_buffer *eb,
1435                                 int slot, struct btrfs_key *key,
1436                                 struct shared_node *active_node)
1437 {
1438         u32 total;
1439         u32 cur = 0;
1440         u32 len;
1441         u32 name_len;
1442         u64 index;
1443         u64 parent;
1444         int error;
1445         struct cache_tree *inode_cache;
1446         struct btrfs_inode_extref *extref;
1447         char namebuf[BTRFS_NAME_LEN];
1448
1449         inode_cache = &active_node->inode_cache;
1450
1451         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1452         total = btrfs_item_size_nr(eb, slot);
1453         while (cur < total) {
1454                 name_len = btrfs_inode_extref_name_len(eb, extref);
1455                 index = btrfs_inode_extref_index(eb, extref);
1456                 parent = btrfs_inode_extref_parent(eb, extref);
1457                 if (name_len <= BTRFS_NAME_LEN) {
1458                         len = name_len;
1459                         error = 0;
1460                 } else {
1461                         len = BTRFS_NAME_LEN;
1462                         error = REF_ERR_NAME_TOO_LONG;
1463                 }
1464                 read_extent_buffer(eb, namebuf,
1465                                    (unsigned long)(extref + 1), len);
1466                 add_inode_backref(inode_cache, key->objectid, parent,
1467                                   index, namebuf, len, 0, key->type, error);
1468
1469                 len = sizeof(*extref) + name_len;
1470                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1471                 cur += len;
1472         }
1473         return 0;
1474
1475 }
1476
1477 static int count_csum_range(struct btrfs_root *root, u64 start,
1478                             u64 len, u64 *found)
1479 {
1480         struct btrfs_key key;
1481         struct btrfs_path path;
1482         struct extent_buffer *leaf;
1483         int ret;
1484         size_t size;
1485         *found = 0;
1486         u64 csum_end;
1487         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1488
1489         btrfs_init_path(&path);
1490
1491         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1492         key.offset = start;
1493         key.type = BTRFS_EXTENT_CSUM_KEY;
1494
1495         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1496                                 &key, &path, 0, 0);
1497         if (ret < 0)
1498                 goto out;
1499         if (ret > 0 && path.slots[0] > 0) {
1500                 leaf = path.nodes[0];
1501                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1502                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1503                     key.type == BTRFS_EXTENT_CSUM_KEY)
1504                         path.slots[0]--;
1505         }
1506
1507         while (len > 0) {
1508                 leaf = path.nodes[0];
1509                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1510                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1511                         if (ret > 0)
1512                                 break;
1513                         else if (ret < 0)
1514                                 goto out;
1515                         leaf = path.nodes[0];
1516                 }
1517
1518                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1519                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1520                     key.type != BTRFS_EXTENT_CSUM_KEY)
1521                         break;
1522
1523                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1524                 if (key.offset >= start + len)
1525                         break;
1526
1527                 if (key.offset > start)
1528                         start = key.offset;
1529
1530                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1531                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1532                 if (csum_end > start) {
1533                         size = min(csum_end - start, len);
1534                         len -= size;
1535                         start += size;
1536                         *found += size;
1537                 }
1538
1539                 path.slots[0]++;
1540         }
1541 out:
1542         btrfs_release_path(&path);
1543         if (ret < 0)
1544                 return ret;
1545         return 0;
1546 }
1547
1548 static int process_file_extent(struct btrfs_root *root,
1549                                 struct extent_buffer *eb,
1550                                 int slot, struct btrfs_key *key,
1551                                 struct shared_node *active_node)
1552 {
1553         struct inode_record *rec;
1554         struct btrfs_file_extent_item *fi;
1555         u64 num_bytes = 0;
1556         u64 disk_bytenr = 0;
1557         u64 extent_offset = 0;
1558         u64 mask = root->sectorsize - 1;
1559         int extent_type;
1560         int ret;
1561
1562         rec = active_node->current;
1563         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1564         rec->found_file_extent = 1;
1565
1566         if (rec->extent_start == (u64)-1) {
1567                 rec->extent_start = key->offset;
1568                 rec->extent_end = key->offset;
1569         }
1570
1571         if (rec->extent_end > key->offset)
1572                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1573         else if (rec->extent_end < key->offset) {
1574                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1575                                            key->offset - rec->extent_end);
1576                 if (ret < 0)
1577                         return ret;
1578         }
1579
1580         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1581         extent_type = btrfs_file_extent_type(eb, fi);
1582
1583         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1584                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1585                 if (num_bytes == 0)
1586                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1587                 rec->found_size += num_bytes;
1588                 num_bytes = (num_bytes + mask) & ~mask;
1589         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1590                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1591                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1592                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1593                 extent_offset = btrfs_file_extent_offset(eb, fi);
1594                 if (num_bytes == 0 || (num_bytes & mask))
1595                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1596                 if (num_bytes + extent_offset >
1597                     btrfs_file_extent_ram_bytes(eb, fi))
1598                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1599                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1600                     (btrfs_file_extent_compression(eb, fi) ||
1601                      btrfs_file_extent_encryption(eb, fi) ||
1602                      btrfs_file_extent_other_encoding(eb, fi)))
1603                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1604                 if (disk_bytenr > 0)
1605                         rec->found_size += num_bytes;
1606         } else {
1607                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1608         }
1609         rec->extent_end = key->offset + num_bytes;
1610
1611         /*
1612          * The data reloc tree will copy full extents into its inode and then
1613          * copy the corresponding csums.  Because the extent it copied could be
1614          * a preallocated extent that hasn't been written to yet there may be no
1615          * csums to copy, ergo we won't have csums for our file extent.  This is
1616          * ok so just don't bother checking csums if the inode belongs to the
1617          * data reloc tree.
1618          */
1619         if (disk_bytenr > 0 &&
1620             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1621                 u64 found;
1622                 if (btrfs_file_extent_compression(eb, fi))
1623                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1624                 else
1625                         disk_bytenr += extent_offset;
1626
1627                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1628                 if (ret < 0)
1629                         return ret;
1630                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1631                         if (found > 0)
1632                                 rec->found_csum_item = 1;
1633                         if (found < num_bytes)
1634                                 rec->some_csum_missing = 1;
1635                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1636                         if (found > 0)
1637                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1638                 }
1639         }
1640         return 0;
1641 }
1642
1643 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1644                             struct walk_control *wc)
1645 {
1646         struct btrfs_key key;
1647         u32 nritems;
1648         int i;
1649         int ret = 0;
1650         struct cache_tree *inode_cache;
1651         struct shared_node *active_node;
1652
1653         if (wc->root_level == wc->active_node &&
1654             btrfs_root_refs(&root->root_item) == 0)
1655                 return 0;
1656
1657         active_node = wc->nodes[wc->active_node];
1658         inode_cache = &active_node->inode_cache;
1659         nritems = btrfs_header_nritems(eb);
1660         for (i = 0; i < nritems; i++) {
1661                 btrfs_item_key_to_cpu(eb, &key, i);
1662
1663                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1664                         continue;
1665                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1666                         continue;
1667
1668                 if (active_node->current == NULL ||
1669                     active_node->current->ino < key.objectid) {
1670                         if (active_node->current) {
1671                                 active_node->current->checked = 1;
1672                                 maybe_free_inode_rec(inode_cache,
1673                                                      active_node->current);
1674                         }
1675                         active_node->current = get_inode_rec(inode_cache,
1676                                                              key.objectid, 1);
1677                         BUG_ON(IS_ERR(active_node->current));
1678                 }
1679                 switch (key.type) {
1680                 case BTRFS_DIR_ITEM_KEY:
1681                 case BTRFS_DIR_INDEX_KEY:
1682                         ret = process_dir_item(root, eb, i, &key, active_node);
1683                         break;
1684                 case BTRFS_INODE_REF_KEY:
1685                         ret = process_inode_ref(eb, i, &key, active_node);
1686                         break;
1687                 case BTRFS_INODE_EXTREF_KEY:
1688                         ret = process_inode_extref(eb, i, &key, active_node);
1689                         break;
1690                 case BTRFS_INODE_ITEM_KEY:
1691                         ret = process_inode_item(eb, i, &key, active_node);
1692                         break;
1693                 case BTRFS_EXTENT_DATA_KEY:
1694                         ret = process_file_extent(root, eb, i, &key,
1695                                                   active_node);
1696                         break;
1697                 default:
1698                         break;
1699                 };
1700         }
1701         return ret;
1702 }
1703
1704 static void reada_walk_down(struct btrfs_root *root,
1705                             struct extent_buffer *node, int slot)
1706 {
1707         u64 bytenr;
1708         u64 ptr_gen;
1709         u32 nritems;
1710         u32 blocksize;
1711         int i;
1712         int level;
1713
1714         level = btrfs_header_level(node);
1715         if (level != 1)
1716                 return;
1717
1718         nritems = btrfs_header_nritems(node);
1719         blocksize = btrfs_level_size(root, level - 1);
1720         for (i = slot; i < nritems; i++) {
1721                 bytenr = btrfs_node_blockptr(node, i);
1722                 ptr_gen = btrfs_node_ptr_generation(node, i);
1723                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1724         }
1725 }
1726
1727 /*
1728  * Check the child node/leaf by the following condition:
1729  * 1. the first item key of the node/leaf should be the same with the one
1730  *    in parent.
1731  * 2. block in parent node should match the child node/leaf.
1732  * 3. generation of parent node and child's header should be consistent.
1733  *
1734  * Or the child node/leaf pointed by the key in parent is not valid.
1735  *
1736  * We hope to check leaf owner too, but since subvol may share leaves,
1737  * which makes leaf owner check not so strong, key check should be
1738  * sufficient enough for that case.
1739  */
1740 static int check_child_node(struct btrfs_root *root,
1741                             struct extent_buffer *parent, int slot,
1742                             struct extent_buffer *child)
1743 {
1744         struct btrfs_key parent_key;
1745         struct btrfs_key child_key;
1746         int ret = 0;
1747
1748         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1749         if (btrfs_header_level(child) == 0)
1750                 btrfs_item_key_to_cpu(child, &child_key, 0);
1751         else
1752                 btrfs_node_key_to_cpu(child, &child_key, 0);
1753
1754         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1755                 ret = -EINVAL;
1756                 fprintf(stderr,
1757                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1758                         parent_key.objectid, parent_key.type, parent_key.offset,
1759                         child_key.objectid, child_key.type, child_key.offset);
1760         }
1761         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1762                 ret = -EINVAL;
1763                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1764                         btrfs_node_blockptr(parent, slot),
1765                         btrfs_header_bytenr(child));
1766         }
1767         if (btrfs_node_ptr_generation(parent, slot) !=
1768             btrfs_header_generation(child)) {
1769                 ret = -EINVAL;
1770                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1771                         btrfs_header_generation(child),
1772                         btrfs_node_ptr_generation(parent, slot));
1773         }
1774         return ret;
1775 }
1776
1777 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1778                           struct walk_control *wc, int *level)
1779 {
1780         enum btrfs_tree_block_status status;
1781         u64 bytenr;
1782         u64 ptr_gen;
1783         struct extent_buffer *next;
1784         struct extent_buffer *cur;
1785         u32 blocksize;
1786         int ret, err = 0;
1787         u64 refs;
1788
1789         WARN_ON(*level < 0);
1790         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1791         ret = btrfs_lookup_extent_info(NULL, root,
1792                                        path->nodes[*level]->start,
1793                                        *level, 1, &refs, NULL);
1794         if (ret < 0) {
1795                 err = ret;
1796                 goto out;
1797         }
1798
1799         if (refs > 1) {
1800                 ret = enter_shared_node(root, path->nodes[*level]->start,
1801                                         refs, wc, *level);
1802                 if (ret > 0) {
1803                         err = ret;
1804                         goto out;
1805                 }
1806         }
1807
1808         while (*level >= 0) {
1809                 WARN_ON(*level < 0);
1810                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1811                 cur = path->nodes[*level];
1812
1813                 if (btrfs_header_level(cur) != *level)
1814                         WARN_ON(1);
1815
1816                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1817                         break;
1818                 if (*level == 0) {
1819                         ret = process_one_leaf(root, cur, wc);
1820                         if (ret < 0)
1821                                 err = ret;
1822                         break;
1823                 }
1824                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1825                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1826                 blocksize = btrfs_level_size(root, *level - 1);
1827                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1828                                                1, &refs, NULL);
1829                 if (ret < 0)
1830                         refs = 0;
1831
1832                 if (refs > 1) {
1833                         ret = enter_shared_node(root, bytenr, refs,
1834                                                 wc, *level - 1);
1835                         if (ret > 0) {
1836                                 path->slots[*level]++;
1837                                 continue;
1838                         }
1839                 }
1840
1841                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1842                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1843                         free_extent_buffer(next);
1844                         reada_walk_down(root, cur, path->slots[*level]);
1845                         next = read_tree_block(root, bytenr, blocksize,
1846                                                ptr_gen);
1847                         if (!extent_buffer_uptodate(next)) {
1848                                 struct btrfs_key node_key;
1849
1850                                 btrfs_node_key_to_cpu(path->nodes[*level],
1851                                                       &node_key,
1852                                                       path->slots[*level]);
1853                                 btrfs_add_corrupt_extent_record(root->fs_info,
1854                                                 &node_key,
1855                                                 path->nodes[*level]->start,
1856                                                 root->leafsize, *level);
1857                                 err = -EIO;
1858                                 goto out;
1859                         }
1860                 }
1861
1862                 ret = check_child_node(root, cur, path->slots[*level], next);
1863                 if (ret) {
1864                         err = ret;
1865                         goto out;
1866                 }
1867
1868                 if (btrfs_is_leaf(next))
1869                         status = btrfs_check_leaf(root, NULL, next);
1870                 else
1871                         status = btrfs_check_node(root, NULL, next);
1872                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1873                         free_extent_buffer(next);
1874                         err = -EIO;
1875                         goto out;
1876                 }
1877
1878                 *level = *level - 1;
1879                 free_extent_buffer(path->nodes[*level]);
1880                 path->nodes[*level] = next;
1881                 path->slots[*level] = 0;
1882         }
1883 out:
1884         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1885         return err;
1886 }
1887
1888 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1889                         struct walk_control *wc, int *level)
1890 {
1891         int i;
1892         struct extent_buffer *leaf;
1893
1894         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1895                 leaf = path->nodes[i];
1896                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1897                         path->slots[i]++;
1898                         *level = i;
1899                         return 0;
1900                 } else {
1901                         free_extent_buffer(path->nodes[*level]);
1902                         path->nodes[*level] = NULL;
1903                         BUG_ON(*level > wc->active_node);
1904                         if (*level == wc->active_node)
1905                                 leave_shared_node(root, wc, *level);
1906                         *level = i + 1;
1907                 }
1908         }
1909         return 1;
1910 }
1911
1912 static int check_root_dir(struct inode_record *rec)
1913 {
1914         struct inode_backref *backref;
1915         int ret = -1;
1916
1917         if (!rec->found_inode_item || rec->errors)
1918                 goto out;
1919         if (rec->nlink != 1 || rec->found_link != 0)
1920                 goto out;
1921         if (list_empty(&rec->backrefs))
1922                 goto out;
1923         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1924         if (!backref->found_inode_ref)
1925                 goto out;
1926         if (backref->index != 0 || backref->namelen != 2 ||
1927             memcmp(backref->name, "..", 2))
1928                 goto out;
1929         if (backref->found_dir_index || backref->found_dir_item)
1930                 goto out;
1931         ret = 0;
1932 out:
1933         return ret;
1934 }
1935
1936 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1937                               struct btrfs_root *root, struct btrfs_path *path,
1938                               struct inode_record *rec)
1939 {
1940         struct btrfs_inode_item *ei;
1941         struct btrfs_key key;
1942         int ret;
1943
1944         key.objectid = rec->ino;
1945         key.type = BTRFS_INODE_ITEM_KEY;
1946         key.offset = (u64)-1;
1947
1948         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1949         if (ret < 0)
1950                 goto out;
1951         if (ret) {
1952                 if (!path->slots[0]) {
1953                         ret = -ENOENT;
1954                         goto out;
1955                 }
1956                 path->slots[0]--;
1957                 ret = 0;
1958         }
1959         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1960         if (key.objectid != rec->ino) {
1961                 ret = -ENOENT;
1962                 goto out;
1963         }
1964
1965         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1966                             struct btrfs_inode_item);
1967         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1968         btrfs_mark_buffer_dirty(path->nodes[0]);
1969         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1970         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1971                root->root_key.objectid);
1972 out:
1973         btrfs_release_path(path);
1974         return ret;
1975 }
1976
1977 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1978                                     struct btrfs_root *root,
1979                                     struct btrfs_path *path,
1980                                     struct inode_record *rec)
1981 {
1982         int ret;
1983
1984         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1985         btrfs_release_path(path);
1986         if (!ret)
1987                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1988         return ret;
1989 }
1990
1991 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1992                                struct btrfs_root *root,
1993                                struct btrfs_path *path,
1994                                struct inode_record *rec)
1995 {
1996         struct btrfs_inode_item *ei;
1997         struct btrfs_key key;
1998         int ret = 0;
1999
2000         key.objectid = rec->ino;
2001         key.type = BTRFS_INODE_ITEM_KEY;
2002         key.offset = 0;
2003
2004         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2005         if (ret) {
2006                 if (ret > 0)
2007                         ret = -ENOENT;
2008                 goto out;
2009         }
2010
2011         /* Since ret == 0, no need to check anything */
2012         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2013                             struct btrfs_inode_item);
2014         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2015         btrfs_mark_buffer_dirty(path->nodes[0]);
2016         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2017         printf("reset nbytes for ino %llu root %llu\n",
2018                rec->ino, root->root_key.objectid);
2019 out:
2020         btrfs_release_path(path);
2021         return ret;
2022 }
2023
2024 static int add_missing_dir_index(struct btrfs_root *root,
2025                                  struct cache_tree *inode_cache,
2026                                  struct inode_record *rec,
2027                                  struct inode_backref *backref)
2028 {
2029         struct btrfs_path *path;
2030         struct btrfs_trans_handle *trans;
2031         struct btrfs_dir_item *dir_item;
2032         struct extent_buffer *leaf;
2033         struct btrfs_key key;
2034         struct btrfs_disk_key disk_key;
2035         struct inode_record *dir_rec;
2036         unsigned long name_ptr;
2037         u32 data_size = sizeof(*dir_item) + backref->namelen;
2038         int ret;
2039
2040         path = btrfs_alloc_path();
2041         if (!path)
2042                 return -ENOMEM;
2043
2044         trans = btrfs_start_transaction(root, 1);
2045         if (IS_ERR(trans)) {
2046                 btrfs_free_path(path);
2047                 return PTR_ERR(trans);
2048         }
2049
2050         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2051                 (unsigned long long)rec->ino);
2052         key.objectid = backref->dir;
2053         key.type = BTRFS_DIR_INDEX_KEY;
2054         key.offset = backref->index;
2055
2056         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2057         BUG_ON(ret);
2058
2059         leaf = path->nodes[0];
2060         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2061
2062         disk_key.objectid = cpu_to_le64(rec->ino);
2063         disk_key.type = BTRFS_INODE_ITEM_KEY;
2064         disk_key.offset = 0;
2065
2066         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2067         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2068         btrfs_set_dir_data_len(leaf, dir_item, 0);
2069         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2070         name_ptr = (unsigned long)(dir_item + 1);
2071         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2072         btrfs_mark_buffer_dirty(leaf);
2073         btrfs_free_path(path);
2074         btrfs_commit_transaction(trans, root);
2075
2076         backref->found_dir_index = 1;
2077         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2078         BUG_ON(IS_ERR(dir_rec));
2079         if (!dir_rec)
2080                 return 0;
2081         dir_rec->found_size += backref->namelen;
2082         if (dir_rec->found_size == dir_rec->isize &&
2083             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2084                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2085         if (dir_rec->found_size != dir_rec->isize)
2086                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2087
2088         return 0;
2089 }
2090
2091 static int delete_dir_index(struct btrfs_root *root,
2092                             struct cache_tree *inode_cache,
2093                             struct inode_record *rec,
2094                             struct inode_backref *backref)
2095 {
2096         struct btrfs_trans_handle *trans;
2097         struct btrfs_dir_item *di;
2098         struct btrfs_path *path;
2099         int ret = 0;
2100
2101         path = btrfs_alloc_path();
2102         if (!path)
2103                 return -ENOMEM;
2104
2105         trans = btrfs_start_transaction(root, 1);
2106         if (IS_ERR(trans)) {
2107                 btrfs_free_path(path);
2108                 return PTR_ERR(trans);
2109         }
2110
2111
2112         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2113                 (unsigned long long)backref->dir,
2114                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2115                 (unsigned long long)root->objectid);
2116
2117         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2118                                     backref->name, backref->namelen,
2119                                     backref->index, -1);
2120         if (IS_ERR(di)) {
2121                 ret = PTR_ERR(di);
2122                 btrfs_free_path(path);
2123                 btrfs_commit_transaction(trans, root);
2124                 if (ret == -ENOENT)
2125                         return 0;
2126                 return ret;
2127         }
2128
2129         if (!di)
2130                 ret = btrfs_del_item(trans, root, path);
2131         else
2132                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2133         BUG_ON(ret);
2134         btrfs_free_path(path);
2135         btrfs_commit_transaction(trans, root);
2136         return ret;
2137 }
2138
2139 static int create_inode_item(struct btrfs_root *root,
2140                              struct inode_record *rec,
2141                              struct inode_backref *backref, int root_dir)
2142 {
2143         struct btrfs_trans_handle *trans;
2144         struct btrfs_inode_item inode_item;
2145         time_t now = time(NULL);
2146         int ret;
2147
2148         trans = btrfs_start_transaction(root, 1);
2149         if (IS_ERR(trans)) {
2150                 ret = PTR_ERR(trans);
2151                 return ret;
2152         }
2153
2154         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2155                 "be incomplete, please check permissions and content after "
2156                 "the fsck completes.\n", (unsigned long long)root->objectid,
2157                 (unsigned long long)rec->ino);
2158
2159         memset(&inode_item, 0, sizeof(inode_item));
2160         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2161         if (root_dir)
2162                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2163         else
2164                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2165         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2166         if (rec->found_dir_item) {
2167                 if (rec->found_file_extent)
2168                         fprintf(stderr, "root %llu inode %llu has both a dir "
2169                                 "item and extents, unsure if it is a dir or a "
2170                                 "regular file so setting it as a directory\n",
2171                                 (unsigned long long)root->objectid,
2172                                 (unsigned long long)rec->ino);
2173                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2174                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2175         } else if (!rec->found_dir_item) {
2176                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2177                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2178         }
2179         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2180         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2181         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2182         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2183         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2184         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2185         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2186         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2187
2188         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2189         BUG_ON(ret);
2190         btrfs_commit_transaction(trans, root);
2191         return 0;
2192 }
2193
2194 static int repair_inode_backrefs(struct btrfs_root *root,
2195                                  struct inode_record *rec,
2196                                  struct cache_tree *inode_cache,
2197                                  int delete)
2198 {
2199         struct inode_backref *tmp, *backref;
2200         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2201         int ret = 0;
2202         int repaired = 0;
2203
2204         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2205                 if (!delete && rec->ino == root_dirid) {
2206                         if (!rec->found_inode_item) {
2207                                 ret = create_inode_item(root, rec, backref, 1);
2208                                 if (ret)
2209                                         break;
2210                                 repaired++;
2211                         }
2212                 }
2213
2214                 /* Index 0 for root dir's are special, don't mess with it */
2215                 if (rec->ino == root_dirid && backref->index == 0)
2216                         continue;
2217
2218                 if (delete &&
2219                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2220                      (backref->found_dir_index && backref->found_inode_ref &&
2221                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2222                         ret = delete_dir_index(root, inode_cache, rec, backref);
2223                         if (ret)
2224                                 break;
2225                         repaired++;
2226                         list_del(&backref->list);
2227                         free(backref);
2228                 }
2229
2230                 if (!delete && !backref->found_dir_index &&
2231                     backref->found_dir_item && backref->found_inode_ref) {
2232                         ret = add_missing_dir_index(root, inode_cache, rec,
2233                                                     backref);
2234                         if (ret)
2235                                 break;
2236                         repaired++;
2237                         if (backref->found_dir_item &&
2238                             backref->found_dir_index &&
2239                             backref->found_dir_index) {
2240                                 if (!backref->errors &&
2241                                     backref->found_inode_ref) {
2242                                         list_del(&backref->list);
2243                                         free(backref);
2244                                 }
2245                         }
2246                 }
2247
2248                 if (!delete && (!backref->found_dir_index &&
2249                                 !backref->found_dir_item &&
2250                                 backref->found_inode_ref)) {
2251                         struct btrfs_trans_handle *trans;
2252                         struct btrfs_key location;
2253
2254                         ret = check_dir_conflict(root, backref->name,
2255                                                  backref->namelen,
2256                                                  backref->dir,
2257                                                  backref->index);
2258                         if (ret) {
2259                                 /*
2260                                  * let nlink fixing routine to handle it,
2261                                  * which can do it better.
2262                                  */
2263                                 ret = 0;
2264                                 break;
2265                         }
2266                         location.objectid = rec->ino;
2267                         location.type = BTRFS_INODE_ITEM_KEY;
2268                         location.offset = 0;
2269
2270                         trans = btrfs_start_transaction(root, 1);
2271                         if (IS_ERR(trans)) {
2272                                 ret = PTR_ERR(trans);
2273                                 break;
2274                         }
2275                         fprintf(stderr, "adding missing dir index/item pair "
2276                                 "for inode %llu\n",
2277                                 (unsigned long long)rec->ino);
2278                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2279                                                     backref->namelen,
2280                                                     backref->dir, &location,
2281                                                     imode_to_type(rec->imode),
2282                                                     backref->index);
2283                         BUG_ON(ret);
2284                         btrfs_commit_transaction(trans, root);
2285                         repaired++;
2286                 }
2287
2288                 if (!delete && (backref->found_inode_ref &&
2289                                 backref->found_dir_index &&
2290                                 backref->found_dir_item &&
2291                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2292                                 !rec->found_inode_item)) {
2293                         ret = create_inode_item(root, rec, backref, 0);
2294                         if (ret)
2295                                 break;
2296                         repaired++;
2297                 }
2298
2299         }
2300         return ret ? ret : repaired;
2301 }
2302
2303 /*
2304  * To determine the file type for nlink/inode_item repair
2305  *
2306  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2307  * Return -ENOENT if file type is not found.
2308  */
2309 static int find_file_type(struct inode_record *rec, u8 *type)
2310 {
2311         struct inode_backref *backref;
2312
2313         /* For inode item recovered case */
2314         if (rec->found_inode_item) {
2315                 *type = imode_to_type(rec->imode);
2316                 return 0;
2317         }
2318
2319         list_for_each_entry(backref, &rec->backrefs, list) {
2320                 if (backref->found_dir_index || backref->found_dir_item) {
2321                         *type = backref->filetype;
2322                         return 0;
2323                 }
2324         }
2325         return -ENOENT;
2326 }
2327
2328 /*
2329  * To determine the file name for nlink repair
2330  *
2331  * Return 0 if file name is found, set name and namelen.
2332  * Return -ENOENT if file name is not found.
2333  */
2334 static int find_file_name(struct inode_record *rec,
2335                           char *name, int *namelen)
2336 {
2337         struct inode_backref *backref;
2338
2339         list_for_each_entry(backref, &rec->backrefs, list) {
2340                 if (backref->found_dir_index || backref->found_dir_item ||
2341                     backref->found_inode_ref) {
2342                         memcpy(name, backref->name, backref->namelen);
2343                         *namelen = backref->namelen;
2344                         return 0;
2345                 }
2346         }
2347         return -ENOENT;
2348 }
2349
2350 /* Reset the nlink of the inode to the correct one */
2351 static int reset_nlink(struct btrfs_trans_handle *trans,
2352                        struct btrfs_root *root,
2353                        struct btrfs_path *path,
2354                        struct inode_record *rec)
2355 {
2356         struct inode_backref *backref;
2357         struct inode_backref *tmp;
2358         struct btrfs_key key;
2359         struct btrfs_inode_item *inode_item;
2360         int ret = 0;
2361
2362         /* We don't believe this either, reset it and iterate backref */
2363         rec->found_link = 0;
2364
2365         /* Remove all backref including the valid ones */
2366         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2367                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2368                                    backref->index, backref->name,
2369                                    backref->namelen, 0);
2370                 if (ret < 0)
2371                         goto out;
2372
2373                 /* remove invalid backref, so it won't be added back */
2374                 if (!(backref->found_dir_index &&
2375                       backref->found_dir_item &&
2376                       backref->found_inode_ref)) {
2377                         list_del(&backref->list);
2378                         free(backref);
2379                 } else {
2380                         rec->found_link++;
2381                 }
2382         }
2383
2384         /* Set nlink to 0 */
2385         key.objectid = rec->ino;
2386         key.type = BTRFS_INODE_ITEM_KEY;
2387         key.offset = 0;
2388         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2389         if (ret < 0)
2390                 goto out;
2391         if (ret > 0) {
2392                 ret = -ENOENT;
2393                 goto out;
2394         }
2395         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2396                                     struct btrfs_inode_item);
2397         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2398         btrfs_mark_buffer_dirty(path->nodes[0]);
2399         btrfs_release_path(path);
2400
2401         /*
2402          * Add back valid inode_ref/dir_item/dir_index,
2403          * add_link() will handle the nlink inc, so new nlink must be correct
2404          */
2405         list_for_each_entry(backref, &rec->backrefs, list) {
2406                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2407                                      backref->name, backref->namelen,
2408                                      backref->filetype, &backref->index, 1);
2409                 if (ret < 0)
2410                         goto out;
2411         }
2412 out:
2413         btrfs_release_path(path);
2414         return ret;
2415 }
2416
2417 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2418                                struct btrfs_root *root,
2419                                struct btrfs_path *path,
2420                                struct inode_record *rec)
2421 {
2422         char *dir_name = "lost+found";
2423         char namebuf[BTRFS_NAME_LEN] = {0};
2424         u64 lost_found_ino;
2425         u32 mode = 0700;
2426         u8 type = 0;
2427         int namelen = 0;
2428         int name_recovered = 0;
2429         int type_recovered = 0;
2430         int ret = 0;
2431
2432         /*
2433          * Get file name and type first before these invalid inode ref
2434          * are deleted by remove_all_invalid_backref()
2435          */
2436         name_recovered = !find_file_name(rec, namebuf, &namelen);
2437         type_recovered = !find_file_type(rec, &type);
2438
2439         if (!name_recovered) {
2440                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2441                        rec->ino, rec->ino);
2442                 namelen = count_digits(rec->ino);
2443                 sprintf(namebuf, "%llu", rec->ino);
2444                 name_recovered = 1;
2445         }
2446         if (!type_recovered) {
2447                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2448                        rec->ino);
2449                 type = BTRFS_FT_REG_FILE;
2450                 type_recovered = 1;
2451         }
2452
2453         ret = reset_nlink(trans, root, path, rec);
2454         if (ret < 0) {
2455                 fprintf(stderr,
2456                         "Failed to reset nlink for inode %llu: %s\n",
2457                         rec->ino, strerror(-ret));
2458                 goto out;
2459         }
2460
2461         if (rec->found_link == 0) {
2462                 lost_found_ino = root->highest_inode;
2463                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2464                         ret = -EOVERFLOW;
2465                         goto out;
2466                 }
2467                 lost_found_ino++;
2468                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2469                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2470                                   mode);
2471                 if (ret < 0) {
2472                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2473                                 dir_name, strerror(-ret));
2474                         goto out;
2475                 }
2476                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2477                                      namebuf, namelen, type, NULL, 1);
2478                 /*
2479                  * Add ".INO" suffix several times to handle case where
2480                  * "FILENAME.INO" is already taken by another file.
2481                  */
2482                 while (ret == -EEXIST) {
2483                         /*
2484                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2485                          */
2486                         if (namelen + count_digits(rec->ino) + 1 >
2487                             BTRFS_NAME_LEN) {
2488                                 ret = -EFBIG;
2489                                 goto out;
2490                         }
2491                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2492                                  ".%llu", rec->ino);
2493                         namelen += count_digits(rec->ino) + 1;
2494                         ret = btrfs_add_link(trans, root, rec->ino,
2495                                              lost_found_ino, namebuf,
2496                                              namelen, type, NULL, 1);
2497                 }
2498                 if (ret < 0) {
2499                         fprintf(stderr,
2500                                 "Failed to link the inode %llu to %s dir: %s\n",
2501                                 rec->ino, dir_name, strerror(-ret));
2502                         goto out;
2503                 }
2504                 /*
2505                  * Just increase the found_link, don't actually add the
2506                  * backref. This will make things easier and this inode
2507                  * record will be freed after the repair is done.
2508                  * So fsck will not report problem about this inode.
2509                  */
2510                 rec->found_link++;
2511                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2512                        namelen, namebuf, dir_name);
2513         }
2514         printf("Fixed the nlink of inode %llu\n", rec->ino);
2515 out:
2516         /*
2517          * Clear the flag anyway, or we will loop forever for the same inode
2518          * as it will not be removed from the bad inode list and the dead loop
2519          * happens.
2520          */
2521         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2522         btrfs_release_path(path);
2523         return ret;
2524 }
2525
2526 /*
2527  * Check if there is any normal(reg or prealloc) file extent for given
2528  * ino.
2529  * This is used to determine the file type when neither its dir_index/item or
2530  * inode_item exists.
2531  *
2532  * This will *NOT* report error, if any error happens, just consider it does
2533  * not have any normal file extent.
2534  */
2535 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2536 {
2537         struct btrfs_path *path;
2538         struct btrfs_key key;
2539         struct btrfs_key found_key;
2540         struct btrfs_file_extent_item *fi;
2541         u8 type;
2542         int ret = 0;
2543
2544         path = btrfs_alloc_path();
2545         if (!path)
2546                 goto out;
2547         key.objectid = ino;
2548         key.type = BTRFS_EXTENT_DATA_KEY;
2549         key.offset = 0;
2550
2551         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2552         if (ret < 0) {
2553                 ret = 0;
2554                 goto out;
2555         }
2556         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2557                 ret = btrfs_next_leaf(root, path);
2558                 if (ret) {
2559                         ret = 0;
2560                         goto out;
2561                 }
2562         }
2563         while (1) {
2564                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2565                                       path->slots[0]);
2566                 if (found_key.objectid != ino ||
2567                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2568                         break;
2569                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2570                                     struct btrfs_file_extent_item);
2571                 type = btrfs_file_extent_type(path->nodes[0], fi);
2572                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2573                         ret = 1;
2574                         goto out;
2575                 }
2576         }
2577 out:
2578         btrfs_free_path(path);
2579         return ret;
2580 }
2581
2582 static u32 btrfs_type_to_imode(u8 type)
2583 {
2584         static u32 imode_by_btrfs_type[] = {
2585                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2586                 [BTRFS_FT_DIR]          = S_IFDIR,
2587                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2588                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2589                 [BTRFS_FT_FIFO]         = S_IFIFO,
2590                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2591                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2592         };
2593
2594         return imode_by_btrfs_type[(type)];
2595 }
2596
2597 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2598                                 struct btrfs_root *root,
2599                                 struct btrfs_path *path,
2600                                 struct inode_record *rec)
2601 {
2602         u8 filetype;
2603         u32 mode = 0700;
2604         int type_recovered = 0;
2605         int ret = 0;
2606
2607         printf("Trying to rebuild inode:%llu\n", rec->ino);
2608
2609         type_recovered = !find_file_type(rec, &filetype);
2610
2611         /*
2612          * Try to determine inode type if type not found.
2613          *
2614          * For found regular file extent, it must be FILE.
2615          * For found dir_item/index, it must be DIR.
2616          *
2617          * For undetermined one, use FILE as fallback.
2618          *
2619          * TODO:
2620          * 1. If found backref(inode_index/item is already handled) to it,
2621          *    it must be DIR.
2622          *    Need new inode-inode ref structure to allow search for that.
2623          */
2624         if (!type_recovered) {
2625                 if (rec->found_file_extent &&
2626                     find_normal_file_extent(root, rec->ino)) {
2627                         type_recovered = 1;
2628                         filetype = BTRFS_FT_REG_FILE;
2629                 } else if (rec->found_dir_item) {
2630                         type_recovered = 1;
2631                         filetype = BTRFS_FT_DIR;
2632                 } else if (!list_empty(&rec->orphan_extents)) {
2633                         type_recovered = 1;
2634                         filetype = BTRFS_FT_REG_FILE;
2635                 } else{
2636                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2637                                rec->ino);
2638                         type_recovered = 1;
2639                         filetype = BTRFS_FT_REG_FILE;
2640                 }
2641         }
2642
2643         ret = btrfs_new_inode(trans, root, rec->ino,
2644                               mode | btrfs_type_to_imode(filetype));
2645         if (ret < 0)
2646                 goto out;
2647
2648         /*
2649          * Here inode rebuild is done, we only rebuild the inode item,
2650          * don't repair the nlink(like move to lost+found).
2651          * That is the job of nlink repair.
2652          *
2653          * We just fill the record and return
2654          */
2655         rec->found_dir_item = 1;
2656         rec->imode = mode | btrfs_type_to_imode(filetype);
2657         rec->nlink = 0;
2658         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2659         /* Ensure the inode_nlinks repair function will be called */
2660         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2661 out:
2662         return ret;
2663 }
2664
2665 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2666                                       struct btrfs_root *root,
2667                                       struct btrfs_path *path,
2668                                       struct inode_record *rec)
2669 {
2670         struct orphan_data_extent *orphan;
2671         struct orphan_data_extent *tmp;
2672         int ret = 0;
2673
2674         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2675                 /*
2676                  * Check for conflicting file extents
2677                  *
2678                  * Here we don't know whether the extents is compressed or not,
2679                  * so we can only assume it not compressed nor data offset,
2680                  * and use its disk_len as extent length.
2681                  */
2682                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2683                                        orphan->offset, orphan->disk_len, 0);
2684                 btrfs_release_path(path);
2685                 if (ret < 0)
2686                         goto out;
2687                 if (!ret) {
2688                         fprintf(stderr,
2689                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2690                                 orphan->disk_bytenr, orphan->disk_len);
2691                         ret = btrfs_free_extent(trans,
2692                                         root->fs_info->extent_root,
2693                                         orphan->disk_bytenr, orphan->disk_len,
2694                                         0, root->objectid, orphan->objectid,
2695                                         orphan->offset);
2696                         if (ret < 0)
2697                                 goto out;
2698                 }
2699                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2700                                 orphan->offset, orphan->disk_bytenr,
2701                                 orphan->disk_len, orphan->disk_len);
2702                 if (ret < 0)
2703                         goto out;
2704
2705                 /* Update file size info */
2706                 rec->found_size += orphan->disk_len;
2707                 if (rec->found_size == rec->nbytes)
2708                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2709
2710                 /* Update the file extent hole info too */
2711                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2712                                            orphan->disk_len);
2713                 if (ret < 0)
2714                         goto out;
2715                 if (RB_EMPTY_ROOT(&rec->holes))
2716                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2717
2718                 list_del(&orphan->list);
2719                 free(orphan);
2720         }
2721         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2722 out:
2723         return ret;
2724 }
2725
2726 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2727                                         struct btrfs_root *root,
2728                                         struct btrfs_path *path,
2729                                         struct inode_record *rec)
2730 {
2731         struct rb_node *node;
2732         struct file_extent_hole *hole;
2733         int found = 0;
2734         int ret = 0;
2735
2736         node = rb_first(&rec->holes);
2737
2738         while (node) {
2739                 found = 1;
2740                 hole = rb_entry(node, struct file_extent_hole, node);
2741                 ret = btrfs_punch_hole(trans, root, rec->ino,
2742                                        hole->start, hole->len);
2743                 if (ret < 0)
2744                         goto out;
2745                 ret = del_file_extent_hole(&rec->holes, hole->start,
2746                                            hole->len);
2747                 if (ret < 0)
2748                         goto out;
2749                 if (RB_EMPTY_ROOT(&rec->holes))
2750                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2751                 node = rb_first(&rec->holes);
2752         }
2753         /* special case for a file losing all its file extent */
2754         if (!found) {
2755                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2756                                        round_up(rec->isize, root->sectorsize));
2757                 if (ret < 0)
2758                         goto out;
2759         }
2760         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2761                rec->ino, root->objectid);
2762 out:
2763         return ret;
2764 }
2765
2766 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2767 {
2768         struct btrfs_trans_handle *trans;
2769         struct btrfs_path *path;
2770         int ret = 0;
2771
2772         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2773                              I_ERR_NO_ORPHAN_ITEM |
2774                              I_ERR_LINK_COUNT_WRONG |
2775                              I_ERR_NO_INODE_ITEM |
2776                              I_ERR_FILE_EXTENT_ORPHAN |
2777                              I_ERR_FILE_EXTENT_DISCOUNT|
2778                              I_ERR_FILE_NBYTES_WRONG)))
2779                 return rec->errors;
2780
2781         path = btrfs_alloc_path();
2782         if (!path)
2783                 return -ENOMEM;
2784
2785         /*
2786          * For nlink repair, it may create a dir and add link, so
2787          * 2 for parent(256)'s dir_index and dir_item
2788          * 2 for lost+found dir's inode_item and inode_ref
2789          * 1 for the new inode_ref of the file
2790          * 2 for lost+found dir's dir_index and dir_item for the file
2791          */
2792         trans = btrfs_start_transaction(root, 7);
2793         if (IS_ERR(trans)) {
2794                 btrfs_free_path(path);
2795                 return PTR_ERR(trans);
2796         }
2797
2798         if (rec->errors & I_ERR_NO_INODE_ITEM)
2799                 ret = repair_inode_no_item(trans, root, path, rec);
2800         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2801                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2802         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2803                 ret = repair_inode_discount_extent(trans, root, path, rec);
2804         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2805                 ret = repair_inode_isize(trans, root, path, rec);
2806         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2807                 ret = repair_inode_orphan_item(trans, root, path, rec);
2808         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2809                 ret = repair_inode_nlinks(trans, root, path, rec);
2810         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2811                 ret = repair_inode_nbytes(trans, root, path, rec);
2812         btrfs_commit_transaction(trans, root);
2813         btrfs_free_path(path);
2814         return ret;
2815 }
2816
2817 static int check_inode_recs(struct btrfs_root *root,
2818                             struct cache_tree *inode_cache)
2819 {
2820         struct cache_extent *cache;
2821         struct ptr_node *node;
2822         struct inode_record *rec;
2823         struct inode_backref *backref;
2824         int stage = 0;
2825         int ret = 0;
2826         int err = 0;
2827         u64 error = 0;
2828         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2829
2830         if (btrfs_root_refs(&root->root_item) == 0) {
2831                 if (!cache_tree_empty(inode_cache))
2832                         fprintf(stderr, "warning line %d\n", __LINE__);
2833                 return 0;
2834         }
2835
2836         /*
2837          * We need to record the highest inode number for later 'lost+found'
2838          * dir creation.
2839          * We must select a ino not used/refered by any existing inode, or
2840          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2841          * this may cause 'lost+found' dir has wrong nlinks.
2842          */
2843         cache = last_cache_extent(inode_cache);
2844         if (cache) {
2845                 node = container_of(cache, struct ptr_node, cache);
2846                 rec = node->data;
2847                 if (rec->ino > root->highest_inode)
2848                         root->highest_inode = rec->ino;
2849         }
2850
2851         /*
2852          * We need to repair backrefs first because we could change some of the
2853          * errors in the inode recs.
2854          *
2855          * We also need to go through and delete invalid backrefs first and then
2856          * add the correct ones second.  We do this because we may get EEXIST
2857          * when adding back the correct index because we hadn't yet deleted the
2858          * invalid index.
2859          *
2860          * For example, if we were missing a dir index then the directories
2861          * isize would be wrong, so if we fixed the isize to what we thought it
2862          * would be and then fixed the backref we'd still have a invalid fs, so
2863          * we need to add back the dir index and then check to see if the isize
2864          * is still wrong.
2865          */
2866         while (stage < 3) {
2867                 stage++;
2868                 if (stage == 3 && !err)
2869                         break;
2870
2871                 cache = search_cache_extent(inode_cache, 0);
2872                 while (repair && cache) {
2873                         node = container_of(cache, struct ptr_node, cache);
2874                         rec = node->data;
2875                         cache = next_cache_extent(cache);
2876
2877                         /* Need to free everything up and rescan */
2878                         if (stage == 3) {
2879                                 remove_cache_extent(inode_cache, &node->cache);
2880                                 free(node);
2881                                 free_inode_rec(rec);
2882                                 continue;
2883                         }
2884
2885                         if (list_empty(&rec->backrefs))
2886                                 continue;
2887
2888                         ret = repair_inode_backrefs(root, rec, inode_cache,
2889                                                     stage == 1);
2890                         if (ret < 0) {
2891                                 err = ret;
2892                                 stage = 2;
2893                                 break;
2894                         } if (ret > 0) {
2895                                 err = -EAGAIN;
2896                         }
2897                 }
2898         }
2899         if (err)
2900                 return err;
2901
2902         rec = get_inode_rec(inode_cache, root_dirid, 0);
2903         BUG_ON(IS_ERR(rec));
2904         if (rec) {
2905                 ret = check_root_dir(rec);
2906                 if (ret) {
2907                         fprintf(stderr, "root %llu root dir %llu error\n",
2908                                 (unsigned long long)root->root_key.objectid,
2909                                 (unsigned long long)root_dirid);
2910                         print_inode_error(root, rec);
2911                         error++;
2912                 }
2913         } else {
2914                 if (repair) {
2915                         struct btrfs_trans_handle *trans;
2916
2917                         trans = btrfs_start_transaction(root, 1);
2918                         if (IS_ERR(trans)) {
2919                                 err = PTR_ERR(trans);
2920                                 return err;
2921                         }
2922
2923                         fprintf(stderr,
2924                                 "root %llu missing its root dir, recreating\n",
2925                                 (unsigned long long)root->objectid);
2926
2927                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2928                         BUG_ON(ret);
2929
2930                         btrfs_commit_transaction(trans, root);
2931                         return -EAGAIN;
2932                 }
2933
2934                 fprintf(stderr, "root %llu root dir %llu not found\n",
2935                         (unsigned long long)root->root_key.objectid,
2936                         (unsigned long long)root_dirid);
2937         }
2938
2939         while (1) {
2940                 cache = search_cache_extent(inode_cache, 0);
2941                 if (!cache)
2942                         break;
2943                 node = container_of(cache, struct ptr_node, cache);
2944                 rec = node->data;
2945                 remove_cache_extent(inode_cache, &node->cache);
2946                 free(node);
2947                 if (rec->ino == root_dirid ||
2948                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2949                         free_inode_rec(rec);
2950                         continue;
2951                 }
2952
2953                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2954                         ret = check_orphan_item(root, rec->ino);
2955                         if (ret == 0)
2956                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2957                         if (can_free_inode_rec(rec)) {
2958                                 free_inode_rec(rec);
2959                                 continue;
2960                         }
2961                 }
2962
2963                 if (!rec->found_inode_item)
2964                         rec->errors |= I_ERR_NO_INODE_ITEM;
2965                 if (rec->found_link != rec->nlink)
2966                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2967                 if (repair) {
2968                         ret = try_repair_inode(root, rec);
2969                         if (ret == 0 && can_free_inode_rec(rec)) {
2970                                 free_inode_rec(rec);
2971                                 continue;
2972                         }
2973                         ret = 0;
2974                 }
2975
2976                 if (!(repair && ret == 0))
2977                         error++;
2978                 print_inode_error(root, rec);
2979                 list_for_each_entry(backref, &rec->backrefs, list) {
2980                         if (!backref->found_dir_item)
2981                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2982                         if (!backref->found_dir_index)
2983                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2984                         if (!backref->found_inode_ref)
2985                                 backref->errors |= REF_ERR_NO_INODE_REF;
2986                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2987                                 " namelen %u name %s filetype %d errors %x",
2988                                 (unsigned long long)backref->dir,
2989                                 (unsigned long long)backref->index,
2990                                 backref->namelen, backref->name,
2991                                 backref->filetype, backref->errors);
2992                         print_ref_error(backref->errors);
2993                 }
2994                 free_inode_rec(rec);
2995         }
2996         return (error > 0) ? -1 : 0;
2997 }
2998
2999 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3000                                         u64 objectid)
3001 {
3002         struct cache_extent *cache;
3003         struct root_record *rec = NULL;
3004         int ret;
3005
3006         cache = lookup_cache_extent(root_cache, objectid, 1);
3007         if (cache) {
3008                 rec = container_of(cache, struct root_record, cache);
3009         } else {
3010                 rec = calloc(1, sizeof(*rec));
3011                 rec->objectid = objectid;
3012                 INIT_LIST_HEAD(&rec->backrefs);
3013                 rec->cache.start = objectid;
3014                 rec->cache.size = 1;
3015
3016                 ret = insert_cache_extent(root_cache, &rec->cache);
3017                 BUG_ON(ret);
3018         }
3019         return rec;
3020 }
3021
3022 static struct root_backref *get_root_backref(struct root_record *rec,
3023                                              u64 ref_root, u64 dir, u64 index,
3024                                              const char *name, int namelen)
3025 {
3026         struct root_backref *backref;
3027
3028         list_for_each_entry(backref, &rec->backrefs, list) {
3029                 if (backref->ref_root != ref_root || backref->dir != dir ||
3030                     backref->namelen != namelen)
3031                         continue;
3032                 if (memcmp(name, backref->name, namelen))
3033                         continue;
3034                 return backref;
3035         }
3036
3037         backref = calloc(1, sizeof(*backref) + namelen + 1);
3038         backref->ref_root = ref_root;
3039         backref->dir = dir;
3040         backref->index = index;
3041         backref->namelen = namelen;
3042         memcpy(backref->name, name, namelen);
3043         backref->name[namelen] = '\0';
3044         list_add_tail(&backref->list, &rec->backrefs);
3045         return backref;
3046 }
3047
3048 static void free_root_record(struct cache_extent *cache)
3049 {
3050         struct root_record *rec;
3051         struct root_backref *backref;
3052
3053         rec = container_of(cache, struct root_record, cache);
3054         while (!list_empty(&rec->backrefs)) {
3055                 backref = list_entry(rec->backrefs.next,
3056                                      struct root_backref, list);
3057                 list_del(&backref->list);
3058                 free(backref);
3059         }
3060
3061         kfree(rec);
3062 }
3063
3064 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3065
3066 static int add_root_backref(struct cache_tree *root_cache,
3067                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3068                             const char *name, int namelen,
3069                             int item_type, int errors)
3070 {
3071         struct root_record *rec;
3072         struct root_backref *backref;
3073
3074         rec = get_root_rec(root_cache, root_id);
3075         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3076
3077         backref->errors |= errors;
3078
3079         if (item_type != BTRFS_DIR_ITEM_KEY) {
3080                 if (backref->found_dir_index || backref->found_back_ref ||
3081                     backref->found_forward_ref) {
3082                         if (backref->index != index)
3083                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3084                 } else {
3085                         backref->index = index;
3086                 }
3087         }
3088
3089         if (item_type == BTRFS_DIR_ITEM_KEY) {
3090                 if (backref->found_forward_ref)
3091                         rec->found_ref++;
3092                 backref->found_dir_item = 1;
3093         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3094                 backref->found_dir_index = 1;
3095         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3096                 if (backref->found_forward_ref)
3097                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3098                 else if (backref->found_dir_item)
3099                         rec->found_ref++;
3100                 backref->found_forward_ref = 1;
3101         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3102                 if (backref->found_back_ref)
3103                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3104                 backref->found_back_ref = 1;
3105         } else {
3106                 BUG_ON(1);
3107         }
3108
3109         if (backref->found_forward_ref && backref->found_dir_item)
3110                 backref->reachable = 1;
3111         return 0;
3112 }
3113
3114 static int merge_root_recs(struct btrfs_root *root,
3115                            struct cache_tree *src_cache,
3116                            struct cache_tree *dst_cache)
3117 {
3118         struct cache_extent *cache;
3119         struct ptr_node *node;
3120         struct inode_record *rec;
3121         struct inode_backref *backref;
3122         int ret = 0;
3123
3124         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3125                 free_inode_recs_tree(src_cache);
3126                 return 0;
3127         }
3128
3129         while (1) {
3130                 cache = search_cache_extent(src_cache, 0);
3131                 if (!cache)
3132                         break;
3133                 node = container_of(cache, struct ptr_node, cache);
3134                 rec = node->data;
3135                 remove_cache_extent(src_cache, &node->cache);
3136                 free(node);
3137
3138                 ret = is_child_root(root, root->objectid, rec->ino);
3139                 if (ret < 0)
3140                         break;
3141                 else if (ret == 0)
3142                         goto skip;
3143
3144                 list_for_each_entry(backref, &rec->backrefs, list) {
3145                         BUG_ON(backref->found_inode_ref);
3146                         if (backref->found_dir_item)
3147                                 add_root_backref(dst_cache, rec->ino,
3148                                         root->root_key.objectid, backref->dir,
3149                                         backref->index, backref->name,
3150                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3151                                         backref->errors);
3152                         if (backref->found_dir_index)
3153                                 add_root_backref(dst_cache, rec->ino,
3154                                         root->root_key.objectid, backref->dir,
3155                                         backref->index, backref->name,
3156                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3157                                         backref->errors);
3158                 }
3159 skip:
3160                 free_inode_rec(rec);
3161         }
3162         if (ret < 0)
3163                 return ret;
3164         return 0;
3165 }
3166
3167 static int check_root_refs(struct btrfs_root *root,
3168                            struct cache_tree *root_cache)
3169 {
3170         struct root_record *rec;
3171         struct root_record *ref_root;
3172         struct root_backref *backref;
3173         struct cache_extent *cache;
3174         int loop = 1;
3175         int ret;
3176         int error;
3177         int errors = 0;
3178
3179         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3180         rec->found_ref = 1;
3181
3182         /* fixme: this can not detect circular references */
3183         while (loop) {
3184                 loop = 0;
3185                 cache = search_cache_extent(root_cache, 0);
3186                 while (1) {
3187                         if (!cache)
3188                                 break;
3189                         rec = container_of(cache, struct root_record, cache);
3190                         cache = next_cache_extent(cache);
3191
3192                         if (rec->found_ref == 0)
3193                                 continue;
3194
3195                         list_for_each_entry(backref, &rec->backrefs, list) {
3196                                 if (!backref->reachable)
3197                                         continue;
3198
3199                                 ref_root = get_root_rec(root_cache,
3200                                                         backref->ref_root);
3201                                 if (ref_root->found_ref > 0)
3202                                         continue;
3203
3204                                 backref->reachable = 0;
3205                                 rec->found_ref--;
3206                                 if (rec->found_ref == 0)
3207                                         loop = 1;
3208                         }
3209                 }
3210         }
3211
3212         cache = search_cache_extent(root_cache, 0);
3213         while (1) {
3214                 if (!cache)
3215                         break;
3216                 rec = container_of(cache, struct root_record, cache);
3217                 cache = next_cache_extent(cache);
3218
3219                 if (rec->found_ref == 0 &&
3220                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3221                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3222                         ret = check_orphan_item(root->fs_info->tree_root,
3223                                                 rec->objectid);
3224                         if (ret == 0)
3225                                 continue;
3226
3227                         /*
3228                          * If we don't have a root item then we likely just have
3229                          * a dir item in a snapshot for this root but no actual
3230                          * ref key or anything so it's meaningless.
3231                          */
3232                         if (!rec->found_root_item)
3233                                 continue;
3234                         errors++;
3235                         fprintf(stderr, "fs tree %llu not referenced\n",
3236                                 (unsigned long long)rec->objectid);
3237                 }
3238
3239                 error = 0;
3240                 if (rec->found_ref > 0 && !rec->found_root_item)
3241                         error = 1;
3242                 list_for_each_entry(backref, &rec->backrefs, list) {
3243                         if (!backref->found_dir_item)
3244                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3245                         if (!backref->found_dir_index)
3246                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3247                         if (!backref->found_back_ref)
3248                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3249                         if (!backref->found_forward_ref)
3250                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3251                         if (backref->reachable && backref->errors)
3252                                 error = 1;
3253                 }
3254                 if (!error)
3255                         continue;
3256
3257                 errors++;
3258                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3259                         (unsigned long long)rec->objectid, rec->found_ref,
3260                          rec->found_root_item ? "" : "not found");
3261
3262                 list_for_each_entry(backref, &rec->backrefs, list) {
3263                         if (!backref->reachable)
3264                                 continue;
3265                         if (!backref->errors && rec->found_root_item)
3266                                 continue;
3267                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3268                                 " index %llu namelen %u name %s errors %x\n",
3269                                 (unsigned long long)backref->ref_root,
3270                                 (unsigned long long)backref->dir,
3271                                 (unsigned long long)backref->index,
3272                                 backref->namelen, backref->name,
3273                                 backref->errors);
3274                         print_ref_error(backref->errors);
3275                 }
3276         }
3277         return errors > 0 ? 1 : 0;
3278 }
3279
3280 static int process_root_ref(struct extent_buffer *eb, int slot,
3281                             struct btrfs_key *key,
3282                             struct cache_tree *root_cache)
3283 {
3284         u64 dirid;
3285         u64 index;
3286         u32 len;
3287         u32 name_len;
3288         struct btrfs_root_ref *ref;
3289         char namebuf[BTRFS_NAME_LEN];
3290         int error;
3291
3292         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3293
3294         dirid = btrfs_root_ref_dirid(eb, ref);
3295         index = btrfs_root_ref_sequence(eb, ref);
3296         name_len = btrfs_root_ref_name_len(eb, ref);
3297
3298         if (name_len <= BTRFS_NAME_LEN) {
3299                 len = name_len;
3300                 error = 0;
3301         } else {
3302                 len = BTRFS_NAME_LEN;
3303                 error = REF_ERR_NAME_TOO_LONG;
3304         }
3305         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3306
3307         if (key->type == BTRFS_ROOT_REF_KEY) {
3308                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3309                                  index, namebuf, len, key->type, error);
3310         } else {
3311                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3312                                  index, namebuf, len, key->type, error);
3313         }
3314         return 0;
3315 }
3316
3317 static void free_corrupt_block(struct cache_extent *cache)
3318 {
3319         struct btrfs_corrupt_block *corrupt;
3320
3321         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3322         free(corrupt);
3323 }
3324
3325 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3326
3327 /*
3328  * Repair the btree of the given root.
3329  *
3330  * The fix is to remove the node key in corrupt_blocks cache_tree.
3331  * and rebalance the tree.
3332  * After the fix, the btree should be writeable.
3333  */
3334 static int repair_btree(struct btrfs_root *root,
3335                         struct cache_tree *corrupt_blocks)
3336 {
3337         struct btrfs_trans_handle *trans;
3338         struct btrfs_path *path;
3339         struct btrfs_corrupt_block *corrupt;
3340         struct cache_extent *cache;
3341         struct btrfs_key key;
3342         u64 offset;
3343         int level;
3344         int ret = 0;
3345
3346         if (cache_tree_empty(corrupt_blocks))
3347                 return 0;
3348
3349         path = btrfs_alloc_path();
3350         if (!path)
3351                 return -ENOMEM;
3352
3353         trans = btrfs_start_transaction(root, 1);
3354         if (IS_ERR(trans)) {
3355                 ret = PTR_ERR(trans);
3356                 fprintf(stderr, "Error starting transaction: %s\n",
3357                         strerror(-ret));
3358                 goto out_free_path;
3359         }
3360         cache = first_cache_extent(corrupt_blocks);
3361         while (cache) {
3362                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3363                                        cache);
3364                 level = corrupt->level;
3365                 path->lowest_level = level;
3366                 key.objectid = corrupt->key.objectid;
3367                 key.type = corrupt->key.type;
3368                 key.offset = corrupt->key.offset;
3369
3370                 /*
3371                  * Here we don't want to do any tree balance, since it may
3372                  * cause a balance with corrupted brother leaf/node,
3373                  * so ins_len set to 0 here.
3374                  * Balance will be done after all corrupt node/leaf is deleted.
3375                  */
3376                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3377                 if (ret < 0)
3378                         goto out;
3379                 offset = btrfs_node_blockptr(path->nodes[level],
3380                                              path->slots[level]);
3381
3382                 /* Remove the ptr */
3383                 ret = btrfs_del_ptr(trans, root, path, level,
3384                                     path->slots[level]);
3385                 if (ret < 0)
3386                         goto out;
3387                 /*
3388                  * Remove the corresponding extent
3389                  * return value is not concerned.
3390                  */
3391                 btrfs_release_path(path);
3392                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3393                                         0, root->root_key.objectid,
3394                                         level - 1, 0);
3395                 cache = next_cache_extent(cache);
3396         }
3397
3398         /* Balance the btree using btrfs_search_slot() */
3399         cache = first_cache_extent(corrupt_blocks);
3400         while (cache) {
3401                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3402                                        cache);
3403                 memcpy(&key, &corrupt->key, sizeof(key));
3404                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3405                 if (ret < 0)
3406                         goto out;
3407                 /* return will always >0 since it won't find the item */
3408                 ret = 0;
3409                 btrfs_release_path(path);
3410                 cache = next_cache_extent(cache);
3411         }
3412 out:
3413         btrfs_commit_transaction(trans, root);
3414 out_free_path:
3415         btrfs_free_path(path);
3416         return ret;
3417 }
3418
3419 static int check_fs_root(struct btrfs_root *root,
3420                          struct cache_tree *root_cache,
3421                          struct walk_control *wc)
3422 {
3423         int ret = 0;
3424         int err = 0;
3425         int wret;
3426         int level;
3427         struct btrfs_path path;
3428         struct shared_node root_node;
3429         struct root_record *rec;
3430         struct btrfs_root_item *root_item = &root->root_item;
3431         struct cache_tree corrupt_blocks;
3432         struct orphan_data_extent *orphan;
3433         struct orphan_data_extent *tmp;
3434         enum btrfs_tree_block_status status;
3435
3436         /*
3437          * Reuse the corrupt_block cache tree to record corrupted tree block
3438          *
3439          * Unlike the usage in extent tree check, here we do it in a per
3440          * fs/subvol tree base.
3441          */
3442         cache_tree_init(&corrupt_blocks);
3443         root->fs_info->corrupt_blocks = &corrupt_blocks;
3444
3445         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3446                 rec = get_root_rec(root_cache, root->root_key.objectid);
3447                 if (btrfs_root_refs(root_item) > 0)
3448                         rec->found_root_item = 1;
3449         }
3450
3451         btrfs_init_path(&path);
3452         memset(&root_node, 0, sizeof(root_node));
3453         cache_tree_init(&root_node.root_cache);
3454         cache_tree_init(&root_node.inode_cache);
3455
3456         /* Move the orphan extent record to corresponding inode_record */
3457         list_for_each_entry_safe(orphan, tmp,
3458                                  &root->orphan_data_extents, list) {
3459                 struct inode_record *inode;
3460
3461                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3462                                       1);
3463                 BUG_ON(IS_ERR(inode));
3464                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3465                 list_move(&orphan->list, &inode->orphan_extents);
3466         }
3467
3468         level = btrfs_header_level(root->node);
3469         memset(wc->nodes, 0, sizeof(wc->nodes));
3470         wc->nodes[level] = &root_node;
3471         wc->active_node = level;
3472         wc->root_level = level;
3473
3474         /* We may not have checked the root block, lets do that now */
3475         if (btrfs_is_leaf(root->node))
3476                 status = btrfs_check_leaf(root, NULL, root->node);
3477         else
3478                 status = btrfs_check_node(root, NULL, root->node);
3479         if (status != BTRFS_TREE_BLOCK_CLEAN)
3480                 return -EIO;
3481
3482         if (btrfs_root_refs(root_item) > 0 ||
3483             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3484                 path.nodes[level] = root->node;
3485                 extent_buffer_get(root->node);
3486                 path.slots[level] = 0;
3487         } else {
3488                 struct btrfs_key key;
3489                 struct btrfs_disk_key found_key;
3490
3491                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3492                 level = root_item->drop_level;
3493                 path.lowest_level = level;
3494                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3495                 if (wret < 0)
3496                         goto skip_walking;
3497                 btrfs_node_key(path.nodes[level], &found_key,
3498                                 path.slots[level]);
3499                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3500                                         sizeof(found_key)));
3501         }
3502
3503         while (1) {
3504                 wret = walk_down_tree(root, &path, wc, &level);
3505                 if (wret < 0)
3506                         ret = wret;
3507                 if (wret != 0)
3508                         break;
3509
3510                 wret = walk_up_tree(root, &path, wc, &level);
3511                 if (wret < 0)
3512                         ret = wret;
3513                 if (wret != 0)
3514                         break;
3515         }
3516 skip_walking:
3517         btrfs_release_path(&path);
3518
3519         if (!cache_tree_empty(&corrupt_blocks)) {
3520                 struct cache_extent *cache;
3521                 struct btrfs_corrupt_block *corrupt;
3522
3523                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3524                        root->root_key.objectid);
3525                 cache = first_cache_extent(&corrupt_blocks);
3526                 while (cache) {
3527                         corrupt = container_of(cache,
3528                                                struct btrfs_corrupt_block,
3529                                                cache);
3530                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3531                                cache->start, corrupt->level,
3532                                corrupt->key.objectid, corrupt->key.type,
3533                                corrupt->key.offset);
3534                         cache = next_cache_extent(cache);
3535                 }
3536                 if (repair) {
3537                         printf("Try to repair the btree for root %llu\n",
3538                                root->root_key.objectid);
3539                         ret = repair_btree(root, &corrupt_blocks);
3540                         if (ret < 0)
3541                                 fprintf(stderr, "Failed to repair btree: %s\n",
3542                                         strerror(-ret));
3543                         if (!ret)
3544                                 printf("Btree for root %llu is fixed\n",
3545                                        root->root_key.objectid);
3546                 }
3547         }
3548
3549         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3550         if (err < 0)
3551                 ret = err;
3552
3553         if (root_node.current) {
3554                 root_node.current->checked = 1;
3555                 maybe_free_inode_rec(&root_node.inode_cache,
3556                                 root_node.current);
3557         }
3558
3559         err = check_inode_recs(root, &root_node.inode_cache);
3560         if (!ret)
3561                 ret = err;
3562
3563         free_corrupt_blocks_tree(&corrupt_blocks);
3564         root->fs_info->corrupt_blocks = NULL;
3565         free_orphan_data_extents(&root->orphan_data_extents);
3566         return ret;
3567 }
3568
3569 static int fs_root_objectid(u64 objectid)
3570 {
3571         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3572             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3573                 return 1;
3574         return is_fstree(objectid);
3575 }
3576
3577 static int check_fs_roots(struct btrfs_root *root,
3578                           struct cache_tree *root_cache)
3579 {
3580         struct btrfs_path path;
3581         struct btrfs_key key;
3582         struct walk_control wc;
3583         struct extent_buffer *leaf, *tree_node;
3584         struct btrfs_root *tmp_root;
3585         struct btrfs_root *tree_root = root->fs_info->tree_root;
3586         int ret;
3587         int err = 0;
3588
3589         if (ctx.progress_enabled) {
3590                 ctx.tp = TASK_FS_ROOTS;
3591                 task_start(ctx.info);
3592         }
3593
3594         /*
3595          * Just in case we made any changes to the extent tree that weren't
3596          * reflected into the free space cache yet.
3597          */
3598         if (repair)
3599                 reset_cached_block_groups(root->fs_info);
3600         memset(&wc, 0, sizeof(wc));
3601         cache_tree_init(&wc.shared);
3602         btrfs_init_path(&path);
3603
3604 again:
3605         key.offset = 0;
3606         key.objectid = 0;
3607         key.type = BTRFS_ROOT_ITEM_KEY;
3608         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3609         if (ret < 0) {
3610                 err = 1;
3611                 goto out;
3612         }
3613         tree_node = tree_root->node;
3614         while (1) {
3615                 if (tree_node != tree_root->node) {
3616                         free_root_recs_tree(root_cache);
3617                         btrfs_release_path(&path);
3618                         goto again;
3619                 }
3620                 leaf = path.nodes[0];
3621                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3622                         ret = btrfs_next_leaf(tree_root, &path);
3623                         if (ret) {
3624                                 if (ret < 0)
3625                                         err = 1;
3626                                 break;
3627                         }
3628                         leaf = path.nodes[0];
3629                 }
3630                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3631                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3632                     fs_root_objectid(key.objectid)) {
3633                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3634                                 tmp_root = btrfs_read_fs_root_no_cache(
3635                                                 root->fs_info, &key);
3636                         } else {
3637                                 key.offset = (u64)-1;
3638                                 tmp_root = btrfs_read_fs_root(
3639                                                 root->fs_info, &key);
3640                         }
3641                         if (IS_ERR(tmp_root)) {
3642                                 err = 1;
3643                                 goto next;
3644                         }
3645                         ret = check_fs_root(tmp_root, root_cache, &wc);
3646                         if (ret == -EAGAIN) {
3647                                 free_root_recs_tree(root_cache);
3648                                 btrfs_release_path(&path);
3649                                 goto again;
3650                         }
3651                         if (ret)
3652                                 err = 1;
3653                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3654                                 btrfs_free_fs_root(tmp_root);
3655                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3656                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3657                         process_root_ref(leaf, path.slots[0], &key,
3658                                          root_cache);
3659                 }
3660 next:
3661                 path.slots[0]++;
3662         }
3663 out:
3664         btrfs_release_path(&path);
3665         if (err)
3666                 free_extent_cache_tree(&wc.shared);
3667         if (!cache_tree_empty(&wc.shared))
3668                 fprintf(stderr, "warning line %d\n", __LINE__);
3669
3670         task_stop(ctx.info);
3671
3672         return err;
3673 }
3674
3675 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3676 {
3677         struct list_head *cur = rec->backrefs.next;
3678         struct extent_backref *back;
3679         struct tree_backref *tback;
3680         struct data_backref *dback;
3681         u64 found = 0;
3682         int err = 0;
3683
3684         while(cur != &rec->backrefs) {
3685                 back = list_entry(cur, struct extent_backref, list);
3686                 cur = cur->next;
3687                 if (!back->found_extent_tree) {
3688                         err = 1;
3689                         if (!print_errs)
3690                                 goto out;
3691                         if (back->is_data) {
3692                                 dback = (struct data_backref *)back;
3693                                 fprintf(stderr, "Backref %llu %s %llu"
3694                                         " owner %llu offset %llu num_refs %lu"
3695                                         " not found in extent tree\n",
3696                                         (unsigned long long)rec->start,
3697                                         back->full_backref ?
3698                                         "parent" : "root",
3699                                         back->full_backref ?
3700                                         (unsigned long long)dback->parent:
3701                                         (unsigned long long)dback->root,
3702                                         (unsigned long long)dback->owner,
3703                                         (unsigned long long)dback->offset,
3704                                         (unsigned long)dback->num_refs);
3705                         } else {
3706                                 tback = (struct tree_backref *)back;
3707                                 fprintf(stderr, "Backref %llu parent %llu"
3708                                         " root %llu not found in extent tree\n",
3709                                         (unsigned long long)rec->start,
3710                                         (unsigned long long)tback->parent,
3711                                         (unsigned long long)tback->root);
3712                         }
3713                 }
3714                 if (!back->is_data && !back->found_ref) {
3715                         err = 1;
3716                         if (!print_errs)
3717                                 goto out;
3718                         tback = (struct tree_backref *)back;
3719                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3720                                 (unsigned long long)rec->start,
3721                                 back->full_backref ? "parent" : "root",
3722                                 back->full_backref ?
3723                                 (unsigned long long)tback->parent :
3724                                 (unsigned long long)tback->root, back);
3725                 }
3726                 if (back->is_data) {
3727                         dback = (struct data_backref *)back;
3728                         if (dback->found_ref != dback->num_refs) {
3729                                 err = 1;
3730                                 if (!print_errs)
3731                                         goto out;
3732                                 fprintf(stderr, "Incorrect local backref count"
3733                                         " on %llu %s %llu owner %llu"
3734                                         " offset %llu found %u wanted %u back %p\n",
3735                                         (unsigned long long)rec->start,
3736                                         back->full_backref ?
3737                                         "parent" : "root",
3738                                         back->full_backref ?
3739                                         (unsigned long long)dback->parent:
3740                                         (unsigned long long)dback->root,
3741                                         (unsigned long long)dback->owner,
3742                                         (unsigned long long)dback->offset,
3743                                         dback->found_ref, dback->num_refs, back);
3744                         }
3745                         if (dback->disk_bytenr != rec->start) {
3746                                 err = 1;
3747                                 if (!print_errs)
3748                                         goto out;
3749                                 fprintf(stderr, "Backref disk bytenr does not"
3750                                         " match extent record, bytenr=%llu, "
3751                                         "ref bytenr=%llu\n",
3752                                         (unsigned long long)rec->start,
3753                                         (unsigned long long)dback->disk_bytenr);
3754                         }
3755
3756                         if (dback->bytes != rec->nr) {
3757                                 err = 1;
3758                                 if (!print_errs)
3759                                         goto out;
3760                                 fprintf(stderr, "Backref bytes do not match "
3761                                         "extent backref, bytenr=%llu, ref "
3762                                         "bytes=%llu, backref bytes=%llu\n",
3763                                         (unsigned long long)rec->start,
3764                                         (unsigned long long)rec->nr,
3765                                         (unsigned long long)dback->bytes);
3766                         }
3767                 }
3768                 if (!back->is_data) {
3769                         found += 1;
3770                 } else {
3771                         dback = (struct data_backref *)back;
3772                         found += dback->found_ref;
3773                 }
3774         }
3775         if (found != rec->refs) {
3776                 err = 1;
3777                 if (!print_errs)
3778                         goto out;
3779                 fprintf(stderr, "Incorrect global backref count "
3780                         "on %llu found %llu wanted %llu\n",
3781                         (unsigned long long)rec->start,
3782                         (unsigned long long)found,
3783                         (unsigned long long)rec->refs);
3784         }
3785 out:
3786         return err;
3787 }
3788
3789 static int free_all_extent_backrefs(struct extent_record *rec)
3790 {
3791         struct extent_backref *back;
3792         struct list_head *cur;
3793         while (!list_empty(&rec->backrefs)) {
3794                 cur = rec->backrefs.next;
3795                 back = list_entry(cur, struct extent_backref, list);
3796                 list_del(cur);
3797                 free(back);
3798         }
3799         return 0;
3800 }
3801
3802 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3803                                      struct cache_tree *extent_cache)
3804 {
3805         struct cache_extent *cache;
3806         struct extent_record *rec;
3807
3808         while (1) {
3809                 cache = first_cache_extent(extent_cache);
3810                 if (!cache)
3811                         break;
3812                 rec = container_of(cache, struct extent_record, cache);
3813                 remove_cache_extent(extent_cache, cache);
3814                 free_all_extent_backrefs(rec);
3815                 free(rec);
3816         }
3817 }
3818
3819 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3820                                  struct extent_record *rec)
3821 {
3822         if (rec->content_checked && rec->owner_ref_checked &&
3823             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3824             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3825             !rec->bad_full_backref && !rec->crossing_stripes &&
3826             !rec->wrong_chunk_type) {
3827                 remove_cache_extent(extent_cache, &rec->cache);
3828                 free_all_extent_backrefs(rec);
3829                 list_del_init(&rec->list);
3830                 free(rec);
3831         }
3832         return 0;
3833 }
3834
3835 static int check_owner_ref(struct btrfs_root *root,
3836                             struct extent_record *rec,
3837                             struct extent_buffer *buf)
3838 {
3839         struct extent_backref *node;
3840         struct tree_backref *back;
3841         struct btrfs_root *ref_root;
3842         struct btrfs_key key;
3843         struct btrfs_path path;
3844         struct extent_buffer *parent;
3845         int level;
3846         int found = 0;
3847         int ret;
3848
3849         list_for_each_entry(node, &rec->backrefs, list) {
3850                 if (node->is_data)
3851                         continue;
3852                 if (!node->found_ref)
3853                         continue;
3854                 if (node->full_backref)
3855                         continue;
3856                 back = (struct tree_backref *)node;
3857                 if (btrfs_header_owner(buf) == back->root)
3858                         return 0;
3859         }
3860         BUG_ON(rec->is_root);
3861
3862         /* try to find the block by search corresponding fs tree */
3863         key.objectid = btrfs_header_owner(buf);
3864         key.type = BTRFS_ROOT_ITEM_KEY;
3865         key.offset = (u64)-1;
3866
3867         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3868         if (IS_ERR(ref_root))
3869                 return 1;
3870
3871         level = btrfs_header_level(buf);
3872         if (level == 0)
3873                 btrfs_item_key_to_cpu(buf, &key, 0);
3874         else
3875                 btrfs_node_key_to_cpu(buf, &key, 0);
3876
3877         btrfs_init_path(&path);
3878         path.lowest_level = level + 1;
3879         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3880         if (ret < 0)
3881                 return 0;
3882
3883         parent = path.nodes[level + 1];
3884         if (parent && buf->start == btrfs_node_blockptr(parent,
3885                                                         path.slots[level + 1]))
3886                 found = 1;
3887
3888         btrfs_release_path(&path);
3889         return found ? 0 : 1;
3890 }
3891
3892 static int is_extent_tree_record(struct extent_record *rec)
3893 {
3894         struct list_head *cur = rec->backrefs.next;
3895         struct extent_backref *node;
3896         struct tree_backref *back;
3897         int is_extent = 0;
3898
3899         while(cur != &rec->backrefs) {
3900                 node = list_entry(cur, struct extent_backref, list);
3901                 cur = cur->next;
3902                 if (node->is_data)
3903                         return 0;
3904                 back = (struct tree_backref *)node;
3905                 if (node->full_backref)
3906                         return 0;
3907                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3908                         is_extent = 1;
3909         }
3910         return is_extent;
3911 }
3912
3913
3914 static int record_bad_block_io(struct btrfs_fs_info *info,
3915                                struct cache_tree *extent_cache,
3916                                u64 start, u64 len)
3917 {
3918         struct extent_record *rec;
3919         struct cache_extent *cache;
3920         struct btrfs_key key;
3921
3922         cache = lookup_cache_extent(extent_cache, start, len);
3923         if (!cache)
3924                 return 0;
3925
3926         rec = container_of(cache, struct extent_record, cache);
3927         if (!is_extent_tree_record(rec))
3928                 return 0;
3929
3930         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3931         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3932 }
3933
3934 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3935                        struct extent_buffer *buf, int slot)
3936 {
3937         if (btrfs_header_level(buf)) {
3938                 struct btrfs_key_ptr ptr1, ptr2;
3939
3940                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3941                                    sizeof(struct btrfs_key_ptr));
3942                 read_extent_buffer(buf, &ptr2,
3943                                    btrfs_node_key_ptr_offset(slot + 1),
3944                                    sizeof(struct btrfs_key_ptr));
3945                 write_extent_buffer(buf, &ptr1,
3946                                     btrfs_node_key_ptr_offset(slot + 1),
3947                                     sizeof(struct btrfs_key_ptr));
3948                 write_extent_buffer(buf, &ptr2,
3949                                     btrfs_node_key_ptr_offset(slot),
3950                                     sizeof(struct btrfs_key_ptr));
3951                 if (slot == 0) {
3952                         struct btrfs_disk_key key;
3953                         btrfs_node_key(buf, &key, 0);
3954                         btrfs_fixup_low_keys(root, path, &key,
3955                                              btrfs_header_level(buf) + 1);
3956                 }
3957         } else {
3958                 struct btrfs_item *item1, *item2;
3959                 struct btrfs_key k1, k2;
3960                 char *item1_data, *item2_data;
3961                 u32 item1_offset, item2_offset, item1_size, item2_size;
3962
3963                 item1 = btrfs_item_nr(slot);
3964                 item2 = btrfs_item_nr(slot + 1);
3965                 btrfs_item_key_to_cpu(buf, &k1, slot);
3966                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3967                 item1_offset = btrfs_item_offset(buf, item1);
3968                 item2_offset = btrfs_item_offset(buf, item2);
3969                 item1_size = btrfs_item_size(buf, item1);
3970                 item2_size = btrfs_item_size(buf, item2);
3971
3972                 item1_data = malloc(item1_size);
3973                 if (!item1_data)
3974                         return -ENOMEM;
3975                 item2_data = malloc(item2_size);
3976                 if (!item2_data) {
3977                         free(item1_data);
3978                         return -ENOMEM;
3979                 }
3980
3981                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3982                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3983
3984                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3985                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3986                 free(item1_data);
3987                 free(item2_data);
3988
3989                 btrfs_set_item_offset(buf, item1, item2_offset);
3990                 btrfs_set_item_offset(buf, item2, item1_offset);
3991                 btrfs_set_item_size(buf, item1, item2_size);
3992                 btrfs_set_item_size(buf, item2, item1_size);
3993
3994                 path->slots[0] = slot;
3995                 btrfs_set_item_key_unsafe(root, path, &k2);
3996                 path->slots[0] = slot + 1;
3997                 btrfs_set_item_key_unsafe(root, path, &k1);
3998         }
3999         return 0;
4000 }
4001
4002 static int fix_key_order(struct btrfs_trans_handle *trans,
4003                          struct btrfs_root *root,
4004                          struct btrfs_path *path)
4005 {
4006         struct extent_buffer *buf;
4007         struct btrfs_key k1, k2;
4008         int i;
4009         int level = path->lowest_level;
4010         int ret = -EIO;
4011
4012         buf = path->nodes[level];
4013         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4014                 if (level) {
4015                         btrfs_node_key_to_cpu(buf, &k1, i);
4016                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4017                 } else {
4018                         btrfs_item_key_to_cpu(buf, &k1, i);
4019                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4020                 }
4021                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4022                         continue;
4023                 ret = swap_values(root, path, buf, i);
4024                 if (ret)
4025                         break;
4026                 btrfs_mark_buffer_dirty(buf);
4027                 i = 0;
4028         }
4029         return ret;
4030 }
4031
4032 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4033                              struct btrfs_root *root,
4034                              struct btrfs_path *path,
4035                              struct extent_buffer *buf, int slot)
4036 {
4037         struct btrfs_key key;
4038         int nritems = btrfs_header_nritems(buf);
4039
4040         btrfs_item_key_to_cpu(buf, &key, slot);
4041
4042         /* These are all the keys we can deal with missing. */
4043         if (key.type != BTRFS_DIR_INDEX_KEY &&
4044             key.type != BTRFS_EXTENT_ITEM_KEY &&
4045             key.type != BTRFS_METADATA_ITEM_KEY &&
4046             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4047             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4048                 return -1;
4049
4050         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4051                (unsigned long long)key.objectid, key.type,
4052                (unsigned long long)key.offset, slot, buf->start);
4053         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4054                               btrfs_item_nr_offset(slot + 1),
4055                               sizeof(struct btrfs_item) *
4056                               (nritems - slot - 1));
4057         btrfs_set_header_nritems(buf, nritems - 1);
4058         if (slot == 0) {
4059                 struct btrfs_disk_key disk_key;
4060
4061                 btrfs_item_key(buf, &disk_key, 0);
4062                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4063         }
4064         btrfs_mark_buffer_dirty(buf);
4065         return 0;
4066 }
4067
4068 static int fix_item_offset(struct btrfs_trans_handle *trans,
4069                            struct btrfs_root *root,
4070                            struct btrfs_path *path)
4071 {
4072         struct extent_buffer *buf;
4073         int i;
4074         int ret = 0;
4075
4076         /* We should only get this for leaves */
4077         BUG_ON(path->lowest_level);
4078         buf = path->nodes[0];
4079 again:
4080         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4081                 unsigned int shift = 0, offset;
4082
4083                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4084                     BTRFS_LEAF_DATA_SIZE(root)) {
4085                         if (btrfs_item_end_nr(buf, i) >
4086                             BTRFS_LEAF_DATA_SIZE(root)) {
4087                                 ret = delete_bogus_item(trans, root, path,
4088                                                         buf, i);
4089                                 if (!ret)
4090                                         goto again;
4091                                 fprintf(stderr, "item is off the end of the "
4092                                         "leaf, can't fix\n");
4093                                 ret = -EIO;
4094                                 break;
4095                         }
4096                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4097                                 btrfs_item_end_nr(buf, i);
4098                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4099                            btrfs_item_offset_nr(buf, i - 1)) {
4100                         if (btrfs_item_end_nr(buf, i) >
4101                             btrfs_item_offset_nr(buf, i - 1)) {
4102                                 ret = delete_bogus_item(trans, root, path,
4103                                                         buf, i);
4104                                 if (!ret)
4105                                         goto again;
4106                                 fprintf(stderr, "items overlap, can't fix\n");
4107                                 ret = -EIO;
4108                                 break;
4109                         }
4110                         shift = btrfs_item_offset_nr(buf, i - 1) -
4111                                 btrfs_item_end_nr(buf, i);
4112                 }
4113                 if (!shift)
4114                         continue;
4115
4116                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4117                        i, shift, (unsigned long long)buf->start);
4118                 offset = btrfs_item_offset_nr(buf, i);
4119                 memmove_extent_buffer(buf,
4120                                       btrfs_leaf_data(buf) + offset + shift,
4121                                       btrfs_leaf_data(buf) + offset,
4122                                       btrfs_item_size_nr(buf, i));
4123                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4124                                       offset + shift);
4125                 btrfs_mark_buffer_dirty(buf);
4126         }
4127
4128         /*
4129          * We may have moved things, in which case we want to exit so we don't
4130          * write those changes out.  Once we have proper abort functionality in
4131          * progs this can be changed to something nicer.
4132          */
4133         BUG_ON(ret);
4134         return ret;
4135 }
4136
4137 /*
4138  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4139  * then just return -EIO.
4140  */
4141 static int try_to_fix_bad_block(struct btrfs_root *root,
4142                                 struct extent_buffer *buf,
4143                                 enum btrfs_tree_block_status status)
4144 {
4145         struct btrfs_trans_handle *trans;
4146         struct ulist *roots;
4147         struct ulist_node *node;
4148         struct btrfs_root *search_root;
4149         struct btrfs_path *path;
4150         struct ulist_iterator iter;
4151         struct btrfs_key root_key, key;
4152         int ret;
4153
4154         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4155             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4156                 return -EIO;
4157
4158         path = btrfs_alloc_path();
4159         if (!path)
4160                 return -EIO;
4161
4162         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4163                                    0, &roots);
4164         if (ret) {
4165                 btrfs_free_path(path);
4166                 return -EIO;
4167         }
4168
4169         ULIST_ITER_INIT(&iter);
4170         while ((node = ulist_next(roots, &iter))) {
4171                 root_key.objectid = node->val;
4172                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4173                 root_key.offset = (u64)-1;
4174
4175                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4176                 if (IS_ERR(root)) {
4177                         ret = -EIO;
4178                         break;
4179                 }
4180
4181
4182                 trans = btrfs_start_transaction(search_root, 0);
4183                 if (IS_ERR(trans)) {
4184                         ret = PTR_ERR(trans);
4185                         break;
4186                 }
4187
4188                 path->lowest_level = btrfs_header_level(buf);
4189                 path->skip_check_block = 1;
4190                 if (path->lowest_level)
4191                         btrfs_node_key_to_cpu(buf, &key, 0);
4192                 else
4193                         btrfs_item_key_to_cpu(buf, &key, 0);
4194                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4195                 if (ret) {
4196                         ret = -EIO;
4197                         btrfs_commit_transaction(trans, search_root);
4198                         break;
4199                 }
4200                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4201                         ret = fix_key_order(trans, search_root, path);
4202                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4203                         ret = fix_item_offset(trans, search_root, path);
4204                 if (ret) {
4205                         btrfs_commit_transaction(trans, search_root);
4206                         break;
4207                 }
4208                 btrfs_release_path(path);
4209                 btrfs_commit_transaction(trans, search_root);
4210         }
4211         ulist_free(roots);
4212         btrfs_free_path(path);
4213         return ret;
4214 }
4215
4216 static int check_block(struct btrfs_root *root,
4217                        struct cache_tree *extent_cache,
4218                        struct extent_buffer *buf, u64 flags)
4219 {
4220         struct extent_record *rec;
4221         struct cache_extent *cache;
4222         struct btrfs_key key;
4223         enum btrfs_tree_block_status status;
4224         int ret = 0;
4225         int level;
4226
4227         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4228         if (!cache)
4229                 return 1;
4230         rec = container_of(cache, struct extent_record, cache);
4231         rec->generation = btrfs_header_generation(buf);
4232
4233         level = btrfs_header_level(buf);
4234         if (btrfs_header_nritems(buf) > 0) {
4235
4236                 if (level == 0)
4237                         btrfs_item_key_to_cpu(buf, &key, 0);
4238                 else
4239                         btrfs_node_key_to_cpu(buf, &key, 0);
4240
4241                 rec->info_objectid = key.objectid;
4242         }
4243         rec->info_level = level;
4244
4245         if (btrfs_is_leaf(buf))
4246                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4247         else
4248                 status = btrfs_check_node(root, &rec->parent_key, buf);
4249
4250         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4251                 if (repair)
4252                         status = try_to_fix_bad_block(root, buf, status);
4253                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4254                         ret = -EIO;
4255                         fprintf(stderr, "bad block %llu\n",
4256                                 (unsigned long long)buf->start);
4257                 } else {
4258                         /*
4259                          * Signal to callers we need to start the scan over
4260                          * again since we'll have cow'ed blocks.
4261                          */
4262                         ret = -EAGAIN;
4263                 }
4264         } else {
4265                 rec->content_checked = 1;
4266                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4267                         rec->owner_ref_checked = 1;
4268                 else {
4269                         ret = check_owner_ref(root, rec, buf);
4270                         if (!ret)
4271                                 rec->owner_ref_checked = 1;
4272                 }
4273         }
4274         if (!ret)
4275                 maybe_free_extent_rec(extent_cache, rec);
4276         return ret;
4277 }
4278
4279 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4280                                                 u64 parent, u64 root)
4281 {
4282         struct list_head *cur = rec->backrefs.next;
4283         struct extent_backref *node;
4284         struct tree_backref *back;
4285
4286         while(cur != &rec->backrefs) {
4287                 node = list_entry(cur, struct extent_backref, list);
4288                 cur = cur->next;
4289                 if (node->is_data)
4290                         continue;
4291                 back = (struct tree_backref *)node;
4292                 if (parent > 0) {
4293                         if (!node->full_backref)
4294                                 continue;
4295                         if (parent == back->parent)
4296                                 return back;
4297                 } else {
4298                         if (node->full_backref)
4299                                 continue;
4300                         if (back->root == root)
4301                                 return back;
4302                 }
4303         }
4304         return NULL;
4305 }
4306
4307 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4308                                                 u64 parent, u64 root)
4309 {
4310         struct tree_backref *ref = malloc(sizeof(*ref));
4311         memset(&ref->node, 0, sizeof(ref->node));
4312         if (parent > 0) {
4313                 ref->parent = parent;
4314                 ref->node.full_backref = 1;
4315         } else {
4316                 ref->root = root;
4317                 ref->node.full_backref = 0;
4318         }
4319         list_add_tail(&ref->node.list, &rec->backrefs);
4320
4321         return ref;
4322 }
4323
4324 static struct data_backref *find_data_backref(struct extent_record *rec,
4325                                                 u64 parent, u64 root,
4326                                                 u64 owner, u64 offset,
4327                                                 int found_ref,
4328                                                 u64 disk_bytenr, u64 bytes)
4329 {
4330         struct list_head *cur = rec->backrefs.next;
4331         struct extent_backref *node;
4332         struct data_backref *back;
4333
4334         while(cur != &rec->backrefs) {
4335                 node = list_entry(cur, struct extent_backref, list);
4336                 cur = cur->next;
4337                 if (!node->is_data)
4338                         continue;
4339                 back = (struct data_backref *)node;
4340                 if (parent > 0) {
4341                         if (!node->full_backref)
4342                                 continue;
4343                         if (parent == back->parent)
4344                                 return back;
4345                 } else {
4346                         if (node->full_backref)
4347                                 continue;
4348                         if (back->root == root && back->owner == owner &&
4349                             back->offset == offset) {
4350                                 if (found_ref && node->found_ref &&
4351                                     (back->bytes != bytes ||
4352                                     back->disk_bytenr != disk_bytenr))
4353                                         continue;
4354                                 return back;
4355                         }
4356                 }
4357         }
4358         return NULL;
4359 }
4360
4361 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4362                                                 u64 parent, u64 root,
4363                                                 u64 owner, u64 offset,
4364                                                 u64 max_size)
4365 {
4366         struct data_backref *ref = malloc(sizeof(*ref));
4367         memset(&ref->node, 0, sizeof(ref->node));
4368         ref->node.is_data = 1;
4369
4370         if (parent > 0) {
4371                 ref->parent = parent;
4372                 ref->owner = 0;
4373                 ref->offset = 0;
4374                 ref->node.full_backref = 1;
4375         } else {
4376                 ref->root = root;
4377                 ref->owner = owner;
4378                 ref->offset = offset;
4379                 ref->node.full_backref = 0;
4380         }
4381         ref->bytes = max_size;
4382         ref->found_ref = 0;
4383         ref->num_refs = 0;
4384         list_add_tail(&ref->node.list, &rec->backrefs);
4385         if (max_size > rec->max_size)
4386                 rec->max_size = max_size;
4387         return ref;
4388 }
4389
4390 /* Check if the type of extent matches with its chunk */
4391 static void check_extent_type(struct extent_record *rec)
4392 {
4393         struct btrfs_block_group_cache *bg_cache;
4394
4395         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4396         if (!bg_cache)
4397                 return;
4398
4399         /* data extent, check chunk directly*/
4400         if (!rec->metadata) {
4401                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4402                         rec->wrong_chunk_type = 1;
4403                 return;
4404         }
4405
4406         /* metadata extent, check the obvious case first */
4407         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4408                                  BTRFS_BLOCK_GROUP_METADATA))) {
4409                 rec->wrong_chunk_type = 1;
4410                 return;
4411         }
4412
4413         /*
4414          * Check SYSTEM extent, as it's also marked as metadata, we can only
4415          * make sure it's a SYSTEM extent by its backref
4416          */
4417         if (!list_empty(&rec->backrefs)) {
4418                 struct extent_backref *node;
4419                 struct tree_backref *tback;
4420                 u64 bg_type;
4421
4422                 node = list_entry(rec->backrefs.next, struct extent_backref,
4423                                   list);
4424                 if (node->is_data) {
4425                         /* tree block shouldn't have data backref */
4426                         rec->wrong_chunk_type = 1;
4427                         return;
4428                 }
4429                 tback = container_of(node, struct tree_backref, node);
4430
4431                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4432                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4433                 else
4434                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4435                 if (!(bg_cache->flags & bg_type))
4436                         rec->wrong_chunk_type = 1;
4437         }
4438 }
4439
4440 static int add_extent_rec(struct cache_tree *extent_cache,
4441                           struct btrfs_key *parent_key, u64 parent_gen,
4442                           u64 start, u64 nr, u64 extent_item_refs,
4443                           int is_root, int inc_ref, int set_checked,
4444                           int metadata, int extent_rec, u64 max_size)
4445 {
4446         struct extent_record *rec;
4447         struct cache_extent *cache;
4448         int ret = 0;
4449         int dup = 0;
4450
4451         cache = lookup_cache_extent(extent_cache, start, nr);
4452         if (cache) {
4453                 rec = container_of(cache, struct extent_record, cache);
4454                 if (inc_ref)
4455                         rec->refs++;
4456                 if (rec->nr == 1)
4457                         rec->nr = max(nr, max_size);
4458
4459                 /*
4460                  * We need to make sure to reset nr to whatever the extent
4461                  * record says was the real size, this way we can compare it to
4462                  * the backrefs.
4463                  */
4464                 if (extent_rec) {
4465                         if (start != rec->start || rec->found_rec) {
4466                                 struct extent_record *tmp;
4467
4468                                 dup = 1;
4469                                 if (list_empty(&rec->list))
4470                                         list_add_tail(&rec->list,
4471                                                       &duplicate_extents);
4472
4473                                 /*
4474                                  * We have to do this song and dance in case we
4475                                  * find an extent record that falls inside of
4476                                  * our current extent record but does not have
4477                                  * the same objectid.
4478                                  */
4479                                 tmp = malloc(sizeof(*tmp));
4480                                 if (!tmp)
4481                                         return -ENOMEM;
4482                                 tmp->start = start;
4483                                 tmp->max_size = max_size;
4484                                 tmp->nr = nr;
4485                                 tmp->found_rec = 1;
4486                                 tmp->metadata = metadata;
4487                                 tmp->extent_item_refs = extent_item_refs;
4488                                 INIT_LIST_HEAD(&tmp->list);
4489                                 list_add_tail(&tmp->list, &rec->dups);
4490                                 rec->num_duplicates++;
4491                         } else {
4492                                 rec->nr = nr;
4493                                 rec->found_rec = 1;
4494                         }
4495                 }
4496
4497                 if (extent_item_refs && !dup) {
4498                         if (rec->extent_item_refs) {
4499                                 fprintf(stderr, "block %llu rec "
4500                                         "extent_item_refs %llu, passed %llu\n",
4501                                         (unsigned long long)start,
4502                                         (unsigned long long)
4503                                                         rec->extent_item_refs,
4504                                         (unsigned long long)extent_item_refs);
4505                         }
4506                         rec->extent_item_refs = extent_item_refs;
4507                 }
4508                 if (is_root)
4509                         rec->is_root = 1;
4510                 if (set_checked) {
4511                         rec->content_checked = 1;
4512                         rec->owner_ref_checked = 1;
4513                 }
4514
4515                 if (parent_key)
4516                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4517                 if (parent_gen)
4518                         rec->parent_generation = parent_gen;
4519
4520                 if (rec->max_size < max_size)
4521                         rec->max_size = max_size;
4522
4523                 /*
4524                  * A metadata extent can't cross stripe_len boundary, otherwise
4525                  * kernel scrub won't be able to handle it.
4526                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4527                  * it.
4528                  */
4529                 if (metadata && check_crossing_stripes(rec->start,
4530                                                        rec->max_size))
4531                                 rec->crossing_stripes = 1;
4532                 check_extent_type(rec);
4533                 maybe_free_extent_rec(extent_cache, rec);
4534                 return ret;
4535         }
4536         rec = malloc(sizeof(*rec));
4537         rec->start = start;
4538         rec->max_size = max_size;
4539         rec->nr = max(nr, max_size);
4540         rec->found_rec = !!extent_rec;
4541         rec->content_checked = 0;
4542         rec->owner_ref_checked = 0;
4543         rec->num_duplicates = 0;
4544         rec->metadata = metadata;
4545         rec->flag_block_full_backref = -1;
4546         rec->bad_full_backref = 0;
4547         rec->crossing_stripes = 0;
4548         rec->wrong_chunk_type = 0;
4549         INIT_LIST_HEAD(&rec->backrefs);
4550         INIT_LIST_HEAD(&rec->dups);
4551         INIT_LIST_HEAD(&rec->list);
4552
4553         if (is_root)
4554                 rec->is_root = 1;
4555         else
4556                 rec->is_root = 0;
4557
4558         if (inc_ref)
4559                 rec->refs = 1;
4560         else
4561                 rec->refs = 0;
4562
4563         if (extent_item_refs)
4564                 rec->extent_item_refs = extent_item_refs;
4565         else
4566                 rec->extent_item_refs = 0;
4567
4568         if (parent_key)
4569                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4570         else
4571                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4572
4573         if (parent_gen)
4574                 rec->parent_generation = parent_gen;
4575         else
4576                 rec->parent_generation = 0;
4577
4578         rec->cache.start = start;
4579         rec->cache.size = nr;
4580         ret = insert_cache_extent(extent_cache, &rec->cache);
4581         BUG_ON(ret);
4582         bytes_used += nr;
4583         if (set_checked) {
4584                 rec->content_checked = 1;
4585                 rec->owner_ref_checked = 1;
4586         }
4587
4588         if (metadata)
4589                 if (check_crossing_stripes(rec->start, rec->max_size))
4590                         rec->crossing_stripes = 1;
4591         check_extent_type(rec);
4592         return ret;
4593 }
4594
4595 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4596                             u64 parent, u64 root, int found_ref)
4597 {
4598         struct extent_record *rec;
4599         struct tree_backref *back;
4600         struct cache_extent *cache;
4601
4602         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4603         if (!cache) {
4604                 add_extent_rec(extent_cache, NULL, 0, bytenr,
4605                                1, 0, 0, 0, 0, 1, 0, 0);
4606                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4607                 if (!cache)
4608                         abort();
4609         }
4610
4611         rec = container_of(cache, struct extent_record, cache);
4612         if (rec->start != bytenr) {
4613                 abort();
4614         }
4615
4616         back = find_tree_backref(rec, parent, root);
4617         if (!back)
4618                 back = alloc_tree_backref(rec, parent, root);
4619
4620         if (found_ref) {
4621                 if (back->node.found_ref) {
4622                         fprintf(stderr, "Extent back ref already exists "
4623                                 "for %llu parent %llu root %llu \n",
4624                                 (unsigned long long)bytenr,
4625                                 (unsigned long long)parent,
4626                                 (unsigned long long)root);
4627                 }
4628                 back->node.found_ref = 1;
4629         } else {
4630                 if (back->node.found_extent_tree) {
4631                         fprintf(stderr, "Extent back ref already exists "
4632                                 "for %llu parent %llu root %llu \n",
4633                                 (unsigned long long)bytenr,
4634                                 (unsigned long long)parent,
4635                                 (unsigned long long)root);
4636                 }
4637                 back->node.found_extent_tree = 1;
4638         }
4639         check_extent_type(rec);
4640         maybe_free_extent_rec(extent_cache, rec);
4641         return 0;
4642 }
4643
4644 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4645                             u64 parent, u64 root, u64 owner, u64 offset,
4646                             u32 num_refs, int found_ref, u64 max_size)
4647 {
4648         struct extent_record *rec;
4649         struct data_backref *back;
4650         struct cache_extent *cache;
4651
4652         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4653         if (!cache) {
4654                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4655                                0, 0, max_size);
4656                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4657                 if (!cache)
4658                         abort();
4659         }
4660
4661         rec = container_of(cache, struct extent_record, cache);
4662         if (rec->max_size < max_size)
4663                 rec->max_size = max_size;
4664
4665         /*
4666          * If found_ref is set then max_size is the real size and must match the
4667          * existing refs.  So if we have already found a ref then we need to
4668          * make sure that this ref matches the existing one, otherwise we need
4669          * to add a new backref so we can notice that the backrefs don't match
4670          * and we need to figure out who is telling the truth.  This is to
4671          * account for that awful fsync bug I introduced where we'd end up with
4672          * a btrfs_file_extent_item that would have its length include multiple
4673          * prealloc extents or point inside of a prealloc extent.
4674          */
4675         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4676                                  bytenr, max_size);
4677         if (!back)
4678                 back = alloc_data_backref(rec, parent, root, owner, offset,
4679                                           max_size);
4680
4681         if (found_ref) {
4682                 BUG_ON(num_refs != 1);
4683                 if (back->node.found_ref)
4684                         BUG_ON(back->bytes != max_size);
4685                 back->node.found_ref = 1;
4686                 back->found_ref += 1;
4687                 back->bytes = max_size;
4688                 back->disk_bytenr = bytenr;
4689                 rec->refs += 1;
4690                 rec->content_checked = 1;
4691                 rec->owner_ref_checked = 1;
4692         } else {
4693                 if (back->node.found_extent_tree) {
4694                         fprintf(stderr, "Extent back ref already exists "
4695                                 "for %llu parent %llu root %llu "
4696                                 "owner %llu offset %llu num_refs %lu\n",
4697                                 (unsigned long long)bytenr,
4698                                 (unsigned long long)parent,
4699                                 (unsigned long long)root,
4700                                 (unsigned long long)owner,
4701                                 (unsigned long long)offset,
4702                                 (unsigned long)num_refs);
4703                 }
4704                 back->num_refs = num_refs;
4705                 back->node.found_extent_tree = 1;
4706         }
4707         maybe_free_extent_rec(extent_cache, rec);
4708         return 0;
4709 }
4710
4711 static int add_pending(struct cache_tree *pending,
4712                        struct cache_tree *seen, u64 bytenr, u32 size)
4713 {
4714         int ret;
4715         ret = add_cache_extent(seen, bytenr, size);
4716         if (ret)
4717                 return ret;
4718         add_cache_extent(pending, bytenr, size);
4719         return 0;
4720 }
4721
4722 static int pick_next_pending(struct cache_tree *pending,
4723                         struct cache_tree *reada,
4724                         struct cache_tree *nodes,
4725                         u64 last, struct block_info *bits, int bits_nr,
4726                         int *reada_bits)
4727 {
4728         unsigned long node_start = last;
4729         struct cache_extent *cache;
4730         int ret;
4731
4732         cache = search_cache_extent(reada, 0);
4733         if (cache) {
4734                 bits[0].start = cache->start;
4735                 bits[0].size = cache->size;
4736                 *reada_bits = 1;
4737                 return 1;
4738         }
4739         *reada_bits = 0;
4740         if (node_start > 32768)
4741                 node_start -= 32768;
4742
4743         cache = search_cache_extent(nodes, node_start);
4744         if (!cache)
4745                 cache = search_cache_extent(nodes, 0);
4746
4747         if (!cache) {
4748                  cache = search_cache_extent(pending, 0);
4749                  if (!cache)
4750                          return 0;
4751                  ret = 0;
4752                  do {
4753                          bits[ret].start = cache->start;
4754                          bits[ret].size = cache->size;
4755                          cache = next_cache_extent(cache);
4756                          ret++;
4757                  } while (cache && ret < bits_nr);
4758                  return ret;
4759         }
4760
4761         ret = 0;
4762         do {
4763                 bits[ret].start = cache->start;
4764                 bits[ret].size = cache->size;
4765                 cache = next_cache_extent(cache);
4766                 ret++;
4767         } while (cache && ret < bits_nr);
4768
4769         if (bits_nr - ret > 8) {
4770                 u64 lookup = bits[0].start + bits[0].size;
4771                 struct cache_extent *next;
4772                 next = search_cache_extent(pending, lookup);
4773                 while(next) {
4774                         if (next->start - lookup > 32768)
4775                                 break;
4776                         bits[ret].start = next->start;
4777                         bits[ret].size = next->size;
4778                         lookup = next->start + next->size;
4779                         ret++;
4780                         if (ret == bits_nr)
4781                                 break;
4782                         next = next_cache_extent(next);
4783                         if (!next)
4784                                 break;
4785                 }
4786         }
4787         return ret;
4788 }
4789
4790 static void free_chunk_record(struct cache_extent *cache)
4791 {
4792         struct chunk_record *rec;
4793
4794         rec = container_of(cache, struct chunk_record, cache);
4795         list_del_init(&rec->list);
4796         list_del_init(&rec->dextents);
4797         free(rec);
4798 }
4799
4800 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4801 {
4802         cache_tree_free_extents(chunk_cache, free_chunk_record);
4803 }
4804
4805 static void free_device_record(struct rb_node *node)
4806 {
4807         struct device_record *rec;
4808
4809         rec = container_of(node, struct device_record, node);
4810         free(rec);
4811 }
4812
4813 FREE_RB_BASED_TREE(device_cache, free_device_record);
4814
4815 int insert_block_group_record(struct block_group_tree *tree,
4816                               struct block_group_record *bg_rec)
4817 {
4818         int ret;
4819
4820         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4821         if (ret)
4822                 return ret;
4823
4824         list_add_tail(&bg_rec->list, &tree->block_groups);
4825         return 0;
4826 }
4827
4828 static void free_block_group_record(struct cache_extent *cache)
4829 {
4830         struct block_group_record *rec;
4831
4832         rec = container_of(cache, struct block_group_record, cache);
4833         list_del_init(&rec->list);
4834         free(rec);
4835 }
4836
4837 void free_block_group_tree(struct block_group_tree *tree)
4838 {
4839         cache_tree_free_extents(&tree->tree, free_block_group_record);
4840 }
4841
4842 int insert_device_extent_record(struct device_extent_tree *tree,
4843                                 struct device_extent_record *de_rec)
4844 {
4845         int ret;
4846
4847         /*
4848          * Device extent is a bit different from the other extents, because
4849          * the extents which belong to the different devices may have the
4850          * same start and size, so we need use the special extent cache
4851          * search/insert functions.
4852          */
4853         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4854         if (ret)
4855                 return ret;
4856
4857         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4858         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4859         return 0;
4860 }
4861
4862 static void free_device_extent_record(struct cache_extent *cache)
4863 {
4864         struct device_extent_record *rec;
4865
4866         rec = container_of(cache, struct device_extent_record, cache);
4867         if (!list_empty(&rec->chunk_list))
4868                 list_del_init(&rec->chunk_list);
4869         if (!list_empty(&rec->device_list))
4870                 list_del_init(&rec->device_list);
4871         free(rec);
4872 }
4873
4874 void free_device_extent_tree(struct device_extent_tree *tree)
4875 {
4876         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4877 }
4878
4879 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4880 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4881                                  struct extent_buffer *leaf, int slot)
4882 {
4883         struct btrfs_extent_ref_v0 *ref0;
4884         struct btrfs_key key;
4885
4886         btrfs_item_key_to_cpu(leaf, &key, slot);
4887         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4888         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4889                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4890         } else {
4891                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4892                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4893         }
4894         return 0;
4895 }
4896 #endif
4897
4898 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4899                                             struct btrfs_key *key,
4900                                             int slot)
4901 {
4902         struct btrfs_chunk *ptr;
4903         struct chunk_record *rec;
4904         int num_stripes, i;
4905
4906         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4907         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4908
4909         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4910         if (!rec) {
4911                 fprintf(stderr, "memory allocation failed\n");
4912                 exit(-1);
4913         }
4914
4915         INIT_LIST_HEAD(&rec->list);
4916         INIT_LIST_HEAD(&rec->dextents);
4917         rec->bg_rec = NULL;
4918
4919         rec->cache.start = key->offset;
4920         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4921
4922         rec->generation = btrfs_header_generation(leaf);
4923
4924         rec->objectid = key->objectid;
4925         rec->type = key->type;
4926         rec->offset = key->offset;
4927
4928         rec->length = rec->cache.size;
4929         rec->owner = btrfs_chunk_owner(leaf, ptr);
4930         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4931         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4932         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4933         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4934         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4935         rec->num_stripes = num_stripes;
4936         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4937
4938         for (i = 0; i < rec->num_stripes; ++i) {
4939                 rec->stripes[i].devid =
4940                         btrfs_stripe_devid_nr(leaf, ptr, i);
4941                 rec->stripes[i].offset =
4942                         btrfs_stripe_offset_nr(leaf, ptr, i);
4943                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4944                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4945                                 BTRFS_UUID_SIZE);
4946         }
4947
4948         return rec;
4949 }
4950
4951 static int process_chunk_item(struct cache_tree *chunk_cache,
4952                               struct btrfs_key *key, struct extent_buffer *eb,
4953                               int slot)
4954 {
4955         struct chunk_record *rec;
4956         int ret = 0;
4957
4958         rec = btrfs_new_chunk_record(eb, key, slot);
4959         ret = insert_cache_extent(chunk_cache, &rec->cache);
4960         if (ret) {
4961                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4962                         rec->offset, rec->length);
4963                 free(rec);
4964         }
4965
4966         return ret;
4967 }
4968
4969 static int process_device_item(struct rb_root *dev_cache,
4970                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4971 {
4972         struct btrfs_dev_item *ptr;
4973         struct device_record *rec;
4974         int ret = 0;
4975
4976         ptr = btrfs_item_ptr(eb,
4977                 slot, struct btrfs_dev_item);
4978
4979         rec = malloc(sizeof(*rec));
4980         if (!rec) {
4981                 fprintf(stderr, "memory allocation failed\n");
4982                 return -ENOMEM;
4983         }
4984
4985         rec->devid = key->offset;
4986         rec->generation = btrfs_header_generation(eb);
4987
4988         rec->objectid = key->objectid;
4989         rec->type = key->type;
4990         rec->offset = key->offset;
4991
4992         rec->devid = btrfs_device_id(eb, ptr);
4993         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4994         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4995
4996         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4997         if (ret) {
4998                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4999                 free(rec);
5000         }
5001
5002         return ret;
5003 }
5004
5005 struct block_group_record *
5006 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5007                              int slot)
5008 {
5009         struct btrfs_block_group_item *ptr;
5010         struct block_group_record *rec;
5011
5012         rec = calloc(1, sizeof(*rec));
5013         if (!rec) {
5014                 fprintf(stderr, "memory allocation failed\n");
5015                 exit(-1);
5016         }
5017
5018         rec->cache.start = key->objectid;
5019         rec->cache.size = key->offset;
5020
5021         rec->generation = btrfs_header_generation(leaf);
5022
5023         rec->objectid = key->objectid;
5024         rec->type = key->type;
5025         rec->offset = key->offset;
5026
5027         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5028         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5029
5030         INIT_LIST_HEAD(&rec->list);
5031
5032         return rec;
5033 }
5034
5035 static int process_block_group_item(struct block_group_tree *block_group_cache,
5036                                     struct btrfs_key *key,
5037                                     struct extent_buffer *eb, int slot)
5038 {
5039         struct block_group_record *rec;
5040         int ret = 0;
5041
5042         rec = btrfs_new_block_group_record(eb, key, slot);
5043         ret = insert_block_group_record(block_group_cache, rec);
5044         if (ret) {
5045                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5046                         rec->objectid, rec->offset);
5047                 free(rec);
5048         }
5049
5050         return ret;
5051 }
5052
5053 struct device_extent_record *
5054 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5055                                struct btrfs_key *key, int slot)
5056 {
5057         struct device_extent_record *rec;
5058         struct btrfs_dev_extent *ptr;
5059
5060         rec = calloc(1, sizeof(*rec));
5061         if (!rec) {
5062                 fprintf(stderr, "memory allocation failed\n");
5063                 exit(-1);
5064         }
5065
5066         rec->cache.objectid = key->objectid;
5067         rec->cache.start = key->offset;
5068
5069         rec->generation = btrfs_header_generation(leaf);
5070
5071         rec->objectid = key->objectid;
5072         rec->type = key->type;
5073         rec->offset = key->offset;
5074
5075         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5076         rec->chunk_objecteid =
5077                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5078         rec->chunk_offset =
5079                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5080         rec->length = btrfs_dev_extent_length(leaf, ptr);
5081         rec->cache.size = rec->length;
5082
5083         INIT_LIST_HEAD(&rec->chunk_list);
5084         INIT_LIST_HEAD(&rec->device_list);
5085
5086         return rec;
5087 }
5088
5089 static int
5090 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5091                            struct btrfs_key *key, struct extent_buffer *eb,
5092                            int slot)
5093 {
5094         struct device_extent_record *rec;
5095         int ret;
5096
5097         rec = btrfs_new_device_extent_record(eb, key, slot);
5098         ret = insert_device_extent_record(dev_extent_cache, rec);
5099         if (ret) {
5100                 fprintf(stderr,
5101                         "Device extent[%llu, %llu, %llu] existed.\n",
5102                         rec->objectid, rec->offset, rec->length);
5103                 free(rec);
5104         }
5105
5106         return ret;
5107 }
5108
5109 static int process_extent_item(struct btrfs_root *root,
5110                                struct cache_tree *extent_cache,
5111                                struct extent_buffer *eb, int slot)
5112 {
5113         struct btrfs_extent_item *ei;
5114         struct btrfs_extent_inline_ref *iref;
5115         struct btrfs_extent_data_ref *dref;
5116         struct btrfs_shared_data_ref *sref;
5117         struct btrfs_key key;
5118         unsigned long end;
5119         unsigned long ptr;
5120         int type;
5121         u32 item_size = btrfs_item_size_nr(eb, slot);
5122         u64 refs = 0;
5123         u64 offset;
5124         u64 num_bytes;
5125         int metadata = 0;
5126
5127         btrfs_item_key_to_cpu(eb, &key, slot);
5128
5129         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5130                 metadata = 1;
5131                 num_bytes = root->leafsize;
5132         } else {
5133                 num_bytes = key.offset;
5134         }
5135
5136         if (item_size < sizeof(*ei)) {
5137 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5138                 struct btrfs_extent_item_v0 *ei0;
5139                 BUG_ON(item_size != sizeof(*ei0));
5140                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5141                 refs = btrfs_extent_refs_v0(eb, ei0);
5142 #else
5143                 BUG();
5144 #endif
5145                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5146                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5147                                       num_bytes);
5148         }
5149
5150         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5151         refs = btrfs_extent_refs(eb, ei);
5152         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5153                 metadata = 1;
5154         else
5155                 metadata = 0;
5156
5157         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5158                        refs, 0, 0, 0, metadata, 1, num_bytes);
5159
5160         ptr = (unsigned long)(ei + 1);
5161         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5162             key.type == BTRFS_EXTENT_ITEM_KEY)
5163                 ptr += sizeof(struct btrfs_tree_block_info);
5164
5165         end = (unsigned long)ei + item_size;
5166         while (ptr < end) {
5167                 iref = (struct btrfs_extent_inline_ref *)ptr;
5168                 type = btrfs_extent_inline_ref_type(eb, iref);
5169                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5170                 switch (type) {
5171                 case BTRFS_TREE_BLOCK_REF_KEY:
5172                         add_tree_backref(extent_cache, key.objectid,
5173                                          0, offset, 0);
5174                         break;
5175                 case BTRFS_SHARED_BLOCK_REF_KEY:
5176                         add_tree_backref(extent_cache, key.objectid,
5177                                          offset, 0, 0);
5178                         break;
5179                 case BTRFS_EXTENT_DATA_REF_KEY:
5180                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5181                         add_data_backref(extent_cache, key.objectid, 0,
5182                                         btrfs_extent_data_ref_root(eb, dref),
5183                                         btrfs_extent_data_ref_objectid(eb,
5184                                                                        dref),
5185                                         btrfs_extent_data_ref_offset(eb, dref),
5186                                         btrfs_extent_data_ref_count(eb, dref),
5187                                         0, num_bytes);
5188                         break;
5189                 case BTRFS_SHARED_DATA_REF_KEY:
5190                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5191                         add_data_backref(extent_cache, key.objectid, offset,
5192                                         0, 0, 0,
5193                                         btrfs_shared_data_ref_count(eb, sref),
5194                                         0, num_bytes);
5195                         break;
5196                 default:
5197                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5198                                 key.objectid, key.type, num_bytes);
5199                         goto out;
5200                 }
5201                 ptr += btrfs_extent_inline_ref_size(type);
5202         }
5203         WARN_ON(ptr > end);
5204 out:
5205         return 0;
5206 }
5207
5208 static int check_cache_range(struct btrfs_root *root,
5209                              struct btrfs_block_group_cache *cache,
5210                              u64 offset, u64 bytes)
5211 {
5212         struct btrfs_free_space *entry;
5213         u64 *logical;
5214         u64 bytenr;
5215         int stripe_len;
5216         int i, nr, ret;
5217
5218         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5219                 bytenr = btrfs_sb_offset(i);
5220                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5221                                        cache->key.objectid, bytenr, 0,
5222                                        &logical, &nr, &stripe_len);
5223                 if (ret)
5224                         return ret;
5225
5226                 while (nr--) {
5227                         if (logical[nr] + stripe_len <= offset)
5228                                 continue;
5229                         if (offset + bytes <= logical[nr])
5230                                 continue;
5231                         if (logical[nr] == offset) {
5232                                 if (stripe_len >= bytes) {
5233                                         kfree(logical);
5234                                         return 0;
5235                                 }
5236                                 bytes -= stripe_len;
5237                                 offset += stripe_len;
5238                         } else if (logical[nr] < offset) {
5239                                 if (logical[nr] + stripe_len >=
5240                                     offset + bytes) {
5241                                         kfree(logical);
5242                                         return 0;
5243                                 }
5244                                 bytes = (offset + bytes) -
5245                                         (logical[nr] + stripe_len);
5246                                 offset = logical[nr] + stripe_len;
5247                         } else {
5248                                 /*
5249                                  * Could be tricky, the super may land in the
5250                                  * middle of the area we're checking.  First
5251                                  * check the easiest case, it's at the end.
5252                                  */
5253                                 if (logical[nr] + stripe_len >=
5254                                     bytes + offset) {
5255                                         bytes = logical[nr] - offset;
5256                                         continue;
5257                                 }
5258
5259                                 /* Check the left side */
5260                                 ret = check_cache_range(root, cache,
5261                                                         offset,
5262                                                         logical[nr] - offset);
5263                                 if (ret) {
5264                                         kfree(logical);
5265                                         return ret;
5266                                 }
5267
5268                                 /* Now we continue with the right side */
5269                                 bytes = (offset + bytes) -
5270                                         (logical[nr] + stripe_len);
5271                                 offset = logical[nr] + stripe_len;
5272                         }
5273                 }
5274
5275                 kfree(logical);
5276         }
5277
5278         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5279         if (!entry) {
5280                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5281                         offset, offset+bytes);
5282                 return -EINVAL;
5283         }
5284
5285         if (entry->offset != offset) {
5286                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5287                         entry->offset);
5288                 return -EINVAL;
5289         }
5290
5291         if (entry->bytes != bytes) {
5292                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5293                         bytes, entry->bytes, offset);
5294                 return -EINVAL;
5295         }
5296
5297         unlink_free_space(cache->free_space_ctl, entry);
5298         free(entry);
5299         return 0;
5300 }
5301
5302 static int verify_space_cache(struct btrfs_root *root,
5303                               struct btrfs_block_group_cache *cache)
5304 {
5305         struct btrfs_path *path;
5306         struct extent_buffer *leaf;
5307         struct btrfs_key key;
5308         u64 last;
5309         int ret = 0;
5310
5311         path = btrfs_alloc_path();
5312         if (!path)
5313                 return -ENOMEM;
5314
5315         root = root->fs_info->extent_root;
5316
5317         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5318
5319         key.objectid = last;
5320         key.offset = 0;
5321         key.type = BTRFS_EXTENT_ITEM_KEY;
5322
5323         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5324         if (ret < 0)
5325                 goto out;
5326         ret = 0;
5327         while (1) {
5328                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5329                         ret = btrfs_next_leaf(root, path);
5330                         if (ret < 0)
5331                                 goto out;
5332                         if (ret > 0) {
5333                                 ret = 0;
5334                                 break;
5335                         }
5336                 }
5337                 leaf = path->nodes[0];
5338                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5339                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5340                         break;
5341                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5342                     key.type != BTRFS_METADATA_ITEM_KEY) {
5343                         path->slots[0]++;
5344                         continue;
5345                 }
5346
5347                 if (last == key.objectid) {
5348                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5349                                 last = key.objectid + key.offset;
5350                         else
5351                                 last = key.objectid + root->leafsize;
5352                         path->slots[0]++;
5353                         continue;
5354                 }
5355
5356                 ret = check_cache_range(root, cache, last,
5357                                         key.objectid - last);
5358                 if (ret)
5359                         break;
5360                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5361                         last = key.objectid + key.offset;
5362                 else
5363                         last = key.objectid + root->leafsize;
5364                 path->slots[0]++;
5365         }
5366
5367         if (last < cache->key.objectid + cache->key.offset)
5368                 ret = check_cache_range(root, cache, last,
5369                                         cache->key.objectid +
5370                                         cache->key.offset - last);
5371
5372 out:
5373         btrfs_free_path(path);
5374
5375         if (!ret &&
5376             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5377                 fprintf(stderr, "There are still entries left in the space "
5378                         "cache\n");
5379                 ret = -EINVAL;
5380         }
5381
5382         return ret;
5383 }
5384
5385 static int check_space_cache(struct btrfs_root *root)
5386 {
5387         struct btrfs_block_group_cache *cache;
5388         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5389         int ret;
5390         int error = 0;
5391
5392         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5393             btrfs_super_generation(root->fs_info->super_copy) !=
5394             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5395                 printf("cache and super generation don't match, space cache "
5396                        "will be invalidated\n");
5397                 return 0;
5398         }
5399
5400         if (ctx.progress_enabled) {
5401                 ctx.tp = TASK_FREE_SPACE;
5402                 task_start(ctx.info);
5403         }
5404
5405         while (1) {
5406                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5407                 if (!cache)
5408                         break;
5409
5410                 start = cache->key.objectid + cache->key.offset;
5411                 if (!cache->free_space_ctl) {
5412                         if (btrfs_init_free_space_ctl(cache,
5413                                                       root->sectorsize)) {
5414                                 ret = -ENOMEM;
5415                                 break;
5416                         }
5417                 } else {
5418                         btrfs_remove_free_space_cache(cache);
5419                 }
5420
5421                 ret = load_free_space_cache(root->fs_info, cache);
5422                 if (!ret)
5423                         continue;
5424
5425                 ret = verify_space_cache(root, cache);
5426                 if (ret) {
5427                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5428                                 cache->key.objectid);
5429                         error++;
5430                 }
5431         }
5432
5433         task_stop(ctx.info);
5434
5435         return error ? -EINVAL : 0;
5436 }
5437
5438 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5439                         u64 num_bytes, unsigned long leaf_offset,
5440                         struct extent_buffer *eb) {
5441
5442         u64 offset = 0;
5443         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5444         char *data;
5445         unsigned long csum_offset;
5446         u32 csum;
5447         u32 csum_expected;
5448         u64 read_len;
5449         u64 data_checked = 0;
5450         u64 tmp;
5451         int ret = 0;
5452         int mirror;
5453         int num_copies;
5454
5455         if (num_bytes % root->sectorsize)
5456                 return -EINVAL;
5457
5458         data = malloc(num_bytes);
5459         if (!data)
5460                 return -ENOMEM;
5461
5462         while (offset < num_bytes) {
5463                 mirror = 0;
5464 again:
5465                 read_len = num_bytes - offset;
5466                 /* read as much space once a time */
5467                 ret = read_extent_data(root, data + offset,
5468                                 bytenr + offset, &read_len, mirror);
5469                 if (ret)
5470                         goto out;
5471                 data_checked = 0;
5472                 /* verify every 4k data's checksum */
5473                 while (data_checked < read_len) {
5474                         csum = ~(u32)0;
5475                         tmp = offset + data_checked;
5476
5477                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5478                                                csum, root->sectorsize);
5479                         btrfs_csum_final(csum, (char *)&csum);
5480
5481                         csum_offset = leaf_offset +
5482                                  tmp / root->sectorsize * csum_size;
5483                         read_extent_buffer(eb, (char *)&csum_expected,
5484                                            csum_offset, csum_size);
5485                         /* try another mirror */
5486                         if (csum != csum_expected) {
5487                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5488                                                 mirror, bytenr + tmp,
5489                                                 csum, csum_expected);
5490                                 num_copies = btrfs_num_copies(
5491                                                 &root->fs_info->mapping_tree,
5492                                                 bytenr, num_bytes);
5493                                 if (mirror < num_copies - 1) {
5494                                         mirror += 1;
5495                                         goto again;
5496                                 }
5497                         }
5498                         data_checked += root->sectorsize;
5499                 }
5500                 offset += read_len;
5501         }
5502 out:
5503         free(data);
5504         return ret;
5505 }
5506
5507 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5508                                u64 num_bytes)
5509 {
5510         struct btrfs_path *path;
5511         struct extent_buffer *leaf;
5512         struct btrfs_key key;
5513         int ret;
5514
5515         path = btrfs_alloc_path();
5516         if (!path) {
5517                 fprintf(stderr, "Error allocing path\n");
5518                 return -ENOMEM;
5519         }
5520
5521         key.objectid = bytenr;
5522         key.type = BTRFS_EXTENT_ITEM_KEY;
5523         key.offset = (u64)-1;
5524
5525 again:
5526         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5527                                 0, 0);
5528         if (ret < 0) {
5529                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5530                 btrfs_free_path(path);
5531                 return ret;
5532         } else if (ret) {
5533                 if (path->slots[0] > 0) {
5534                         path->slots[0]--;
5535                 } else {
5536                         ret = btrfs_prev_leaf(root, path);
5537                         if (ret < 0) {
5538                                 goto out;
5539                         } else if (ret > 0) {
5540                                 ret = 0;
5541                                 goto out;
5542                         }
5543                 }
5544         }
5545
5546         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5547
5548         /*
5549          * Block group items come before extent items if they have the same
5550          * bytenr, so walk back one more just in case.  Dear future traveler,
5551          * first congrats on mastering time travel.  Now if it's not too much
5552          * trouble could you go back to 2006 and tell Chris to make the
5553          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5554          * EXTENT_ITEM_KEY please?
5555          */
5556         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5557                 if (path->slots[0] > 0) {
5558                         path->slots[0]--;
5559                 } else {
5560                         ret = btrfs_prev_leaf(root, path);
5561                         if (ret < 0) {
5562                                 goto out;
5563                         } else if (ret > 0) {
5564                                 ret = 0;
5565                                 goto out;
5566                         }
5567                 }
5568                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5569         }
5570
5571         while (num_bytes) {
5572                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5573                         ret = btrfs_next_leaf(root, path);
5574                         if (ret < 0) {
5575                                 fprintf(stderr, "Error going to next leaf "
5576                                         "%d\n", ret);
5577                                 btrfs_free_path(path);
5578                                 return ret;
5579                         } else if (ret) {
5580                                 break;
5581                         }
5582                 }
5583                 leaf = path->nodes[0];
5584                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5585                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5586                         path->slots[0]++;
5587                         continue;
5588                 }
5589                 if (key.objectid + key.offset < bytenr) {
5590                         path->slots[0]++;
5591                         continue;
5592                 }
5593                 if (key.objectid > bytenr + num_bytes)
5594                         break;
5595
5596                 if (key.objectid == bytenr) {
5597                         if (key.offset >= num_bytes) {
5598                                 num_bytes = 0;
5599                                 break;
5600                         }
5601                         num_bytes -= key.offset;
5602                         bytenr += key.offset;
5603                 } else if (key.objectid < bytenr) {
5604                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5605                                 num_bytes = 0;
5606                                 break;
5607                         }
5608                         num_bytes = (bytenr + num_bytes) -
5609                                 (key.objectid + key.offset);
5610                         bytenr = key.objectid + key.offset;
5611                 } else {
5612                         if (key.objectid + key.offset < bytenr + num_bytes) {
5613                                 u64 new_start = key.objectid + key.offset;
5614                                 u64 new_bytes = bytenr + num_bytes - new_start;
5615
5616                                 /*
5617                                  * Weird case, the extent is in the middle of
5618                                  * our range, we'll have to search one side
5619                                  * and then the other.  Not sure if this happens
5620                                  * in real life, but no harm in coding it up
5621                                  * anyway just in case.
5622                                  */
5623                                 btrfs_release_path(path);
5624                                 ret = check_extent_exists(root, new_start,
5625                                                           new_bytes);
5626                                 if (ret) {
5627                                         fprintf(stderr, "Right section didn't "
5628                                                 "have a record\n");
5629                                         break;
5630                                 }
5631                                 num_bytes = key.objectid - bytenr;
5632                                 goto again;
5633                         }
5634                         num_bytes = key.objectid - bytenr;
5635                 }
5636                 path->slots[0]++;
5637         }
5638         ret = 0;
5639
5640 out:
5641         if (num_bytes && !ret) {
5642                 fprintf(stderr, "There are no extents for csum range "
5643                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5644                 ret = 1;
5645         }
5646
5647         btrfs_free_path(path);
5648         return ret;
5649 }
5650
5651 static int check_csums(struct btrfs_root *root)
5652 {
5653         struct btrfs_path *path;
5654         struct extent_buffer *leaf;
5655         struct btrfs_key key;
5656         u64 offset = 0, num_bytes = 0;
5657         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5658         int errors = 0;
5659         int ret;
5660         u64 data_len;
5661         unsigned long leaf_offset;
5662
5663         root = root->fs_info->csum_root;
5664         if (!extent_buffer_uptodate(root->node)) {
5665                 fprintf(stderr, "No valid csum tree found\n");
5666                 return -ENOENT;
5667         }
5668
5669         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5670         key.type = BTRFS_EXTENT_CSUM_KEY;
5671         key.offset = 0;
5672
5673         path = btrfs_alloc_path();
5674         if (!path)
5675                 return -ENOMEM;
5676
5677         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5678         if (ret < 0) {
5679                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5680                 btrfs_free_path(path);
5681                 return ret;
5682         }
5683
5684         if (ret > 0 && path->slots[0])
5685                 path->slots[0]--;
5686         ret = 0;
5687
5688         while (1) {
5689                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5690                         ret = btrfs_next_leaf(root, path);
5691                         if (ret < 0) {
5692                                 fprintf(stderr, "Error going to next leaf "
5693                                         "%d\n", ret);
5694                                 break;
5695                         }
5696                         if (ret)
5697                                 break;
5698                 }
5699                 leaf = path->nodes[0];
5700
5701                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5702                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5703                         path->slots[0]++;
5704                         continue;
5705                 }
5706
5707                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5708                               csum_size) * root->sectorsize;
5709                 if (!check_data_csum)
5710                         goto skip_csum_check;
5711                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5712                 ret = check_extent_csums(root, key.offset, data_len,
5713                                          leaf_offset, leaf);
5714                 if (ret)
5715                         break;
5716 skip_csum_check:
5717                 if (!num_bytes) {
5718                         offset = key.offset;
5719                 } else if (key.offset != offset + num_bytes) {
5720                         ret = check_extent_exists(root, offset, num_bytes);
5721                         if (ret) {
5722                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5723                                         "there is no extent record\n",
5724                                         offset, offset+num_bytes);
5725                                 errors++;
5726                         }
5727                         offset = key.offset;
5728                         num_bytes = 0;
5729                 }
5730                 num_bytes += data_len;
5731                 path->slots[0]++;
5732         }
5733
5734         btrfs_free_path(path);
5735         return errors;
5736 }
5737
5738 static int is_dropped_key(struct btrfs_key *key,
5739                           struct btrfs_key *drop_key) {
5740         if (key->objectid < drop_key->objectid)
5741                 return 1;
5742         else if (key->objectid == drop_key->objectid) {
5743                 if (key->type < drop_key->type)
5744                         return 1;
5745                 else if (key->type == drop_key->type) {
5746                         if (key->offset < drop_key->offset)
5747                                 return 1;
5748                 }
5749         }
5750         return 0;
5751 }
5752
5753 /*
5754  * Here are the rules for FULL_BACKREF.
5755  *
5756  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5757  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5758  *      FULL_BACKREF set.
5759  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5760  *    if it happened after the relocation occurred since we'll have dropped the
5761  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5762  *    have no real way to know for sure.
5763  *
5764  * We process the blocks one root at a time, and we start from the lowest root
5765  * objectid and go to the highest.  So we can just lookup the owner backref for
5766  * the record and if we don't find it then we know it doesn't exist and we have
5767  * a FULL BACKREF.
5768  *
5769  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5770  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5771  * be set or not and then we can check later once we've gathered all the refs.
5772  */
5773 static int calc_extent_flag(struct btrfs_root *root,
5774                            struct cache_tree *extent_cache,
5775                            struct extent_buffer *buf,
5776                            struct root_item_record *ri,
5777                            u64 *flags)
5778 {
5779         struct extent_record *rec;
5780         struct cache_extent *cache;
5781         struct tree_backref *tback;
5782         u64 owner = 0;
5783
5784         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5785         /* we have added this extent before */
5786         BUG_ON(!cache);
5787         rec = container_of(cache, struct extent_record, cache);
5788
5789         /*
5790          * Except file/reloc tree, we can not have
5791          * FULL BACKREF MODE
5792          */
5793         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5794                 goto normal;
5795         /*
5796          * root node
5797          */
5798         if (buf->start == ri->bytenr)
5799                 goto normal;
5800
5801         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5802                 goto full_backref;
5803
5804         owner = btrfs_header_owner(buf);
5805         if (owner == ri->objectid)
5806                 goto normal;
5807
5808         tback = find_tree_backref(rec, 0, owner);
5809         if (!tback)
5810                 goto full_backref;
5811 normal:
5812         *flags = 0;
5813         if (rec->flag_block_full_backref != -1 &&
5814             rec->flag_block_full_backref != 0)
5815                 rec->bad_full_backref = 1;
5816         return 0;
5817 full_backref:
5818         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5819         if (rec->flag_block_full_backref != -1 &&
5820             rec->flag_block_full_backref != 1)
5821                 rec->bad_full_backref = 1;
5822         return 0;
5823 }
5824
5825 static int run_next_block(struct btrfs_root *root,
5826                           struct block_info *bits,
5827                           int bits_nr,
5828                           u64 *last,
5829                           struct cache_tree *pending,
5830                           struct cache_tree *seen,
5831                           struct cache_tree *reada,
5832                           struct cache_tree *nodes,
5833                           struct cache_tree *extent_cache,
5834                           struct cache_tree *chunk_cache,
5835                           struct rb_root *dev_cache,
5836                           struct block_group_tree *block_group_cache,
5837                           struct device_extent_tree *dev_extent_cache,
5838                           struct root_item_record *ri)
5839 {
5840         struct extent_buffer *buf;
5841         struct extent_record *rec = NULL;
5842         u64 bytenr;
5843         u32 size;
5844         u64 parent;
5845         u64 owner;
5846         u64 flags;
5847         u64 ptr;
5848         u64 gen = 0;
5849         int ret = 0;
5850         int i;
5851         int nritems;
5852         struct btrfs_key key;
5853         struct cache_extent *cache;
5854         int reada_bits;
5855
5856         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5857                                     bits_nr, &reada_bits);
5858         if (nritems == 0)
5859                 return 1;
5860
5861         if (!reada_bits) {
5862                 for(i = 0; i < nritems; i++) {
5863                         ret = add_cache_extent(reada, bits[i].start,
5864                                                bits[i].size);
5865                         if (ret == -EEXIST)
5866                                 continue;
5867
5868                         /* fixme, get the parent transid */
5869                         readahead_tree_block(root, bits[i].start,
5870                                              bits[i].size, 0);
5871                 }
5872         }
5873         *last = bits[0].start;
5874         bytenr = bits[0].start;
5875         size = bits[0].size;
5876
5877         cache = lookup_cache_extent(pending, bytenr, size);
5878         if (cache) {
5879                 remove_cache_extent(pending, cache);
5880                 free(cache);
5881         }
5882         cache = lookup_cache_extent(reada, bytenr, size);
5883         if (cache) {
5884                 remove_cache_extent(reada, cache);
5885                 free(cache);
5886         }
5887         cache = lookup_cache_extent(nodes, bytenr, size);
5888         if (cache) {
5889                 remove_cache_extent(nodes, cache);
5890                 free(cache);
5891         }
5892         cache = lookup_cache_extent(extent_cache, bytenr, size);
5893         if (cache) {
5894                 rec = container_of(cache, struct extent_record, cache);
5895                 gen = rec->parent_generation;
5896         }
5897
5898         /* fixme, get the real parent transid */
5899         buf = read_tree_block(root, bytenr, size, gen);
5900         if (!extent_buffer_uptodate(buf)) {
5901                 record_bad_block_io(root->fs_info,
5902                                     extent_cache, bytenr, size);
5903                 goto out;
5904         }
5905
5906         nritems = btrfs_header_nritems(buf);
5907
5908         flags = 0;
5909         if (!init_extent_tree) {
5910                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5911                                        btrfs_header_level(buf), 1, NULL,
5912                                        &flags);
5913                 if (ret < 0) {
5914                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5915                         if (ret < 0) {
5916                                 fprintf(stderr, "Couldn't calc extent flags\n");
5917                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5918                         }
5919                 }
5920         } else {
5921                 flags = 0;
5922                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5923                 if (ret < 0) {
5924                         fprintf(stderr, "Couldn't calc extent flags\n");
5925                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5926                 }
5927         }
5928
5929         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5930                 if (ri != NULL &&
5931                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5932                     ri->objectid == btrfs_header_owner(buf)) {
5933                         /*
5934                          * Ok we got to this block from it's original owner and
5935                          * we have FULL_BACKREF set.  Relocation can leave
5936                          * converted blocks over so this is altogether possible,
5937                          * however it's not possible if the generation > the
5938                          * last snapshot, so check for this case.
5939                          */
5940                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5941                             btrfs_header_generation(buf) > ri->last_snapshot) {
5942                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5943                                 rec->bad_full_backref = 1;
5944                         }
5945                 }
5946         } else {
5947                 if (ri != NULL &&
5948                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5949                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5950                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5951                         rec->bad_full_backref = 1;
5952                 }
5953         }
5954
5955         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5956                 rec->flag_block_full_backref = 1;
5957                 parent = bytenr;
5958                 owner = 0;
5959         } else {
5960                 rec->flag_block_full_backref = 0;
5961                 parent = 0;
5962                 owner = btrfs_header_owner(buf);
5963         }
5964
5965         ret = check_block(root, extent_cache, buf, flags);
5966         if (ret)
5967                 goto out;
5968
5969         if (btrfs_is_leaf(buf)) {
5970                 btree_space_waste += btrfs_leaf_free_space(root, buf);
5971                 for (i = 0; i < nritems; i++) {
5972                         struct btrfs_file_extent_item *fi;
5973                         btrfs_item_key_to_cpu(buf, &key, i);
5974                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5975                                 process_extent_item(root, extent_cache, buf,
5976                                                     i);
5977                                 continue;
5978                         }
5979                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5980                                 process_extent_item(root, extent_cache, buf,
5981                                                     i);
5982                                 continue;
5983                         }
5984                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5985                                 total_csum_bytes +=
5986                                         btrfs_item_size_nr(buf, i);
5987                                 continue;
5988                         }
5989                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5990                                 process_chunk_item(chunk_cache, &key, buf, i);
5991                                 continue;
5992                         }
5993                         if (key.type == BTRFS_DEV_ITEM_KEY) {
5994                                 process_device_item(dev_cache, &key, buf, i);
5995                                 continue;
5996                         }
5997                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5998                                 process_block_group_item(block_group_cache,
5999                                         &key, buf, i);
6000                                 continue;
6001                         }
6002                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6003                                 process_device_extent_item(dev_extent_cache,
6004                                         &key, buf, i);
6005                                 continue;
6006
6007                         }
6008                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6009 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6010                                 process_extent_ref_v0(extent_cache, buf, i);
6011 #else
6012                                 BUG();
6013 #endif
6014                                 continue;
6015                         }
6016
6017                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6018                                 add_tree_backref(extent_cache, key.objectid, 0,
6019                                                  key.offset, 0);
6020                                 continue;
6021                         }
6022                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6023                                 add_tree_backref(extent_cache, key.objectid,
6024                                                  key.offset, 0, 0);
6025                                 continue;
6026                         }
6027                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6028                                 struct btrfs_extent_data_ref *ref;
6029                                 ref = btrfs_item_ptr(buf, i,
6030                                                 struct btrfs_extent_data_ref);
6031                                 add_data_backref(extent_cache,
6032                                         key.objectid, 0,
6033                                         btrfs_extent_data_ref_root(buf, ref),
6034                                         btrfs_extent_data_ref_objectid(buf,
6035                                                                        ref),
6036                                         btrfs_extent_data_ref_offset(buf, ref),
6037                                         btrfs_extent_data_ref_count(buf, ref),
6038                                         0, root->sectorsize);
6039                                 continue;
6040                         }
6041                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6042                                 struct btrfs_shared_data_ref *ref;
6043                                 ref = btrfs_item_ptr(buf, i,
6044                                                 struct btrfs_shared_data_ref);
6045                                 add_data_backref(extent_cache,
6046                                         key.objectid, key.offset, 0, 0, 0,
6047                                         btrfs_shared_data_ref_count(buf, ref),
6048                                         0, root->sectorsize);
6049                                 continue;
6050                         }
6051                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6052                                 struct bad_item *bad;
6053
6054                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6055                                         continue;
6056                                 if (!owner)
6057                                         continue;
6058                                 bad = malloc(sizeof(struct bad_item));
6059                                 if (!bad)
6060                                         continue;
6061                                 INIT_LIST_HEAD(&bad->list);
6062                                 memcpy(&bad->key, &key,
6063                                        sizeof(struct btrfs_key));
6064                                 bad->root_id = owner;
6065                                 list_add_tail(&bad->list, &delete_items);
6066                                 continue;
6067                         }
6068                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6069                                 continue;
6070                         fi = btrfs_item_ptr(buf, i,
6071                                             struct btrfs_file_extent_item);
6072                         if (btrfs_file_extent_type(buf, fi) ==
6073                             BTRFS_FILE_EXTENT_INLINE)
6074                                 continue;
6075                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6076                                 continue;
6077
6078                         data_bytes_allocated +=
6079                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6080                         if (data_bytes_allocated < root->sectorsize) {
6081                                 abort();
6082                         }
6083                         data_bytes_referenced +=
6084                                 btrfs_file_extent_num_bytes(buf, fi);
6085                         add_data_backref(extent_cache,
6086                                 btrfs_file_extent_disk_bytenr(buf, fi),
6087                                 parent, owner, key.objectid, key.offset -
6088                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6089                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6090                 }
6091         } else {
6092                 int level;
6093                 struct btrfs_key first_key;
6094
6095                 first_key.objectid = 0;
6096
6097                 if (nritems > 0)
6098                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6099                 level = btrfs_header_level(buf);
6100                 for (i = 0; i < nritems; i++) {
6101                         ptr = btrfs_node_blockptr(buf, i);
6102                         size = btrfs_level_size(root, level - 1);
6103                         btrfs_node_key_to_cpu(buf, &key, i);
6104                         if (ri != NULL) {
6105                                 if ((level == ri->drop_level)
6106                                     && is_dropped_key(&key, &ri->drop_key)) {
6107                                         continue;
6108                                 }
6109                         }
6110                         ret = add_extent_rec(extent_cache, &key,
6111                                              btrfs_node_ptr_generation(buf, i),
6112                                              ptr, size, 0, 0, 1, 0, 1, 0,
6113                                              size);
6114                         BUG_ON(ret);
6115
6116                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6117
6118                         if (level > 1) {
6119                                 add_pending(nodes, seen, ptr, size);
6120                         } else {
6121                                 add_pending(pending, seen, ptr, size);
6122                         }
6123                 }
6124                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6125                                       nritems) * sizeof(struct btrfs_key_ptr);
6126         }
6127         total_btree_bytes += buf->len;
6128         if (fs_root_objectid(btrfs_header_owner(buf)))
6129                 total_fs_tree_bytes += buf->len;
6130         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6131                 total_extent_tree_bytes += buf->len;
6132         if (!found_old_backref &&
6133             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6134             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6135             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6136                 found_old_backref = 1;
6137 out:
6138         free_extent_buffer(buf);
6139         return ret;
6140 }
6141
6142 static int add_root_to_pending(struct extent_buffer *buf,
6143                                struct cache_tree *extent_cache,
6144                                struct cache_tree *pending,
6145                                struct cache_tree *seen,
6146                                struct cache_tree *nodes,
6147                                u64 objectid)
6148 {
6149         if (btrfs_header_level(buf) > 0)
6150                 add_pending(nodes, seen, buf->start, buf->len);
6151         else
6152                 add_pending(pending, seen, buf->start, buf->len);
6153         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6154                        0, 1, 1, 0, 1, 0, buf->len);
6155
6156         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6157             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6158                 add_tree_backref(extent_cache, buf->start, buf->start,
6159                                  0, 1);
6160         else
6161                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6162         return 0;
6163 }
6164
6165 /* as we fix the tree, we might be deleting blocks that
6166  * we're tracking for repair.  This hook makes sure we
6167  * remove any backrefs for blocks as we are fixing them.
6168  */
6169 static int free_extent_hook(struct btrfs_trans_handle *trans,
6170                             struct btrfs_root *root,
6171                             u64 bytenr, u64 num_bytes, u64 parent,
6172                             u64 root_objectid, u64 owner, u64 offset,
6173                             int refs_to_drop)
6174 {
6175         struct extent_record *rec;
6176         struct cache_extent *cache;
6177         int is_data;
6178         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6179
6180         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6181         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6182         if (!cache)
6183                 return 0;
6184
6185         rec = container_of(cache, struct extent_record, cache);
6186         if (is_data) {
6187                 struct data_backref *back;
6188                 back = find_data_backref(rec, parent, root_objectid, owner,
6189                                          offset, 1, bytenr, num_bytes);
6190                 if (!back)
6191                         goto out;
6192                 if (back->node.found_ref) {
6193                         back->found_ref -= refs_to_drop;
6194                         if (rec->refs)
6195                                 rec->refs -= refs_to_drop;
6196                 }
6197                 if (back->node.found_extent_tree) {
6198                         back->num_refs -= refs_to_drop;
6199                         if (rec->extent_item_refs)
6200                                 rec->extent_item_refs -= refs_to_drop;
6201                 }
6202                 if (back->found_ref == 0)
6203                         back->node.found_ref = 0;
6204                 if (back->num_refs == 0)
6205                         back->node.found_extent_tree = 0;
6206
6207                 if (!back->node.found_extent_tree && back->node.found_ref) {
6208                         list_del(&back->node.list);
6209                         free(back);
6210                 }
6211         } else {
6212                 struct tree_backref *back;
6213                 back = find_tree_backref(rec, parent, root_objectid);
6214                 if (!back)
6215                         goto out;
6216                 if (back->node.found_ref) {
6217                         if (rec->refs)
6218                                 rec->refs--;
6219                         back->node.found_ref = 0;
6220                 }
6221                 if (back->node.found_extent_tree) {
6222                         if (rec->extent_item_refs)
6223                                 rec->extent_item_refs--;
6224                         back->node.found_extent_tree = 0;
6225                 }
6226                 if (!back->node.found_extent_tree && back->node.found_ref) {
6227                         list_del(&back->node.list);
6228                         free(back);
6229                 }
6230         }
6231         maybe_free_extent_rec(extent_cache, rec);
6232 out:
6233         return 0;
6234 }
6235
6236 static int delete_extent_records(struct btrfs_trans_handle *trans,
6237                                  struct btrfs_root *root,
6238                                  struct btrfs_path *path,
6239                                  u64 bytenr, u64 new_len)
6240 {
6241         struct btrfs_key key;
6242         struct btrfs_key found_key;
6243         struct extent_buffer *leaf;
6244         int ret;
6245         int slot;
6246
6247
6248         key.objectid = bytenr;
6249         key.type = (u8)-1;
6250         key.offset = (u64)-1;
6251
6252         while(1) {
6253                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6254                                         &key, path, 0, 1);
6255                 if (ret < 0)
6256                         break;
6257
6258                 if (ret > 0) {
6259                         ret = 0;
6260                         if (path->slots[0] == 0)
6261                                 break;
6262                         path->slots[0]--;
6263                 }
6264                 ret = 0;
6265
6266                 leaf = path->nodes[0];
6267                 slot = path->slots[0];
6268
6269                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6270                 if (found_key.objectid != bytenr)
6271                         break;
6272
6273                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6274                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6275                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6276                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6277                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6278                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6279                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6280                         btrfs_release_path(path);
6281                         if (found_key.type == 0) {
6282                                 if (found_key.offset == 0)
6283                                         break;
6284                                 key.offset = found_key.offset - 1;
6285                                 key.type = found_key.type;
6286                         }
6287                         key.type = found_key.type - 1;
6288                         key.offset = (u64)-1;
6289                         continue;
6290                 }
6291
6292                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6293                         found_key.objectid, found_key.type, found_key.offset);
6294
6295                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6296                 if (ret)
6297                         break;
6298                 btrfs_release_path(path);
6299
6300                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6301                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6302                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6303                                 found_key.offset : root->leafsize;
6304
6305                         ret = btrfs_update_block_group(trans, root, bytenr,
6306                                                        bytes, 0, 0);
6307                         if (ret)
6308                                 break;
6309                 }
6310         }
6311
6312         btrfs_release_path(path);
6313         return ret;
6314 }
6315
6316 /*
6317  * for a single backref, this will allocate a new extent
6318  * and add the backref to it.
6319  */
6320 static int record_extent(struct btrfs_trans_handle *trans,
6321                          struct btrfs_fs_info *info,
6322                          struct btrfs_path *path,
6323                          struct extent_record *rec,
6324                          struct extent_backref *back,
6325                          int allocated, u64 flags)
6326 {
6327         int ret;
6328         struct btrfs_root *extent_root = info->extent_root;
6329         struct extent_buffer *leaf;
6330         struct btrfs_key ins_key;
6331         struct btrfs_extent_item *ei;
6332         struct tree_backref *tback;
6333         struct data_backref *dback;
6334         struct btrfs_tree_block_info *bi;
6335
6336         if (!back->is_data)
6337                 rec->max_size = max_t(u64, rec->max_size,
6338                                     info->extent_root->leafsize);
6339
6340         if (!allocated) {
6341                 u32 item_size = sizeof(*ei);
6342
6343                 if (!back->is_data)
6344                         item_size += sizeof(*bi);
6345
6346                 ins_key.objectid = rec->start;
6347                 ins_key.offset = rec->max_size;
6348                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6349
6350                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6351                                         &ins_key, item_size);
6352                 if (ret)
6353                         goto fail;
6354
6355                 leaf = path->nodes[0];
6356                 ei = btrfs_item_ptr(leaf, path->slots[0],
6357                                     struct btrfs_extent_item);
6358
6359                 btrfs_set_extent_refs(leaf, ei, 0);
6360                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6361
6362                 if (back->is_data) {
6363                         btrfs_set_extent_flags(leaf, ei,
6364                                                BTRFS_EXTENT_FLAG_DATA);
6365                 } else {
6366                         struct btrfs_disk_key copy_key;;
6367
6368                         tback = (struct tree_backref *)back;
6369                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6370                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6371                                              sizeof(*bi));
6372
6373                         btrfs_set_disk_key_objectid(&copy_key,
6374                                                     rec->info_objectid);
6375                         btrfs_set_disk_key_type(&copy_key, 0);
6376                         btrfs_set_disk_key_offset(&copy_key, 0);
6377
6378                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6379                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6380
6381                         btrfs_set_extent_flags(leaf, ei,
6382                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6383                 }
6384
6385                 btrfs_mark_buffer_dirty(leaf);
6386                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6387                                                rec->max_size, 1, 0);
6388                 if (ret)
6389                         goto fail;
6390                 btrfs_release_path(path);
6391         }
6392
6393         if (back->is_data) {
6394                 u64 parent;
6395                 int i;
6396
6397                 dback = (struct data_backref *)back;
6398                 if (back->full_backref)
6399                         parent = dback->parent;
6400                 else
6401                         parent = 0;
6402
6403                 for (i = 0; i < dback->found_ref; i++) {
6404                         /* if parent != 0, we're doing a full backref
6405                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6406                          * just makes the backref allocator create a data
6407                          * backref
6408                          */
6409                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6410                                                    rec->start, rec->max_size,
6411                                                    parent,
6412                                                    dback->root,
6413                                                    parent ?
6414                                                    BTRFS_FIRST_FREE_OBJECTID :
6415                                                    dback->owner,
6416                                                    dback->offset);
6417                         if (ret)
6418                                 break;
6419                 }
6420                 fprintf(stderr, "adding new data backref"
6421                                 " on %llu %s %llu owner %llu"
6422                                 " offset %llu found %d\n",
6423                                 (unsigned long long)rec->start,
6424                                 back->full_backref ?
6425                                 "parent" : "root",
6426                                 back->full_backref ?
6427                                 (unsigned long long)parent :
6428                                 (unsigned long long)dback->root,
6429                                 (unsigned long long)dback->owner,
6430                                 (unsigned long long)dback->offset,
6431                                 dback->found_ref);
6432         } else {
6433                 u64 parent;
6434
6435                 tback = (struct tree_backref *)back;
6436                 if (back->full_backref)
6437                         parent = tback->parent;
6438                 else
6439                         parent = 0;
6440
6441                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6442                                            rec->start, rec->max_size,
6443                                            parent, tback->root, 0, 0);
6444                 fprintf(stderr, "adding new tree backref on "
6445                         "start %llu len %llu parent %llu root %llu\n",
6446                         rec->start, rec->max_size, parent, tback->root);
6447         }
6448 fail:
6449         btrfs_release_path(path);
6450         return ret;
6451 }
6452
6453 struct extent_entry {
6454         u64 bytenr;
6455         u64 bytes;
6456         int count;
6457         int broken;
6458         struct list_head list;
6459 };
6460
6461 static struct extent_entry *find_entry(struct list_head *entries,
6462                                        u64 bytenr, u64 bytes)
6463 {
6464         struct extent_entry *entry = NULL;
6465
6466         list_for_each_entry(entry, entries, list) {
6467                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6468                         return entry;
6469         }
6470
6471         return NULL;
6472 }
6473
6474 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6475 {
6476         struct extent_entry *entry, *best = NULL, *prev = NULL;
6477
6478         list_for_each_entry(entry, entries, list) {
6479                 if (!prev) {
6480                         prev = entry;
6481                         continue;
6482                 }
6483
6484                 /*
6485                  * If there are as many broken entries as entries then we know
6486                  * not to trust this particular entry.
6487                  */
6488                 if (entry->broken == entry->count)
6489                         continue;
6490
6491                 /*
6492                  * If our current entry == best then we can't be sure our best
6493                  * is really the best, so we need to keep searching.
6494                  */
6495                 if (best && best->count == entry->count) {
6496                         prev = entry;
6497                         best = NULL;
6498                         continue;
6499                 }
6500
6501                 /* Prev == entry, not good enough, have to keep searching */
6502                 if (!prev->broken && prev->count == entry->count)
6503                         continue;
6504
6505                 if (!best)
6506                         best = (prev->count > entry->count) ? prev : entry;
6507                 else if (best->count < entry->count)
6508                         best = entry;
6509                 prev = entry;
6510         }
6511
6512         return best;
6513 }
6514
6515 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6516                       struct data_backref *dback, struct extent_entry *entry)
6517 {
6518         struct btrfs_trans_handle *trans;
6519         struct btrfs_root *root;
6520         struct btrfs_file_extent_item *fi;
6521         struct extent_buffer *leaf;
6522         struct btrfs_key key;
6523         u64 bytenr, bytes;
6524         int ret, err;
6525
6526         key.objectid = dback->root;
6527         key.type = BTRFS_ROOT_ITEM_KEY;
6528         key.offset = (u64)-1;
6529         root = btrfs_read_fs_root(info, &key);
6530         if (IS_ERR(root)) {
6531                 fprintf(stderr, "Couldn't find root for our ref\n");
6532                 return -EINVAL;
6533         }
6534
6535         /*
6536          * The backref points to the original offset of the extent if it was
6537          * split, so we need to search down to the offset we have and then walk
6538          * forward until we find the backref we're looking for.
6539          */
6540         key.objectid = dback->owner;
6541         key.type = BTRFS_EXTENT_DATA_KEY;
6542         key.offset = dback->offset;
6543         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6544         if (ret < 0) {
6545                 fprintf(stderr, "Error looking up ref %d\n", ret);
6546                 return ret;
6547         }
6548
6549         while (1) {
6550                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6551                         ret = btrfs_next_leaf(root, path);
6552                         if (ret) {
6553                                 fprintf(stderr, "Couldn't find our ref, next\n");
6554                                 return -EINVAL;
6555                         }
6556                 }
6557                 leaf = path->nodes[0];
6558                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6559                 if (key.objectid != dback->owner ||
6560                     key.type != BTRFS_EXTENT_DATA_KEY) {
6561                         fprintf(stderr, "Couldn't find our ref, search\n");
6562                         return -EINVAL;
6563                 }
6564                 fi = btrfs_item_ptr(leaf, path->slots[0],
6565                                     struct btrfs_file_extent_item);
6566                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6567                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6568
6569                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6570                         break;
6571                 path->slots[0]++;
6572         }
6573
6574         btrfs_release_path(path);
6575
6576         trans = btrfs_start_transaction(root, 1);
6577         if (IS_ERR(trans))
6578                 return PTR_ERR(trans);
6579
6580         /*
6581          * Ok we have the key of the file extent we want to fix, now we can cow
6582          * down to the thing and fix it.
6583          */
6584         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6585         if (ret < 0) {
6586                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6587                         key.objectid, key.type, key.offset, ret);
6588                 goto out;
6589         }
6590         if (ret > 0) {
6591                 fprintf(stderr, "Well that's odd, we just found this key "
6592                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6593                         key.offset);
6594                 ret = -EINVAL;
6595                 goto out;
6596         }
6597         leaf = path->nodes[0];
6598         fi = btrfs_item_ptr(leaf, path->slots[0],
6599                             struct btrfs_file_extent_item);
6600
6601         if (btrfs_file_extent_compression(leaf, fi) &&
6602             dback->disk_bytenr != entry->bytenr) {
6603                 fprintf(stderr, "Ref doesn't match the record start and is "
6604                         "compressed, please take a btrfs-image of this file "
6605                         "system and send it to a btrfs developer so they can "
6606                         "complete this functionality for bytenr %Lu\n",
6607                         dback->disk_bytenr);
6608                 ret = -EINVAL;
6609                 goto out;
6610         }
6611
6612         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6613                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6614         } else if (dback->disk_bytenr > entry->bytenr) {
6615                 u64 off_diff, offset;
6616
6617                 off_diff = dback->disk_bytenr - entry->bytenr;
6618                 offset = btrfs_file_extent_offset(leaf, fi);
6619                 if (dback->disk_bytenr + offset +
6620                     btrfs_file_extent_num_bytes(leaf, fi) >
6621                     entry->bytenr + entry->bytes) {
6622                         fprintf(stderr, "Ref is past the entry end, please "
6623                                 "take a btrfs-image of this file system and "
6624                                 "send it to a btrfs developer, ref %Lu\n",
6625                                 dback->disk_bytenr);
6626                         ret = -EINVAL;
6627                         goto out;
6628                 }
6629                 offset += off_diff;
6630                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6631                 btrfs_set_file_extent_offset(leaf, fi, offset);
6632         } else if (dback->disk_bytenr < entry->bytenr) {
6633                 u64 offset;
6634
6635                 offset = btrfs_file_extent_offset(leaf, fi);
6636                 if (dback->disk_bytenr + offset < entry->bytenr) {
6637                         fprintf(stderr, "Ref is before the entry start, please"
6638                                 " take a btrfs-image of this file system and "
6639                                 "send it to a btrfs developer, ref %Lu\n",
6640                                 dback->disk_bytenr);
6641                         ret = -EINVAL;
6642                         goto out;
6643                 }
6644
6645                 offset += dback->disk_bytenr;
6646                 offset -= entry->bytenr;
6647                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6648                 btrfs_set_file_extent_offset(leaf, fi, offset);
6649         }
6650
6651         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6652
6653         /*
6654          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6655          * only do this if we aren't using compression, otherwise it's a
6656          * trickier case.
6657          */
6658         if (!btrfs_file_extent_compression(leaf, fi))
6659                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6660         else
6661                 printf("ram bytes may be wrong?\n");
6662         btrfs_mark_buffer_dirty(leaf);
6663 out:
6664         err = btrfs_commit_transaction(trans, root);
6665         btrfs_release_path(path);
6666         return ret ? ret : err;
6667 }
6668
6669 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6670                            struct extent_record *rec)
6671 {
6672         struct extent_backref *back;
6673         struct data_backref *dback;
6674         struct extent_entry *entry, *best = NULL;
6675         LIST_HEAD(entries);
6676         int nr_entries = 0;
6677         int broken_entries = 0;
6678         int ret = 0;
6679         short mismatch = 0;
6680
6681         /*
6682          * Metadata is easy and the backrefs should always agree on bytenr and
6683          * size, if not we've got bigger issues.
6684          */
6685         if (rec->metadata)
6686                 return 0;
6687
6688         list_for_each_entry(back, &rec->backrefs, list) {
6689                 if (back->full_backref || !back->is_data)
6690                         continue;
6691
6692                 dback = (struct data_backref *)back;
6693
6694                 /*
6695                  * We only pay attention to backrefs that we found a real
6696                  * backref for.
6697                  */
6698                 if (dback->found_ref == 0)
6699                         continue;
6700
6701                 /*
6702                  * For now we only catch when the bytes don't match, not the
6703                  * bytenr.  We can easily do this at the same time, but I want
6704                  * to have a fs image to test on before we just add repair
6705                  * functionality willy-nilly so we know we won't screw up the
6706                  * repair.
6707                  */
6708
6709                 entry = find_entry(&entries, dback->disk_bytenr,
6710                                    dback->bytes);
6711                 if (!entry) {
6712                         entry = malloc(sizeof(struct extent_entry));
6713                         if (!entry) {
6714                                 ret = -ENOMEM;
6715                                 goto out;
6716                         }
6717                         memset(entry, 0, sizeof(*entry));
6718                         entry->bytenr = dback->disk_bytenr;
6719                         entry->bytes = dback->bytes;
6720                         list_add_tail(&entry->list, &entries);
6721                         nr_entries++;
6722                 }
6723
6724                 /*
6725                  * If we only have on entry we may think the entries agree when
6726                  * in reality they don't so we have to do some extra checking.
6727                  */
6728                 if (dback->disk_bytenr != rec->start ||
6729                     dback->bytes != rec->nr || back->broken)
6730                         mismatch = 1;
6731
6732                 if (back->broken) {
6733                         entry->broken++;
6734                         broken_entries++;
6735                 }
6736
6737                 entry->count++;
6738         }
6739
6740         /* Yay all the backrefs agree, carry on good sir */
6741         if (nr_entries <= 1 && !mismatch)
6742                 goto out;
6743
6744         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6745                 "%Lu\n", rec->start);
6746
6747         /*
6748          * First we want to see if the backrefs can agree amongst themselves who
6749          * is right, so figure out which one of the entries has the highest
6750          * count.
6751          */
6752         best = find_most_right_entry(&entries);
6753
6754         /*
6755          * Ok so we may have an even split between what the backrefs think, so
6756          * this is where we use the extent ref to see what it thinks.
6757          */
6758         if (!best) {
6759                 entry = find_entry(&entries, rec->start, rec->nr);
6760                 if (!entry && (!broken_entries || !rec->found_rec)) {
6761                         fprintf(stderr, "Backrefs don't agree with each other "
6762                                 "and extent record doesn't agree with anybody,"
6763                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6764                                 rec->start, rec->nr);
6765                         ret = -EINVAL;
6766                         goto out;
6767                 } else if (!entry) {
6768                         /*
6769                          * Ok our backrefs were broken, we'll assume this is the
6770                          * correct value and add an entry for this range.
6771                          */
6772                         entry = malloc(sizeof(struct extent_entry));
6773                         if (!entry) {
6774                                 ret = -ENOMEM;
6775                                 goto out;
6776                         }
6777                         memset(entry, 0, sizeof(*entry));
6778                         entry->bytenr = rec->start;
6779                         entry->bytes = rec->nr;
6780                         list_add_tail(&entry->list, &entries);
6781                         nr_entries++;
6782                 }
6783                 entry->count++;
6784                 best = find_most_right_entry(&entries);
6785                 if (!best) {
6786                         fprintf(stderr, "Backrefs and extent record evenly "
6787                                 "split on who is right, this is going to "
6788                                 "require user input to fix bytenr %Lu bytes "
6789                                 "%Lu\n", rec->start, rec->nr);
6790                         ret = -EINVAL;
6791                         goto out;
6792                 }
6793         }
6794
6795         /*
6796          * I don't think this can happen currently as we'll abort() if we catch
6797          * this case higher up, but in case somebody removes that we still can't
6798          * deal with it properly here yet, so just bail out of that's the case.
6799          */
6800         if (best->bytenr != rec->start) {
6801                 fprintf(stderr, "Extent start and backref starts don't match, "
6802                         "please use btrfs-image on this file system and send "
6803                         "it to a btrfs developer so they can make fsck fix "
6804                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6805                         rec->start, rec->nr);
6806                 ret = -EINVAL;
6807                 goto out;
6808         }
6809
6810         /*
6811          * Ok great we all agreed on an extent record, let's go find the real
6812          * references and fix up the ones that don't match.
6813          */
6814         list_for_each_entry(back, &rec->backrefs, list) {
6815                 if (back->full_backref || !back->is_data)
6816                         continue;
6817
6818                 dback = (struct data_backref *)back;
6819
6820                 /*
6821                  * Still ignoring backrefs that don't have a real ref attached
6822                  * to them.
6823                  */
6824                 if (dback->found_ref == 0)
6825                         continue;
6826
6827                 if (dback->bytes == best->bytes &&
6828                     dback->disk_bytenr == best->bytenr)
6829                         continue;
6830
6831                 ret = repair_ref(info, path, dback, best);
6832                 if (ret)
6833                         goto out;
6834         }
6835
6836         /*
6837          * Ok we messed with the actual refs, which means we need to drop our
6838          * entire cache and go back and rescan.  I know this is a huge pain and
6839          * adds a lot of extra work, but it's the only way to be safe.  Once all
6840          * the backrefs agree we may not need to do anything to the extent
6841          * record itself.
6842          */
6843         ret = -EAGAIN;
6844 out:
6845         while (!list_empty(&entries)) {
6846                 entry = list_entry(entries.next, struct extent_entry, list);
6847                 list_del_init(&entry->list);
6848                 free(entry);
6849         }
6850         return ret;
6851 }
6852
6853 static int process_duplicates(struct btrfs_root *root,
6854                               struct cache_tree *extent_cache,
6855                               struct extent_record *rec)
6856 {
6857         struct extent_record *good, *tmp;
6858         struct cache_extent *cache;
6859         int ret;
6860
6861         /*
6862          * If we found a extent record for this extent then return, or if we
6863          * have more than one duplicate we are likely going to need to delete
6864          * something.
6865          */
6866         if (rec->found_rec || rec->num_duplicates > 1)
6867                 return 0;
6868
6869         /* Shouldn't happen but just in case */
6870         BUG_ON(!rec->num_duplicates);
6871
6872         /*
6873          * So this happens if we end up with a backref that doesn't match the
6874          * actual extent entry.  So either the backref is bad or the extent
6875          * entry is bad.  Either way we want to have the extent_record actually
6876          * reflect what we found in the extent_tree, so we need to take the
6877          * duplicate out and use that as the extent_record since the only way we
6878          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6879          */
6880         remove_cache_extent(extent_cache, &rec->cache);
6881
6882         good = list_entry(rec->dups.next, struct extent_record, list);
6883         list_del_init(&good->list);
6884         INIT_LIST_HEAD(&good->backrefs);
6885         INIT_LIST_HEAD(&good->dups);
6886         good->cache.start = good->start;
6887         good->cache.size = good->nr;
6888         good->content_checked = 0;
6889         good->owner_ref_checked = 0;
6890         good->num_duplicates = 0;
6891         good->refs = rec->refs;
6892         list_splice_init(&rec->backrefs, &good->backrefs);
6893         while (1) {
6894                 cache = lookup_cache_extent(extent_cache, good->start,
6895                                             good->nr);
6896                 if (!cache)
6897                         break;
6898                 tmp = container_of(cache, struct extent_record, cache);
6899
6900                 /*
6901                  * If we find another overlapping extent and it's found_rec is
6902                  * set then it's a duplicate and we need to try and delete
6903                  * something.
6904                  */
6905                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6906                         if (list_empty(&good->list))
6907                                 list_add_tail(&good->list,
6908                                               &duplicate_extents);
6909                         good->num_duplicates += tmp->num_duplicates + 1;
6910                         list_splice_init(&tmp->dups, &good->dups);
6911                         list_del_init(&tmp->list);
6912                         list_add_tail(&tmp->list, &good->dups);
6913                         remove_cache_extent(extent_cache, &tmp->cache);
6914                         continue;
6915                 }
6916
6917                 /*
6918                  * Ok we have another non extent item backed extent rec, so lets
6919                  * just add it to this extent and carry on like we did above.
6920                  */
6921                 good->refs += tmp->refs;
6922                 list_splice_init(&tmp->backrefs, &good->backrefs);
6923                 remove_cache_extent(extent_cache, &tmp->cache);
6924                 free(tmp);
6925         }
6926         ret = insert_cache_extent(extent_cache, &good->cache);
6927         BUG_ON(ret);
6928         free(rec);
6929         return good->num_duplicates ? 0 : 1;
6930 }
6931
6932 static int delete_duplicate_records(struct btrfs_root *root,
6933                                     struct extent_record *rec)
6934 {
6935         struct btrfs_trans_handle *trans;
6936         LIST_HEAD(delete_list);
6937         struct btrfs_path *path;
6938         struct extent_record *tmp, *good, *n;
6939         int nr_del = 0;
6940         int ret = 0, err;
6941         struct btrfs_key key;
6942
6943         path = btrfs_alloc_path();
6944         if (!path) {
6945                 ret = -ENOMEM;
6946                 goto out;
6947         }
6948
6949         good = rec;
6950         /* Find the record that covers all of the duplicates. */
6951         list_for_each_entry(tmp, &rec->dups, list) {
6952                 if (good->start < tmp->start)
6953                         continue;
6954                 if (good->nr > tmp->nr)
6955                         continue;
6956
6957                 if (tmp->start + tmp->nr < good->start + good->nr) {
6958                         fprintf(stderr, "Ok we have overlapping extents that "
6959                                 "aren't completely covered by eachother, this "
6960                                 "is going to require more careful thought.  "
6961                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6962                                 tmp->start, tmp->nr, good->start, good->nr);
6963                         abort();
6964                 }
6965                 good = tmp;
6966         }
6967
6968         if (good != rec)
6969                 list_add_tail(&rec->list, &delete_list);
6970
6971         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6972                 if (tmp == good)
6973                         continue;
6974                 list_move_tail(&tmp->list, &delete_list);
6975         }
6976
6977         root = root->fs_info->extent_root;
6978         trans = btrfs_start_transaction(root, 1);
6979         if (IS_ERR(trans)) {
6980                 ret = PTR_ERR(trans);
6981                 goto out;
6982         }
6983
6984         list_for_each_entry(tmp, &delete_list, list) {
6985                 if (tmp->found_rec == 0)
6986                         continue;
6987                 key.objectid = tmp->start;
6988                 key.type = BTRFS_EXTENT_ITEM_KEY;
6989                 key.offset = tmp->nr;
6990
6991                 /* Shouldn't happen but just in case */
6992                 if (tmp->metadata) {
6993                         fprintf(stderr, "Well this shouldn't happen, extent "
6994                                 "record overlaps but is metadata? "
6995                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6996                         abort();
6997                 }
6998
6999                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7000                 if (ret) {
7001                         if (ret > 0)
7002                                 ret = -EINVAL;
7003                         break;
7004                 }
7005                 ret = btrfs_del_item(trans, root, path);
7006                 if (ret)
7007                         break;
7008                 btrfs_release_path(path);
7009                 nr_del++;
7010         }
7011         err = btrfs_commit_transaction(trans, root);
7012         if (err && !ret)
7013                 ret = err;
7014 out:
7015         while (!list_empty(&delete_list)) {
7016                 tmp = list_entry(delete_list.next, struct extent_record, list);
7017                 list_del_init(&tmp->list);
7018                 if (tmp == rec)
7019                         continue;
7020                 free(tmp);
7021         }
7022
7023         while (!list_empty(&rec->dups)) {
7024                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7025                 list_del_init(&tmp->list);
7026                 free(tmp);
7027         }
7028
7029         btrfs_free_path(path);
7030
7031         if (!ret && !nr_del)
7032                 rec->num_duplicates = 0;
7033
7034         return ret ? ret : nr_del;
7035 }
7036
7037 static int find_possible_backrefs(struct btrfs_fs_info *info,
7038                                   struct btrfs_path *path,
7039                                   struct cache_tree *extent_cache,
7040                                   struct extent_record *rec)
7041 {
7042         struct btrfs_root *root;
7043         struct extent_backref *back;
7044         struct data_backref *dback;
7045         struct cache_extent *cache;
7046         struct btrfs_file_extent_item *fi;
7047         struct btrfs_key key;
7048         u64 bytenr, bytes;
7049         int ret;
7050
7051         list_for_each_entry(back, &rec->backrefs, list) {
7052                 /* Don't care about full backrefs (poor unloved backrefs) */
7053                 if (back->full_backref || !back->is_data)
7054                         continue;
7055
7056                 dback = (struct data_backref *)back;
7057
7058                 /* We found this one, we don't need to do a lookup */
7059                 if (dback->found_ref)
7060                         continue;
7061
7062                 key.objectid = dback->root;
7063                 key.type = BTRFS_ROOT_ITEM_KEY;
7064                 key.offset = (u64)-1;
7065
7066                 root = btrfs_read_fs_root(info, &key);
7067
7068                 /* No root, definitely a bad ref, skip */
7069                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7070                         continue;
7071                 /* Other err, exit */
7072                 if (IS_ERR(root))
7073                         return PTR_ERR(root);
7074
7075                 key.objectid = dback->owner;
7076                 key.type = BTRFS_EXTENT_DATA_KEY;
7077                 key.offset = dback->offset;
7078                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7079                 if (ret) {
7080                         btrfs_release_path(path);
7081                         if (ret < 0)
7082                                 return ret;
7083                         /* Didn't find it, we can carry on */
7084                         ret = 0;
7085                         continue;
7086                 }
7087
7088                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7089                                     struct btrfs_file_extent_item);
7090                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7091                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7092                 btrfs_release_path(path);
7093                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7094                 if (cache) {
7095                         struct extent_record *tmp;
7096                         tmp = container_of(cache, struct extent_record, cache);
7097
7098                         /*
7099                          * If we found an extent record for the bytenr for this
7100                          * particular backref then we can't add it to our
7101                          * current extent record.  We only want to add backrefs
7102                          * that don't have a corresponding extent item in the
7103                          * extent tree since they likely belong to this record
7104                          * and we need to fix it if it doesn't match bytenrs.
7105                          */
7106                         if  (tmp->found_rec)
7107                                 continue;
7108                 }
7109
7110                 dback->found_ref += 1;
7111                 dback->disk_bytenr = bytenr;
7112                 dback->bytes = bytes;
7113
7114                 /*
7115                  * Set this so the verify backref code knows not to trust the
7116                  * values in this backref.
7117                  */
7118                 back->broken = 1;
7119         }
7120
7121         return 0;
7122 }
7123
7124 /*
7125  * Record orphan data ref into corresponding root.
7126  *
7127  * Return 0 if the extent item contains data ref and recorded.
7128  * Return 1 if the extent item contains no useful data ref
7129  *   On that case, it may contains only shared_dataref or metadata backref
7130  *   or the file extent exists(this should be handled by the extent bytenr
7131  *   recovery routine)
7132  * Return <0 if something goes wrong.
7133  */
7134 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7135                                       struct extent_record *rec)
7136 {
7137         struct btrfs_key key;
7138         struct btrfs_root *dest_root;
7139         struct extent_backref *back;
7140         struct data_backref *dback;
7141         struct orphan_data_extent *orphan;
7142         struct btrfs_path *path;
7143         int recorded_data_ref = 0;
7144         int ret = 0;
7145
7146         if (rec->metadata)
7147                 return 1;
7148         path = btrfs_alloc_path();
7149         if (!path)
7150                 return -ENOMEM;
7151         list_for_each_entry(back, &rec->backrefs, list) {
7152                 if (back->full_backref || !back->is_data ||
7153                     !back->found_extent_tree)
7154                         continue;
7155                 dback = (struct data_backref *)back;
7156                 if (dback->found_ref)
7157                         continue;
7158                 key.objectid = dback->root;
7159                 key.type = BTRFS_ROOT_ITEM_KEY;
7160                 key.offset = (u64)-1;
7161
7162                 dest_root = btrfs_read_fs_root(fs_info, &key);
7163
7164                 /* For non-exist root we just skip it */
7165                 if (IS_ERR(dest_root) || !dest_root)
7166                         continue;
7167
7168                 key.objectid = dback->owner;
7169                 key.type = BTRFS_EXTENT_DATA_KEY;
7170                 key.offset = dback->offset;
7171
7172                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7173                 /*
7174                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7175                  * we need to record it for inode/file extent rebuild.
7176                  * For ret > 0, we record it only for file extent rebuild.
7177                  * For ret == 0, the file extent exists but only bytenr
7178                  * mismatch, let the original bytenr fix routine to handle,
7179                  * don't record it.
7180                  */
7181                 if (ret == 0)
7182                         continue;
7183                 ret = 0;
7184                 orphan = malloc(sizeof(*orphan));
7185                 if (!orphan) {
7186                         ret = -ENOMEM;
7187                         goto out;
7188                 }
7189                 INIT_LIST_HEAD(&orphan->list);
7190                 orphan->root = dback->root;
7191                 orphan->objectid = dback->owner;
7192                 orphan->offset = dback->offset;
7193                 orphan->disk_bytenr = rec->cache.start;
7194                 orphan->disk_len = rec->cache.size;
7195                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7196                 recorded_data_ref = 1;
7197         }
7198 out:
7199         btrfs_free_path(path);
7200         if (!ret)
7201                 return !recorded_data_ref;
7202         else
7203                 return ret;
7204 }
7205
7206 /*
7207  * when an incorrect extent item is found, this will delete
7208  * all of the existing entries for it and recreate them
7209  * based on what the tree scan found.
7210  */
7211 static int fixup_extent_refs(struct btrfs_fs_info *info,
7212                              struct cache_tree *extent_cache,
7213                              struct extent_record *rec)
7214 {
7215         struct btrfs_trans_handle *trans = NULL;
7216         int ret;
7217         struct btrfs_path *path;
7218         struct list_head *cur = rec->backrefs.next;
7219         struct cache_extent *cache;
7220         struct extent_backref *back;
7221         int allocated = 0;
7222         u64 flags = 0;
7223
7224         if (rec->flag_block_full_backref)
7225                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7226
7227         path = btrfs_alloc_path();
7228         if (!path)
7229                 return -ENOMEM;
7230
7231         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7232                 /*
7233                  * Sometimes the backrefs themselves are so broken they don't
7234                  * get attached to any meaningful rec, so first go back and
7235                  * check any of our backrefs that we couldn't find and throw
7236                  * them into the list if we find the backref so that
7237                  * verify_backrefs can figure out what to do.
7238                  */
7239                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7240                 if (ret < 0)
7241                         goto out;
7242         }
7243
7244         /* step one, make sure all of the backrefs agree */
7245         ret = verify_backrefs(info, path, rec);
7246         if (ret < 0)
7247                 goto out;
7248
7249         trans = btrfs_start_transaction(info->extent_root, 1);
7250         if (IS_ERR(trans)) {
7251                 ret = PTR_ERR(trans);
7252                 goto out;
7253         }
7254
7255         /* step two, delete all the existing records */
7256         ret = delete_extent_records(trans, info->extent_root, path,
7257                                     rec->start, rec->max_size);
7258
7259         if (ret < 0)
7260                 goto out;
7261
7262         /* was this block corrupt?  If so, don't add references to it */
7263         cache = lookup_cache_extent(info->corrupt_blocks,
7264                                     rec->start, rec->max_size);
7265         if (cache) {
7266                 ret = 0;
7267                 goto out;
7268         }
7269
7270         /* step three, recreate all the refs we did find */
7271         while(cur != &rec->backrefs) {
7272                 back = list_entry(cur, struct extent_backref, list);
7273                 cur = cur->next;
7274
7275                 /*
7276                  * if we didn't find any references, don't create a
7277                  * new extent record
7278                  */
7279                 if (!back->found_ref)
7280                         continue;
7281
7282                 rec->bad_full_backref = 0;
7283                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7284                 allocated = 1;
7285
7286                 if (ret)
7287                         goto out;
7288         }
7289 out:
7290         if (trans) {
7291                 int err = btrfs_commit_transaction(trans, info->extent_root);
7292                 if (!ret)
7293                         ret = err;
7294         }
7295
7296         btrfs_free_path(path);
7297         return ret;
7298 }
7299
7300 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7301                               struct extent_record *rec)
7302 {
7303         struct btrfs_trans_handle *trans;
7304         struct btrfs_root *root = fs_info->extent_root;
7305         struct btrfs_path *path;
7306         struct btrfs_extent_item *ei;
7307         struct btrfs_key key;
7308         u64 flags;
7309         int ret = 0;
7310
7311         key.objectid = rec->start;
7312         if (rec->metadata) {
7313                 key.type = BTRFS_METADATA_ITEM_KEY;
7314                 key.offset = rec->info_level;
7315         } else {
7316                 key.type = BTRFS_EXTENT_ITEM_KEY;
7317                 key.offset = rec->max_size;
7318         }
7319
7320         path = btrfs_alloc_path();
7321         if (!path)
7322                 return -ENOMEM;
7323
7324         trans = btrfs_start_transaction(root, 0);
7325         if (IS_ERR(trans)) {
7326                 btrfs_free_path(path);
7327                 return PTR_ERR(trans);
7328         }
7329
7330         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7331         if (ret < 0) {
7332                 btrfs_free_path(path);
7333                 btrfs_commit_transaction(trans, root);
7334                 return ret;
7335         } else if (ret) {
7336                 fprintf(stderr, "Didn't find extent for %llu\n",
7337                         (unsigned long long)rec->start);
7338                 btrfs_free_path(path);
7339                 btrfs_commit_transaction(trans, root);
7340                 return -ENOENT;
7341         }
7342
7343         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7344                             struct btrfs_extent_item);
7345         flags = btrfs_extent_flags(path->nodes[0], ei);
7346         if (rec->flag_block_full_backref) {
7347                 fprintf(stderr, "setting full backref on %llu\n",
7348                         (unsigned long long)key.objectid);
7349                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7350         } else {
7351                 fprintf(stderr, "clearing full backref on %llu\n",
7352                         (unsigned long long)key.objectid);
7353                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7354         }
7355         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7356         btrfs_mark_buffer_dirty(path->nodes[0]);
7357         btrfs_free_path(path);
7358         return btrfs_commit_transaction(trans, root);
7359 }
7360
7361 /* right now we only prune from the extent allocation tree */
7362 static int prune_one_block(struct btrfs_trans_handle *trans,
7363                            struct btrfs_fs_info *info,
7364                            struct btrfs_corrupt_block *corrupt)
7365 {
7366         int ret;
7367         struct btrfs_path path;
7368         struct extent_buffer *eb;
7369         u64 found;
7370         int slot;
7371         int nritems;
7372         int level = corrupt->level + 1;
7373
7374         btrfs_init_path(&path);
7375 again:
7376         /* we want to stop at the parent to our busted block */
7377         path.lowest_level = level;
7378
7379         ret = btrfs_search_slot(trans, info->extent_root,
7380                                 &corrupt->key, &path, -1, 1);
7381
7382         if (ret < 0)
7383                 goto out;
7384
7385         eb = path.nodes[level];
7386         if (!eb) {
7387                 ret = -ENOENT;
7388                 goto out;
7389         }
7390
7391         /*
7392          * hopefully the search gave us the block we want to prune,
7393          * lets try that first
7394          */
7395         slot = path.slots[level];
7396         found =  btrfs_node_blockptr(eb, slot);
7397         if (found == corrupt->cache.start)
7398                 goto del_ptr;
7399
7400         nritems = btrfs_header_nritems(eb);
7401
7402         /* the search failed, lets scan this node and hope we find it */
7403         for (slot = 0; slot < nritems; slot++) {
7404                 found =  btrfs_node_blockptr(eb, slot);
7405                 if (found == corrupt->cache.start)
7406                         goto del_ptr;
7407         }
7408         /*
7409          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7410          * to this block
7411          */
7412         if (eb == info->extent_root->node) {
7413                 ret = -ENOENT;
7414                 goto out;
7415         } else {
7416                 level++;
7417                 btrfs_release_path(&path);
7418                 goto again;
7419         }
7420
7421 del_ptr:
7422         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7423         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7424
7425 out:
7426         btrfs_release_path(&path);
7427         return ret;
7428 }
7429
7430 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7431 {
7432         struct btrfs_trans_handle *trans = NULL;
7433         struct cache_extent *cache;
7434         struct btrfs_corrupt_block *corrupt;
7435
7436         while (1) {
7437                 cache = search_cache_extent(info->corrupt_blocks, 0);
7438                 if (!cache)
7439                         break;
7440                 if (!trans) {
7441                         trans = btrfs_start_transaction(info->extent_root, 1);
7442                         if (IS_ERR(trans))
7443                                 return PTR_ERR(trans);
7444                 }
7445                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7446                 prune_one_block(trans, info, corrupt);
7447                 remove_cache_extent(info->corrupt_blocks, cache);
7448         }
7449         if (trans)
7450                 return btrfs_commit_transaction(trans, info->extent_root);
7451         return 0;
7452 }
7453
7454 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7455 {
7456         struct btrfs_block_group_cache *cache;
7457         u64 start, end;
7458         int ret;
7459
7460         while (1) {
7461                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7462                                             &start, &end, EXTENT_DIRTY);
7463                 if (ret)
7464                         break;
7465                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7466                                    GFP_NOFS);
7467         }
7468
7469         start = 0;
7470         while (1) {
7471                 cache = btrfs_lookup_first_block_group(fs_info, start);
7472                 if (!cache)
7473                         break;
7474                 if (cache->cached)
7475                         cache->cached = 0;
7476                 start = cache->key.objectid + cache->key.offset;
7477         }
7478 }
7479
7480 static int check_extent_refs(struct btrfs_root *root,
7481                              struct cache_tree *extent_cache)
7482 {
7483         struct extent_record *rec;
7484         struct cache_extent *cache;
7485         int err = 0;
7486         int ret = 0;
7487         int fixed = 0;
7488         int had_dups = 0;
7489         int recorded = 0;
7490
7491         if (repair) {
7492                 /*
7493                  * if we're doing a repair, we have to make sure
7494                  * we don't allocate from the problem extents.
7495                  * In the worst case, this will be all the
7496                  * extents in the FS
7497                  */
7498                 cache = search_cache_extent(extent_cache, 0);
7499                 while(cache) {
7500                         rec = container_of(cache, struct extent_record, cache);
7501                         set_extent_dirty(root->fs_info->excluded_extents,
7502                                          rec->start,
7503                                          rec->start + rec->max_size - 1,
7504                                          GFP_NOFS);
7505                         cache = next_cache_extent(cache);
7506                 }
7507
7508                 /* pin down all the corrupted blocks too */
7509                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7510                 while(cache) {
7511                         set_extent_dirty(root->fs_info->excluded_extents,
7512                                          cache->start,
7513                                          cache->start + cache->size - 1,
7514                                          GFP_NOFS);
7515                         cache = next_cache_extent(cache);
7516                 }
7517                 prune_corrupt_blocks(root->fs_info);
7518                 reset_cached_block_groups(root->fs_info);
7519         }
7520
7521         reset_cached_block_groups(root->fs_info);
7522
7523         /*
7524          * We need to delete any duplicate entries we find first otherwise we
7525          * could mess up the extent tree when we have backrefs that actually
7526          * belong to a different extent item and not the weird duplicate one.
7527          */
7528         while (repair && !list_empty(&duplicate_extents)) {
7529                 rec = list_entry(duplicate_extents.next, struct extent_record,
7530                                  list);
7531                 list_del_init(&rec->list);
7532
7533                 /* Sometimes we can find a backref before we find an actual
7534                  * extent, so we need to process it a little bit to see if there
7535                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7536                  * if this is a backref screwup.  If we need to delete stuff
7537                  * process_duplicates() will return 0, otherwise it will return
7538                  * 1 and we
7539                  */
7540                 if (process_duplicates(root, extent_cache, rec))
7541                         continue;
7542                 ret = delete_duplicate_records(root, rec);
7543                 if (ret < 0)
7544                         return ret;
7545                 /*
7546                  * delete_duplicate_records will return the number of entries
7547                  * deleted, so if it's greater than 0 then we know we actually
7548                  * did something and we need to remove.
7549                  */
7550                 if (ret)
7551                         had_dups = 1;
7552         }
7553
7554         if (had_dups)
7555                 return -EAGAIN;
7556
7557         while(1) {
7558                 int cur_err = 0;
7559
7560                 fixed = 0;
7561                 recorded = 0;
7562                 cache = search_cache_extent(extent_cache, 0);
7563                 if (!cache)
7564                         break;
7565                 rec = container_of(cache, struct extent_record, cache);
7566                 if (rec->num_duplicates) {
7567                         fprintf(stderr, "extent item %llu has multiple extent "
7568                                 "items\n", (unsigned long long)rec->start);
7569                         err = 1;
7570                         cur_err = 1;
7571                 }
7572
7573                 if (rec->refs != rec->extent_item_refs) {
7574                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7575                                 (unsigned long long)rec->start,
7576                                 (unsigned long long)rec->nr);
7577                         fprintf(stderr, "extent item %llu, found %llu\n",
7578                                 (unsigned long long)rec->extent_item_refs,
7579                                 (unsigned long long)rec->refs);
7580                         ret = record_orphan_data_extents(root->fs_info, rec);
7581                         if (ret < 0)
7582                                 goto repair_abort;
7583                         if (ret == 0) {
7584                                 recorded = 1;
7585                         } else {
7586                                 /*
7587                                  * we can't use the extent to repair file
7588                                  * extent, let the fallback method handle it.
7589                                  */
7590                                 if (!fixed && repair) {
7591                                         ret = fixup_extent_refs(
7592                                                         root->fs_info,
7593                                                         extent_cache, rec);
7594                                         if (ret)
7595                                                 goto repair_abort;
7596                                         fixed = 1;
7597                                 }
7598                         }
7599                         err = 1;
7600                         cur_err = 1;
7601                 }
7602                 if (all_backpointers_checked(rec, 1)) {
7603                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7604                                 (unsigned long long)rec->start,
7605                                 (unsigned long long)rec->nr);
7606
7607                         if (!fixed && !recorded && repair) {
7608                                 ret = fixup_extent_refs(root->fs_info,
7609                                                         extent_cache, rec);
7610                                 if (ret)
7611                                         goto repair_abort;
7612                                 fixed = 1;
7613                         }
7614                         cur_err = 1;
7615                         err = 1;
7616                 }
7617                 if (!rec->owner_ref_checked) {
7618                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7619                                 (unsigned long long)rec->start,
7620                                 (unsigned long long)rec->nr);
7621                         if (!fixed && !recorded && repair) {
7622                                 ret = fixup_extent_refs(root->fs_info,
7623                                                         extent_cache, rec);
7624                                 if (ret)
7625                                         goto repair_abort;
7626                                 fixed = 1;
7627                         }
7628                         err = 1;
7629                         cur_err = 1;
7630                 }
7631                 if (rec->bad_full_backref) {
7632                         fprintf(stderr, "bad full backref, on [%llu]\n",
7633                                 (unsigned long long)rec->start);
7634                         if (repair) {
7635                                 ret = fixup_extent_flags(root->fs_info, rec);
7636                                 if (ret)
7637                                         goto repair_abort;
7638                                 fixed = 1;
7639                         }
7640                         err = 1;
7641                         cur_err = 1;
7642                 }
7643                 /*
7644                  * Although it's not a extent ref's problem, we reuse this
7645                  * routine for error reporting.
7646                  * No repair function yet.
7647                  */
7648                 if (rec->crossing_stripes) {
7649                         fprintf(stderr,
7650                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7651                                 rec->start, rec->start + rec->max_size);
7652                         err = 1;
7653                         cur_err = 1;
7654                 }
7655
7656                 if (rec->wrong_chunk_type) {
7657                         fprintf(stderr,
7658                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7659                                 rec->start, rec->start + rec->max_size);
7660                         err = 1;
7661                         cur_err = 1;
7662                 }
7663
7664                 remove_cache_extent(extent_cache, cache);
7665                 free_all_extent_backrefs(rec);
7666                 if (!init_extent_tree && repair && (!cur_err || fixed))
7667                         clear_extent_dirty(root->fs_info->excluded_extents,
7668                                            rec->start,
7669                                            rec->start + rec->max_size - 1,
7670                                            GFP_NOFS);
7671                 free(rec);
7672         }
7673 repair_abort:
7674         if (repair) {
7675                 if (ret && ret != -EAGAIN) {
7676                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7677                         exit(1);
7678                 } else if (!ret) {
7679                         struct btrfs_trans_handle *trans;
7680
7681                         root = root->fs_info->extent_root;
7682                         trans = btrfs_start_transaction(root, 1);
7683                         if (IS_ERR(trans)) {
7684                                 ret = PTR_ERR(trans);
7685                                 goto repair_abort;
7686                         }
7687
7688                         btrfs_fix_block_accounting(trans, root);
7689                         ret = btrfs_commit_transaction(trans, root);
7690                         if (ret)
7691                                 goto repair_abort;
7692                 }
7693                 if (err)
7694                         fprintf(stderr, "repaired damaged extent references\n");
7695                 return ret;
7696         }
7697         return err;
7698 }
7699
7700 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7701 {
7702         u64 stripe_size;
7703
7704         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7705                 stripe_size = length;
7706                 stripe_size /= num_stripes;
7707         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7708                 stripe_size = length * 2;
7709                 stripe_size /= num_stripes;
7710         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7711                 stripe_size = length;
7712                 stripe_size /= (num_stripes - 1);
7713         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7714                 stripe_size = length;
7715                 stripe_size /= (num_stripes - 2);
7716         } else {
7717                 stripe_size = length;
7718         }
7719         return stripe_size;
7720 }
7721
7722 /*
7723  * Check the chunk with its block group/dev list ref:
7724  * Return 0 if all refs seems valid.
7725  * Return 1 if part of refs seems valid, need later check for rebuild ref
7726  * like missing block group and needs to search extent tree to rebuild them.
7727  * Return -1 if essential refs are missing and unable to rebuild.
7728  */
7729 static int check_chunk_refs(struct chunk_record *chunk_rec,
7730                             struct block_group_tree *block_group_cache,
7731                             struct device_extent_tree *dev_extent_cache,
7732                             int silent)
7733 {
7734         struct cache_extent *block_group_item;
7735         struct block_group_record *block_group_rec;
7736         struct cache_extent *dev_extent_item;
7737         struct device_extent_record *dev_extent_rec;
7738         u64 devid;
7739         u64 offset;
7740         u64 length;
7741         int metadump_v2 = 0;
7742         int i;
7743         int ret = 0;
7744
7745         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7746                                                chunk_rec->offset,
7747                                                chunk_rec->length);
7748         if (block_group_item) {
7749                 block_group_rec = container_of(block_group_item,
7750                                                struct block_group_record,
7751                                                cache);
7752                 if (chunk_rec->length != block_group_rec->offset ||
7753                     chunk_rec->offset != block_group_rec->objectid ||
7754                     (!metadump_v2 &&
7755                      chunk_rec->type_flags != block_group_rec->flags)) {
7756                         if (!silent)
7757                                 fprintf(stderr,
7758                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7759                                         chunk_rec->objectid,
7760                                         chunk_rec->type,
7761                                         chunk_rec->offset,
7762                                         chunk_rec->length,
7763                                         chunk_rec->offset,
7764                                         chunk_rec->type_flags,
7765                                         block_group_rec->objectid,
7766                                         block_group_rec->type,
7767                                         block_group_rec->offset,
7768                                         block_group_rec->offset,
7769                                         block_group_rec->objectid,
7770                                         block_group_rec->flags);
7771                         ret = -1;
7772                 } else {
7773                         list_del_init(&block_group_rec->list);
7774                         chunk_rec->bg_rec = block_group_rec;
7775                 }
7776         } else {
7777                 if (!silent)
7778                         fprintf(stderr,
7779                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7780                                 chunk_rec->objectid,
7781                                 chunk_rec->type,
7782                                 chunk_rec->offset,
7783                                 chunk_rec->length,
7784                                 chunk_rec->offset,
7785                                 chunk_rec->type_flags);
7786                 ret = 1;
7787         }
7788
7789         if (metadump_v2)
7790                 return ret;
7791
7792         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7793                                     chunk_rec->num_stripes);
7794         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7795                 devid = chunk_rec->stripes[i].devid;
7796                 offset = chunk_rec->stripes[i].offset;
7797                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7798                                                        devid, offset, length);
7799                 if (dev_extent_item) {
7800                         dev_extent_rec = container_of(dev_extent_item,
7801                                                 struct device_extent_record,
7802                                                 cache);
7803                         if (dev_extent_rec->objectid != devid ||
7804                             dev_extent_rec->offset != offset ||
7805                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7806                             dev_extent_rec->length != length) {
7807                                 if (!silent)
7808                                         fprintf(stderr,
7809                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7810                                                 chunk_rec->objectid,
7811                                                 chunk_rec->type,
7812                                                 chunk_rec->offset,
7813                                                 chunk_rec->stripes[i].devid,
7814                                                 chunk_rec->stripes[i].offset,
7815                                                 dev_extent_rec->objectid,
7816                                                 dev_extent_rec->offset,
7817                                                 dev_extent_rec->length);
7818                                 ret = -1;
7819                         } else {
7820                                 list_move(&dev_extent_rec->chunk_list,
7821                                           &chunk_rec->dextents);
7822                         }
7823                 } else {
7824                         if (!silent)
7825                                 fprintf(stderr,
7826                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7827                                         chunk_rec->objectid,
7828                                         chunk_rec->type,
7829                                         chunk_rec->offset,
7830                                         chunk_rec->stripes[i].devid,
7831                                         chunk_rec->stripes[i].offset);
7832                         ret = -1;
7833                 }
7834         }
7835         return ret;
7836 }
7837
7838 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7839 int check_chunks(struct cache_tree *chunk_cache,
7840                  struct block_group_tree *block_group_cache,
7841                  struct device_extent_tree *dev_extent_cache,
7842                  struct list_head *good, struct list_head *bad,
7843                  struct list_head *rebuild, int silent)
7844 {
7845         struct cache_extent *chunk_item;
7846         struct chunk_record *chunk_rec;
7847         struct block_group_record *bg_rec;
7848         struct device_extent_record *dext_rec;
7849         int err;
7850         int ret = 0;
7851
7852         chunk_item = first_cache_extent(chunk_cache);
7853         while (chunk_item) {
7854                 chunk_rec = container_of(chunk_item, struct chunk_record,
7855                                          cache);
7856                 err = check_chunk_refs(chunk_rec, block_group_cache,
7857                                        dev_extent_cache, silent);
7858                 if (err < 0)
7859                         ret = err;
7860                 if (err == 0 && good)
7861                         list_add_tail(&chunk_rec->list, good);
7862                 if (err > 0 && rebuild)
7863                         list_add_tail(&chunk_rec->list, rebuild);
7864                 if (err < 0 && bad)
7865                         list_add_tail(&chunk_rec->list, bad);
7866                 chunk_item = next_cache_extent(chunk_item);
7867         }
7868
7869         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7870                 if (!silent)
7871                         fprintf(stderr,
7872                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7873                                 bg_rec->objectid,
7874                                 bg_rec->offset,
7875                                 bg_rec->flags);
7876                 if (!ret)
7877                         ret = 1;
7878         }
7879
7880         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7881                             chunk_list) {
7882                 if (!silent)
7883                         fprintf(stderr,
7884                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7885                                 dext_rec->objectid,
7886                                 dext_rec->offset,
7887                                 dext_rec->length);
7888                 if (!ret)
7889                         ret = 1;
7890         }
7891         return ret;
7892 }
7893
7894
7895 static int check_device_used(struct device_record *dev_rec,
7896                              struct device_extent_tree *dext_cache)
7897 {
7898         struct cache_extent *cache;
7899         struct device_extent_record *dev_extent_rec;
7900         u64 total_byte = 0;
7901
7902         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7903         while (cache) {
7904                 dev_extent_rec = container_of(cache,
7905                                               struct device_extent_record,
7906                                               cache);
7907                 if (dev_extent_rec->objectid != dev_rec->devid)
7908                         break;
7909
7910                 list_del_init(&dev_extent_rec->device_list);
7911                 total_byte += dev_extent_rec->length;
7912                 cache = next_cache_extent(cache);
7913         }
7914
7915         if (total_byte != dev_rec->byte_used) {
7916                 fprintf(stderr,
7917                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7918                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7919                         dev_rec->type, dev_rec->offset);
7920                 return -1;
7921         } else {
7922                 return 0;
7923         }
7924 }
7925
7926 /* check btrfs_dev_item -> btrfs_dev_extent */
7927 static int check_devices(struct rb_root *dev_cache,
7928                          struct device_extent_tree *dev_extent_cache)
7929 {
7930         struct rb_node *dev_node;
7931         struct device_record *dev_rec;
7932         struct device_extent_record *dext_rec;
7933         int err;
7934         int ret = 0;
7935
7936         dev_node = rb_first(dev_cache);
7937         while (dev_node) {
7938                 dev_rec = container_of(dev_node, struct device_record, node);
7939                 err = check_device_used(dev_rec, dev_extent_cache);
7940                 if (err)
7941                         ret = err;
7942
7943                 dev_node = rb_next(dev_node);
7944         }
7945         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7946                             device_list) {
7947                 fprintf(stderr,
7948                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7949                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7950                 if (!ret)
7951                         ret = 1;
7952         }
7953         return ret;
7954 }
7955
7956 static int add_root_item_to_list(struct list_head *head,
7957                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7958                                   u8 level, u8 drop_level,
7959                                   int level_size, struct btrfs_key *drop_key)
7960 {
7961
7962         struct root_item_record *ri_rec;
7963         ri_rec = malloc(sizeof(*ri_rec));
7964         if (!ri_rec)
7965                 return -ENOMEM;
7966         ri_rec->bytenr = bytenr;
7967         ri_rec->objectid = objectid;
7968         ri_rec->level = level;
7969         ri_rec->level_size = level_size;
7970         ri_rec->drop_level = drop_level;
7971         ri_rec->last_snapshot = last_snapshot;
7972         if (drop_key)
7973                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7974         list_add_tail(&ri_rec->list, head);
7975
7976         return 0;
7977 }
7978
7979 static void free_root_item_list(struct list_head *list)
7980 {
7981         struct root_item_record *ri_rec;
7982
7983         while (!list_empty(list)) {
7984                 ri_rec = list_first_entry(list, struct root_item_record,
7985                                           list);
7986                 list_del_init(&ri_rec->list);
7987                 free(ri_rec);
7988         }
7989 }
7990
7991 static int deal_root_from_list(struct list_head *list,
7992                                struct btrfs_root *root,
7993                                struct block_info *bits,
7994                                int bits_nr,
7995                                struct cache_tree *pending,
7996                                struct cache_tree *seen,
7997                                struct cache_tree *reada,
7998                                struct cache_tree *nodes,
7999                                struct cache_tree *extent_cache,
8000                                struct cache_tree *chunk_cache,
8001                                struct rb_root *dev_cache,
8002                                struct block_group_tree *block_group_cache,
8003                                struct device_extent_tree *dev_extent_cache)
8004 {
8005         int ret = 0;
8006         u64 last;
8007
8008         while (!list_empty(list)) {
8009                 struct root_item_record *rec;
8010                 struct extent_buffer *buf;
8011                 rec = list_entry(list->next,
8012                                  struct root_item_record, list);
8013                 last = 0;
8014                 buf = read_tree_block(root->fs_info->tree_root,
8015                                       rec->bytenr, rec->level_size, 0);
8016                 if (!extent_buffer_uptodate(buf)) {
8017                         free_extent_buffer(buf);
8018                         ret = -EIO;
8019                         break;
8020                 }
8021                 add_root_to_pending(buf, extent_cache, pending,
8022                                     seen, nodes, rec->objectid);
8023                 /*
8024                  * To rebuild extent tree, we need deal with snapshot
8025                  * one by one, otherwise we deal with node firstly which
8026                  * can maximize readahead.
8027                  */
8028                 while (1) {
8029                         ret = run_next_block(root, bits, bits_nr, &last,
8030                                              pending, seen, reada, nodes,
8031                                              extent_cache, chunk_cache,
8032                                              dev_cache, block_group_cache,
8033                                              dev_extent_cache, rec);
8034                         if (ret != 0)
8035                                 break;
8036                 }
8037                 free_extent_buffer(buf);
8038                 list_del(&rec->list);
8039                 free(rec);
8040                 if (ret < 0)
8041                         break;
8042         }
8043         while (ret >= 0) {
8044                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8045                                      reada, nodes, extent_cache, chunk_cache,
8046                                      dev_cache, block_group_cache,
8047                                      dev_extent_cache, NULL);
8048                 if (ret != 0) {
8049                         if (ret > 0)
8050                                 ret = 0;
8051                         break;
8052                 }
8053         }
8054         return ret;
8055 }
8056
8057 static int check_chunks_and_extents(struct btrfs_root *root)
8058 {
8059         struct rb_root dev_cache;
8060         struct cache_tree chunk_cache;
8061         struct block_group_tree block_group_cache;
8062         struct device_extent_tree dev_extent_cache;
8063         struct cache_tree extent_cache;
8064         struct cache_tree seen;
8065         struct cache_tree pending;
8066         struct cache_tree reada;
8067         struct cache_tree nodes;
8068         struct extent_io_tree excluded_extents;
8069         struct cache_tree corrupt_blocks;
8070         struct btrfs_path path;
8071         struct btrfs_key key;
8072         struct btrfs_key found_key;
8073         int ret, err = 0;
8074         struct block_info *bits;
8075         int bits_nr;
8076         struct extent_buffer *leaf;
8077         int slot;
8078         struct btrfs_root_item ri;
8079         struct list_head dropping_trees;
8080         struct list_head normal_trees;
8081         struct btrfs_root *root1;
8082         u64 objectid;
8083         u32 level_size;
8084         u8 level;
8085
8086         dev_cache = RB_ROOT;
8087         cache_tree_init(&chunk_cache);
8088         block_group_tree_init(&block_group_cache);
8089         device_extent_tree_init(&dev_extent_cache);
8090
8091         cache_tree_init(&extent_cache);
8092         cache_tree_init(&seen);
8093         cache_tree_init(&pending);
8094         cache_tree_init(&nodes);
8095         cache_tree_init(&reada);
8096         cache_tree_init(&corrupt_blocks);
8097         extent_io_tree_init(&excluded_extents);
8098         INIT_LIST_HEAD(&dropping_trees);
8099         INIT_LIST_HEAD(&normal_trees);
8100
8101         if (repair) {
8102                 root->fs_info->excluded_extents = &excluded_extents;
8103                 root->fs_info->fsck_extent_cache = &extent_cache;
8104                 root->fs_info->free_extent_hook = free_extent_hook;
8105                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8106         }
8107
8108         bits_nr = 1024;
8109         bits = malloc(bits_nr * sizeof(struct block_info));
8110         if (!bits) {
8111                 perror("malloc");
8112                 exit(1);
8113         }
8114
8115         if (ctx.progress_enabled) {
8116                 ctx.tp = TASK_EXTENTS;
8117                 task_start(ctx.info);
8118         }
8119
8120 again:
8121         root1 = root->fs_info->tree_root;
8122         level = btrfs_header_level(root1->node);
8123         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8124                                     root1->node->start, 0, level, 0,
8125                                     btrfs_level_size(root1, level), NULL);
8126         if (ret < 0)
8127                 goto out;
8128         root1 = root->fs_info->chunk_root;
8129         level = btrfs_header_level(root1->node);
8130         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8131                                     root1->node->start, 0, level, 0,
8132                                     btrfs_level_size(root1, level), NULL);
8133         if (ret < 0)
8134                 goto out;
8135         btrfs_init_path(&path);
8136         key.offset = 0;
8137         key.objectid = 0;
8138         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8139         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8140                                         &key, &path, 0, 0);
8141         if (ret < 0)
8142                 goto out;
8143         while(1) {
8144                 leaf = path.nodes[0];
8145                 slot = path.slots[0];
8146                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8147                         ret = btrfs_next_leaf(root, &path);
8148                         if (ret != 0)
8149                                 break;
8150                         leaf = path.nodes[0];
8151                         slot = path.slots[0];
8152                 }
8153                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8154                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8155                         unsigned long offset;
8156                         u64 last_snapshot;
8157
8158                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8159                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8160                         last_snapshot = btrfs_root_last_snapshot(&ri);
8161                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8162                                 level = btrfs_root_level(&ri);
8163                                 level_size = btrfs_level_size(root, level);
8164                                 ret = add_root_item_to_list(&normal_trees,
8165                                                 found_key.objectid,
8166                                                 btrfs_root_bytenr(&ri),
8167                                                 last_snapshot, level,
8168                                                 0, level_size, NULL);
8169                                 if (ret < 0)
8170                                         goto out;
8171                         } else {
8172                                 level = btrfs_root_level(&ri);
8173                                 level_size = btrfs_level_size(root, level);
8174                                 objectid = found_key.objectid;
8175                                 btrfs_disk_key_to_cpu(&found_key,
8176                                                       &ri.drop_progress);
8177                                 ret = add_root_item_to_list(&dropping_trees,
8178                                                 objectid,
8179                                                 btrfs_root_bytenr(&ri),
8180                                                 last_snapshot, level,
8181                                                 ri.drop_level,
8182                                                 level_size, &found_key);
8183                                 if (ret < 0)
8184                                         goto out;
8185                         }
8186                 }
8187                 path.slots[0]++;
8188         }
8189         btrfs_release_path(&path);
8190
8191         /*
8192          * check_block can return -EAGAIN if it fixes something, please keep
8193          * this in mind when dealing with return values from these functions, if
8194          * we get -EAGAIN we want to fall through and restart the loop.
8195          */
8196         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8197                                   &seen, &reada, &nodes, &extent_cache,
8198                                   &chunk_cache, &dev_cache, &block_group_cache,
8199                                   &dev_extent_cache);
8200         if (ret < 0) {
8201                 if (ret == -EAGAIN)
8202                         goto loop;
8203                 goto out;
8204         }
8205         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8206                                   &pending, &seen, &reada, &nodes,
8207                                   &extent_cache, &chunk_cache, &dev_cache,
8208                                   &block_group_cache, &dev_extent_cache);
8209         if (ret < 0) {
8210                 if (ret == -EAGAIN)
8211                         goto loop;
8212                 goto out;
8213         }
8214
8215         ret = check_chunks(&chunk_cache, &block_group_cache,
8216                            &dev_extent_cache, NULL, NULL, NULL, 0);
8217         if (ret) {
8218                 if (ret == -EAGAIN)
8219                         goto loop;
8220                 err = ret;
8221         }
8222
8223         ret = check_extent_refs(root, &extent_cache);
8224         if (ret < 0) {
8225                 if (ret == -EAGAIN)
8226                         goto loop;
8227                 goto out;
8228         }
8229
8230         ret = check_devices(&dev_cache, &dev_extent_cache);
8231         if (ret && err)
8232                 ret = err;
8233
8234 out:
8235         task_stop(ctx.info);
8236         if (repair) {
8237                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8238                 extent_io_tree_cleanup(&excluded_extents);
8239                 root->fs_info->fsck_extent_cache = NULL;
8240                 root->fs_info->free_extent_hook = NULL;
8241                 root->fs_info->corrupt_blocks = NULL;
8242                 root->fs_info->excluded_extents = NULL;
8243         }
8244         free(bits);
8245         free_chunk_cache_tree(&chunk_cache);
8246         free_device_cache_tree(&dev_cache);
8247         free_block_group_tree(&block_group_cache);
8248         free_device_extent_tree(&dev_extent_cache);
8249         free_extent_cache_tree(&seen);
8250         free_extent_cache_tree(&pending);
8251         free_extent_cache_tree(&reada);
8252         free_extent_cache_tree(&nodes);
8253         return ret;
8254 loop:
8255         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8256         free_extent_cache_tree(&seen);
8257         free_extent_cache_tree(&pending);
8258         free_extent_cache_tree(&reada);
8259         free_extent_cache_tree(&nodes);
8260         free_chunk_cache_tree(&chunk_cache);
8261         free_block_group_tree(&block_group_cache);
8262         free_device_cache_tree(&dev_cache);
8263         free_device_extent_tree(&dev_extent_cache);
8264         free_extent_record_cache(root->fs_info, &extent_cache);
8265         free_root_item_list(&normal_trees);
8266         free_root_item_list(&dropping_trees);
8267         extent_io_tree_cleanup(&excluded_extents);
8268         goto again;
8269 }
8270
8271 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8272                            struct btrfs_root *root, int overwrite)
8273 {
8274         struct extent_buffer *c;
8275         struct extent_buffer *old = root->node;
8276         int level;
8277         int ret;
8278         struct btrfs_disk_key disk_key = {0,0,0};
8279
8280         level = 0;
8281
8282         if (overwrite) {
8283                 c = old;
8284                 extent_buffer_get(c);
8285                 goto init;
8286         }
8287         c = btrfs_alloc_free_block(trans, root,
8288                                    btrfs_level_size(root, 0),
8289                                    root->root_key.objectid,
8290                                    &disk_key, level, 0, 0);
8291         if (IS_ERR(c)) {
8292                 c = old;
8293                 extent_buffer_get(c);
8294                 overwrite = 1;
8295         }
8296 init:
8297         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8298         btrfs_set_header_level(c, level);
8299         btrfs_set_header_bytenr(c, c->start);
8300         btrfs_set_header_generation(c, trans->transid);
8301         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8302         btrfs_set_header_owner(c, root->root_key.objectid);
8303
8304         write_extent_buffer(c, root->fs_info->fsid,
8305                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8306
8307         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8308                             btrfs_header_chunk_tree_uuid(c),
8309                             BTRFS_UUID_SIZE);
8310
8311         btrfs_mark_buffer_dirty(c);
8312         /*
8313          * this case can happen in the following case:
8314          *
8315          * 1.overwrite previous root.
8316          *
8317          * 2.reinit reloc data root, this is because we skip pin
8318          * down reloc data tree before which means we can allocate
8319          * same block bytenr here.
8320          */
8321         if (old->start == c->start) {
8322                 btrfs_set_root_generation(&root->root_item,
8323                                           trans->transid);
8324                 root->root_item.level = btrfs_header_level(root->node);
8325                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8326                                         &root->root_key, &root->root_item);
8327                 if (ret) {
8328                         free_extent_buffer(c);
8329                         return ret;
8330                 }
8331         }
8332         free_extent_buffer(old);
8333         root->node = c;
8334         add_root_to_dirty_list(root);
8335         return 0;
8336 }
8337
8338 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8339                                 struct extent_buffer *eb, int tree_root)
8340 {
8341         struct extent_buffer *tmp;
8342         struct btrfs_root_item *ri;
8343         struct btrfs_key key;
8344         u64 bytenr;
8345         u32 leafsize;
8346         int level = btrfs_header_level(eb);
8347         int nritems;
8348         int ret;
8349         int i;
8350
8351         /*
8352          * If we have pinned this block before, don't pin it again.
8353          * This can not only avoid forever loop with broken filesystem
8354          * but also give us some speedups.
8355          */
8356         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8357                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8358                 return 0;
8359
8360         btrfs_pin_extent(fs_info, eb->start, eb->len);
8361
8362         leafsize = btrfs_super_leafsize(fs_info->super_copy);
8363         nritems = btrfs_header_nritems(eb);
8364         for (i = 0; i < nritems; i++) {
8365                 if (level == 0) {
8366                         btrfs_item_key_to_cpu(eb, &key, i);
8367                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8368                                 continue;
8369                         /* Skip the extent root and reloc roots */
8370                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8371                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8372                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8373                                 continue;
8374                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8375                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8376
8377                         /*
8378                          * If at any point we start needing the real root we
8379                          * will have to build a stump root for the root we are
8380                          * in, but for now this doesn't actually use the root so
8381                          * just pass in extent_root.
8382                          */
8383                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8384                                               leafsize, 0);
8385                         if (!extent_buffer_uptodate(tmp)) {
8386                                 fprintf(stderr, "Error reading root block\n");
8387                                 return -EIO;
8388                         }
8389                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8390                         free_extent_buffer(tmp);
8391                         if (ret)
8392                                 return ret;
8393                 } else {
8394                         bytenr = btrfs_node_blockptr(eb, i);
8395
8396                         /* If we aren't the tree root don't read the block */
8397                         if (level == 1 && !tree_root) {
8398                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
8399                                 continue;
8400                         }
8401
8402                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8403                                               leafsize, 0);
8404                         if (!extent_buffer_uptodate(tmp)) {
8405                                 fprintf(stderr, "Error reading tree block\n");
8406                                 return -EIO;
8407                         }
8408                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8409                         free_extent_buffer(tmp);
8410                         if (ret)
8411                                 return ret;
8412                 }
8413         }
8414
8415         return 0;
8416 }
8417
8418 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8419 {
8420         int ret;
8421
8422         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8423         if (ret)
8424                 return ret;
8425
8426         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8427 }
8428
8429 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8430 {
8431         struct btrfs_block_group_cache *cache;
8432         struct btrfs_path *path;
8433         struct extent_buffer *leaf;
8434         struct btrfs_chunk *chunk;
8435         struct btrfs_key key;
8436         int ret;
8437         u64 start;
8438
8439         path = btrfs_alloc_path();
8440         if (!path)
8441                 return -ENOMEM;
8442
8443         key.objectid = 0;
8444         key.type = BTRFS_CHUNK_ITEM_KEY;
8445         key.offset = 0;
8446
8447         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8448         if (ret < 0) {
8449                 btrfs_free_path(path);
8450                 return ret;
8451         }
8452
8453         /*
8454          * We do this in case the block groups were screwed up and had alloc
8455          * bits that aren't actually set on the chunks.  This happens with
8456          * restored images every time and could happen in real life I guess.
8457          */
8458         fs_info->avail_data_alloc_bits = 0;
8459         fs_info->avail_metadata_alloc_bits = 0;
8460         fs_info->avail_system_alloc_bits = 0;
8461
8462         /* First we need to create the in-memory block groups */
8463         while (1) {
8464                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8465                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8466                         if (ret < 0) {
8467                                 btrfs_free_path(path);
8468                                 return ret;
8469                         }
8470                         if (ret) {
8471                                 ret = 0;
8472                                 break;
8473                         }
8474                 }
8475                 leaf = path->nodes[0];
8476                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8477                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8478                         path->slots[0]++;
8479                         continue;
8480                 }
8481
8482                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8483                                        struct btrfs_chunk);
8484                 btrfs_add_block_group(fs_info, 0,
8485                                       btrfs_chunk_type(leaf, chunk),
8486                                       key.objectid, key.offset,
8487                                       btrfs_chunk_length(leaf, chunk));
8488                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8489                                  key.offset + btrfs_chunk_length(leaf, chunk),
8490                                  GFP_NOFS);
8491                 path->slots[0]++;
8492         }
8493         start = 0;
8494         while (1) {
8495                 cache = btrfs_lookup_first_block_group(fs_info, start);
8496                 if (!cache)
8497                         break;
8498                 cache->cached = 1;
8499                 start = cache->key.objectid + cache->key.offset;
8500         }
8501
8502         btrfs_free_path(path);
8503         return 0;
8504 }
8505
8506 static int reset_balance(struct btrfs_trans_handle *trans,
8507                          struct btrfs_fs_info *fs_info)
8508 {
8509         struct btrfs_root *root = fs_info->tree_root;
8510         struct btrfs_path *path;
8511         struct extent_buffer *leaf;
8512         struct btrfs_key key;
8513         int del_slot, del_nr = 0;
8514         int ret;
8515         int found = 0;
8516
8517         path = btrfs_alloc_path();
8518         if (!path)
8519                 return -ENOMEM;
8520
8521         key.objectid = BTRFS_BALANCE_OBJECTID;
8522         key.type = BTRFS_BALANCE_ITEM_KEY;
8523         key.offset = 0;
8524
8525         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8526         if (ret) {
8527                 if (ret > 0)
8528                         ret = 0;
8529                 if (!ret)
8530                         goto reinit_data_reloc;
8531                 else
8532                         goto out;
8533         }
8534
8535         ret = btrfs_del_item(trans, root, path);
8536         if (ret)
8537                 goto out;
8538         btrfs_release_path(path);
8539
8540         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8541         key.type = BTRFS_ROOT_ITEM_KEY;
8542         key.offset = 0;
8543
8544         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8545         if (ret < 0)
8546                 goto out;
8547         while (1) {
8548                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8549                         if (!found)
8550                                 break;
8551
8552                         if (del_nr) {
8553                                 ret = btrfs_del_items(trans, root, path,
8554                                                       del_slot, del_nr);
8555                                 del_nr = 0;
8556                                 if (ret)
8557                                         goto out;
8558                         }
8559                         key.offset++;
8560                         btrfs_release_path(path);
8561
8562                         found = 0;
8563                         ret = btrfs_search_slot(trans, root, &key, path,
8564                                                 -1, 1);
8565                         if (ret < 0)
8566                                 goto out;
8567                         continue;
8568                 }
8569                 found = 1;
8570                 leaf = path->nodes[0];
8571                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8572                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8573                         break;
8574                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8575                         path->slots[0]++;
8576                         continue;
8577                 }
8578                 if (!del_nr) {
8579                         del_slot = path->slots[0];
8580                         del_nr = 1;
8581                 } else {
8582                         del_nr++;
8583                 }
8584                 path->slots[0]++;
8585         }
8586
8587         if (del_nr) {
8588                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8589                 if (ret)
8590                         goto out;
8591         }
8592         btrfs_release_path(path);
8593
8594 reinit_data_reloc:
8595         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8596         key.type = BTRFS_ROOT_ITEM_KEY;
8597         key.offset = (u64)-1;
8598         root = btrfs_read_fs_root(fs_info, &key);
8599         if (IS_ERR(root)) {
8600                 fprintf(stderr, "Error reading data reloc tree\n");
8601                 ret = PTR_ERR(root);
8602                 goto out;
8603         }
8604         record_root_in_trans(trans, root);
8605         ret = btrfs_fsck_reinit_root(trans, root, 0);
8606         if (ret)
8607                 goto out;
8608         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8609 out:
8610         btrfs_free_path(path);
8611         return ret;
8612 }
8613
8614 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8615                               struct btrfs_fs_info *fs_info)
8616 {
8617         u64 start = 0;
8618         int ret;
8619
8620         /*
8621          * The only reason we don't do this is because right now we're just
8622          * walking the trees we find and pinning down their bytes, we don't look
8623          * at any of the leaves.  In order to do mixed groups we'd have to check
8624          * the leaves of any fs roots and pin down the bytes for any file
8625          * extents we find.  Not hard but why do it if we don't have to?
8626          */
8627         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8628                 fprintf(stderr, "We don't support re-initing the extent tree "
8629                         "for mixed block groups yet, please notify a btrfs "
8630                         "developer you want to do this so they can add this "
8631                         "functionality.\n");
8632                 return -EINVAL;
8633         }
8634
8635         /*
8636          * first we need to walk all of the trees except the extent tree and pin
8637          * down the bytes that are in use so we don't overwrite any existing
8638          * metadata.
8639          */
8640         ret = pin_metadata_blocks(fs_info);
8641         if (ret) {
8642                 fprintf(stderr, "error pinning down used bytes\n");
8643                 return ret;
8644         }
8645
8646         /*
8647          * Need to drop all the block groups since we're going to recreate all
8648          * of them again.
8649          */
8650         btrfs_free_block_groups(fs_info);
8651         ret = reset_block_groups(fs_info);
8652         if (ret) {
8653                 fprintf(stderr, "error resetting the block groups\n");
8654                 return ret;
8655         }
8656
8657         /* Ok we can allocate now, reinit the extent root */
8658         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8659         if (ret) {
8660                 fprintf(stderr, "extent root initialization failed\n");
8661                 /*
8662                  * When the transaction code is updated we should end the
8663                  * transaction, but for now progs only knows about commit so
8664                  * just return an error.
8665                  */
8666                 return ret;
8667         }
8668
8669         /*
8670          * Now we have all the in-memory block groups setup so we can make
8671          * allocations properly, and the metadata we care about is safe since we
8672          * pinned all of it above.
8673          */
8674         while (1) {
8675                 struct btrfs_block_group_cache *cache;
8676
8677                 cache = btrfs_lookup_first_block_group(fs_info, start);
8678                 if (!cache)
8679                         break;
8680                 start = cache->key.objectid + cache->key.offset;
8681                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8682                                         &cache->key, &cache->item,
8683                                         sizeof(cache->item));
8684                 if (ret) {
8685                         fprintf(stderr, "Error adding block group\n");
8686                         return ret;
8687                 }
8688                 btrfs_extent_post_op(trans, fs_info->extent_root);
8689         }
8690
8691         ret = reset_balance(trans, fs_info);
8692         if (ret)
8693                 fprintf(stderr, "error reseting the pending balance\n");
8694
8695         return ret;
8696 }
8697
8698 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8699 {
8700         struct btrfs_path *path;
8701         struct btrfs_trans_handle *trans;
8702         struct btrfs_key key;
8703         int ret;
8704
8705         printf("Recowing metadata block %llu\n", eb->start);
8706         key.objectid = btrfs_header_owner(eb);
8707         key.type = BTRFS_ROOT_ITEM_KEY;
8708         key.offset = (u64)-1;
8709
8710         root = btrfs_read_fs_root(root->fs_info, &key);
8711         if (IS_ERR(root)) {
8712                 fprintf(stderr, "Couldn't find owner root %llu\n",
8713                         key.objectid);
8714                 return PTR_ERR(root);
8715         }
8716
8717         path = btrfs_alloc_path();
8718         if (!path)
8719                 return -ENOMEM;
8720
8721         trans = btrfs_start_transaction(root, 1);
8722         if (IS_ERR(trans)) {
8723                 btrfs_free_path(path);
8724                 return PTR_ERR(trans);
8725         }
8726
8727         path->lowest_level = btrfs_header_level(eb);
8728         if (path->lowest_level)
8729                 btrfs_node_key_to_cpu(eb, &key, 0);
8730         else
8731                 btrfs_item_key_to_cpu(eb, &key, 0);
8732
8733         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8734         btrfs_commit_transaction(trans, root);
8735         btrfs_free_path(path);
8736         return ret;
8737 }
8738
8739 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8740 {
8741         struct btrfs_path *path;
8742         struct btrfs_trans_handle *trans;
8743         struct btrfs_key key;
8744         int ret;
8745
8746         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8747                bad->key.type, bad->key.offset);
8748         key.objectid = bad->root_id;
8749         key.type = BTRFS_ROOT_ITEM_KEY;
8750         key.offset = (u64)-1;
8751
8752         root = btrfs_read_fs_root(root->fs_info, &key);
8753         if (IS_ERR(root)) {
8754                 fprintf(stderr, "Couldn't find owner root %llu\n",
8755                         key.objectid);
8756                 return PTR_ERR(root);
8757         }
8758
8759         path = btrfs_alloc_path();
8760         if (!path)
8761                 return -ENOMEM;
8762
8763         trans = btrfs_start_transaction(root, 1);
8764         if (IS_ERR(trans)) {
8765                 btrfs_free_path(path);
8766                 return PTR_ERR(trans);
8767         }
8768
8769         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8770         if (ret) {
8771                 if (ret > 0)
8772                         ret = 0;
8773                 goto out;
8774         }
8775         ret = btrfs_del_item(trans, root, path);
8776 out:
8777         btrfs_commit_transaction(trans, root);
8778         btrfs_free_path(path);
8779         return ret;
8780 }
8781
8782 static int zero_log_tree(struct btrfs_root *root)
8783 {
8784         struct btrfs_trans_handle *trans;
8785         int ret;
8786
8787         trans = btrfs_start_transaction(root, 1);
8788         if (IS_ERR(trans)) {
8789                 ret = PTR_ERR(trans);
8790                 return ret;
8791         }
8792         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8793         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8794         ret = btrfs_commit_transaction(trans, root);
8795         return ret;
8796 }
8797
8798 static int populate_csum(struct btrfs_trans_handle *trans,
8799                          struct btrfs_root *csum_root, char *buf, u64 start,
8800                          u64 len)
8801 {
8802         u64 offset = 0;
8803         u64 sectorsize;
8804         int ret = 0;
8805
8806         while (offset < len) {
8807                 sectorsize = csum_root->sectorsize;
8808                 ret = read_extent_data(csum_root, buf, start + offset,
8809                                        &sectorsize, 0);
8810                 if (ret)
8811                         break;
8812                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8813                                             start + offset, buf, sectorsize);
8814                 if (ret)
8815                         break;
8816                 offset += sectorsize;
8817         }
8818         return ret;
8819 }
8820
8821 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8822                                       struct btrfs_root *csum_root,
8823                                       struct btrfs_root *cur_root)
8824 {
8825         struct btrfs_path *path;
8826         struct btrfs_key key;
8827         struct extent_buffer *node;
8828         struct btrfs_file_extent_item *fi;
8829         char *buf = NULL;
8830         u64 start = 0;
8831         u64 len = 0;
8832         int slot = 0;
8833         int ret = 0;
8834
8835         path = btrfs_alloc_path();
8836         if (!path)
8837                 return -ENOMEM;
8838         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8839         if (!buf) {
8840                 ret = -ENOMEM;
8841                 goto out;
8842         }
8843
8844         key.objectid = 0;
8845         key.offset = 0;
8846         key.type = 0;
8847
8848         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8849         if (ret < 0)
8850                 goto out;
8851         /* Iterate all regular file extents and fill its csum */
8852         while (1) {
8853                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8854
8855                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8856                         goto next;
8857                 node = path->nodes[0];
8858                 slot = path->slots[0];
8859                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8860                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8861                         goto next;
8862                 start = btrfs_file_extent_disk_bytenr(node, fi);
8863                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8864
8865                 ret = populate_csum(trans, csum_root, buf, start, len);
8866                 if (ret == -EEXIST)
8867                         ret = 0;
8868                 if (ret < 0)
8869                         goto out;
8870 next:
8871                 /*
8872                  * TODO: if next leaf is corrupted, jump to nearest next valid
8873                  * leaf.
8874                  */
8875                 ret = btrfs_next_item(cur_root, path);
8876                 if (ret < 0)
8877                         goto out;
8878                 if (ret > 0) {
8879                         ret = 0;
8880                         goto out;
8881                 }
8882         }
8883
8884 out:
8885         btrfs_free_path(path);
8886         free(buf);
8887         return ret;
8888 }
8889
8890 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8891                                   struct btrfs_root *csum_root)
8892 {
8893         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8894         struct btrfs_path *path;
8895         struct btrfs_root *tree_root = fs_info->tree_root;
8896         struct btrfs_root *cur_root;
8897         struct extent_buffer *node;
8898         struct btrfs_key key;
8899         int slot = 0;
8900         int ret = 0;
8901
8902         path = btrfs_alloc_path();
8903         if (!path)
8904                 return -ENOMEM;
8905
8906         key.objectid = BTRFS_FS_TREE_OBJECTID;
8907         key.offset = 0;
8908         key.type = BTRFS_ROOT_ITEM_KEY;
8909
8910         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8911         if (ret < 0)
8912                 goto out;
8913         if (ret > 0) {
8914                 ret = -ENOENT;
8915                 goto out;
8916         }
8917
8918         while (1) {
8919                 node = path->nodes[0];
8920                 slot = path->slots[0];
8921                 btrfs_item_key_to_cpu(node, &key, slot);
8922                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8923                         goto out;
8924                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8925                         goto next;
8926                 if (!is_fstree(key.objectid))
8927                         goto next;
8928                 key.offset = (u64)-1;
8929
8930                 cur_root = btrfs_read_fs_root(fs_info, &key);
8931                 if (IS_ERR(cur_root) || !cur_root) {
8932                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8933                                 key.objectid);
8934                         goto out;
8935                 }
8936                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8937                                 cur_root);
8938                 if (ret < 0)
8939                         goto out;
8940 next:
8941                 ret = btrfs_next_item(tree_root, path);
8942                 if (ret > 0) {
8943                         ret = 0;
8944                         goto out;
8945                 }
8946                 if (ret < 0)
8947                         goto out;
8948         }
8949
8950 out:
8951         btrfs_free_path(path);
8952         return ret;
8953 }
8954
8955 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8956                                       struct btrfs_root *csum_root)
8957 {
8958         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8959         struct btrfs_path *path;
8960         struct btrfs_extent_item *ei;
8961         struct extent_buffer *leaf;
8962         char *buf;
8963         struct btrfs_key key;
8964         int ret;
8965
8966         path = btrfs_alloc_path();
8967         if (!path)
8968                 return -ENOMEM;
8969
8970         key.objectid = 0;
8971         key.type = BTRFS_EXTENT_ITEM_KEY;
8972         key.offset = 0;
8973
8974         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
8975         if (ret < 0) {
8976                 btrfs_free_path(path);
8977                 return ret;
8978         }
8979
8980         buf = malloc(csum_root->sectorsize);
8981         if (!buf) {
8982                 btrfs_free_path(path);
8983                 return -ENOMEM;
8984         }
8985
8986         while (1) {
8987                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8988                         ret = btrfs_next_leaf(extent_root, path);
8989                         if (ret < 0)
8990                                 break;
8991                         if (ret) {
8992                                 ret = 0;
8993                                 break;
8994                         }
8995                 }
8996                 leaf = path->nodes[0];
8997
8998                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8999                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9000                         path->slots[0]++;
9001                         continue;
9002                 }
9003
9004                 ei = btrfs_item_ptr(leaf, path->slots[0],
9005                                     struct btrfs_extent_item);
9006                 if (!(btrfs_extent_flags(leaf, ei) &
9007                       BTRFS_EXTENT_FLAG_DATA)) {
9008                         path->slots[0]++;
9009                         continue;
9010                 }
9011
9012                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9013                                     key.offset);
9014                 if (ret)
9015                         break;
9016                 path->slots[0]++;
9017         }
9018
9019         btrfs_free_path(path);
9020         free(buf);
9021         return ret;
9022 }
9023
9024 /*
9025  * Recalculate the csum and put it into the csum tree.
9026  *
9027  * Extent tree init will wipe out all the extent info, so in that case, we
9028  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9029  * will use fs/subvol trees to init the csum tree.
9030  */
9031 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9032                           struct btrfs_root *csum_root,
9033                           int search_fs_tree)
9034 {
9035         if (search_fs_tree)
9036                 return fill_csum_tree_from_fs(trans, csum_root);
9037         else
9038                 return fill_csum_tree_from_extent(trans, csum_root);
9039 }
9040
9041 struct root_item_info {
9042         /* level of the root */
9043         u8 level;
9044         /* number of nodes at this level, must be 1 for a root */
9045         int node_count;
9046         u64 bytenr;
9047         u64 gen;
9048         struct cache_extent cache_extent;
9049 };
9050
9051 static struct cache_tree *roots_info_cache = NULL;
9052
9053 static void free_roots_info_cache(void)
9054 {
9055         if (!roots_info_cache)
9056                 return;
9057
9058         while (!cache_tree_empty(roots_info_cache)) {
9059                 struct cache_extent *entry;
9060                 struct root_item_info *rii;
9061
9062                 entry = first_cache_extent(roots_info_cache);
9063                 if (!entry)
9064                         break;
9065                 remove_cache_extent(roots_info_cache, entry);
9066                 rii = container_of(entry, struct root_item_info, cache_extent);
9067                 free(rii);
9068         }
9069
9070         free(roots_info_cache);
9071         roots_info_cache = NULL;
9072 }
9073
9074 static int build_roots_info_cache(struct btrfs_fs_info *info)
9075 {
9076         int ret = 0;
9077         struct btrfs_key key;
9078         struct extent_buffer *leaf;
9079         struct btrfs_path *path;
9080
9081         if (!roots_info_cache) {
9082                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9083                 if (!roots_info_cache)
9084                         return -ENOMEM;
9085                 cache_tree_init(roots_info_cache);
9086         }
9087
9088         path = btrfs_alloc_path();
9089         if (!path)
9090                 return -ENOMEM;
9091
9092         key.objectid = 0;
9093         key.type = BTRFS_EXTENT_ITEM_KEY;
9094         key.offset = 0;
9095
9096         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9097         if (ret < 0)
9098                 goto out;
9099         leaf = path->nodes[0];
9100
9101         while (1) {
9102                 struct btrfs_key found_key;
9103                 struct btrfs_extent_item *ei;
9104                 struct btrfs_extent_inline_ref *iref;
9105                 int slot = path->slots[0];
9106                 int type;
9107                 u64 flags;
9108                 u64 root_id;
9109                 u8 level;
9110                 struct cache_extent *entry;
9111                 struct root_item_info *rii;
9112
9113                 if (slot >= btrfs_header_nritems(leaf)) {
9114                         ret = btrfs_next_leaf(info->extent_root, path);
9115                         if (ret < 0) {
9116                                 break;
9117                         } else if (ret) {
9118                                 ret = 0;
9119                                 break;
9120                         }
9121                         leaf = path->nodes[0];
9122                         slot = path->slots[0];
9123                 }
9124
9125                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9126
9127                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9128                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9129                         goto next;
9130
9131                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9132                 flags = btrfs_extent_flags(leaf, ei);
9133
9134                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9135                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9136                         goto next;
9137
9138                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9139                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9140                         level = found_key.offset;
9141                 } else {
9142                         struct btrfs_tree_block_info *info;
9143
9144                         info = (struct btrfs_tree_block_info *)(ei + 1);
9145                         iref = (struct btrfs_extent_inline_ref *)(info + 1);
9146                         level = btrfs_tree_block_level(leaf, info);
9147                 }
9148
9149                 /*
9150                  * For a root extent, it must be of the following type and the
9151                  * first (and only one) iref in the item.
9152                  */
9153                 type = btrfs_extent_inline_ref_type(leaf, iref);
9154                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9155                         goto next;
9156
9157                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9158                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9159                 if (!entry) {
9160                         rii = malloc(sizeof(struct root_item_info));
9161                         if (!rii) {
9162                                 ret = -ENOMEM;
9163                                 goto out;
9164                         }
9165                         rii->cache_extent.start = root_id;
9166                         rii->cache_extent.size = 1;
9167                         rii->level = (u8)-1;
9168                         entry = &rii->cache_extent;
9169                         ret = insert_cache_extent(roots_info_cache, entry);
9170                         ASSERT(ret == 0);
9171                 } else {
9172                         rii = container_of(entry, struct root_item_info,
9173                                            cache_extent);
9174                 }
9175
9176                 ASSERT(rii->cache_extent.start == root_id);
9177                 ASSERT(rii->cache_extent.size == 1);
9178
9179                 if (level > rii->level || rii->level == (u8)-1) {
9180                         rii->level = level;
9181                         rii->bytenr = found_key.objectid;
9182                         rii->gen = btrfs_extent_generation(leaf, ei);
9183                         rii->node_count = 1;
9184                 } else if (level == rii->level) {
9185                         rii->node_count++;
9186                 }
9187 next:
9188                 path->slots[0]++;
9189         }
9190
9191 out:
9192         btrfs_free_path(path);
9193
9194         return ret;
9195 }
9196
9197 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9198                                   struct btrfs_path *path,
9199                                   const struct btrfs_key *root_key,
9200                                   const int read_only_mode)
9201 {
9202         const u64 root_id = root_key->objectid;
9203         struct cache_extent *entry;
9204         struct root_item_info *rii;
9205         struct btrfs_root_item ri;
9206         unsigned long offset;
9207
9208         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9209         if (!entry) {
9210                 fprintf(stderr,
9211                         "Error: could not find extent items for root %llu\n",
9212                         root_key->objectid);
9213                 return -ENOENT;
9214         }
9215
9216         rii = container_of(entry, struct root_item_info, cache_extent);
9217         ASSERT(rii->cache_extent.start == root_id);
9218         ASSERT(rii->cache_extent.size == 1);
9219
9220         if (rii->node_count != 1) {
9221                 fprintf(stderr,
9222                         "Error: could not find btree root extent for root %llu\n",
9223                         root_id);
9224                 return -ENOENT;
9225         }
9226
9227         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9228         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9229
9230         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9231             btrfs_root_level(&ri) != rii->level ||
9232             btrfs_root_generation(&ri) != rii->gen) {
9233
9234                 /*
9235                  * If we're in repair mode but our caller told us to not update
9236                  * the root item, i.e. just check if it needs to be updated, don't
9237                  * print this message, since the caller will call us again shortly
9238                  * for the same root item without read only mode (the caller will
9239                  * open a transaction first).
9240                  */
9241                 if (!(read_only_mode && repair))
9242                         fprintf(stderr,
9243                                 "%sroot item for root %llu,"
9244                                 " current bytenr %llu, current gen %llu, current level %u,"
9245                                 " new bytenr %llu, new gen %llu, new level %u\n",
9246                                 (read_only_mode ? "" : "fixing "),
9247                                 root_id,
9248                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9249                                 btrfs_root_level(&ri),
9250                                 rii->bytenr, rii->gen, rii->level);
9251
9252                 if (btrfs_root_generation(&ri) > rii->gen) {
9253                         fprintf(stderr,
9254                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9255                                 root_id, btrfs_root_generation(&ri), rii->gen);
9256                         return -EINVAL;
9257                 }
9258
9259                 if (!read_only_mode) {
9260                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9261                         btrfs_set_root_level(&ri, rii->level);
9262                         btrfs_set_root_generation(&ri, rii->gen);
9263                         write_extent_buffer(path->nodes[0], &ri,
9264                                             offset, sizeof(ri));
9265                 }
9266
9267                 return 1;
9268         }
9269
9270         return 0;
9271 }
9272
9273 /*
9274  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9275  * caused read-only snapshots to be corrupted if they were created at a moment
9276  * when the source subvolume/snapshot had orphan items. The issue was that the
9277  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9278  * node instead of the post orphan cleanup root node.
9279  * So this function, and its callees, just detects and fixes those cases. Even
9280  * though the regression was for read-only snapshots, this function applies to
9281  * any snapshot/subvolume root.
9282  * This must be run before any other repair code - not doing it so, makes other
9283  * repair code delete or modify backrefs in the extent tree for example, which
9284  * will result in an inconsistent fs after repairing the root items.
9285  */
9286 static int repair_root_items(struct btrfs_fs_info *info)
9287 {
9288         struct btrfs_path *path = NULL;
9289         struct btrfs_key key;
9290         struct extent_buffer *leaf;
9291         struct btrfs_trans_handle *trans = NULL;
9292         int ret = 0;
9293         int bad_roots = 0;
9294         int need_trans = 0;
9295
9296         ret = build_roots_info_cache(info);
9297         if (ret)
9298                 goto out;
9299
9300         path = btrfs_alloc_path();
9301         if (!path) {
9302                 ret = -ENOMEM;
9303                 goto out;
9304         }
9305
9306         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9307         key.type = BTRFS_ROOT_ITEM_KEY;
9308         key.offset = 0;
9309
9310 again:
9311         /*
9312          * Avoid opening and committing transactions if a leaf doesn't have
9313          * any root items that need to be fixed, so that we avoid rotating
9314          * backup roots unnecessarily.
9315          */
9316         if (need_trans) {
9317                 trans = btrfs_start_transaction(info->tree_root, 1);
9318                 if (IS_ERR(trans)) {
9319                         ret = PTR_ERR(trans);
9320                         goto out;
9321                 }
9322         }
9323
9324         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9325                                 0, trans ? 1 : 0);
9326         if (ret < 0)
9327                 goto out;
9328         leaf = path->nodes[0];
9329
9330         while (1) {
9331                 struct btrfs_key found_key;
9332
9333                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9334                         int no_more_keys = find_next_key(path, &key);
9335
9336                         btrfs_release_path(path);
9337                         if (trans) {
9338                                 ret = btrfs_commit_transaction(trans,
9339                                                                info->tree_root);
9340                                 trans = NULL;
9341                                 if (ret < 0)
9342                                         goto out;
9343                         }
9344                         need_trans = 0;
9345                         if (no_more_keys)
9346                                 break;
9347                         goto again;
9348                 }
9349
9350                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9351
9352                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9353                         goto next;
9354                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9355                         goto next;
9356
9357                 ret = maybe_repair_root_item(info, path, &found_key,
9358                                              trans ? 0 : 1);
9359                 if (ret < 0)
9360                         goto out;
9361                 if (ret) {
9362                         if (!trans && repair) {
9363                                 need_trans = 1;
9364                                 key = found_key;
9365                                 btrfs_release_path(path);
9366                                 goto again;
9367                         }
9368                         bad_roots++;
9369                 }
9370 next:
9371                 path->slots[0]++;
9372         }
9373         ret = 0;
9374 out:
9375         free_roots_info_cache();
9376         btrfs_free_path(path);
9377         if (trans)
9378                 btrfs_commit_transaction(trans, info->tree_root);
9379         if (ret < 0)
9380                 return ret;
9381
9382         return bad_roots;
9383 }
9384
9385 const char * const cmd_check_usage[] = {
9386         "btrfs check [options] <device>",
9387         "Check structural inegrity of a filesystem (unmounted).",
9388         "Check structural inegrity of an unmounted filesystem. Verify internal",
9389         "trees' consistency and item connectivity. In the repair mode try to",
9390         "fix the problems found.",
9391         "WARNING: the repair mode is considered dangerous",
9392         "",
9393         "-s|--super <superblock>     use this superblock copy",
9394         "-b|--backup                 use the backup root copy",
9395         "--repair                    try to repair the filesystem",
9396         "--readonly                  run in read-only mode (default)",
9397         "--init-csum-tree            create a new CRC tree",
9398         "--init-extent-tree          create a new extent tree",
9399         "--check-data-csum           verify checkums of data blocks",
9400         "-Q|--qgroup-report           print a report on qgroup consistency",
9401         "-E|--subvol-extents <subvolid>",
9402         "                            print subvolume extents and sharing state",
9403         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9404         "-p|--progress               indicate progress",
9405         NULL
9406 };
9407
9408 int cmd_check(int argc, char **argv)
9409 {
9410         struct cache_tree root_cache;
9411         struct btrfs_root *root;
9412         struct btrfs_fs_info *info;
9413         u64 bytenr = 0;
9414         u64 subvolid = 0;
9415         u64 tree_root_bytenr = 0;
9416         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9417         int ret;
9418         u64 num;
9419         int init_csum_tree = 0;
9420         int readonly = 0;
9421         int qgroup_report = 0;
9422         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9423
9424         while(1) {
9425                 int c;
9426                 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9427                         OPT_CHECK_CSUM, OPT_READONLY };
9428                 static const struct option long_options[] = {
9429                         { "super", required_argument, NULL, 's' },
9430                         { "repair", no_argument, NULL, OPT_REPAIR },
9431                         { "readonly", no_argument, NULL, OPT_READONLY },
9432                         { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9433                         { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9434                         { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9435                         { "backup", no_argument, NULL, 'b' },
9436                         { "subvol-extents", required_argument, NULL, 'E' },
9437                         { "qgroup-report", no_argument, NULL, 'Q' },
9438                         { "tree-root", required_argument, NULL, 'r' },
9439                         { "progress", no_argument, NULL, 'p' },
9440                         { NULL, 0, NULL, 0}
9441                 };
9442
9443                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9444                 if (c < 0)
9445                         break;
9446                 switch(c) {
9447                         case 'a': /* ignored */ break;
9448                         case 'b':
9449                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9450                                 break;
9451                         case 's':
9452                                 num = arg_strtou64(optarg);
9453                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9454                                         fprintf(stderr,
9455                                                 "ERROR: super mirror should be less than: %d\n",
9456                                                 BTRFS_SUPER_MIRROR_MAX);
9457                                         exit(1);
9458                                 }
9459                                 bytenr = btrfs_sb_offset(((int)num));
9460                                 printf("using SB copy %llu, bytenr %llu\n", num,
9461                                        (unsigned long long)bytenr);
9462                                 break;
9463                         case 'Q':
9464                                 qgroup_report = 1;
9465                                 break;
9466                         case 'E':
9467                                 subvolid = arg_strtou64(optarg);
9468                                 break;
9469                         case 'r':
9470                                 tree_root_bytenr = arg_strtou64(optarg);
9471                                 break;
9472                         case 'p':
9473                                 ctx.progress_enabled = true;
9474                                 break;
9475                         case '?':
9476                         case 'h':
9477                                 usage(cmd_check_usage);
9478                         case OPT_REPAIR:
9479                                 printf("enabling repair mode\n");
9480                                 repair = 1;
9481                                 ctree_flags |= OPEN_CTREE_WRITES;
9482                                 break;
9483                         case OPT_READONLY:
9484                                 readonly = 1;
9485                                 break;
9486                         case OPT_INIT_CSUM:
9487                                 printf("Creating a new CRC tree\n");
9488                                 init_csum_tree = 1;
9489                                 repair = 1;
9490                                 ctree_flags |= OPEN_CTREE_WRITES;
9491                                 break;
9492                         case OPT_INIT_EXTENT:
9493                                 init_extent_tree = 1;
9494                                 ctree_flags |= (OPEN_CTREE_WRITES |
9495                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9496                                 repair = 1;
9497                                 break;
9498                         case OPT_CHECK_CSUM:
9499                                 check_data_csum = 1;
9500                                 break;
9501                 }
9502         }
9503         argc = argc - optind;
9504
9505         if (check_argc_exact(argc, 1))
9506                 usage(cmd_check_usage);
9507
9508         if (ctx.progress_enabled) {
9509                 ctx.tp = TASK_NOTHING;
9510                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9511         }
9512
9513         /* This check is the only reason for --readonly to exist */
9514         if (readonly && repair) {
9515                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9516                 exit(1);
9517         }
9518
9519         radix_tree_init();
9520         cache_tree_init(&root_cache);
9521
9522         if((ret = check_mounted(argv[optind])) < 0) {
9523                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9524                 goto err_out;
9525         } else if(ret) {
9526                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9527                 ret = -EBUSY;
9528                 goto err_out;
9529         }
9530
9531         /* only allow partial opening under repair mode */
9532         if (repair)
9533                 ctree_flags |= OPEN_CTREE_PARTIAL;
9534
9535         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9536                                   ctree_flags);
9537         if (!info) {
9538                 fprintf(stderr, "Couldn't open file system\n");
9539                 ret = -EIO;
9540                 goto err_out;
9541         }
9542
9543         global_info = info;
9544         root = info->fs_root;
9545
9546         /*
9547          * repair mode will force us to commit transaction which
9548          * will make us fail to load log tree when mounting.
9549          */
9550         if (repair && btrfs_super_log_root(info->super_copy)) {
9551                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9552                 if (!ret) {
9553                         ret = 1;
9554                         goto close_out;
9555                 }
9556                 ret = zero_log_tree(root);
9557                 if (ret) {
9558                         fprintf(stderr, "fail to zero log tree\n");
9559                         goto close_out;
9560                 }
9561         }
9562
9563         uuid_unparse(info->super_copy->fsid, uuidbuf);
9564         if (qgroup_report) {
9565                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9566                        uuidbuf);
9567                 ret = qgroup_verify_all(info);
9568                 if (ret == 0)
9569                         print_qgroup_report(1);
9570                 goto close_out;
9571         }
9572         if (subvolid) {
9573                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9574                        subvolid, argv[optind], uuidbuf);
9575                 ret = print_extent_state(info, subvolid);
9576                 goto close_out;
9577         }
9578         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9579
9580         if (!extent_buffer_uptodate(info->tree_root->node) ||
9581             !extent_buffer_uptodate(info->dev_root->node) ||
9582             !extent_buffer_uptodate(info->chunk_root->node)) {
9583                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9584                 ret = -EIO;
9585                 goto close_out;
9586         }
9587
9588         if (init_extent_tree || init_csum_tree) {
9589                 struct btrfs_trans_handle *trans;
9590
9591                 trans = btrfs_start_transaction(info->extent_root, 0);
9592                 if (IS_ERR(trans)) {
9593                         fprintf(stderr, "Error starting transaction\n");
9594                         ret = PTR_ERR(trans);
9595                         goto close_out;
9596                 }
9597
9598                 if (init_extent_tree) {
9599                         printf("Creating a new extent tree\n");
9600                         ret = reinit_extent_tree(trans, info);
9601                         if (ret)
9602                                 goto close_out;
9603                 }
9604
9605                 if (init_csum_tree) {
9606                         fprintf(stderr, "Reinit crc root\n");
9607                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9608                         if (ret) {
9609                                 fprintf(stderr, "crc root initialization failed\n");
9610                                 ret = -EIO;
9611                                 goto close_out;
9612                         }
9613
9614                         ret = fill_csum_tree(trans, info->csum_root,
9615                                              init_extent_tree);
9616                         if (ret) {
9617                                 fprintf(stderr, "crc refilling failed\n");
9618                                 return -EIO;
9619                         }
9620                 }
9621                 /*
9622                  * Ok now we commit and run the normal fsck, which will add
9623                  * extent entries for all of the items it finds.
9624                  */
9625                 ret = btrfs_commit_transaction(trans, info->extent_root);
9626                 if (ret)
9627                         goto close_out;
9628         }
9629         if (!extent_buffer_uptodate(info->extent_root->node)) {
9630                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9631                 ret = -EIO;
9632                 goto close_out;
9633         }
9634         if (!extent_buffer_uptodate(info->csum_root->node)) {
9635                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9636                 ret = -EIO;
9637                 goto close_out;
9638         }
9639
9640         if (!ctx.progress_enabled)
9641                 fprintf(stderr, "checking extents\n");
9642         ret = check_chunks_and_extents(root);
9643         if (ret)
9644                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9645
9646         ret = repair_root_items(info);
9647         if (ret < 0)
9648                 goto close_out;
9649         if (repair) {
9650                 fprintf(stderr, "Fixed %d roots.\n", ret);
9651                 ret = 0;
9652         } else if (ret > 0) {
9653                 fprintf(stderr,
9654                        "Found %d roots with an outdated root item.\n",
9655                        ret);
9656                 fprintf(stderr,
9657                         "Please run a filesystem check with the option --repair to fix them.\n");
9658                 ret = 1;
9659                 goto close_out;
9660         }
9661
9662         if (!ctx.progress_enabled)
9663                 fprintf(stderr, "checking free space cache\n");
9664         ret = check_space_cache(root);
9665         if (ret)
9666                 goto out;
9667
9668         /*
9669          * We used to have to have these hole extents in between our real
9670          * extents so if we don't have this flag set we need to make sure there
9671          * are no gaps in the file extents for inodes, otherwise we can just
9672          * ignore it when this happens.
9673          */
9674         no_holes = btrfs_fs_incompat(root->fs_info,
9675                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9676         if (!ctx.progress_enabled)
9677                 fprintf(stderr, "checking fs roots\n");
9678         ret = check_fs_roots(root, &root_cache);
9679         if (ret)
9680                 goto out;
9681
9682         fprintf(stderr, "checking csums\n");
9683         ret = check_csums(root);
9684         if (ret)
9685                 goto out;
9686
9687         fprintf(stderr, "checking root refs\n");
9688         ret = check_root_refs(root, &root_cache);
9689         if (ret)
9690                 goto out;
9691
9692         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9693                 struct extent_buffer *eb;
9694
9695                 eb = list_first_entry(&root->fs_info->recow_ebs,
9696                                       struct extent_buffer, recow);
9697                 list_del_init(&eb->recow);
9698                 ret = recow_extent_buffer(root, eb);
9699                 if (ret)
9700                         break;
9701         }
9702
9703         while (!list_empty(&delete_items)) {
9704                 struct bad_item *bad;
9705
9706                 bad = list_first_entry(&delete_items, struct bad_item, list);
9707                 list_del_init(&bad->list);
9708                 if (repair)
9709                         ret = delete_bad_item(root, bad);
9710                 free(bad);
9711         }
9712
9713         if (info->quota_enabled) {
9714                 int err;
9715                 fprintf(stderr, "checking quota groups\n");
9716                 err = qgroup_verify_all(info);
9717                 if (err)
9718                         goto out;
9719         }
9720
9721         if (!list_empty(&root->fs_info->recow_ebs)) {
9722                 fprintf(stderr, "Transid errors in file system\n");
9723                 ret = 1;
9724         }
9725 out:
9726         print_qgroup_report(0);
9727         if (found_old_backref) { /*
9728                  * there was a disk format change when mixed
9729                  * backref was in testing tree. The old format
9730                  * existed about one week.
9731                  */
9732                 printf("\n * Found old mixed backref format. "
9733                        "The old format is not supported! *"
9734                        "\n * Please mount the FS in readonly mode, "
9735                        "backup data and re-format the FS. *\n\n");
9736                 ret = 1;
9737         }
9738         printf("found %llu bytes used err is %d\n",
9739                (unsigned long long)bytes_used, ret);
9740         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9741         printf("total tree bytes: %llu\n",
9742                (unsigned long long)total_btree_bytes);
9743         printf("total fs tree bytes: %llu\n",
9744                (unsigned long long)total_fs_tree_bytes);
9745         printf("total extent tree bytes: %llu\n",
9746                (unsigned long long)total_extent_tree_bytes);
9747         printf("btree space waste bytes: %llu\n",
9748                (unsigned long long)btree_space_waste);
9749         printf("file data blocks allocated: %llu\n referenced %llu\n",
9750                 (unsigned long long)data_bytes_allocated,
9751                 (unsigned long long)data_bytes_referenced);
9752
9753         free_root_recs_tree(&root_cache);
9754 close_out:
9755         close_ctree(root);
9756 err_out:
9757         if (ctx.progress_enabled)
9758                 task_deinit(ctx.info);
9759
9760         return ret;
9761 }