btrfs-progs: install to /usr/local by default again
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "btrfsck.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
41 #include "backref.h"
42 #include "ulist.h"
43
44 enum task_position {
45         TASK_EXTENTS,
46         TASK_FREE_SPACE,
47         TASK_FS_ROOTS,
48         TASK_NOTHING, /* have to be the last element */
49 };
50
51 struct task_ctx {
52         int progress_enabled;
53         enum task_position tp;
54
55         struct task_info *info;
56 };
57
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75
76 static void *print_status_check(void *p)
77 {
78         struct task_ctx *priv = p;
79         const char work_indicator[] = { '.', 'o', 'O', 'o' };
80         uint32_t count = 0;
81         static char *task_position_string[] = {
82                 "checking extents",
83                 "checking free space cache",
84                 "checking fs roots",
85         };
86
87         task_period_start(priv->info, 1000 /* 1s */);
88
89         if (priv->tp == TASK_NOTHING)
90                 return NULL;
91
92         while (1) {
93                 printf("%s [%c]\r", task_position_string[priv->tp],
94                                 work_indicator[count % 4]);
95                 count++;
96                 fflush(stdout);
97                 task_period_wait(priv->info);
98         }
99         return NULL;
100 }
101
102 static int print_status_return(void *p)
103 {
104         printf("\n");
105         fflush(stdout);
106
107         return 0;
108 }
109
110 struct extent_backref {
111         struct list_head list;
112         unsigned int is_data:1;
113         unsigned int found_extent_tree:1;
114         unsigned int full_backref:1;
115         unsigned int found_ref:1;
116         unsigned int broken:1;
117 };
118
119 struct data_backref {
120         struct extent_backref node;
121         union {
122                 u64 parent;
123                 u64 root;
124         };
125         u64 owner;
126         u64 offset;
127         u64 disk_bytenr;
128         u64 bytes;
129         u64 ram_bytes;
130         u32 num_refs;
131         u32 found_ref;
132 };
133
134 /*
135  * Much like data_backref, just removed the undetermined members
136  * and change it to use list_head.
137  * During extent scan, it is stored in root->orphan_data_extent.
138  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
139  */
140 struct orphan_data_extent {
141         struct list_head list;
142         u64 root;
143         u64 objectid;
144         u64 offset;
145         u64 disk_bytenr;
146         u64 disk_len;
147 };
148
149 struct tree_backref {
150         struct extent_backref node;
151         union {
152                 u64 parent;
153                 u64 root;
154         };
155 };
156
157 struct extent_record {
158         struct list_head backrefs;
159         struct list_head dups;
160         struct list_head list;
161         struct cache_extent cache;
162         struct btrfs_disk_key parent_key;
163         u64 start;
164         u64 max_size;
165         u64 nr;
166         u64 refs;
167         u64 extent_item_refs;
168         u64 generation;
169         u64 parent_generation;
170         u64 info_objectid;
171         u32 num_duplicates;
172         u8 info_level;
173         int flag_block_full_backref;
174         unsigned int found_rec:1;
175         unsigned int content_checked:1;
176         unsigned int owner_ref_checked:1;
177         unsigned int is_root:1;
178         unsigned int metadata:1;
179         unsigned int bad_full_backref:1;
180         unsigned int crossing_stripes:1;
181         unsigned int wrong_chunk_type:1;
182 };
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         unsigned int filetype:8;
190         int errors;
191         unsigned int ref_type;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 struct root_item_record {
199         struct list_head list;
200         u64 objectid;
201         u64 bytenr;
202         u64 last_snapshot;
203         u8 level;
204         u8 drop_level;
205         int level_size;
206         struct btrfs_key drop_key;
207 };
208
209 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
211 #define REF_ERR_NO_INODE_REF            (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
214 #define REF_ERR_DUP_INODE_REF           (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF             (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
222
223 struct file_extent_hole {
224         struct rb_node node;
225         u64 start;
226         u64 len;
227 };
228
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
231 {
232         struct file_extent_hole *hole;
233
234         if (RB_EMPTY_ROOT(holes))
235                 return (u64)-1;
236
237         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
238         return hole->start;
239 }
240
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
242 {
243         struct file_extent_hole *hole1;
244         struct file_extent_hole *hole2;
245
246         hole1 = rb_entry(node1, struct file_extent_hole, node);
247         hole2 = rb_entry(node2, struct file_extent_hole, node);
248
249         if (hole1->start > hole2->start)
250                 return -1;
251         if (hole1->start < hole2->start)
252                 return 1;
253         /* Now hole1->start == hole2->start */
254         if (hole1->len >= hole2->len)
255                 /*
256                  * Hole 1 will be merge center
257                  * Same hole will be merged later
258                  */
259                 return -1;
260         /* Hole 2 will be merge center */
261         return 1;
262 }
263
264 /*
265  * Add a hole to the record
266  *
267  * This will do hole merge for copy_file_extent_holes(),
268  * which will ensure there won't be continuous holes.
269  */
270 static int add_file_extent_hole(struct rb_root *holes,
271                                 u64 start, u64 len)
272 {
273         struct file_extent_hole *hole;
274         struct file_extent_hole *prev = NULL;
275         struct file_extent_hole *next = NULL;
276
277         hole = malloc(sizeof(*hole));
278         if (!hole)
279                 return -ENOMEM;
280         hole->start = start;
281         hole->len = len;
282         /* Since compare will not return 0, no -EEXIST will happen */
283         rb_insert(holes, &hole->node, compare_hole);
284
285         /* simple merge with previous hole */
286         if (rb_prev(&hole->node))
287                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
288                                 node);
289         if (prev && prev->start + prev->len >= hole->start) {
290                 hole->len = hole->start + hole->len - prev->start;
291                 hole->start = prev->start;
292                 rb_erase(&prev->node, holes);
293                 free(prev);
294                 prev = NULL;
295         }
296
297         /* iterate merge with next holes */
298         while (1) {
299                 if (!rb_next(&hole->node))
300                         break;
301                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
302                                         node);
303                 if (hole->start + hole->len >= next->start) {
304                         if (hole->start + hole->len <= next->start + next->len)
305                                 hole->len = next->start + next->len -
306                                             hole->start;
307                         rb_erase(&next->node, holes);
308                         free(next);
309                         next = NULL;
310                 } else
311                         break;
312         }
313         return 0;
314 }
315
316 static int compare_hole_range(struct rb_node *node, void *data)
317 {
318         struct file_extent_hole *hole;
319         u64 start;
320
321         hole = (struct file_extent_hole *)data;
322         start = hole->start;
323
324         hole = rb_entry(node, struct file_extent_hole, node);
325         if (start < hole->start)
326                 return -1;
327         if (start >= hole->start && start < hole->start + hole->len)
328                 return 0;
329         return 1;
330 }
331
332 /*
333  * Delete a hole in the record
334  *
335  * This will do the hole split and is much restrict than add.
336  */
337 static int del_file_extent_hole(struct rb_root *holes,
338                                 u64 start, u64 len)
339 {
340         struct file_extent_hole *hole;
341         struct file_extent_hole tmp;
342         u64 prev_start = 0;
343         u64 prev_len = 0;
344         u64 next_start = 0;
345         u64 next_len = 0;
346         struct rb_node *node;
347         int have_prev = 0;
348         int have_next = 0;
349         int ret = 0;
350
351         tmp.start = start;
352         tmp.len = len;
353         node = rb_search(holes, &tmp, compare_hole_range, NULL);
354         if (!node)
355                 return -EEXIST;
356         hole = rb_entry(node, struct file_extent_hole, node);
357         if (start + len > hole->start + hole->len)
358                 return -EEXIST;
359
360         /*
361          * Now there will be no overflap, delete the hole and re-add the
362          * split(s) if they exists.
363          */
364         if (start > hole->start) {
365                 prev_start = hole->start;
366                 prev_len = start - hole->start;
367                 have_prev = 1;
368         }
369         if (hole->start + hole->len > start + len) {
370                 next_start = start + len;
371                 next_len = hole->start + hole->len - start - len;
372                 have_next = 1;
373         }
374         rb_erase(node, holes);
375         free(hole);
376         if (have_prev) {
377                 ret = add_file_extent_hole(holes, prev_start, prev_len);
378                 if (ret < 0)
379                         return ret;
380         }
381         if (have_next) {
382                 ret = add_file_extent_hole(holes, next_start, next_len);
383                 if (ret < 0)
384                         return ret;
385         }
386         return 0;
387 }
388
389 static int copy_file_extent_holes(struct rb_root *dst,
390                                   struct rb_root *src)
391 {
392         struct file_extent_hole *hole;
393         struct rb_node *node;
394         int ret = 0;
395
396         node = rb_first(src);
397         while (node) {
398                 hole = rb_entry(node, struct file_extent_hole, node);
399                 ret = add_file_extent_hole(dst, hole->start, hole->len);
400                 if (ret)
401                         break;
402                 node = rb_next(node);
403         }
404         return ret;
405 }
406
407 static void free_file_extent_holes(struct rb_root *holes)
408 {
409         struct rb_node *node;
410         struct file_extent_hole *hole;
411
412         node = rb_first(holes);
413         while (node) {
414                 hole = rb_entry(node, struct file_extent_hole, node);
415                 rb_erase(node, holes);
416                 free(hole);
417                 node = rb_first(holes);
418         }
419 }
420
421 struct inode_record {
422         struct list_head backrefs;
423         unsigned int checked:1;
424         unsigned int merging:1;
425         unsigned int found_inode_item:1;
426         unsigned int found_dir_item:1;
427         unsigned int found_file_extent:1;
428         unsigned int found_csum_item:1;
429         unsigned int some_csum_missing:1;
430         unsigned int nodatasum:1;
431         int errors;
432
433         u64 ino;
434         u32 nlink;
435         u32 imode;
436         u64 isize;
437         u64 nbytes;
438
439         u32 found_link;
440         u64 found_size;
441         u64 extent_start;
442         u64 extent_end;
443         struct rb_root holes;
444         struct list_head orphan_extents;
445
446         u32 refs;
447 };
448
449 #define I_ERR_NO_INODE_ITEM             (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
464
465 struct root_backref {
466         struct list_head list;
467         unsigned int found_dir_item:1;
468         unsigned int found_dir_index:1;
469         unsigned int found_back_ref:1;
470         unsigned int found_forward_ref:1;
471         unsigned int reachable:1;
472         int errors;
473         u64 ref_root;
474         u64 dir;
475         u64 index;
476         u16 namelen;
477         char name[0];
478 };
479
480 struct root_record {
481         struct list_head backrefs;
482         struct cache_extent cache;
483         unsigned int found_root_item:1;
484         u64 objectid;
485         u32 found_ref;
486 };
487
488 struct ptr_node {
489         struct cache_extent cache;
490         void *data;
491 };
492
493 struct shared_node {
494         struct cache_extent cache;
495         struct cache_tree root_cache;
496         struct cache_tree inode_cache;
497         struct inode_record *current;
498         u32 refs;
499 };
500
501 struct block_info {
502         u64 start;
503         u32 size;
504 };
505
506 struct walk_control {
507         struct cache_tree shared;
508         struct shared_node *nodes[BTRFS_MAX_LEVEL];
509         int active_node;
510         int root_level;
511 };
512
513 struct bad_item {
514         struct btrfs_key key;
515         u64 root_id;
516         struct list_head list;
517 };
518
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
520
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522                                  struct btrfs_root *root)
523 {
524         if (root->last_trans != trans->transid) {
525                 root->track_dirty = 1;
526                 root->last_trans = trans->transid;
527                 root->commit_root = root->node;
528                 extent_buffer_get(root->node);
529         }
530 }
531
532 static u8 imode_to_type(u32 imode)
533 {
534 #define S_SHIFT 12
535         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
537                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
538                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
539                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
540                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
541                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
542                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
543         };
544
545         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
546 #undef S_SHIFT
547 }
548
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
550 {
551         struct device_record *rec1;
552         struct device_record *rec2;
553
554         rec1 = rb_entry(node1, struct device_record, node);
555         rec2 = rb_entry(node2, struct device_record, node);
556         if (rec1->devid > rec2->devid)
557                 return -1;
558         else if (rec1->devid < rec2->devid)
559                 return 1;
560         else
561                 return 0;
562 }
563
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
565 {
566         struct inode_record *rec;
567         struct inode_backref *backref;
568         struct inode_backref *orig;
569         struct orphan_data_extent *src_orphan;
570         struct orphan_data_extent *dst_orphan;
571         size_t size;
572         int ret;
573
574         rec = malloc(sizeof(*rec));
575         memcpy(rec, orig_rec, sizeof(*rec));
576         rec->refs = 1;
577         INIT_LIST_HEAD(&rec->backrefs);
578         INIT_LIST_HEAD(&rec->orphan_extents);
579         rec->holes = RB_ROOT;
580
581         list_for_each_entry(orig, &orig_rec->backrefs, list) {
582                 size = sizeof(*orig) + orig->namelen + 1;
583                 backref = malloc(size);
584                 memcpy(backref, orig, size);
585                 list_add_tail(&backref->list, &rec->backrefs);
586         }
587         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
588                 dst_orphan = malloc(sizeof(*dst_orphan));
589                 /* TODO: Fix all the HELL of un-catched -ENOMEM case */
590                 BUG_ON(!dst_orphan);
591                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
592                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
593         }
594         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
595         BUG_ON(ret < 0);
596
597         return rec;
598 }
599
600 static void print_orphan_data_extents(struct list_head *orphan_extents,
601                                       u64 objectid)
602 {
603         struct orphan_data_extent *orphan;
604
605         if (list_empty(orphan_extents))
606                 return;
607         printf("The following data extent is lost in tree %llu:\n",
608                objectid);
609         list_for_each_entry(orphan, orphan_extents, list) {
610                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
611                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
612                        orphan->disk_len);
613         }
614 }
615
616 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
617 {
618         u64 root_objectid = root->root_key.objectid;
619         int errors = rec->errors;
620
621         if (!errors)
622                 return;
623         /* reloc root errors, we print its corresponding fs root objectid*/
624         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
625                 root_objectid = root->root_key.offset;
626                 fprintf(stderr, "reloc");
627         }
628         fprintf(stderr, "root %llu inode %llu errors %x",
629                 (unsigned long long) root_objectid,
630                 (unsigned long long) rec->ino, rec->errors);
631
632         if (errors & I_ERR_NO_INODE_ITEM)
633                 fprintf(stderr, ", no inode item");
634         if (errors & I_ERR_NO_ORPHAN_ITEM)
635                 fprintf(stderr, ", no orphan item");
636         if (errors & I_ERR_DUP_INODE_ITEM)
637                 fprintf(stderr, ", dup inode item");
638         if (errors & I_ERR_DUP_DIR_INDEX)
639                 fprintf(stderr, ", dup dir index");
640         if (errors & I_ERR_ODD_DIR_ITEM)
641                 fprintf(stderr, ", odd dir item");
642         if (errors & I_ERR_ODD_FILE_EXTENT)
643                 fprintf(stderr, ", odd file extent");
644         if (errors & I_ERR_BAD_FILE_EXTENT)
645                 fprintf(stderr, ", bad file extent");
646         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
647                 fprintf(stderr, ", file extent overlap");
648         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
649                 fprintf(stderr, ", file extent discount");
650         if (errors & I_ERR_DIR_ISIZE_WRONG)
651                 fprintf(stderr, ", dir isize wrong");
652         if (errors & I_ERR_FILE_NBYTES_WRONG)
653                 fprintf(stderr, ", nbytes wrong");
654         if (errors & I_ERR_ODD_CSUM_ITEM)
655                 fprintf(stderr, ", odd csum item");
656         if (errors & I_ERR_SOME_CSUM_MISSING)
657                 fprintf(stderr, ", some csum missing");
658         if (errors & I_ERR_LINK_COUNT_WRONG)
659                 fprintf(stderr, ", link count wrong");
660         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
661                 fprintf(stderr, ", orphan file extent");
662         fprintf(stderr, "\n");
663         /* Print the orphan extents if needed */
664         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
665                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
666
667         /* Print the holes if needed */
668         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
669                 struct file_extent_hole *hole;
670                 struct rb_node *node;
671                 int found = 0;
672
673                 node = rb_first(&rec->holes);
674                 fprintf(stderr, "Found file extent holes:\n");
675                 while (node) {
676                         found = 1;
677                         hole = rb_entry(node, struct file_extent_hole, node);
678                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
679                                 hole->start, hole->len);
680                         node = rb_next(node);
681                 }
682                 if (!found)
683                         fprintf(stderr, "\tstart: 0, len: %llu\n",
684                                 round_up(rec->isize, root->sectorsize));
685         }
686 }
687
688 static void print_ref_error(int errors)
689 {
690         if (errors & REF_ERR_NO_DIR_ITEM)
691                 fprintf(stderr, ", no dir item");
692         if (errors & REF_ERR_NO_DIR_INDEX)
693                 fprintf(stderr, ", no dir index");
694         if (errors & REF_ERR_NO_INODE_REF)
695                 fprintf(stderr, ", no inode ref");
696         if (errors & REF_ERR_DUP_DIR_ITEM)
697                 fprintf(stderr, ", dup dir item");
698         if (errors & REF_ERR_DUP_DIR_INDEX)
699                 fprintf(stderr, ", dup dir index");
700         if (errors & REF_ERR_DUP_INODE_REF)
701                 fprintf(stderr, ", dup inode ref");
702         if (errors & REF_ERR_INDEX_UNMATCH)
703                 fprintf(stderr, ", index unmatch");
704         if (errors & REF_ERR_FILETYPE_UNMATCH)
705                 fprintf(stderr, ", filetype unmatch");
706         if (errors & REF_ERR_NAME_TOO_LONG)
707                 fprintf(stderr, ", name too long");
708         if (errors & REF_ERR_NO_ROOT_REF)
709                 fprintf(stderr, ", no root ref");
710         if (errors & REF_ERR_NO_ROOT_BACKREF)
711                 fprintf(stderr, ", no root backref");
712         if (errors & REF_ERR_DUP_ROOT_REF)
713                 fprintf(stderr, ", dup root ref");
714         if (errors & REF_ERR_DUP_ROOT_BACKREF)
715                 fprintf(stderr, ", dup root backref");
716         fprintf(stderr, "\n");
717 }
718
719 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
720                                           u64 ino, int mod)
721 {
722         struct ptr_node *node;
723         struct cache_extent *cache;
724         struct inode_record *rec = NULL;
725         int ret;
726
727         cache = lookup_cache_extent(inode_cache, ino, 1);
728         if (cache) {
729                 node = container_of(cache, struct ptr_node, cache);
730                 rec = node->data;
731                 if (mod && rec->refs > 1) {
732                         node->data = clone_inode_rec(rec);
733                         rec->refs--;
734                         rec = node->data;
735                 }
736         } else if (mod) {
737                 rec = calloc(1, sizeof(*rec));
738                 rec->ino = ino;
739                 rec->extent_start = (u64)-1;
740                 rec->refs = 1;
741                 INIT_LIST_HEAD(&rec->backrefs);
742                 INIT_LIST_HEAD(&rec->orphan_extents);
743                 rec->holes = RB_ROOT;
744
745                 node = malloc(sizeof(*node));
746                 node->cache.start = ino;
747                 node->cache.size = 1;
748                 node->data = rec;
749
750                 if (ino == BTRFS_FREE_INO_OBJECTID)
751                         rec->found_link = 1;
752
753                 ret = insert_cache_extent(inode_cache, &node->cache);
754                 BUG_ON(ret);
755         }
756         return rec;
757 }
758
759 static void free_orphan_data_extents(struct list_head *orphan_extents)
760 {
761         struct orphan_data_extent *orphan;
762
763         while (!list_empty(orphan_extents)) {
764                 orphan = list_entry(orphan_extents->next,
765                                     struct orphan_data_extent, list);
766                 list_del(&orphan->list);
767                 free(orphan);
768         }
769 }
770
771 static void free_inode_rec(struct inode_record *rec)
772 {
773         struct inode_backref *backref;
774
775         if (--rec->refs > 0)
776                 return;
777
778         while (!list_empty(&rec->backrefs)) {
779                 backref = list_entry(rec->backrefs.next,
780                                      struct inode_backref, list);
781                 list_del(&backref->list);
782                 free(backref);
783         }
784         free_orphan_data_extents(&rec->orphan_extents);
785         free_file_extent_holes(&rec->holes);
786         free(rec);
787 }
788
789 static int can_free_inode_rec(struct inode_record *rec)
790 {
791         if (!rec->errors && rec->checked && rec->found_inode_item &&
792             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
793                 return 1;
794         return 0;
795 }
796
797 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
798                                  struct inode_record *rec)
799 {
800         struct cache_extent *cache;
801         struct inode_backref *tmp, *backref;
802         struct ptr_node *node;
803         unsigned char filetype;
804
805         if (!rec->found_inode_item)
806                 return;
807
808         filetype = imode_to_type(rec->imode);
809         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
810                 if (backref->found_dir_item && backref->found_dir_index) {
811                         if (backref->filetype != filetype)
812                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
813                         if (!backref->errors && backref->found_inode_ref) {
814                                 list_del(&backref->list);
815                                 free(backref);
816                         }
817                 }
818         }
819
820         if (!rec->checked || rec->merging)
821                 return;
822
823         if (S_ISDIR(rec->imode)) {
824                 if (rec->found_size != rec->isize)
825                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
826                 if (rec->found_file_extent)
827                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
828         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
829                 if (rec->found_dir_item)
830                         rec->errors |= I_ERR_ODD_DIR_ITEM;
831                 if (rec->found_size != rec->nbytes)
832                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
833                 if (rec->nlink > 0 && !no_holes &&
834                     (rec->extent_end < rec->isize ||
835                      first_extent_gap(&rec->holes) < rec->isize))
836                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
837         }
838
839         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
840                 if (rec->found_csum_item && rec->nodatasum)
841                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
842                 if (rec->some_csum_missing && !rec->nodatasum)
843                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
844         }
845
846         BUG_ON(rec->refs != 1);
847         if (can_free_inode_rec(rec)) {
848                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
849                 node = container_of(cache, struct ptr_node, cache);
850                 BUG_ON(node->data != rec);
851                 remove_cache_extent(inode_cache, &node->cache);
852                 free(node);
853                 free_inode_rec(rec);
854         }
855 }
856
857 static int check_orphan_item(struct btrfs_root *root, u64 ino)
858 {
859         struct btrfs_path path;
860         struct btrfs_key key;
861         int ret;
862
863         key.objectid = BTRFS_ORPHAN_OBJECTID;
864         key.type = BTRFS_ORPHAN_ITEM_KEY;
865         key.offset = ino;
866
867         btrfs_init_path(&path);
868         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
869         btrfs_release_path(&path);
870         if (ret > 0)
871                 ret = -ENOENT;
872         return ret;
873 }
874
875 static int process_inode_item(struct extent_buffer *eb,
876                               int slot, struct btrfs_key *key,
877                               struct shared_node *active_node)
878 {
879         struct inode_record *rec;
880         struct btrfs_inode_item *item;
881
882         rec = active_node->current;
883         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
884         if (rec->found_inode_item) {
885                 rec->errors |= I_ERR_DUP_INODE_ITEM;
886                 return 1;
887         }
888         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
889         rec->nlink = btrfs_inode_nlink(eb, item);
890         rec->isize = btrfs_inode_size(eb, item);
891         rec->nbytes = btrfs_inode_nbytes(eb, item);
892         rec->imode = btrfs_inode_mode(eb, item);
893         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
894                 rec->nodatasum = 1;
895         rec->found_inode_item = 1;
896         if (rec->nlink == 0)
897                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
898         maybe_free_inode_rec(&active_node->inode_cache, rec);
899         return 0;
900 }
901
902 static struct inode_backref *get_inode_backref(struct inode_record *rec,
903                                                 const char *name,
904                                                 int namelen, u64 dir)
905 {
906         struct inode_backref *backref;
907
908         list_for_each_entry(backref, &rec->backrefs, list) {
909                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
910                         break;
911                 if (backref->dir != dir || backref->namelen != namelen)
912                         continue;
913                 if (memcmp(name, backref->name, namelen))
914                         continue;
915                 return backref;
916         }
917
918         backref = malloc(sizeof(*backref) + namelen + 1);
919         memset(backref, 0, sizeof(*backref));
920         backref->dir = dir;
921         backref->namelen = namelen;
922         memcpy(backref->name, name, namelen);
923         backref->name[namelen] = '\0';
924         list_add_tail(&backref->list, &rec->backrefs);
925         return backref;
926 }
927
928 static int add_inode_backref(struct cache_tree *inode_cache,
929                              u64 ino, u64 dir, u64 index,
930                              const char *name, int namelen,
931                              int filetype, int itemtype, int errors)
932 {
933         struct inode_record *rec;
934         struct inode_backref *backref;
935
936         rec = get_inode_rec(inode_cache, ino, 1);
937         backref = get_inode_backref(rec, name, namelen, dir);
938         if (errors)
939                 backref->errors |= errors;
940         if (itemtype == BTRFS_DIR_INDEX_KEY) {
941                 if (backref->found_dir_index)
942                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
943                 if (backref->found_inode_ref && backref->index != index)
944                         backref->errors |= REF_ERR_INDEX_UNMATCH;
945                 if (backref->found_dir_item && backref->filetype != filetype)
946                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
947
948                 backref->index = index;
949                 backref->filetype = filetype;
950                 backref->found_dir_index = 1;
951         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
952                 rec->found_link++;
953                 if (backref->found_dir_item)
954                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
955                 if (backref->found_dir_index && backref->filetype != filetype)
956                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
957
958                 backref->filetype = filetype;
959                 backref->found_dir_item = 1;
960         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
961                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
962                 if (backref->found_inode_ref)
963                         backref->errors |= REF_ERR_DUP_INODE_REF;
964                 if (backref->found_dir_index && backref->index != index)
965                         backref->errors |= REF_ERR_INDEX_UNMATCH;
966                 else
967                         backref->index = index;
968
969                 backref->ref_type = itemtype;
970                 backref->found_inode_ref = 1;
971         } else {
972                 BUG_ON(1);
973         }
974
975         maybe_free_inode_rec(inode_cache, rec);
976         return 0;
977 }
978
979 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
980                             struct cache_tree *dst_cache)
981 {
982         struct inode_backref *backref;
983         u32 dir_count = 0;
984         int ret = 0;
985
986         dst->merging = 1;
987         list_for_each_entry(backref, &src->backrefs, list) {
988                 if (backref->found_dir_index) {
989                         add_inode_backref(dst_cache, dst->ino, backref->dir,
990                                         backref->index, backref->name,
991                                         backref->namelen, backref->filetype,
992                                         BTRFS_DIR_INDEX_KEY, backref->errors);
993                 }
994                 if (backref->found_dir_item) {
995                         dir_count++;
996                         add_inode_backref(dst_cache, dst->ino,
997                                         backref->dir, 0, backref->name,
998                                         backref->namelen, backref->filetype,
999                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1000                 }
1001                 if (backref->found_inode_ref) {
1002                         add_inode_backref(dst_cache, dst->ino,
1003                                         backref->dir, backref->index,
1004                                         backref->name, backref->namelen, 0,
1005                                         backref->ref_type, backref->errors);
1006                 }
1007         }
1008
1009         if (src->found_dir_item)
1010                 dst->found_dir_item = 1;
1011         if (src->found_file_extent)
1012                 dst->found_file_extent = 1;
1013         if (src->found_csum_item)
1014                 dst->found_csum_item = 1;
1015         if (src->some_csum_missing)
1016                 dst->some_csum_missing = 1;
1017         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1018                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1019                 if (ret < 0)
1020                         return ret;
1021         }
1022
1023         BUG_ON(src->found_link < dir_count);
1024         dst->found_link += src->found_link - dir_count;
1025         dst->found_size += src->found_size;
1026         if (src->extent_start != (u64)-1) {
1027                 if (dst->extent_start == (u64)-1) {
1028                         dst->extent_start = src->extent_start;
1029                         dst->extent_end = src->extent_end;
1030                 } else {
1031                         if (dst->extent_end > src->extent_start)
1032                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1033                         else if (dst->extent_end < src->extent_start) {
1034                                 ret = add_file_extent_hole(&dst->holes,
1035                                         dst->extent_end,
1036                                         src->extent_start - dst->extent_end);
1037                         }
1038                         if (dst->extent_end < src->extent_end)
1039                                 dst->extent_end = src->extent_end;
1040                 }
1041         }
1042
1043         dst->errors |= src->errors;
1044         if (src->found_inode_item) {
1045                 if (!dst->found_inode_item) {
1046                         dst->nlink = src->nlink;
1047                         dst->isize = src->isize;
1048                         dst->nbytes = src->nbytes;
1049                         dst->imode = src->imode;
1050                         dst->nodatasum = src->nodatasum;
1051                         dst->found_inode_item = 1;
1052                 } else {
1053                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1054                 }
1055         }
1056         dst->merging = 0;
1057
1058         return 0;
1059 }
1060
1061 static int splice_shared_node(struct shared_node *src_node,
1062                               struct shared_node *dst_node)
1063 {
1064         struct cache_extent *cache;
1065         struct ptr_node *node, *ins;
1066         struct cache_tree *src, *dst;
1067         struct inode_record *rec, *conflict;
1068         u64 current_ino = 0;
1069         int splice = 0;
1070         int ret;
1071
1072         if (--src_node->refs == 0)
1073                 splice = 1;
1074         if (src_node->current)
1075                 current_ino = src_node->current->ino;
1076
1077         src = &src_node->root_cache;
1078         dst = &dst_node->root_cache;
1079 again:
1080         cache = search_cache_extent(src, 0);
1081         while (cache) {
1082                 node = container_of(cache, struct ptr_node, cache);
1083                 rec = node->data;
1084                 cache = next_cache_extent(cache);
1085
1086                 if (splice) {
1087                         remove_cache_extent(src, &node->cache);
1088                         ins = node;
1089                 } else {
1090                         ins = malloc(sizeof(*ins));
1091                         ins->cache.start = node->cache.start;
1092                         ins->cache.size = node->cache.size;
1093                         ins->data = rec;
1094                         rec->refs++;
1095                 }
1096                 ret = insert_cache_extent(dst, &ins->cache);
1097                 if (ret == -EEXIST) {
1098                         conflict = get_inode_rec(dst, rec->ino, 1);
1099                         merge_inode_recs(rec, conflict, dst);
1100                         if (rec->checked) {
1101                                 conflict->checked = 1;
1102                                 if (dst_node->current == conflict)
1103                                         dst_node->current = NULL;
1104                         }
1105                         maybe_free_inode_rec(dst, conflict);
1106                         free_inode_rec(rec);
1107                         free(ins);
1108                 } else {
1109                         BUG_ON(ret);
1110                 }
1111         }
1112
1113         if (src == &src_node->root_cache) {
1114                 src = &src_node->inode_cache;
1115                 dst = &dst_node->inode_cache;
1116                 goto again;
1117         }
1118
1119         if (current_ino > 0 && (!dst_node->current ||
1120             current_ino > dst_node->current->ino)) {
1121                 if (dst_node->current) {
1122                         dst_node->current->checked = 1;
1123                         maybe_free_inode_rec(dst, dst_node->current);
1124                 }
1125                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1126         }
1127         return 0;
1128 }
1129
1130 static void free_inode_ptr(struct cache_extent *cache)
1131 {
1132         struct ptr_node *node;
1133         struct inode_record *rec;
1134
1135         node = container_of(cache, struct ptr_node, cache);
1136         rec = node->data;
1137         free_inode_rec(rec);
1138         free(node);
1139 }
1140
1141 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1142
1143 static struct shared_node *find_shared_node(struct cache_tree *shared,
1144                                             u64 bytenr)
1145 {
1146         struct cache_extent *cache;
1147         struct shared_node *node;
1148
1149         cache = lookup_cache_extent(shared, bytenr, 1);
1150         if (cache) {
1151                 node = container_of(cache, struct shared_node, cache);
1152                 return node;
1153         }
1154         return NULL;
1155 }
1156
1157 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1158 {
1159         int ret;
1160         struct shared_node *node;
1161
1162         node = calloc(1, sizeof(*node));
1163         node->cache.start = bytenr;
1164         node->cache.size = 1;
1165         cache_tree_init(&node->root_cache);
1166         cache_tree_init(&node->inode_cache);
1167         node->refs = refs;
1168
1169         ret = insert_cache_extent(shared, &node->cache);
1170         BUG_ON(ret);
1171         return 0;
1172 }
1173
1174 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1175                              struct walk_control *wc, int level)
1176 {
1177         struct shared_node *node;
1178         struct shared_node *dest;
1179
1180         if (level == wc->active_node)
1181                 return 0;
1182
1183         BUG_ON(wc->active_node <= level);
1184         node = find_shared_node(&wc->shared, bytenr);
1185         if (!node) {
1186                 add_shared_node(&wc->shared, bytenr, refs);
1187                 node = find_shared_node(&wc->shared, bytenr);
1188                 wc->nodes[level] = node;
1189                 wc->active_node = level;
1190                 return 0;
1191         }
1192
1193         if (wc->root_level == wc->active_node &&
1194             btrfs_root_refs(&root->root_item) == 0) {
1195                 if (--node->refs == 0) {
1196                         free_inode_recs_tree(&node->root_cache);
1197                         free_inode_recs_tree(&node->inode_cache);
1198                         remove_cache_extent(&wc->shared, &node->cache);
1199                         free(node);
1200                 }
1201                 return 1;
1202         }
1203
1204         dest = wc->nodes[wc->active_node];
1205         splice_shared_node(node, dest);
1206         if (node->refs == 0) {
1207                 remove_cache_extent(&wc->shared, &node->cache);
1208                 free(node);
1209         }
1210         return 1;
1211 }
1212
1213 static int leave_shared_node(struct btrfs_root *root,
1214                              struct walk_control *wc, int level)
1215 {
1216         struct shared_node *node;
1217         struct shared_node *dest;
1218         int i;
1219
1220         if (level == wc->root_level)
1221                 return 0;
1222
1223         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1224                 if (wc->nodes[i])
1225                         break;
1226         }
1227         BUG_ON(i >= BTRFS_MAX_LEVEL);
1228
1229         node = wc->nodes[wc->active_node];
1230         wc->nodes[wc->active_node] = NULL;
1231         wc->active_node = i;
1232
1233         dest = wc->nodes[wc->active_node];
1234         if (wc->active_node < wc->root_level ||
1235             btrfs_root_refs(&root->root_item) > 0) {
1236                 BUG_ON(node->refs <= 1);
1237                 splice_shared_node(node, dest);
1238         } else {
1239                 BUG_ON(node->refs < 2);
1240                 node->refs--;
1241         }
1242         return 0;
1243 }
1244
1245 /*
1246  * Returns:
1247  * < 0 - on error
1248  * 1   - if the root with id child_root_id is a child of root parent_root_id
1249  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1250  *       has other root(s) as parent(s)
1251  * 2   - if the root child_root_id doesn't have any parent roots
1252  */
1253 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1254                          u64 child_root_id)
1255 {
1256         struct btrfs_path path;
1257         struct btrfs_key key;
1258         struct extent_buffer *leaf;
1259         int has_parent = 0;
1260         int ret;
1261
1262         btrfs_init_path(&path);
1263
1264         key.objectid = parent_root_id;
1265         key.type = BTRFS_ROOT_REF_KEY;
1266         key.offset = child_root_id;
1267         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1268                                 0, 0);
1269         if (ret < 0)
1270                 return ret;
1271         btrfs_release_path(&path);
1272         if (!ret)
1273                 return 1;
1274
1275         key.objectid = child_root_id;
1276         key.type = BTRFS_ROOT_BACKREF_KEY;
1277         key.offset = 0;
1278         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1279                                 0, 0);
1280         if (ret < 0)
1281                 goto out;
1282
1283         while (1) {
1284                 leaf = path.nodes[0];
1285                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1286                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1287                         if (ret)
1288                                 break;
1289                         leaf = path.nodes[0];
1290                 }
1291
1292                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1293                 if (key.objectid != child_root_id ||
1294                     key.type != BTRFS_ROOT_BACKREF_KEY)
1295                         break;
1296
1297                 has_parent = 1;
1298
1299                 if (key.offset == parent_root_id) {
1300                         btrfs_release_path(&path);
1301                         return 1;
1302                 }
1303
1304                 path.slots[0]++;
1305         }
1306 out:
1307         btrfs_release_path(&path);
1308         if (ret < 0)
1309                 return ret;
1310         return has_parent ? 0 : 2;
1311 }
1312
1313 static int process_dir_item(struct btrfs_root *root,
1314                             struct extent_buffer *eb,
1315                             int slot, struct btrfs_key *key,
1316                             struct shared_node *active_node)
1317 {
1318         u32 total;
1319         u32 cur = 0;
1320         u32 len;
1321         u32 name_len;
1322         u32 data_len;
1323         int error;
1324         int nritems = 0;
1325         int filetype;
1326         struct btrfs_dir_item *di;
1327         struct inode_record *rec;
1328         struct cache_tree *root_cache;
1329         struct cache_tree *inode_cache;
1330         struct btrfs_key location;
1331         char namebuf[BTRFS_NAME_LEN];
1332
1333         root_cache = &active_node->root_cache;
1334         inode_cache = &active_node->inode_cache;
1335         rec = active_node->current;
1336         rec->found_dir_item = 1;
1337
1338         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1339         total = btrfs_item_size_nr(eb, slot);
1340         while (cur < total) {
1341                 nritems++;
1342                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1343                 name_len = btrfs_dir_name_len(eb, di);
1344                 data_len = btrfs_dir_data_len(eb, di);
1345                 filetype = btrfs_dir_type(eb, di);
1346
1347                 rec->found_size += name_len;
1348                 if (name_len <= BTRFS_NAME_LEN) {
1349                         len = name_len;
1350                         error = 0;
1351                 } else {
1352                         len = BTRFS_NAME_LEN;
1353                         error = REF_ERR_NAME_TOO_LONG;
1354                 }
1355                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1356
1357                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1358                         add_inode_backref(inode_cache, location.objectid,
1359                                           key->objectid, key->offset, namebuf,
1360                                           len, filetype, key->type, error);
1361                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1362                         add_inode_backref(root_cache, location.objectid,
1363                                           key->objectid, key->offset,
1364                                           namebuf, len, filetype,
1365                                           key->type, error);
1366                 } else {
1367                         fprintf(stderr, "invalid location in dir item %u\n",
1368                                 location.type);
1369                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1370                                           key->objectid, key->offset, namebuf,
1371                                           len, filetype, key->type, error);
1372                 }
1373
1374                 len = sizeof(*di) + name_len + data_len;
1375                 di = (struct btrfs_dir_item *)((char *)di + len);
1376                 cur += len;
1377         }
1378         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1379                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1380
1381         return 0;
1382 }
1383
1384 static int process_inode_ref(struct extent_buffer *eb,
1385                              int slot, struct btrfs_key *key,
1386                              struct shared_node *active_node)
1387 {
1388         u32 total;
1389         u32 cur = 0;
1390         u32 len;
1391         u32 name_len;
1392         u64 index;
1393         int error;
1394         struct cache_tree *inode_cache;
1395         struct btrfs_inode_ref *ref;
1396         char namebuf[BTRFS_NAME_LEN];
1397
1398         inode_cache = &active_node->inode_cache;
1399
1400         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1401         total = btrfs_item_size_nr(eb, slot);
1402         while (cur < total) {
1403                 name_len = btrfs_inode_ref_name_len(eb, ref);
1404                 index = btrfs_inode_ref_index(eb, ref);
1405                 if (name_len <= BTRFS_NAME_LEN) {
1406                         len = name_len;
1407                         error = 0;
1408                 } else {
1409                         len = BTRFS_NAME_LEN;
1410                         error = REF_ERR_NAME_TOO_LONG;
1411                 }
1412                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1413                 add_inode_backref(inode_cache, key->objectid, key->offset,
1414                                   index, namebuf, len, 0, key->type, error);
1415
1416                 len = sizeof(*ref) + name_len;
1417                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1418                 cur += len;
1419         }
1420         return 0;
1421 }
1422
1423 static int process_inode_extref(struct extent_buffer *eb,
1424                                 int slot, struct btrfs_key *key,
1425                                 struct shared_node *active_node)
1426 {
1427         u32 total;
1428         u32 cur = 0;
1429         u32 len;
1430         u32 name_len;
1431         u64 index;
1432         u64 parent;
1433         int error;
1434         struct cache_tree *inode_cache;
1435         struct btrfs_inode_extref *extref;
1436         char namebuf[BTRFS_NAME_LEN];
1437
1438         inode_cache = &active_node->inode_cache;
1439
1440         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1441         total = btrfs_item_size_nr(eb, slot);
1442         while (cur < total) {
1443                 name_len = btrfs_inode_extref_name_len(eb, extref);
1444                 index = btrfs_inode_extref_index(eb, extref);
1445                 parent = btrfs_inode_extref_parent(eb, extref);
1446                 if (name_len <= BTRFS_NAME_LEN) {
1447                         len = name_len;
1448                         error = 0;
1449                 } else {
1450                         len = BTRFS_NAME_LEN;
1451                         error = REF_ERR_NAME_TOO_LONG;
1452                 }
1453                 read_extent_buffer(eb, namebuf,
1454                                    (unsigned long)(extref + 1), len);
1455                 add_inode_backref(inode_cache, key->objectid, parent,
1456                                   index, namebuf, len, 0, key->type, error);
1457
1458                 len = sizeof(*extref) + name_len;
1459                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1460                 cur += len;
1461         }
1462         return 0;
1463
1464 }
1465
1466 static int count_csum_range(struct btrfs_root *root, u64 start,
1467                             u64 len, u64 *found)
1468 {
1469         struct btrfs_key key;
1470         struct btrfs_path path;
1471         struct extent_buffer *leaf;
1472         int ret;
1473         size_t size;
1474         *found = 0;
1475         u64 csum_end;
1476         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1477
1478         btrfs_init_path(&path);
1479
1480         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1481         key.offset = start;
1482         key.type = BTRFS_EXTENT_CSUM_KEY;
1483
1484         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1485                                 &key, &path, 0, 0);
1486         if (ret < 0)
1487                 goto out;
1488         if (ret > 0 && path.slots[0] > 0) {
1489                 leaf = path.nodes[0];
1490                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1491                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1492                     key.type == BTRFS_EXTENT_CSUM_KEY)
1493                         path.slots[0]--;
1494         }
1495
1496         while (len > 0) {
1497                 leaf = path.nodes[0];
1498                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1499                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1500                         if (ret > 0)
1501                                 break;
1502                         else if (ret < 0)
1503                                 goto out;
1504                         leaf = path.nodes[0];
1505                 }
1506
1507                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1508                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1509                     key.type != BTRFS_EXTENT_CSUM_KEY)
1510                         break;
1511
1512                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1513                 if (key.offset >= start + len)
1514                         break;
1515
1516                 if (key.offset > start)
1517                         start = key.offset;
1518
1519                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1520                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1521                 if (csum_end > start) {
1522                         size = min(csum_end - start, len);
1523                         len -= size;
1524                         start += size;
1525                         *found += size;
1526                 }
1527
1528                 path.slots[0]++;
1529         }
1530 out:
1531         btrfs_release_path(&path);
1532         if (ret < 0)
1533                 return ret;
1534         return 0;
1535 }
1536
1537 static int process_file_extent(struct btrfs_root *root,
1538                                 struct extent_buffer *eb,
1539                                 int slot, struct btrfs_key *key,
1540                                 struct shared_node *active_node)
1541 {
1542         struct inode_record *rec;
1543         struct btrfs_file_extent_item *fi;
1544         u64 num_bytes = 0;
1545         u64 disk_bytenr = 0;
1546         u64 extent_offset = 0;
1547         u64 mask = root->sectorsize - 1;
1548         int extent_type;
1549         int ret;
1550
1551         rec = active_node->current;
1552         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1553         rec->found_file_extent = 1;
1554
1555         if (rec->extent_start == (u64)-1) {
1556                 rec->extent_start = key->offset;
1557                 rec->extent_end = key->offset;
1558         }
1559
1560         if (rec->extent_end > key->offset)
1561                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1562         else if (rec->extent_end < key->offset) {
1563                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1564                                            key->offset - rec->extent_end);
1565                 if (ret < 0)
1566                         return ret;
1567         }
1568
1569         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1570         extent_type = btrfs_file_extent_type(eb, fi);
1571
1572         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1573                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1574                 if (num_bytes == 0)
1575                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1576                 rec->found_size += num_bytes;
1577                 num_bytes = (num_bytes + mask) & ~mask;
1578         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1579                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1580                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1581                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1582                 extent_offset = btrfs_file_extent_offset(eb, fi);
1583                 if (num_bytes == 0 || (num_bytes & mask))
1584                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1585                 if (num_bytes + extent_offset >
1586                     btrfs_file_extent_ram_bytes(eb, fi))
1587                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1588                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1589                     (btrfs_file_extent_compression(eb, fi) ||
1590                      btrfs_file_extent_encryption(eb, fi) ||
1591                      btrfs_file_extent_other_encoding(eb, fi)))
1592                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1593                 if (disk_bytenr > 0)
1594                         rec->found_size += num_bytes;
1595         } else {
1596                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1597         }
1598         rec->extent_end = key->offset + num_bytes;
1599
1600         /*
1601          * The data reloc tree will copy full extents into its inode and then
1602          * copy the corresponding csums.  Because the extent it copied could be
1603          * a preallocated extent that hasn't been written to yet there may be no
1604          * csums to copy, ergo we won't have csums for our file extent.  This is
1605          * ok so just don't bother checking csums if the inode belongs to the
1606          * data reloc tree.
1607          */
1608         if (disk_bytenr > 0 &&
1609             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1610                 u64 found;
1611                 if (btrfs_file_extent_compression(eb, fi))
1612                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1613                 else
1614                         disk_bytenr += extent_offset;
1615
1616                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1617                 if (ret < 0)
1618                         return ret;
1619                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1620                         if (found > 0)
1621                                 rec->found_csum_item = 1;
1622                         if (found < num_bytes)
1623                                 rec->some_csum_missing = 1;
1624                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1625                         if (found > 0)
1626                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1627                 }
1628         }
1629         return 0;
1630 }
1631
1632 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1633                             struct walk_control *wc)
1634 {
1635         struct btrfs_key key;
1636         u32 nritems;
1637         int i;
1638         int ret = 0;
1639         struct cache_tree *inode_cache;
1640         struct shared_node *active_node;
1641
1642         if (wc->root_level == wc->active_node &&
1643             btrfs_root_refs(&root->root_item) == 0)
1644                 return 0;
1645
1646         active_node = wc->nodes[wc->active_node];
1647         inode_cache = &active_node->inode_cache;
1648         nritems = btrfs_header_nritems(eb);
1649         for (i = 0; i < nritems; i++) {
1650                 btrfs_item_key_to_cpu(eb, &key, i);
1651
1652                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1653                         continue;
1654                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1655                         continue;
1656
1657                 if (active_node->current == NULL ||
1658                     active_node->current->ino < key.objectid) {
1659                         if (active_node->current) {
1660                                 active_node->current->checked = 1;
1661                                 maybe_free_inode_rec(inode_cache,
1662                                                      active_node->current);
1663                         }
1664                         active_node->current = get_inode_rec(inode_cache,
1665                                                              key.objectid, 1);
1666                 }
1667                 switch (key.type) {
1668                 case BTRFS_DIR_ITEM_KEY:
1669                 case BTRFS_DIR_INDEX_KEY:
1670                         ret = process_dir_item(root, eb, i, &key, active_node);
1671                         break;
1672                 case BTRFS_INODE_REF_KEY:
1673                         ret = process_inode_ref(eb, i, &key, active_node);
1674                         break;
1675                 case BTRFS_INODE_EXTREF_KEY:
1676                         ret = process_inode_extref(eb, i, &key, active_node);
1677                         break;
1678                 case BTRFS_INODE_ITEM_KEY:
1679                         ret = process_inode_item(eb, i, &key, active_node);
1680                         break;
1681                 case BTRFS_EXTENT_DATA_KEY:
1682                         ret = process_file_extent(root, eb, i, &key,
1683                                                   active_node);
1684                         break;
1685                 default:
1686                         break;
1687                 };
1688         }
1689         return ret;
1690 }
1691
1692 static void reada_walk_down(struct btrfs_root *root,
1693                             struct extent_buffer *node, int slot)
1694 {
1695         u64 bytenr;
1696         u64 ptr_gen;
1697         u32 nritems;
1698         u32 blocksize;
1699         int i;
1700         int level;
1701
1702         level = btrfs_header_level(node);
1703         if (level != 1)
1704                 return;
1705
1706         nritems = btrfs_header_nritems(node);
1707         blocksize = btrfs_level_size(root, level - 1);
1708         for (i = slot; i < nritems; i++) {
1709                 bytenr = btrfs_node_blockptr(node, i);
1710                 ptr_gen = btrfs_node_ptr_generation(node, i);
1711                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1712         }
1713 }
1714
1715 /*
1716  * Check the child node/leaf by the following condition:
1717  * 1. the first item key of the node/leaf should be the same with the one
1718  *    in parent.
1719  * 2. block in parent node should match the child node/leaf.
1720  * 3. generation of parent node and child's header should be consistent.
1721  *
1722  * Or the child node/leaf pointed by the key in parent is not valid.
1723  *
1724  * We hope to check leaf owner too, but since subvol may share leaves,
1725  * which makes leaf owner check not so strong, key check should be
1726  * sufficient enough for that case.
1727  */
1728 static int check_child_node(struct btrfs_root *root,
1729                             struct extent_buffer *parent, int slot,
1730                             struct extent_buffer *child)
1731 {
1732         struct btrfs_key parent_key;
1733         struct btrfs_key child_key;
1734         int ret = 0;
1735
1736         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1737         if (btrfs_header_level(child) == 0)
1738                 btrfs_item_key_to_cpu(child, &child_key, 0);
1739         else
1740                 btrfs_node_key_to_cpu(child, &child_key, 0);
1741
1742         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1743                 ret = -EINVAL;
1744                 fprintf(stderr,
1745                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1746                         parent_key.objectid, parent_key.type, parent_key.offset,
1747                         child_key.objectid, child_key.type, child_key.offset);
1748         }
1749         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1750                 ret = -EINVAL;
1751                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1752                         btrfs_node_blockptr(parent, slot),
1753                         btrfs_header_bytenr(child));
1754         }
1755         if (btrfs_node_ptr_generation(parent, slot) !=
1756             btrfs_header_generation(child)) {
1757                 ret = -EINVAL;
1758                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1759                         btrfs_header_generation(child),
1760                         btrfs_node_ptr_generation(parent, slot));
1761         }
1762         return ret;
1763 }
1764
1765 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1766                           struct walk_control *wc, int *level)
1767 {
1768         enum btrfs_tree_block_status status;
1769         u64 bytenr;
1770         u64 ptr_gen;
1771         struct extent_buffer *next;
1772         struct extent_buffer *cur;
1773         u32 blocksize;
1774         int ret, err = 0;
1775         u64 refs;
1776
1777         WARN_ON(*level < 0);
1778         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1779         ret = btrfs_lookup_extent_info(NULL, root,
1780                                        path->nodes[*level]->start,
1781                                        *level, 1, &refs, NULL);
1782         if (ret < 0) {
1783                 err = ret;
1784                 goto out;
1785         }
1786
1787         if (refs > 1) {
1788                 ret = enter_shared_node(root, path->nodes[*level]->start,
1789                                         refs, wc, *level);
1790                 if (ret > 0) {
1791                         err = ret;
1792                         goto out;
1793                 }
1794         }
1795
1796         while (*level >= 0) {
1797                 WARN_ON(*level < 0);
1798                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1799                 cur = path->nodes[*level];
1800
1801                 if (btrfs_header_level(cur) != *level)
1802                         WARN_ON(1);
1803
1804                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1805                         break;
1806                 if (*level == 0) {
1807                         ret = process_one_leaf(root, cur, wc);
1808                         if (ret < 0)
1809                                 err = ret;
1810                         break;
1811                 }
1812                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1813                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1814                 blocksize = btrfs_level_size(root, *level - 1);
1815                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1816                                                1, &refs, NULL);
1817                 if (ret < 0)
1818                         refs = 0;
1819
1820                 if (refs > 1) {
1821                         ret = enter_shared_node(root, bytenr, refs,
1822                                                 wc, *level - 1);
1823                         if (ret > 0) {
1824                                 path->slots[*level]++;
1825                                 continue;
1826                         }
1827                 }
1828
1829                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1830                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1831                         free_extent_buffer(next);
1832                         reada_walk_down(root, cur, path->slots[*level]);
1833                         next = read_tree_block(root, bytenr, blocksize,
1834                                                ptr_gen);
1835                         if (!extent_buffer_uptodate(next)) {
1836                                 struct btrfs_key node_key;
1837
1838                                 btrfs_node_key_to_cpu(path->nodes[*level],
1839                                                       &node_key,
1840                                                       path->slots[*level]);
1841                                 btrfs_add_corrupt_extent_record(root->fs_info,
1842                                                 &node_key,
1843                                                 path->nodes[*level]->start,
1844                                                 root->leafsize, *level);
1845                                 err = -EIO;
1846                                 goto out;
1847                         }
1848                 }
1849
1850                 ret = check_child_node(root, cur, path->slots[*level], next);
1851                 if (ret) {
1852                         err = ret;
1853                         goto out;
1854                 }
1855
1856                 if (btrfs_is_leaf(next))
1857                         status = btrfs_check_leaf(root, NULL, next);
1858                 else
1859                         status = btrfs_check_node(root, NULL, next);
1860                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1861                         free_extent_buffer(next);
1862                         err = -EIO;
1863                         goto out;
1864                 }
1865
1866                 *level = *level - 1;
1867                 free_extent_buffer(path->nodes[*level]);
1868                 path->nodes[*level] = next;
1869                 path->slots[*level] = 0;
1870         }
1871 out:
1872         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1873         return err;
1874 }
1875
1876 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1877                         struct walk_control *wc, int *level)
1878 {
1879         int i;
1880         struct extent_buffer *leaf;
1881
1882         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1883                 leaf = path->nodes[i];
1884                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1885                         path->slots[i]++;
1886                         *level = i;
1887                         return 0;
1888                 } else {
1889                         free_extent_buffer(path->nodes[*level]);
1890                         path->nodes[*level] = NULL;
1891                         BUG_ON(*level > wc->active_node);
1892                         if (*level == wc->active_node)
1893                                 leave_shared_node(root, wc, *level);
1894                         *level = i + 1;
1895                 }
1896         }
1897         return 1;
1898 }
1899
1900 static int check_root_dir(struct inode_record *rec)
1901 {
1902         struct inode_backref *backref;
1903         int ret = -1;
1904
1905         if (!rec->found_inode_item || rec->errors)
1906                 goto out;
1907         if (rec->nlink != 1 || rec->found_link != 0)
1908                 goto out;
1909         if (list_empty(&rec->backrefs))
1910                 goto out;
1911         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1912         if (!backref->found_inode_ref)
1913                 goto out;
1914         if (backref->index != 0 || backref->namelen != 2 ||
1915             memcmp(backref->name, "..", 2))
1916                 goto out;
1917         if (backref->found_dir_index || backref->found_dir_item)
1918                 goto out;
1919         ret = 0;
1920 out:
1921         return ret;
1922 }
1923
1924 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1925                               struct btrfs_root *root, struct btrfs_path *path,
1926                               struct inode_record *rec)
1927 {
1928         struct btrfs_inode_item *ei;
1929         struct btrfs_key key;
1930         int ret;
1931
1932         key.objectid = rec->ino;
1933         key.type = BTRFS_INODE_ITEM_KEY;
1934         key.offset = (u64)-1;
1935
1936         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1937         if (ret < 0)
1938                 goto out;
1939         if (ret) {
1940                 if (!path->slots[0]) {
1941                         ret = -ENOENT;
1942                         goto out;
1943                 }
1944                 path->slots[0]--;
1945                 ret = 0;
1946         }
1947         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1948         if (key.objectid != rec->ino) {
1949                 ret = -ENOENT;
1950                 goto out;
1951         }
1952
1953         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1954                             struct btrfs_inode_item);
1955         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1956         btrfs_mark_buffer_dirty(path->nodes[0]);
1957         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1958         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1959                root->root_key.objectid);
1960 out:
1961         btrfs_release_path(path);
1962         return ret;
1963 }
1964
1965 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1966                                     struct btrfs_root *root,
1967                                     struct btrfs_path *path,
1968                                     struct inode_record *rec)
1969 {
1970         int ret;
1971
1972         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1973         btrfs_release_path(path);
1974         if (!ret)
1975                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1976         return ret;
1977 }
1978
1979 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1980                                struct btrfs_root *root,
1981                                struct btrfs_path *path,
1982                                struct inode_record *rec)
1983 {
1984         struct btrfs_inode_item *ei;
1985         struct btrfs_key key;
1986         int ret = 0;
1987
1988         key.objectid = rec->ino;
1989         key.type = BTRFS_INODE_ITEM_KEY;
1990         key.offset = 0;
1991
1992         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1993         if (ret) {
1994                 if (ret > 0)
1995                         ret = -ENOENT;
1996                 goto out;
1997         }
1998
1999         /* Since ret == 0, no need to check anything */
2000         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2001                             struct btrfs_inode_item);
2002         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2003         btrfs_mark_buffer_dirty(path->nodes[0]);
2004         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2005         printf("reset nbytes for ino %llu root %llu\n",
2006                rec->ino, root->root_key.objectid);
2007 out:
2008         btrfs_release_path(path);
2009         return ret;
2010 }
2011
2012 static int add_missing_dir_index(struct btrfs_root *root,
2013                                  struct cache_tree *inode_cache,
2014                                  struct inode_record *rec,
2015                                  struct inode_backref *backref)
2016 {
2017         struct btrfs_path *path;
2018         struct btrfs_trans_handle *trans;
2019         struct btrfs_dir_item *dir_item;
2020         struct extent_buffer *leaf;
2021         struct btrfs_key key;
2022         struct btrfs_disk_key disk_key;
2023         struct inode_record *dir_rec;
2024         unsigned long name_ptr;
2025         u32 data_size = sizeof(*dir_item) + backref->namelen;
2026         int ret;
2027
2028         path = btrfs_alloc_path();
2029         if (!path)
2030                 return -ENOMEM;
2031
2032         trans = btrfs_start_transaction(root, 1);
2033         if (IS_ERR(trans)) {
2034                 btrfs_free_path(path);
2035                 return PTR_ERR(trans);
2036         }
2037
2038         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2039                 (unsigned long long)rec->ino);
2040         key.objectid = backref->dir;
2041         key.type = BTRFS_DIR_INDEX_KEY;
2042         key.offset = backref->index;
2043
2044         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2045         BUG_ON(ret);
2046
2047         leaf = path->nodes[0];
2048         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2049
2050         disk_key.objectid = cpu_to_le64(rec->ino);
2051         disk_key.type = BTRFS_INODE_ITEM_KEY;
2052         disk_key.offset = 0;
2053
2054         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2055         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2056         btrfs_set_dir_data_len(leaf, dir_item, 0);
2057         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2058         name_ptr = (unsigned long)(dir_item + 1);
2059         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2060         btrfs_mark_buffer_dirty(leaf);
2061         btrfs_free_path(path);
2062         btrfs_commit_transaction(trans, root);
2063
2064         backref->found_dir_index = 1;
2065         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2066         if (!dir_rec)
2067                 return 0;
2068         dir_rec->found_size += backref->namelen;
2069         if (dir_rec->found_size == dir_rec->isize &&
2070             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2071                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2072         if (dir_rec->found_size != dir_rec->isize)
2073                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2074
2075         return 0;
2076 }
2077
2078 static int delete_dir_index(struct btrfs_root *root,
2079                             struct cache_tree *inode_cache,
2080                             struct inode_record *rec,
2081                             struct inode_backref *backref)
2082 {
2083         struct btrfs_trans_handle *trans;
2084         struct btrfs_dir_item *di;
2085         struct btrfs_path *path;
2086         int ret = 0;
2087
2088         path = btrfs_alloc_path();
2089         if (!path)
2090                 return -ENOMEM;
2091
2092         trans = btrfs_start_transaction(root, 1);
2093         if (IS_ERR(trans)) {
2094                 btrfs_free_path(path);
2095                 return PTR_ERR(trans);
2096         }
2097
2098
2099         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2100                 (unsigned long long)backref->dir,
2101                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2102                 (unsigned long long)root->objectid);
2103
2104         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2105                                     backref->name, backref->namelen,
2106                                     backref->index, -1);
2107         if (IS_ERR(di)) {
2108                 ret = PTR_ERR(di);
2109                 btrfs_free_path(path);
2110                 btrfs_commit_transaction(trans, root);
2111                 if (ret == -ENOENT)
2112                         return 0;
2113                 return ret;
2114         }
2115
2116         if (!di)
2117                 ret = btrfs_del_item(trans, root, path);
2118         else
2119                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2120         BUG_ON(ret);
2121         btrfs_free_path(path);
2122         btrfs_commit_transaction(trans, root);
2123         return ret;
2124 }
2125
2126 static int create_inode_item(struct btrfs_root *root,
2127                              struct inode_record *rec,
2128                              struct inode_backref *backref, int root_dir)
2129 {
2130         struct btrfs_trans_handle *trans;
2131         struct btrfs_inode_item inode_item;
2132         time_t now = time(NULL);
2133         int ret;
2134
2135         trans = btrfs_start_transaction(root, 1);
2136         if (IS_ERR(trans)) {
2137                 ret = PTR_ERR(trans);
2138                 return ret;
2139         }
2140
2141         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2142                 "be incomplete, please check permissions and content after "
2143                 "the fsck completes.\n", (unsigned long long)root->objectid,
2144                 (unsigned long long)rec->ino);
2145
2146         memset(&inode_item, 0, sizeof(inode_item));
2147         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2148         if (root_dir)
2149                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2150         else
2151                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2152         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2153         if (rec->found_dir_item) {
2154                 if (rec->found_file_extent)
2155                         fprintf(stderr, "root %llu inode %llu has both a dir "
2156                                 "item and extents, unsure if it is a dir or a "
2157                                 "regular file so setting it as a directory\n",
2158                                 (unsigned long long)root->objectid,
2159                                 (unsigned long long)rec->ino);
2160                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2161                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2162         } else if (!rec->found_dir_item) {
2163                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2164                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2165         }
2166         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2167         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2168         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2169         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2170         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2171         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2172         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2173         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2174
2175         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2176         BUG_ON(ret);
2177         btrfs_commit_transaction(trans, root);
2178         return 0;
2179 }
2180
2181 static int repair_inode_backrefs(struct btrfs_root *root,
2182                                  struct inode_record *rec,
2183                                  struct cache_tree *inode_cache,
2184                                  int delete)
2185 {
2186         struct inode_backref *tmp, *backref;
2187         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2188         int ret = 0;
2189         int repaired = 0;
2190
2191         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2192                 if (!delete && rec->ino == root_dirid) {
2193                         if (!rec->found_inode_item) {
2194                                 ret = create_inode_item(root, rec, backref, 1);
2195                                 if (ret)
2196                                         break;
2197                                 repaired++;
2198                         }
2199                 }
2200
2201                 /* Index 0 for root dir's are special, don't mess with it */
2202                 if (rec->ino == root_dirid && backref->index == 0)
2203                         continue;
2204
2205                 if (delete &&
2206                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2207                      (backref->found_dir_index && backref->found_inode_ref &&
2208                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2209                         ret = delete_dir_index(root, inode_cache, rec, backref);
2210                         if (ret)
2211                                 break;
2212                         repaired++;
2213                         list_del(&backref->list);
2214                         free(backref);
2215                 }
2216
2217                 if (!delete && !backref->found_dir_index &&
2218                     backref->found_dir_item && backref->found_inode_ref) {
2219                         ret = add_missing_dir_index(root, inode_cache, rec,
2220                                                     backref);
2221                         if (ret)
2222                                 break;
2223                         repaired++;
2224                         if (backref->found_dir_item &&
2225                             backref->found_dir_index &&
2226                             backref->found_dir_index) {
2227                                 if (!backref->errors &&
2228                                     backref->found_inode_ref) {
2229                                         list_del(&backref->list);
2230                                         free(backref);
2231                                 }
2232                         }
2233                 }
2234
2235                 if (!delete && (!backref->found_dir_index &&
2236                                 !backref->found_dir_item &&
2237                                 backref->found_inode_ref)) {
2238                         struct btrfs_trans_handle *trans;
2239                         struct btrfs_key location;
2240
2241                         ret = check_dir_conflict(root, backref->name,
2242                                                  backref->namelen,
2243                                                  backref->dir,
2244                                                  backref->index);
2245                         if (ret) {
2246                                 /*
2247                                  * let nlink fixing routine to handle it,
2248                                  * which can do it better.
2249                                  */
2250                                 ret = 0;
2251                                 break;
2252                         }
2253                         location.objectid = rec->ino;
2254                         location.type = BTRFS_INODE_ITEM_KEY;
2255                         location.offset = 0;
2256
2257                         trans = btrfs_start_transaction(root, 1);
2258                         if (IS_ERR(trans)) {
2259                                 ret = PTR_ERR(trans);
2260                                 break;
2261                         }
2262                         fprintf(stderr, "adding missing dir index/item pair "
2263                                 "for inode %llu\n",
2264                                 (unsigned long long)rec->ino);
2265                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2266                                                     backref->namelen,
2267                                                     backref->dir, &location,
2268                                                     imode_to_type(rec->imode),
2269                                                     backref->index);
2270                         BUG_ON(ret);
2271                         btrfs_commit_transaction(trans, root);
2272                         repaired++;
2273                 }
2274
2275                 if (!delete && (backref->found_inode_ref &&
2276                                 backref->found_dir_index &&
2277                                 backref->found_dir_item &&
2278                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2279                                 !rec->found_inode_item)) {
2280                         ret = create_inode_item(root, rec, backref, 0);
2281                         if (ret)
2282                                 break;
2283                         repaired++;
2284                 }
2285
2286         }
2287         return ret ? ret : repaired;
2288 }
2289
2290 /*
2291  * To determine the file type for nlink/inode_item repair
2292  *
2293  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2294  * Return -ENOENT if file type is not found.
2295  */
2296 static int find_file_type(struct inode_record *rec, u8 *type)
2297 {
2298         struct inode_backref *backref;
2299
2300         /* For inode item recovered case */
2301         if (rec->found_inode_item) {
2302                 *type = imode_to_type(rec->imode);
2303                 return 0;
2304         }
2305
2306         list_for_each_entry(backref, &rec->backrefs, list) {
2307                 if (backref->found_dir_index || backref->found_dir_item) {
2308                         *type = backref->filetype;
2309                         return 0;
2310                 }
2311         }
2312         return -ENOENT;
2313 }
2314
2315 /*
2316  * To determine the file name for nlink repair
2317  *
2318  * Return 0 if file name is found, set name and namelen.
2319  * Return -ENOENT if file name is not found.
2320  */
2321 static int find_file_name(struct inode_record *rec,
2322                           char *name, int *namelen)
2323 {
2324         struct inode_backref *backref;
2325
2326         list_for_each_entry(backref, &rec->backrefs, list) {
2327                 if (backref->found_dir_index || backref->found_dir_item ||
2328                     backref->found_inode_ref) {
2329                         memcpy(name, backref->name, backref->namelen);
2330                         *namelen = backref->namelen;
2331                         return 0;
2332                 }
2333         }
2334         return -ENOENT;
2335 }
2336
2337 /* Reset the nlink of the inode to the correct one */
2338 static int reset_nlink(struct btrfs_trans_handle *trans,
2339                        struct btrfs_root *root,
2340                        struct btrfs_path *path,
2341                        struct inode_record *rec)
2342 {
2343         struct inode_backref *backref;
2344         struct inode_backref *tmp;
2345         struct btrfs_key key;
2346         struct btrfs_inode_item *inode_item;
2347         int ret = 0;
2348
2349         /* We don't believe this either, reset it and iterate backref */
2350         rec->found_link = 0;
2351
2352         /* Remove all backref including the valid ones */
2353         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2355                                    backref->index, backref->name,
2356                                    backref->namelen, 0);
2357                 if (ret < 0)
2358                         goto out;
2359
2360                 /* remove invalid backref, so it won't be added back */
2361                 if (!(backref->found_dir_index &&
2362                       backref->found_dir_item &&
2363                       backref->found_inode_ref)) {
2364                         list_del(&backref->list);
2365                         free(backref);
2366                 } else {
2367                         rec->found_link++;
2368                 }
2369         }
2370
2371         /* Set nlink to 0 */
2372         key.objectid = rec->ino;
2373         key.type = BTRFS_INODE_ITEM_KEY;
2374         key.offset = 0;
2375         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2376         if (ret < 0)
2377                 goto out;
2378         if (ret > 0) {
2379                 ret = -ENOENT;
2380                 goto out;
2381         }
2382         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2383                                     struct btrfs_inode_item);
2384         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2385         btrfs_mark_buffer_dirty(path->nodes[0]);
2386         btrfs_release_path(path);
2387
2388         /*
2389          * Add back valid inode_ref/dir_item/dir_index,
2390          * add_link() will handle the nlink inc, so new nlink must be correct
2391          */
2392         list_for_each_entry(backref, &rec->backrefs, list) {
2393                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2394                                      backref->name, backref->namelen,
2395                                      backref->ref_type, &backref->index, 1);
2396                 if (ret < 0)
2397                         goto out;
2398         }
2399 out:
2400         btrfs_release_path(path);
2401         return ret;
2402 }
2403
2404 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2405                                struct btrfs_root *root,
2406                                struct btrfs_path *path,
2407                                struct inode_record *rec)
2408 {
2409         char *dir_name = "lost+found";
2410         char namebuf[BTRFS_NAME_LEN] = {0};
2411         u64 lost_found_ino;
2412         u32 mode = 0700;
2413         u8 type = 0;
2414         int namelen = 0;
2415         int name_recovered = 0;
2416         int type_recovered = 0;
2417         int ret = 0;
2418
2419         /*
2420          * Get file name and type first before these invalid inode ref
2421          * are deleted by remove_all_invalid_backref()
2422          */
2423         name_recovered = !find_file_name(rec, namebuf, &namelen);
2424         type_recovered = !find_file_type(rec, &type);
2425
2426         if (!name_recovered) {
2427                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2428                        rec->ino, rec->ino);
2429                 namelen = count_digits(rec->ino);
2430                 sprintf(namebuf, "%llu", rec->ino);
2431                 name_recovered = 1;
2432         }
2433         if (!type_recovered) {
2434                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2435                        rec->ino);
2436                 type = BTRFS_FT_REG_FILE;
2437                 type_recovered = 1;
2438         }
2439
2440         ret = reset_nlink(trans, root, path, rec);
2441         if (ret < 0) {
2442                 fprintf(stderr,
2443                         "Failed to reset nlink for inode %llu: %s\n",
2444                         rec->ino, strerror(-ret));
2445                 goto out;
2446         }
2447
2448         if (rec->found_link == 0) {
2449                 lost_found_ino = root->highest_inode;
2450                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2451                         ret = -EOVERFLOW;
2452                         goto out;
2453                 }
2454                 lost_found_ino++;
2455                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2456                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2457                                   mode);
2458                 if (ret < 0) {
2459                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2460                                 dir_name, strerror(-ret));
2461                         goto out;
2462                 }
2463                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2464                                      namebuf, namelen, type, NULL, 1);
2465                 /*
2466                  * Add ".INO" suffix several times to handle case where
2467                  * "FILENAME.INO" is already taken by another file.
2468                  */
2469                 while (ret == -EEXIST) {
2470                         /*
2471                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2472                          */
2473                         if (namelen + count_digits(rec->ino) + 1 >
2474                             BTRFS_NAME_LEN) {
2475                                 ret = -EFBIG;
2476                                 goto out;
2477                         }
2478                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2479                                  ".%llu", rec->ino);
2480                         namelen += count_digits(rec->ino) + 1;
2481                         ret = btrfs_add_link(trans, root, rec->ino,
2482                                              lost_found_ino, namebuf,
2483                                              namelen, type, NULL, 1);
2484                 }
2485                 if (ret < 0) {
2486                         fprintf(stderr,
2487                                 "Failed to link the inode %llu to %s dir: %s\n",
2488                                 rec->ino, dir_name, strerror(-ret));
2489                         goto out;
2490                 }
2491                 /*
2492                  * Just increase the found_link, don't actually add the
2493                  * backref. This will make things easier and this inode
2494                  * record will be freed after the repair is done.
2495                  * So fsck will not report problem about this inode.
2496                  */
2497                 rec->found_link++;
2498                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2499                        namelen, namebuf, dir_name);
2500         }
2501         printf("Fixed the nlink of inode %llu\n", rec->ino);
2502 out:
2503         /*
2504          * Clear the flag anyway, or we will loop forever for the same inode
2505          * as it will not be removed from the bad inode list and the dead loop
2506          * happens.
2507          */
2508         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2509         btrfs_release_path(path);
2510         return ret;
2511 }
2512
2513 /*
2514  * Check if there is any normal(reg or prealloc) file extent for given
2515  * ino.
2516  * This is used to determine the file type when neither its dir_index/item or
2517  * inode_item exists.
2518  *
2519  * This will *NOT* report error, if any error happens, just consider it does
2520  * not have any normal file extent.
2521  */
2522 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2523 {
2524         struct btrfs_path *path;
2525         struct btrfs_key key;
2526         struct btrfs_key found_key;
2527         struct btrfs_file_extent_item *fi;
2528         u8 type;
2529         int ret = 0;
2530
2531         path = btrfs_alloc_path();
2532         if (!path)
2533                 goto out;
2534         key.objectid = ino;
2535         key.type = BTRFS_EXTENT_DATA_KEY;
2536         key.offset = 0;
2537
2538         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2539         if (ret < 0) {
2540                 ret = 0;
2541                 goto out;
2542         }
2543         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2544                 ret = btrfs_next_leaf(root, path);
2545                 if (ret) {
2546                         ret = 0;
2547                         goto out;
2548                 }
2549         }
2550         while (1) {
2551                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2552                                       path->slots[0]);
2553                 if (found_key.objectid != ino ||
2554                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2555                         break;
2556                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2557                                     struct btrfs_file_extent_item);
2558                 type = btrfs_file_extent_type(path->nodes[0], fi);
2559                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2560                         ret = 1;
2561                         goto out;
2562                 }
2563         }
2564 out:
2565         btrfs_free_path(path);
2566         return ret;
2567 }
2568
2569 static u32 btrfs_type_to_imode(u8 type)
2570 {
2571         static u32 imode_by_btrfs_type[] = {
2572                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2573                 [BTRFS_FT_DIR]          = S_IFDIR,
2574                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2575                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2576                 [BTRFS_FT_FIFO]         = S_IFIFO,
2577                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2578                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2579         };
2580
2581         return imode_by_btrfs_type[(type)];
2582 }
2583
2584 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2585                                 struct btrfs_root *root,
2586                                 struct btrfs_path *path,
2587                                 struct inode_record *rec)
2588 {
2589         u8 filetype;
2590         u32 mode = 0700;
2591         int type_recovered = 0;
2592         int ret = 0;
2593
2594         printf("Trying to rebuild inode:%llu\n", rec->ino);
2595
2596         type_recovered = !find_file_type(rec, &filetype);
2597
2598         /*
2599          * Try to determine inode type if type not found.
2600          *
2601          * For found regular file extent, it must be FILE.
2602          * For found dir_item/index, it must be DIR.
2603          *
2604          * For undetermined one, use FILE as fallback.
2605          *
2606          * TODO:
2607          * 1. If found backref(inode_index/item is already handled) to it,
2608          *    it must be DIR.
2609          *    Need new inode-inode ref structure to allow search for that.
2610          */
2611         if (!type_recovered) {
2612                 if (rec->found_file_extent &&
2613                     find_normal_file_extent(root, rec->ino)) {
2614                         type_recovered = 1;
2615                         filetype = BTRFS_FT_REG_FILE;
2616                 } else if (rec->found_dir_item) {
2617                         type_recovered = 1;
2618                         filetype = BTRFS_FT_DIR;
2619                 } else if (!list_empty(&rec->orphan_extents)) {
2620                         type_recovered = 1;
2621                         filetype = BTRFS_FT_REG_FILE;
2622                 } else{
2623                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2624                                rec->ino);
2625                         type_recovered = 1;
2626                         filetype = BTRFS_FT_REG_FILE;
2627                 }
2628         }
2629
2630         ret = btrfs_new_inode(trans, root, rec->ino,
2631                               mode | btrfs_type_to_imode(filetype));
2632         if (ret < 0)
2633                 goto out;
2634
2635         /*
2636          * Here inode rebuild is done, we only rebuild the inode item,
2637          * don't repair the nlink(like move to lost+found).
2638          * That is the job of nlink repair.
2639          *
2640          * We just fill the record and return
2641          */
2642         rec->found_dir_item = 1;
2643         rec->imode = mode | btrfs_type_to_imode(filetype);
2644         rec->nlink = 0;
2645         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2646         /* Ensure the inode_nlinks repair function will be called */
2647         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2648 out:
2649         return ret;
2650 }
2651
2652 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2653                                       struct btrfs_root *root,
2654                                       struct btrfs_path *path,
2655                                       struct inode_record *rec)
2656 {
2657         struct orphan_data_extent *orphan;
2658         struct orphan_data_extent *tmp;
2659         int ret = 0;
2660
2661         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2662                 /*
2663                  * Check for conflicting file extents
2664                  *
2665                  * Here we don't know whether the extents is compressed or not,
2666                  * so we can only assume it not compressed nor data offset,
2667                  * and use its disk_len as extent length.
2668                  */
2669                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2670                                        orphan->offset, orphan->disk_len, 0);
2671                 btrfs_release_path(path);
2672                 if (ret < 0)
2673                         goto out;
2674                 if (!ret) {
2675                         fprintf(stderr,
2676                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2677                                 orphan->disk_bytenr, orphan->disk_len);
2678                         ret = btrfs_free_extent(trans,
2679                                         root->fs_info->extent_root,
2680                                         orphan->disk_bytenr, orphan->disk_len,
2681                                         0, root->objectid, orphan->objectid,
2682                                         orphan->offset);
2683                         if (ret < 0)
2684                                 goto out;
2685                 }
2686                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2687                                 orphan->offset, orphan->disk_bytenr,
2688                                 orphan->disk_len, orphan->disk_len);
2689                 if (ret < 0)
2690                         goto out;
2691
2692                 /* Update file size info */
2693                 rec->found_size += orphan->disk_len;
2694                 if (rec->found_size == rec->nbytes)
2695                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2696
2697                 /* Update the file extent hole info too */
2698                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2699                                            orphan->disk_len);
2700                 if (ret < 0)
2701                         goto out;
2702                 if (RB_EMPTY_ROOT(&rec->holes))
2703                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2704
2705                 list_del(&orphan->list);
2706                 free(orphan);
2707         }
2708         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2709 out:
2710         return ret;
2711 }
2712
2713 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2714                                         struct btrfs_root *root,
2715                                         struct btrfs_path *path,
2716                                         struct inode_record *rec)
2717 {
2718         struct rb_node *node;
2719         struct file_extent_hole *hole;
2720         int found = 0;
2721         int ret = 0;
2722
2723         node = rb_first(&rec->holes);
2724
2725         while (node) {
2726                 found = 1;
2727                 hole = rb_entry(node, struct file_extent_hole, node);
2728                 ret = btrfs_punch_hole(trans, root, rec->ino,
2729                                        hole->start, hole->len);
2730                 if (ret < 0)
2731                         goto out;
2732                 ret = del_file_extent_hole(&rec->holes, hole->start,
2733                                            hole->len);
2734                 if (ret < 0)
2735                         goto out;
2736                 if (RB_EMPTY_ROOT(&rec->holes))
2737                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2738                 node = rb_first(&rec->holes);
2739         }
2740         /* special case for a file losing all its file extent */
2741         if (!found) {
2742                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2743                                        round_up(rec->isize, root->sectorsize));
2744                 if (ret < 0)
2745                         goto out;
2746         }
2747         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2748                rec->ino, root->objectid);
2749 out:
2750         return ret;
2751 }
2752
2753 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2754 {
2755         struct btrfs_trans_handle *trans;
2756         struct btrfs_path *path;
2757         int ret = 0;
2758
2759         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2760                              I_ERR_NO_ORPHAN_ITEM |
2761                              I_ERR_LINK_COUNT_WRONG |
2762                              I_ERR_NO_INODE_ITEM |
2763                              I_ERR_FILE_EXTENT_ORPHAN |
2764                              I_ERR_FILE_EXTENT_DISCOUNT|
2765                              I_ERR_FILE_NBYTES_WRONG)))
2766                 return rec->errors;
2767
2768         path = btrfs_alloc_path();
2769         if (!path)
2770                 return -ENOMEM;
2771
2772         /*
2773          * For nlink repair, it may create a dir and add link, so
2774          * 2 for parent(256)'s dir_index and dir_item
2775          * 2 for lost+found dir's inode_item and inode_ref
2776          * 1 for the new inode_ref of the file
2777          * 2 for lost+found dir's dir_index and dir_item for the file
2778          */
2779         trans = btrfs_start_transaction(root, 7);
2780         if (IS_ERR(trans)) {
2781                 btrfs_free_path(path);
2782                 return PTR_ERR(trans);
2783         }
2784
2785         if (rec->errors & I_ERR_NO_INODE_ITEM)
2786                 ret = repair_inode_no_item(trans, root, path, rec);
2787         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2788                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2789         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2790                 ret = repair_inode_discount_extent(trans, root, path, rec);
2791         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2792                 ret = repair_inode_isize(trans, root, path, rec);
2793         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2794                 ret = repair_inode_orphan_item(trans, root, path, rec);
2795         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2796                 ret = repair_inode_nlinks(trans, root, path, rec);
2797         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2798                 ret = repair_inode_nbytes(trans, root, path, rec);
2799         btrfs_commit_transaction(trans, root);
2800         btrfs_free_path(path);
2801         return ret;
2802 }
2803
2804 static int check_inode_recs(struct btrfs_root *root,
2805                             struct cache_tree *inode_cache)
2806 {
2807         struct cache_extent *cache;
2808         struct ptr_node *node;
2809         struct inode_record *rec;
2810         struct inode_backref *backref;
2811         int stage = 0;
2812         int ret = 0;
2813         int err = 0;
2814         u64 error = 0;
2815         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2816
2817         if (btrfs_root_refs(&root->root_item) == 0) {
2818                 if (!cache_tree_empty(inode_cache))
2819                         fprintf(stderr, "warning line %d\n", __LINE__);
2820                 return 0;
2821         }
2822
2823         /*
2824          * We need to record the highest inode number for later 'lost+found'
2825          * dir creation.
2826          * We must select a ino not used/refered by any existing inode, or
2827          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2828          * this may cause 'lost+found' dir has wrong nlinks.
2829          */
2830         cache = last_cache_extent(inode_cache);
2831         if (cache) {
2832                 node = container_of(cache, struct ptr_node, cache);
2833                 rec = node->data;
2834                 if (rec->ino > root->highest_inode)
2835                         root->highest_inode = rec->ino;
2836         }
2837
2838         /*
2839          * We need to repair backrefs first because we could change some of the
2840          * errors in the inode recs.
2841          *
2842          * We also need to go through and delete invalid backrefs first and then
2843          * add the correct ones second.  We do this because we may get EEXIST
2844          * when adding back the correct index because we hadn't yet deleted the
2845          * invalid index.
2846          *
2847          * For example, if we were missing a dir index then the directories
2848          * isize would be wrong, so if we fixed the isize to what we thought it
2849          * would be and then fixed the backref we'd still have a invalid fs, so
2850          * we need to add back the dir index and then check to see if the isize
2851          * is still wrong.
2852          */
2853         while (stage < 3) {
2854                 stage++;
2855                 if (stage == 3 && !err)
2856                         break;
2857
2858                 cache = search_cache_extent(inode_cache, 0);
2859                 while (repair && cache) {
2860                         node = container_of(cache, struct ptr_node, cache);
2861                         rec = node->data;
2862                         cache = next_cache_extent(cache);
2863
2864                         /* Need to free everything up and rescan */
2865                         if (stage == 3) {
2866                                 remove_cache_extent(inode_cache, &node->cache);
2867                                 free(node);
2868                                 free_inode_rec(rec);
2869                                 continue;
2870                         }
2871
2872                         if (list_empty(&rec->backrefs))
2873                                 continue;
2874
2875                         ret = repair_inode_backrefs(root, rec, inode_cache,
2876                                                     stage == 1);
2877                         if (ret < 0) {
2878                                 err = ret;
2879                                 stage = 2;
2880                                 break;
2881                         } if (ret > 0) {
2882                                 err = -EAGAIN;
2883                         }
2884                 }
2885         }
2886         if (err)
2887                 return err;
2888
2889         rec = get_inode_rec(inode_cache, root_dirid, 0);
2890         if (rec) {
2891                 ret = check_root_dir(rec);
2892                 if (ret) {
2893                         fprintf(stderr, "root %llu root dir %llu error\n",
2894                                 (unsigned long long)root->root_key.objectid,
2895                                 (unsigned long long)root_dirid);
2896                         print_inode_error(root, rec);
2897                         error++;
2898                 }
2899         } else {
2900                 if (repair) {
2901                         struct btrfs_trans_handle *trans;
2902
2903                         trans = btrfs_start_transaction(root, 1);
2904                         if (IS_ERR(trans)) {
2905                                 err = PTR_ERR(trans);
2906                                 return err;
2907                         }
2908
2909                         fprintf(stderr,
2910                                 "root %llu missing its root dir, recreating\n",
2911                                 (unsigned long long)root->objectid);
2912
2913                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2914                         BUG_ON(ret);
2915
2916                         btrfs_commit_transaction(trans, root);
2917                         return -EAGAIN;
2918                 }
2919
2920                 fprintf(stderr, "root %llu root dir %llu not found\n",
2921                         (unsigned long long)root->root_key.objectid,
2922                         (unsigned long long)root_dirid);
2923         }
2924
2925         while (1) {
2926                 cache = search_cache_extent(inode_cache, 0);
2927                 if (!cache)
2928                         break;
2929                 node = container_of(cache, struct ptr_node, cache);
2930                 rec = node->data;
2931                 remove_cache_extent(inode_cache, &node->cache);
2932                 free(node);
2933                 if (rec->ino == root_dirid ||
2934                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2935                         free_inode_rec(rec);
2936                         continue;
2937                 }
2938
2939                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2940                         ret = check_orphan_item(root, rec->ino);
2941                         if (ret == 0)
2942                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2943                         if (can_free_inode_rec(rec)) {
2944                                 free_inode_rec(rec);
2945                                 continue;
2946                         }
2947                 }
2948
2949                 if (!rec->found_inode_item)
2950                         rec->errors |= I_ERR_NO_INODE_ITEM;
2951                 if (rec->found_link != rec->nlink)
2952                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2953                 if (repair) {
2954                         ret = try_repair_inode(root, rec);
2955                         if (ret == 0 && can_free_inode_rec(rec)) {
2956                                 free_inode_rec(rec);
2957                                 continue;
2958                         }
2959                         ret = 0;
2960                 }
2961
2962                 if (!(repair && ret == 0))
2963                         error++;
2964                 print_inode_error(root, rec);
2965                 list_for_each_entry(backref, &rec->backrefs, list) {
2966                         if (!backref->found_dir_item)
2967                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2968                         if (!backref->found_dir_index)
2969                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2970                         if (!backref->found_inode_ref)
2971                                 backref->errors |= REF_ERR_NO_INODE_REF;
2972                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2973                                 " namelen %u name %s filetype %d errors %x",
2974                                 (unsigned long long)backref->dir,
2975                                 (unsigned long long)backref->index,
2976                                 backref->namelen, backref->name,
2977                                 backref->filetype, backref->errors);
2978                         print_ref_error(backref->errors);
2979                 }
2980                 free_inode_rec(rec);
2981         }
2982         return (error > 0) ? -1 : 0;
2983 }
2984
2985 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2986                                         u64 objectid)
2987 {
2988         struct cache_extent *cache;
2989         struct root_record *rec = NULL;
2990         int ret;
2991
2992         cache = lookup_cache_extent(root_cache, objectid, 1);
2993         if (cache) {
2994                 rec = container_of(cache, struct root_record, cache);
2995         } else {
2996                 rec = calloc(1, sizeof(*rec));
2997                 rec->objectid = objectid;
2998                 INIT_LIST_HEAD(&rec->backrefs);
2999                 rec->cache.start = objectid;
3000                 rec->cache.size = 1;
3001
3002                 ret = insert_cache_extent(root_cache, &rec->cache);
3003                 BUG_ON(ret);
3004         }
3005         return rec;
3006 }
3007
3008 static struct root_backref *get_root_backref(struct root_record *rec,
3009                                              u64 ref_root, u64 dir, u64 index,
3010                                              const char *name, int namelen)
3011 {
3012         struct root_backref *backref;
3013
3014         list_for_each_entry(backref, &rec->backrefs, list) {
3015                 if (backref->ref_root != ref_root || backref->dir != dir ||
3016                     backref->namelen != namelen)
3017                         continue;
3018                 if (memcmp(name, backref->name, namelen))
3019                         continue;
3020                 return backref;
3021         }
3022
3023         backref = calloc(1, sizeof(*backref) + namelen + 1);
3024         backref->ref_root = ref_root;
3025         backref->dir = dir;
3026         backref->index = index;
3027         backref->namelen = namelen;
3028         memcpy(backref->name, name, namelen);
3029         backref->name[namelen] = '\0';
3030         list_add_tail(&backref->list, &rec->backrefs);
3031         return backref;
3032 }
3033
3034 static void free_root_record(struct cache_extent *cache)
3035 {
3036         struct root_record *rec;
3037         struct root_backref *backref;
3038
3039         rec = container_of(cache, struct root_record, cache);
3040         while (!list_empty(&rec->backrefs)) {
3041                 backref = list_entry(rec->backrefs.next,
3042                                      struct root_backref, list);
3043                 list_del(&backref->list);
3044                 free(backref);
3045         }
3046
3047         kfree(rec);
3048 }
3049
3050 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3051
3052 static int add_root_backref(struct cache_tree *root_cache,
3053                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3054                             const char *name, int namelen,
3055                             int item_type, int errors)
3056 {
3057         struct root_record *rec;
3058         struct root_backref *backref;
3059
3060         rec = get_root_rec(root_cache, root_id);
3061         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3062
3063         backref->errors |= errors;
3064
3065         if (item_type != BTRFS_DIR_ITEM_KEY) {
3066                 if (backref->found_dir_index || backref->found_back_ref ||
3067                     backref->found_forward_ref) {
3068                         if (backref->index != index)
3069                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3070                 } else {
3071                         backref->index = index;
3072                 }
3073         }
3074
3075         if (item_type == BTRFS_DIR_ITEM_KEY) {
3076                 if (backref->found_forward_ref)
3077                         rec->found_ref++;
3078                 backref->found_dir_item = 1;
3079         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3080                 backref->found_dir_index = 1;
3081         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3082                 if (backref->found_forward_ref)
3083                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3084                 else if (backref->found_dir_item)
3085                         rec->found_ref++;
3086                 backref->found_forward_ref = 1;
3087         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3088                 if (backref->found_back_ref)
3089                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3090                 backref->found_back_ref = 1;
3091         } else {
3092                 BUG_ON(1);
3093         }
3094
3095         if (backref->found_forward_ref && backref->found_dir_item)
3096                 backref->reachable = 1;
3097         return 0;
3098 }
3099
3100 static int merge_root_recs(struct btrfs_root *root,
3101                            struct cache_tree *src_cache,
3102                            struct cache_tree *dst_cache)
3103 {
3104         struct cache_extent *cache;
3105         struct ptr_node *node;
3106         struct inode_record *rec;
3107         struct inode_backref *backref;
3108         int ret = 0;
3109
3110         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3111                 free_inode_recs_tree(src_cache);
3112                 return 0;
3113         }
3114
3115         while (1) {
3116                 cache = search_cache_extent(src_cache, 0);
3117                 if (!cache)
3118                         break;
3119                 node = container_of(cache, struct ptr_node, cache);
3120                 rec = node->data;
3121                 remove_cache_extent(src_cache, &node->cache);
3122                 free(node);
3123
3124                 ret = is_child_root(root, root->objectid, rec->ino);
3125                 if (ret < 0)
3126                         break;
3127                 else if (ret == 0)
3128                         goto skip;
3129
3130                 list_for_each_entry(backref, &rec->backrefs, list) {
3131                         BUG_ON(backref->found_inode_ref);
3132                         if (backref->found_dir_item)
3133                                 add_root_backref(dst_cache, rec->ino,
3134                                         root->root_key.objectid, backref->dir,
3135                                         backref->index, backref->name,
3136                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3137                                         backref->errors);
3138                         if (backref->found_dir_index)
3139                                 add_root_backref(dst_cache, rec->ino,
3140                                         root->root_key.objectid, backref->dir,
3141                                         backref->index, backref->name,
3142                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3143                                         backref->errors);
3144                 }
3145 skip:
3146                 free_inode_rec(rec);
3147         }
3148         if (ret < 0)
3149                 return ret;
3150         return 0;
3151 }
3152
3153 static int check_root_refs(struct btrfs_root *root,
3154                            struct cache_tree *root_cache)
3155 {
3156         struct root_record *rec;
3157         struct root_record *ref_root;
3158         struct root_backref *backref;
3159         struct cache_extent *cache;
3160         int loop = 1;
3161         int ret;
3162         int error;
3163         int errors = 0;
3164
3165         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3166         rec->found_ref = 1;
3167
3168         /* fixme: this can not detect circular references */
3169         while (loop) {
3170                 loop = 0;
3171                 cache = search_cache_extent(root_cache, 0);
3172                 while (1) {
3173                         if (!cache)
3174                                 break;
3175                         rec = container_of(cache, struct root_record, cache);
3176                         cache = next_cache_extent(cache);
3177
3178                         if (rec->found_ref == 0)
3179                                 continue;
3180
3181                         list_for_each_entry(backref, &rec->backrefs, list) {
3182                                 if (!backref->reachable)
3183                                         continue;
3184
3185                                 ref_root = get_root_rec(root_cache,
3186                                                         backref->ref_root);
3187                                 if (ref_root->found_ref > 0)
3188                                         continue;
3189
3190                                 backref->reachable = 0;
3191                                 rec->found_ref--;
3192                                 if (rec->found_ref == 0)
3193                                         loop = 1;
3194                         }
3195                 }
3196         }
3197
3198         cache = search_cache_extent(root_cache, 0);
3199         while (1) {
3200                 if (!cache)
3201                         break;
3202                 rec = container_of(cache, struct root_record, cache);
3203                 cache = next_cache_extent(cache);
3204
3205                 if (rec->found_ref == 0 &&
3206                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3207                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3208                         ret = check_orphan_item(root->fs_info->tree_root,
3209                                                 rec->objectid);
3210                         if (ret == 0)
3211                                 continue;
3212
3213                         /*
3214                          * If we don't have a root item then we likely just have
3215                          * a dir item in a snapshot for this root but no actual
3216                          * ref key or anything so it's meaningless.
3217                          */
3218                         if (!rec->found_root_item)
3219                                 continue;
3220                         errors++;
3221                         fprintf(stderr, "fs tree %llu not referenced\n",
3222                                 (unsigned long long)rec->objectid);
3223                 }
3224
3225                 error = 0;
3226                 if (rec->found_ref > 0 && !rec->found_root_item)
3227                         error = 1;
3228                 list_for_each_entry(backref, &rec->backrefs, list) {
3229                         if (!backref->found_dir_item)
3230                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3231                         if (!backref->found_dir_index)
3232                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3233                         if (!backref->found_back_ref)
3234                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3235                         if (!backref->found_forward_ref)
3236                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3237                         if (backref->reachable && backref->errors)
3238                                 error = 1;
3239                 }
3240                 if (!error)
3241                         continue;
3242
3243                 errors++;
3244                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3245                         (unsigned long long)rec->objectid, rec->found_ref,
3246                          rec->found_root_item ? "" : "not found");
3247
3248                 list_for_each_entry(backref, &rec->backrefs, list) {
3249                         if (!backref->reachable)
3250                                 continue;
3251                         if (!backref->errors && rec->found_root_item)
3252                                 continue;
3253                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3254                                 " index %llu namelen %u name %s errors %x\n",
3255                                 (unsigned long long)backref->ref_root,
3256                                 (unsigned long long)backref->dir,
3257                                 (unsigned long long)backref->index,
3258                                 backref->namelen, backref->name,
3259                                 backref->errors);
3260                         print_ref_error(backref->errors);
3261                 }
3262         }
3263         return errors > 0 ? 1 : 0;
3264 }
3265
3266 static int process_root_ref(struct extent_buffer *eb, int slot,
3267                             struct btrfs_key *key,
3268                             struct cache_tree *root_cache)
3269 {
3270         u64 dirid;
3271         u64 index;
3272         u32 len;
3273         u32 name_len;
3274         struct btrfs_root_ref *ref;
3275         char namebuf[BTRFS_NAME_LEN];
3276         int error;
3277
3278         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3279
3280         dirid = btrfs_root_ref_dirid(eb, ref);
3281         index = btrfs_root_ref_sequence(eb, ref);
3282         name_len = btrfs_root_ref_name_len(eb, ref);
3283
3284         if (name_len <= BTRFS_NAME_LEN) {
3285                 len = name_len;
3286                 error = 0;
3287         } else {
3288                 len = BTRFS_NAME_LEN;
3289                 error = REF_ERR_NAME_TOO_LONG;
3290         }
3291         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3292
3293         if (key->type == BTRFS_ROOT_REF_KEY) {
3294                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3295                                  index, namebuf, len, key->type, error);
3296         } else {
3297                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3298                                  index, namebuf, len, key->type, error);
3299         }
3300         return 0;
3301 }
3302
3303 static void free_corrupt_block(struct cache_extent *cache)
3304 {
3305         struct btrfs_corrupt_block *corrupt;
3306
3307         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3308         free(corrupt);
3309 }
3310
3311 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3312
3313 /*
3314  * Repair the btree of the given root.
3315  *
3316  * The fix is to remove the node key in corrupt_blocks cache_tree.
3317  * and rebalance the tree.
3318  * After the fix, the btree should be writeable.
3319  */
3320 static int repair_btree(struct btrfs_root *root,
3321                         struct cache_tree *corrupt_blocks)
3322 {
3323         struct btrfs_trans_handle *trans;
3324         struct btrfs_path *path;
3325         struct btrfs_corrupt_block *corrupt;
3326         struct cache_extent *cache;
3327         struct btrfs_key key;
3328         u64 offset;
3329         int level;
3330         int ret = 0;
3331
3332         if (cache_tree_empty(corrupt_blocks))
3333                 return 0;
3334
3335         path = btrfs_alloc_path();
3336         if (!path)
3337                 return -ENOMEM;
3338
3339         trans = btrfs_start_transaction(root, 1);
3340         if (IS_ERR(trans)) {
3341                 ret = PTR_ERR(trans);
3342                 fprintf(stderr, "Error starting transaction: %s\n",
3343                         strerror(-ret));
3344                 goto out_free_path;
3345         }
3346         cache = first_cache_extent(corrupt_blocks);
3347         while (cache) {
3348                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3349                                        cache);
3350                 level = corrupt->level;
3351                 path->lowest_level = level;
3352                 key.objectid = corrupt->key.objectid;
3353                 key.type = corrupt->key.type;
3354                 key.offset = corrupt->key.offset;
3355
3356                 /*
3357                  * Here we don't want to do any tree balance, since it may
3358                  * cause a balance with corrupted brother leaf/node,
3359                  * so ins_len set to 0 here.
3360                  * Balance will be done after all corrupt node/leaf is deleted.
3361                  */
3362                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3363                 if (ret < 0)
3364                         goto out;
3365                 offset = btrfs_node_blockptr(path->nodes[level],
3366                                              path->slots[level]);
3367
3368                 /* Remove the ptr */
3369                 ret = btrfs_del_ptr(trans, root, path, level,
3370                                     path->slots[level]);
3371                 if (ret < 0)
3372                         goto out;
3373                 /*
3374                  * Remove the corresponding extent
3375                  * return value is not concerned.
3376                  */
3377                 btrfs_release_path(path);
3378                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3379                                         0, root->root_key.objectid,
3380                                         level - 1, 0);
3381                 cache = next_cache_extent(cache);
3382         }
3383
3384         /* Balance the btree using btrfs_search_slot() */
3385         cache = first_cache_extent(corrupt_blocks);
3386         while (cache) {
3387                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3388                                        cache);
3389                 memcpy(&key, &corrupt->key, sizeof(key));
3390                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3391                 if (ret < 0)
3392                         goto out;
3393                 /* return will always >0 since it won't find the item */
3394                 ret = 0;
3395                 btrfs_release_path(path);
3396                 cache = next_cache_extent(cache);
3397         }
3398 out:
3399         btrfs_commit_transaction(trans, root);
3400 out_free_path:
3401         btrfs_free_path(path);
3402         return ret;
3403 }
3404
3405 static int check_fs_root(struct btrfs_root *root,
3406                          struct cache_tree *root_cache,
3407                          struct walk_control *wc)
3408 {
3409         int ret = 0;
3410         int err = 0;
3411         int wret;
3412         int level;
3413         struct btrfs_path path;
3414         struct shared_node root_node;
3415         struct root_record *rec;
3416         struct btrfs_root_item *root_item = &root->root_item;
3417         struct cache_tree corrupt_blocks;
3418         struct orphan_data_extent *orphan;
3419         struct orphan_data_extent *tmp;
3420         enum btrfs_tree_block_status status;
3421
3422         /*
3423          * Reuse the corrupt_block cache tree to record corrupted tree block
3424          *
3425          * Unlike the usage in extent tree check, here we do it in a per
3426          * fs/subvol tree base.
3427          */
3428         cache_tree_init(&corrupt_blocks);
3429         root->fs_info->corrupt_blocks = &corrupt_blocks;
3430
3431         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3432                 rec = get_root_rec(root_cache, root->root_key.objectid);
3433                 if (btrfs_root_refs(root_item) > 0)
3434                         rec->found_root_item = 1;
3435         }
3436
3437         btrfs_init_path(&path);
3438         memset(&root_node, 0, sizeof(root_node));
3439         cache_tree_init(&root_node.root_cache);
3440         cache_tree_init(&root_node.inode_cache);
3441
3442         /* Move the orphan extent record to corresponding inode_record */
3443         list_for_each_entry_safe(orphan, tmp,
3444                                  &root->orphan_data_extents, list) {
3445                 struct inode_record *inode;
3446
3447                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3448                                       1);
3449                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3450                 list_move(&orphan->list, &inode->orphan_extents);
3451         }
3452
3453         level = btrfs_header_level(root->node);
3454         memset(wc->nodes, 0, sizeof(wc->nodes));
3455         wc->nodes[level] = &root_node;
3456         wc->active_node = level;
3457         wc->root_level = level;
3458
3459         /* We may not have checked the root block, lets do that now */
3460         if (btrfs_is_leaf(root->node))
3461                 status = btrfs_check_leaf(root, NULL, root->node);
3462         else
3463                 status = btrfs_check_node(root, NULL, root->node);
3464         if (status != BTRFS_TREE_BLOCK_CLEAN)
3465                 return -EIO;
3466
3467         if (btrfs_root_refs(root_item) > 0 ||
3468             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3469                 path.nodes[level] = root->node;
3470                 extent_buffer_get(root->node);
3471                 path.slots[level] = 0;
3472         } else {
3473                 struct btrfs_key key;
3474                 struct btrfs_disk_key found_key;
3475
3476                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3477                 level = root_item->drop_level;
3478                 path.lowest_level = level;
3479                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3480                 if (wret < 0)
3481                         goto skip_walking;
3482                 btrfs_node_key(path.nodes[level], &found_key,
3483                                 path.slots[level]);
3484                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3485                                         sizeof(found_key)));
3486         }
3487
3488         while (1) {
3489                 wret = walk_down_tree(root, &path, wc, &level);
3490                 if (wret < 0)
3491                         ret = wret;
3492                 if (wret != 0)
3493                         break;
3494
3495                 wret = walk_up_tree(root, &path, wc, &level);
3496                 if (wret < 0)
3497                         ret = wret;
3498                 if (wret != 0)
3499                         break;
3500         }
3501 skip_walking:
3502         btrfs_release_path(&path);
3503
3504         if (!cache_tree_empty(&corrupt_blocks)) {
3505                 struct cache_extent *cache;
3506                 struct btrfs_corrupt_block *corrupt;
3507
3508                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3509                        root->root_key.objectid);
3510                 cache = first_cache_extent(&corrupt_blocks);
3511                 while (cache) {
3512                         corrupt = container_of(cache,
3513                                                struct btrfs_corrupt_block,
3514                                                cache);
3515                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3516                                cache->start, corrupt->level,
3517                                corrupt->key.objectid, corrupt->key.type,
3518                                corrupt->key.offset);
3519                         cache = next_cache_extent(cache);
3520                 }
3521                 if (repair) {
3522                         printf("Try to repair the btree for root %llu\n",
3523                                root->root_key.objectid);
3524                         ret = repair_btree(root, &corrupt_blocks);
3525                         if (ret < 0)
3526                                 fprintf(stderr, "Failed to repair btree: %s\n",
3527                                         strerror(-ret));
3528                         if (!ret)
3529                                 printf("Btree for root %llu is fixed\n",
3530                                        root->root_key.objectid);
3531                 }
3532         }
3533
3534         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3535         if (err < 0)
3536                 ret = err;
3537
3538         if (root_node.current) {
3539                 root_node.current->checked = 1;
3540                 maybe_free_inode_rec(&root_node.inode_cache,
3541                                 root_node.current);
3542         }
3543
3544         err = check_inode_recs(root, &root_node.inode_cache);
3545         if (!ret)
3546                 ret = err;
3547
3548         free_corrupt_blocks_tree(&corrupt_blocks);
3549         root->fs_info->corrupt_blocks = NULL;
3550         free_orphan_data_extents(&root->orphan_data_extents);
3551         return ret;
3552 }
3553
3554 static int fs_root_objectid(u64 objectid)
3555 {
3556         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3557             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3558                 return 1;
3559         return is_fstree(objectid);
3560 }
3561
3562 static int check_fs_roots(struct btrfs_root *root,
3563                           struct cache_tree *root_cache)
3564 {
3565         struct btrfs_path path;
3566         struct btrfs_key key;
3567         struct walk_control wc;
3568         struct extent_buffer *leaf, *tree_node;
3569         struct btrfs_root *tmp_root;
3570         struct btrfs_root *tree_root = root->fs_info->tree_root;
3571         int ret;
3572         int err = 0;
3573
3574         if (ctx.progress_enabled) {
3575                 ctx.tp = TASK_FS_ROOTS;
3576                 task_start(ctx.info);
3577         }
3578
3579         /*
3580          * Just in case we made any changes to the extent tree that weren't
3581          * reflected into the free space cache yet.
3582          */
3583         if (repair)
3584                 reset_cached_block_groups(root->fs_info);
3585         memset(&wc, 0, sizeof(wc));
3586         cache_tree_init(&wc.shared);
3587         btrfs_init_path(&path);
3588
3589 again:
3590         key.offset = 0;
3591         key.objectid = 0;
3592         key.type = BTRFS_ROOT_ITEM_KEY;
3593         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3594         if (ret < 0) {
3595                 err = 1;
3596                 goto out;
3597         }
3598         tree_node = tree_root->node;
3599         while (1) {
3600                 if (tree_node != tree_root->node) {
3601                         free_root_recs_tree(root_cache);
3602                         btrfs_release_path(&path);
3603                         goto again;
3604                 }
3605                 leaf = path.nodes[0];
3606                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3607                         ret = btrfs_next_leaf(tree_root, &path);
3608                         if (ret) {
3609                                 if (ret < 0)
3610                                         err = 1;
3611                                 break;
3612                         }
3613                         leaf = path.nodes[0];
3614                 }
3615                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3616                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3617                     fs_root_objectid(key.objectid)) {
3618                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3619                                 tmp_root = btrfs_read_fs_root_no_cache(
3620                                                 root->fs_info, &key);
3621                         } else {
3622                                 key.offset = (u64)-1;
3623                                 tmp_root = btrfs_read_fs_root(
3624                                                 root->fs_info, &key);
3625                         }
3626                         if (IS_ERR(tmp_root)) {
3627                                 err = 1;
3628                                 goto next;
3629                         }
3630                         ret = check_fs_root(tmp_root, root_cache, &wc);
3631                         if (ret == -EAGAIN) {
3632                                 free_root_recs_tree(root_cache);
3633                                 btrfs_release_path(&path);
3634                                 goto again;
3635                         }
3636                         if (ret)
3637                                 err = 1;
3638                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3639                                 btrfs_free_fs_root(tmp_root);
3640                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3641                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3642                         process_root_ref(leaf, path.slots[0], &key,
3643                                          root_cache);
3644                 }
3645 next:
3646                 path.slots[0]++;
3647         }
3648 out:
3649         btrfs_release_path(&path);
3650         if (err)
3651                 free_extent_cache_tree(&wc.shared);
3652         if (!cache_tree_empty(&wc.shared))
3653                 fprintf(stderr, "warning line %d\n", __LINE__);
3654
3655         task_stop(ctx.info);
3656
3657         return err;
3658 }
3659
3660 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3661 {
3662         struct list_head *cur = rec->backrefs.next;
3663         struct extent_backref *back;
3664         struct tree_backref *tback;
3665         struct data_backref *dback;
3666         u64 found = 0;
3667         int err = 0;
3668
3669         while(cur != &rec->backrefs) {
3670                 back = list_entry(cur, struct extent_backref, list);
3671                 cur = cur->next;
3672                 if (!back->found_extent_tree) {
3673                         err = 1;
3674                         if (!print_errs)
3675                                 goto out;
3676                         if (back->is_data) {
3677                                 dback = (struct data_backref *)back;
3678                                 fprintf(stderr, "Backref %llu %s %llu"
3679                                         " owner %llu offset %llu num_refs %lu"
3680                                         " not found in extent tree\n",
3681                                         (unsigned long long)rec->start,
3682                                         back->full_backref ?
3683                                         "parent" : "root",
3684                                         back->full_backref ?
3685                                         (unsigned long long)dback->parent:
3686                                         (unsigned long long)dback->root,
3687                                         (unsigned long long)dback->owner,
3688                                         (unsigned long long)dback->offset,
3689                                         (unsigned long)dback->num_refs);
3690                         } else {
3691                                 tback = (struct tree_backref *)back;
3692                                 fprintf(stderr, "Backref %llu parent %llu"
3693                                         " root %llu not found in extent tree\n",
3694                                         (unsigned long long)rec->start,
3695                                         (unsigned long long)tback->parent,
3696                                         (unsigned long long)tback->root);
3697                         }
3698                 }
3699                 if (!back->is_data && !back->found_ref) {
3700                         err = 1;
3701                         if (!print_errs)
3702                                 goto out;
3703                         tback = (struct tree_backref *)back;
3704                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3705                                 (unsigned long long)rec->start,
3706                                 back->full_backref ? "parent" : "root",
3707                                 back->full_backref ?
3708                                 (unsigned long long)tback->parent :
3709                                 (unsigned long long)tback->root, back);
3710                 }
3711                 if (back->is_data) {
3712                         dback = (struct data_backref *)back;
3713                         if (dback->found_ref != dback->num_refs) {
3714                                 err = 1;
3715                                 if (!print_errs)
3716                                         goto out;
3717                                 fprintf(stderr, "Incorrect local backref count"
3718                                         " on %llu %s %llu owner %llu"
3719                                         " offset %llu found %u wanted %u back %p\n",
3720                                         (unsigned long long)rec->start,
3721                                         back->full_backref ?
3722                                         "parent" : "root",
3723                                         back->full_backref ?
3724                                         (unsigned long long)dback->parent:
3725                                         (unsigned long long)dback->root,
3726                                         (unsigned long long)dback->owner,
3727                                         (unsigned long long)dback->offset,
3728                                         dback->found_ref, dback->num_refs, back);
3729                         }
3730                         if (dback->disk_bytenr != rec->start) {
3731                                 err = 1;
3732                                 if (!print_errs)
3733                                         goto out;
3734                                 fprintf(stderr, "Backref disk bytenr does not"
3735                                         " match extent record, bytenr=%llu, "
3736                                         "ref bytenr=%llu\n",
3737                                         (unsigned long long)rec->start,
3738                                         (unsigned long long)dback->disk_bytenr);
3739                         }
3740
3741                         if (dback->bytes != rec->nr) {
3742                                 err = 1;
3743                                 if (!print_errs)
3744                                         goto out;
3745                                 fprintf(stderr, "Backref bytes do not match "
3746                                         "extent backref, bytenr=%llu, ref "
3747                                         "bytes=%llu, backref bytes=%llu\n",
3748                                         (unsigned long long)rec->start,
3749                                         (unsigned long long)rec->nr,
3750                                         (unsigned long long)dback->bytes);
3751                         }
3752                 }
3753                 if (!back->is_data) {
3754                         found += 1;
3755                 } else {
3756                         dback = (struct data_backref *)back;
3757                         found += dback->found_ref;
3758                 }
3759         }
3760         if (found != rec->refs) {
3761                 err = 1;
3762                 if (!print_errs)
3763                         goto out;
3764                 fprintf(stderr, "Incorrect global backref count "
3765                         "on %llu found %llu wanted %llu\n",
3766                         (unsigned long long)rec->start,
3767                         (unsigned long long)found,
3768                         (unsigned long long)rec->refs);
3769         }
3770 out:
3771         return err;
3772 }
3773
3774 static int free_all_extent_backrefs(struct extent_record *rec)
3775 {
3776         struct extent_backref *back;
3777         struct list_head *cur;
3778         while (!list_empty(&rec->backrefs)) {
3779                 cur = rec->backrefs.next;
3780                 back = list_entry(cur, struct extent_backref, list);
3781                 list_del(cur);
3782                 free(back);
3783         }
3784         return 0;
3785 }
3786
3787 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3788                                      struct cache_tree *extent_cache)
3789 {
3790         struct cache_extent *cache;
3791         struct extent_record *rec;
3792
3793         while (1) {
3794                 cache = first_cache_extent(extent_cache);
3795                 if (!cache)
3796                         break;
3797                 rec = container_of(cache, struct extent_record, cache);
3798                 remove_cache_extent(extent_cache, cache);
3799                 free_all_extent_backrefs(rec);
3800                 free(rec);
3801         }
3802 }
3803
3804 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3805                                  struct extent_record *rec)
3806 {
3807         if (rec->content_checked && rec->owner_ref_checked &&
3808             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3809             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3810             !rec->bad_full_backref && !rec->crossing_stripes &&
3811             !rec->wrong_chunk_type) {
3812                 remove_cache_extent(extent_cache, &rec->cache);
3813                 free_all_extent_backrefs(rec);
3814                 list_del_init(&rec->list);
3815                 free(rec);
3816         }
3817         return 0;
3818 }
3819
3820 static int check_owner_ref(struct btrfs_root *root,
3821                             struct extent_record *rec,
3822                             struct extent_buffer *buf)
3823 {
3824         struct extent_backref *node;
3825         struct tree_backref *back;
3826         struct btrfs_root *ref_root;
3827         struct btrfs_key key;
3828         struct btrfs_path path;
3829         struct extent_buffer *parent;
3830         int level;
3831         int found = 0;
3832         int ret;
3833
3834         list_for_each_entry(node, &rec->backrefs, list) {
3835                 if (node->is_data)
3836                         continue;
3837                 if (!node->found_ref)
3838                         continue;
3839                 if (node->full_backref)
3840                         continue;
3841                 back = (struct tree_backref *)node;
3842                 if (btrfs_header_owner(buf) == back->root)
3843                         return 0;
3844         }
3845         BUG_ON(rec->is_root);
3846
3847         /* try to find the block by search corresponding fs tree */
3848         key.objectid = btrfs_header_owner(buf);
3849         key.type = BTRFS_ROOT_ITEM_KEY;
3850         key.offset = (u64)-1;
3851
3852         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3853         if (IS_ERR(ref_root))
3854                 return 1;
3855
3856         level = btrfs_header_level(buf);
3857         if (level == 0)
3858                 btrfs_item_key_to_cpu(buf, &key, 0);
3859         else
3860                 btrfs_node_key_to_cpu(buf, &key, 0);
3861
3862         btrfs_init_path(&path);
3863         path.lowest_level = level + 1;
3864         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3865         if (ret < 0)
3866                 return 0;
3867
3868         parent = path.nodes[level + 1];
3869         if (parent && buf->start == btrfs_node_blockptr(parent,
3870                                                         path.slots[level + 1]))
3871                 found = 1;
3872
3873         btrfs_release_path(&path);
3874         return found ? 0 : 1;
3875 }
3876
3877 static int is_extent_tree_record(struct extent_record *rec)
3878 {
3879         struct list_head *cur = rec->backrefs.next;
3880         struct extent_backref *node;
3881         struct tree_backref *back;
3882         int is_extent = 0;
3883
3884         while(cur != &rec->backrefs) {
3885                 node = list_entry(cur, struct extent_backref, list);
3886                 cur = cur->next;
3887                 if (node->is_data)
3888                         return 0;
3889                 back = (struct tree_backref *)node;
3890                 if (node->full_backref)
3891                         return 0;
3892                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3893                         is_extent = 1;
3894         }
3895         return is_extent;
3896 }
3897
3898
3899 static int record_bad_block_io(struct btrfs_fs_info *info,
3900                                struct cache_tree *extent_cache,
3901                                u64 start, u64 len)
3902 {
3903         struct extent_record *rec;
3904         struct cache_extent *cache;
3905         struct btrfs_key key;
3906
3907         cache = lookup_cache_extent(extent_cache, start, len);
3908         if (!cache)
3909                 return 0;
3910
3911         rec = container_of(cache, struct extent_record, cache);
3912         if (!is_extent_tree_record(rec))
3913                 return 0;
3914
3915         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3916         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3917 }
3918
3919 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3920                        struct extent_buffer *buf, int slot)
3921 {
3922         if (btrfs_header_level(buf)) {
3923                 struct btrfs_key_ptr ptr1, ptr2;
3924
3925                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3926                                    sizeof(struct btrfs_key_ptr));
3927                 read_extent_buffer(buf, &ptr2,
3928                                    btrfs_node_key_ptr_offset(slot + 1),
3929                                    sizeof(struct btrfs_key_ptr));
3930                 write_extent_buffer(buf, &ptr1,
3931                                     btrfs_node_key_ptr_offset(slot + 1),
3932                                     sizeof(struct btrfs_key_ptr));
3933                 write_extent_buffer(buf, &ptr2,
3934                                     btrfs_node_key_ptr_offset(slot),
3935                                     sizeof(struct btrfs_key_ptr));
3936                 if (slot == 0) {
3937                         struct btrfs_disk_key key;
3938                         btrfs_node_key(buf, &key, 0);
3939                         btrfs_fixup_low_keys(root, path, &key,
3940                                              btrfs_header_level(buf) + 1);
3941                 }
3942         } else {
3943                 struct btrfs_item *item1, *item2;
3944                 struct btrfs_key k1, k2;
3945                 char *item1_data, *item2_data;
3946                 u32 item1_offset, item2_offset, item1_size, item2_size;
3947
3948                 item1 = btrfs_item_nr(slot);
3949                 item2 = btrfs_item_nr(slot + 1);
3950                 btrfs_item_key_to_cpu(buf, &k1, slot);
3951                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3952                 item1_offset = btrfs_item_offset(buf, item1);
3953                 item2_offset = btrfs_item_offset(buf, item2);
3954                 item1_size = btrfs_item_size(buf, item1);
3955                 item2_size = btrfs_item_size(buf, item2);
3956
3957                 item1_data = malloc(item1_size);
3958                 if (!item1_data)
3959                         return -ENOMEM;
3960                 item2_data = malloc(item2_size);
3961                 if (!item2_data) {
3962                         free(item1_data);
3963                         return -ENOMEM;
3964                 }
3965
3966                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3967                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3968
3969                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3970                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3971                 free(item1_data);
3972                 free(item2_data);
3973
3974                 btrfs_set_item_offset(buf, item1, item2_offset);
3975                 btrfs_set_item_offset(buf, item2, item1_offset);
3976                 btrfs_set_item_size(buf, item1, item2_size);
3977                 btrfs_set_item_size(buf, item2, item1_size);
3978
3979                 path->slots[0] = slot;
3980                 btrfs_set_item_key_unsafe(root, path, &k2);
3981                 path->slots[0] = slot + 1;
3982                 btrfs_set_item_key_unsafe(root, path, &k1);
3983         }
3984         return 0;
3985 }
3986
3987 static int fix_key_order(struct btrfs_trans_handle *trans,
3988                          struct btrfs_root *root,
3989                          struct btrfs_path *path)
3990 {
3991         struct extent_buffer *buf;
3992         struct btrfs_key k1, k2;
3993         int i;
3994         int level = path->lowest_level;
3995         int ret = -EIO;
3996
3997         buf = path->nodes[level];
3998         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3999                 if (level) {
4000                         btrfs_node_key_to_cpu(buf, &k1, i);
4001                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4002                 } else {
4003                         btrfs_item_key_to_cpu(buf, &k1, i);
4004                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4005                 }
4006                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4007                         continue;
4008                 ret = swap_values(root, path, buf, i);
4009                 if (ret)
4010                         break;
4011                 btrfs_mark_buffer_dirty(buf);
4012                 i = 0;
4013         }
4014         return ret;
4015 }
4016
4017 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4018                              struct btrfs_root *root,
4019                              struct btrfs_path *path,
4020                              struct extent_buffer *buf, int slot)
4021 {
4022         struct btrfs_key key;
4023         int nritems = btrfs_header_nritems(buf);
4024
4025         btrfs_item_key_to_cpu(buf, &key, slot);
4026
4027         /* These are all the keys we can deal with missing. */
4028         if (key.type != BTRFS_DIR_INDEX_KEY &&
4029             key.type != BTRFS_EXTENT_ITEM_KEY &&
4030             key.type != BTRFS_METADATA_ITEM_KEY &&
4031             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4032             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4033                 return -1;
4034
4035         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4036                (unsigned long long)key.objectid, key.type,
4037                (unsigned long long)key.offset, slot, buf->start);
4038         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4039                               btrfs_item_nr_offset(slot + 1),
4040                               sizeof(struct btrfs_item) *
4041                               (nritems - slot - 1));
4042         btrfs_set_header_nritems(buf, nritems - 1);
4043         if (slot == 0) {
4044                 struct btrfs_disk_key disk_key;
4045
4046                 btrfs_item_key(buf, &disk_key, 0);
4047                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4048         }
4049         btrfs_mark_buffer_dirty(buf);
4050         return 0;
4051 }
4052
4053 static int fix_item_offset(struct btrfs_trans_handle *trans,
4054                            struct btrfs_root *root,
4055                            struct btrfs_path *path)
4056 {
4057         struct extent_buffer *buf;
4058         int i;
4059         int ret = 0;
4060
4061         /* We should only get this for leaves */
4062         BUG_ON(path->lowest_level);
4063         buf = path->nodes[0];
4064 again:
4065         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4066                 unsigned int shift = 0, offset;
4067
4068                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4069                     BTRFS_LEAF_DATA_SIZE(root)) {
4070                         if (btrfs_item_end_nr(buf, i) >
4071                             BTRFS_LEAF_DATA_SIZE(root)) {
4072                                 ret = delete_bogus_item(trans, root, path,
4073                                                         buf, i);
4074                                 if (!ret)
4075                                         goto again;
4076                                 fprintf(stderr, "item is off the end of the "
4077                                         "leaf, can't fix\n");
4078                                 ret = -EIO;
4079                                 break;
4080                         }
4081                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4082                                 btrfs_item_end_nr(buf, i);
4083                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4084                            btrfs_item_offset_nr(buf, i - 1)) {
4085                         if (btrfs_item_end_nr(buf, i) >
4086                             btrfs_item_offset_nr(buf, i - 1)) {
4087                                 ret = delete_bogus_item(trans, root, path,
4088                                                         buf, i);
4089                                 if (!ret)
4090                                         goto again;
4091                                 fprintf(stderr, "items overlap, can't fix\n");
4092                                 ret = -EIO;
4093                                 break;
4094                         }
4095                         shift = btrfs_item_offset_nr(buf, i - 1) -
4096                                 btrfs_item_end_nr(buf, i);
4097                 }
4098                 if (!shift)
4099                         continue;
4100
4101                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4102                        i, shift, (unsigned long long)buf->start);
4103                 offset = btrfs_item_offset_nr(buf, i);
4104                 memmove_extent_buffer(buf,
4105                                       btrfs_leaf_data(buf) + offset + shift,
4106                                       btrfs_leaf_data(buf) + offset,
4107                                       btrfs_item_size_nr(buf, i));
4108                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4109                                       offset + shift);
4110                 btrfs_mark_buffer_dirty(buf);
4111         }
4112
4113         /*
4114          * We may have moved things, in which case we want to exit so we don't
4115          * write those changes out.  Once we have proper abort functionality in
4116          * progs this can be changed to something nicer.
4117          */
4118         BUG_ON(ret);
4119         return ret;
4120 }
4121
4122 /*
4123  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4124  * then just return -EIO.
4125  */
4126 static int try_to_fix_bad_block(struct btrfs_root *root,
4127                                 struct extent_buffer *buf,
4128                                 enum btrfs_tree_block_status status)
4129 {
4130         struct btrfs_trans_handle *trans;
4131         struct ulist *roots;
4132         struct ulist_node *node;
4133         struct btrfs_root *search_root;
4134         struct btrfs_path *path;
4135         struct ulist_iterator iter;
4136         struct btrfs_key root_key, key;
4137         int ret;
4138
4139         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4140             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4141                 return -EIO;
4142
4143         path = btrfs_alloc_path();
4144         if (!path)
4145                 return -EIO;
4146
4147         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4148                                    0, &roots);
4149         if (ret) {
4150                 btrfs_free_path(path);
4151                 return -EIO;
4152         }
4153
4154         ULIST_ITER_INIT(&iter);
4155         while ((node = ulist_next(roots, &iter))) {
4156                 root_key.objectid = node->val;
4157                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4158                 root_key.offset = (u64)-1;
4159
4160                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4161                 if (IS_ERR(root)) {
4162                         ret = -EIO;
4163                         break;
4164                 }
4165
4166
4167                 trans = btrfs_start_transaction(search_root, 0);
4168                 if (IS_ERR(trans)) {
4169                         ret = PTR_ERR(trans);
4170                         break;
4171                 }
4172
4173                 path->lowest_level = btrfs_header_level(buf);
4174                 path->skip_check_block = 1;
4175                 if (path->lowest_level)
4176                         btrfs_node_key_to_cpu(buf, &key, 0);
4177                 else
4178                         btrfs_item_key_to_cpu(buf, &key, 0);
4179                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4180                 if (ret) {
4181                         ret = -EIO;
4182                         btrfs_commit_transaction(trans, search_root);
4183                         break;
4184                 }
4185                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4186                         ret = fix_key_order(trans, search_root, path);
4187                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4188                         ret = fix_item_offset(trans, search_root, path);
4189                 if (ret) {
4190                         btrfs_commit_transaction(trans, search_root);
4191                         break;
4192                 }
4193                 btrfs_release_path(path);
4194                 btrfs_commit_transaction(trans, search_root);
4195         }
4196         ulist_free(roots);
4197         btrfs_free_path(path);
4198         return ret;
4199 }
4200
4201 static int check_block(struct btrfs_root *root,
4202                        struct cache_tree *extent_cache,
4203                        struct extent_buffer *buf, u64 flags)
4204 {
4205         struct extent_record *rec;
4206         struct cache_extent *cache;
4207         struct btrfs_key key;
4208         enum btrfs_tree_block_status status;
4209         int ret = 0;
4210         int level;
4211
4212         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4213         if (!cache)
4214                 return 1;
4215         rec = container_of(cache, struct extent_record, cache);
4216         rec->generation = btrfs_header_generation(buf);
4217
4218         level = btrfs_header_level(buf);
4219         if (btrfs_header_nritems(buf) > 0) {
4220
4221                 if (level == 0)
4222                         btrfs_item_key_to_cpu(buf, &key, 0);
4223                 else
4224                         btrfs_node_key_to_cpu(buf, &key, 0);
4225
4226                 rec->info_objectid = key.objectid;
4227         }
4228         rec->info_level = level;
4229
4230         if (btrfs_is_leaf(buf))
4231                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4232         else
4233                 status = btrfs_check_node(root, &rec->parent_key, buf);
4234
4235         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4236                 if (repair)
4237                         status = try_to_fix_bad_block(root, buf, status);
4238                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4239                         ret = -EIO;
4240                         fprintf(stderr, "bad block %llu\n",
4241                                 (unsigned long long)buf->start);
4242                 } else {
4243                         /*
4244                          * Signal to callers we need to start the scan over
4245                          * again since we'll have cow'ed blocks.
4246                          */
4247                         ret = -EAGAIN;
4248                 }
4249         } else {
4250                 rec->content_checked = 1;
4251                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4252                         rec->owner_ref_checked = 1;
4253                 else {
4254                         ret = check_owner_ref(root, rec, buf);
4255                         if (!ret)
4256                                 rec->owner_ref_checked = 1;
4257                 }
4258         }
4259         if (!ret)
4260                 maybe_free_extent_rec(extent_cache, rec);
4261         return ret;
4262 }
4263
4264 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4265                                                 u64 parent, u64 root)
4266 {
4267         struct list_head *cur = rec->backrefs.next;
4268         struct extent_backref *node;
4269         struct tree_backref *back;
4270
4271         while(cur != &rec->backrefs) {
4272                 node = list_entry(cur, struct extent_backref, list);
4273                 cur = cur->next;
4274                 if (node->is_data)
4275                         continue;
4276                 back = (struct tree_backref *)node;
4277                 if (parent > 0) {
4278                         if (!node->full_backref)
4279                                 continue;
4280                         if (parent == back->parent)
4281                                 return back;
4282                 } else {
4283                         if (node->full_backref)
4284                                 continue;
4285                         if (back->root == root)
4286                                 return back;
4287                 }
4288         }
4289         return NULL;
4290 }
4291
4292 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4293                                                 u64 parent, u64 root)
4294 {
4295         struct tree_backref *ref = malloc(sizeof(*ref));
4296         memset(&ref->node, 0, sizeof(ref->node));
4297         if (parent > 0) {
4298                 ref->parent = parent;
4299                 ref->node.full_backref = 1;
4300         } else {
4301                 ref->root = root;
4302                 ref->node.full_backref = 0;
4303         }
4304         list_add_tail(&ref->node.list, &rec->backrefs);
4305
4306         return ref;
4307 }
4308
4309 static struct data_backref *find_data_backref(struct extent_record *rec,
4310                                                 u64 parent, u64 root,
4311                                                 u64 owner, u64 offset,
4312                                                 int found_ref,
4313                                                 u64 disk_bytenr, u64 bytes)
4314 {
4315         struct list_head *cur = rec->backrefs.next;
4316         struct extent_backref *node;
4317         struct data_backref *back;
4318
4319         while(cur != &rec->backrefs) {
4320                 node = list_entry(cur, struct extent_backref, list);
4321                 cur = cur->next;
4322                 if (!node->is_data)
4323                         continue;
4324                 back = (struct data_backref *)node;
4325                 if (parent > 0) {
4326                         if (!node->full_backref)
4327                                 continue;
4328                         if (parent == back->parent)
4329                                 return back;
4330                 } else {
4331                         if (node->full_backref)
4332                                 continue;
4333                         if (back->root == root && back->owner == owner &&
4334                             back->offset == offset) {
4335                                 if (found_ref && node->found_ref &&
4336                                     (back->bytes != bytes ||
4337                                     back->disk_bytenr != disk_bytenr))
4338                                         continue;
4339                                 return back;
4340                         }
4341                 }
4342         }
4343         return NULL;
4344 }
4345
4346 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4347                                                 u64 parent, u64 root,
4348                                                 u64 owner, u64 offset,
4349                                                 u64 max_size)
4350 {
4351         struct data_backref *ref = malloc(sizeof(*ref));
4352         memset(&ref->node, 0, sizeof(ref->node));
4353         ref->node.is_data = 1;
4354
4355         if (parent > 0) {
4356                 ref->parent = parent;
4357                 ref->owner = 0;
4358                 ref->offset = 0;
4359                 ref->node.full_backref = 1;
4360         } else {
4361                 ref->root = root;
4362                 ref->owner = owner;
4363                 ref->offset = offset;
4364                 ref->node.full_backref = 0;
4365         }
4366         ref->bytes = max_size;
4367         ref->found_ref = 0;
4368         ref->num_refs = 0;
4369         list_add_tail(&ref->node.list, &rec->backrefs);
4370         if (max_size > rec->max_size)
4371                 rec->max_size = max_size;
4372         return ref;
4373 }
4374
4375 /* Check if the type of extent matches with its chunk */
4376 static void check_extent_type(struct extent_record *rec)
4377 {
4378         struct btrfs_block_group_cache *bg_cache;
4379
4380         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4381         if (!bg_cache)
4382                 return;
4383
4384         /* data extent, check chunk directly*/
4385         if (!rec->metadata) {
4386                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4387                         rec->wrong_chunk_type = 1;
4388                 return;
4389         }
4390
4391         /* metadata extent, check the obvious case first */
4392         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4393                                  BTRFS_BLOCK_GROUP_METADATA))) {
4394                 rec->wrong_chunk_type = 1;
4395                 return;
4396         }
4397
4398         /*
4399          * Check SYSTEM extent, as it's also marked as metadata, we can only
4400          * make sure it's a SYSTEM extent by its backref
4401          */
4402         if (!list_empty(&rec->backrefs)) {
4403                 struct extent_backref *node;
4404                 struct tree_backref *tback;
4405                 u64 bg_type;
4406
4407                 node = list_entry(rec->backrefs.next, struct extent_backref,
4408                                   list);
4409                 if (node->is_data) {
4410                         /* tree block shouldn't have data backref */
4411                         rec->wrong_chunk_type = 1;
4412                         return;
4413                 }
4414                 tback = container_of(node, struct tree_backref, node);
4415
4416                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4417                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4418                 else
4419                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4420                 if (!(bg_cache->flags & bg_type))
4421                         rec->wrong_chunk_type = 1;
4422         }
4423 }
4424
4425 static int add_extent_rec(struct cache_tree *extent_cache,
4426                           struct btrfs_key *parent_key, u64 parent_gen,
4427                           u64 start, u64 nr, u64 extent_item_refs,
4428                           int is_root, int inc_ref, int set_checked,
4429                           int metadata, int extent_rec, u64 max_size)
4430 {
4431         struct extent_record *rec;
4432         struct cache_extent *cache;
4433         int ret = 0;
4434         int dup = 0;
4435
4436         cache = lookup_cache_extent(extent_cache, start, nr);
4437         if (cache) {
4438                 rec = container_of(cache, struct extent_record, cache);
4439                 if (inc_ref)
4440                         rec->refs++;
4441                 if (rec->nr == 1)
4442                         rec->nr = max(nr, max_size);
4443
4444                 /*
4445                  * We need to make sure to reset nr to whatever the extent
4446                  * record says was the real size, this way we can compare it to
4447                  * the backrefs.
4448                  */
4449                 if (extent_rec) {
4450                         if (start != rec->start || rec->found_rec) {
4451                                 struct extent_record *tmp;
4452
4453                                 dup = 1;
4454                                 if (list_empty(&rec->list))
4455                                         list_add_tail(&rec->list,
4456                                                       &duplicate_extents);
4457
4458                                 /*
4459                                  * We have to do this song and dance in case we
4460                                  * find an extent record that falls inside of
4461                                  * our current extent record but does not have
4462                                  * the same objectid.
4463                                  */
4464                                 tmp = malloc(sizeof(*tmp));
4465                                 if (!tmp)
4466                                         return -ENOMEM;
4467                                 tmp->start = start;
4468                                 tmp->max_size = max_size;
4469                                 tmp->nr = nr;
4470                                 tmp->found_rec = 1;
4471                                 tmp->metadata = metadata;
4472                                 tmp->extent_item_refs = extent_item_refs;
4473                                 INIT_LIST_HEAD(&tmp->list);
4474                                 list_add_tail(&tmp->list, &rec->dups);
4475                                 rec->num_duplicates++;
4476                         } else {
4477                                 rec->nr = nr;
4478                                 rec->found_rec = 1;
4479                         }
4480                 }
4481
4482                 if (extent_item_refs && !dup) {
4483                         if (rec->extent_item_refs) {
4484                                 fprintf(stderr, "block %llu rec "
4485                                         "extent_item_refs %llu, passed %llu\n",
4486                                         (unsigned long long)start,
4487                                         (unsigned long long)
4488                                                         rec->extent_item_refs,
4489                                         (unsigned long long)extent_item_refs);
4490                         }
4491                         rec->extent_item_refs = extent_item_refs;
4492                 }
4493                 if (is_root)
4494                         rec->is_root = 1;
4495                 if (set_checked) {
4496                         rec->content_checked = 1;
4497                         rec->owner_ref_checked = 1;
4498                 }
4499
4500                 if (parent_key)
4501                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4502                 if (parent_gen)
4503                         rec->parent_generation = parent_gen;
4504
4505                 if (rec->max_size < max_size)
4506                         rec->max_size = max_size;
4507
4508                 /*
4509                  * A metadata extent can't cross stripe_len boundary, otherwise
4510                  * kernel scrub won't be able to handle it.
4511                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4512                  * it.
4513                  */
4514                 if (metadata && check_crossing_stripes(rec->start,
4515                                                        rec->max_size))
4516                                 rec->crossing_stripes = 1;
4517                 check_extent_type(rec);
4518                 maybe_free_extent_rec(extent_cache, rec);
4519                 return ret;
4520         }
4521         rec = malloc(sizeof(*rec));
4522         rec->start = start;
4523         rec->max_size = max_size;
4524         rec->nr = max(nr, max_size);
4525         rec->found_rec = !!extent_rec;
4526         rec->content_checked = 0;
4527         rec->owner_ref_checked = 0;
4528         rec->num_duplicates = 0;
4529         rec->metadata = metadata;
4530         rec->flag_block_full_backref = -1;
4531         rec->bad_full_backref = 0;
4532         rec->crossing_stripes = 0;
4533         rec->wrong_chunk_type = 0;
4534         INIT_LIST_HEAD(&rec->backrefs);
4535         INIT_LIST_HEAD(&rec->dups);
4536         INIT_LIST_HEAD(&rec->list);
4537
4538         if (is_root)
4539                 rec->is_root = 1;
4540         else
4541                 rec->is_root = 0;
4542
4543         if (inc_ref)
4544                 rec->refs = 1;
4545         else
4546                 rec->refs = 0;
4547
4548         if (extent_item_refs)
4549                 rec->extent_item_refs = extent_item_refs;
4550         else
4551                 rec->extent_item_refs = 0;
4552
4553         if (parent_key)
4554                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4555         else
4556                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4557
4558         if (parent_gen)
4559                 rec->parent_generation = parent_gen;
4560         else
4561                 rec->parent_generation = 0;
4562
4563         rec->cache.start = start;
4564         rec->cache.size = nr;
4565         ret = insert_cache_extent(extent_cache, &rec->cache);
4566         BUG_ON(ret);
4567         bytes_used += nr;
4568         if (set_checked) {
4569                 rec->content_checked = 1;
4570                 rec->owner_ref_checked = 1;
4571         }
4572
4573         if (metadata)
4574                 if (check_crossing_stripes(rec->start, rec->max_size))
4575                         rec->crossing_stripes = 1;
4576         check_extent_type(rec);
4577         return ret;
4578 }
4579
4580 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4581                             u64 parent, u64 root, int found_ref)
4582 {
4583         struct extent_record *rec;
4584         struct tree_backref *back;
4585         struct cache_extent *cache;
4586
4587         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4588         if (!cache) {
4589                 add_extent_rec(extent_cache, NULL, 0, bytenr,
4590                                1, 0, 0, 0, 0, 1, 0, 0);
4591                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4592                 if (!cache)
4593                         abort();
4594         }
4595
4596         rec = container_of(cache, struct extent_record, cache);
4597         if (rec->start != bytenr) {
4598                 abort();
4599         }
4600
4601         back = find_tree_backref(rec, parent, root);
4602         if (!back)
4603                 back = alloc_tree_backref(rec, parent, root);
4604
4605         if (found_ref) {
4606                 if (back->node.found_ref) {
4607                         fprintf(stderr, "Extent back ref already exists "
4608                                 "for %llu parent %llu root %llu \n",
4609                                 (unsigned long long)bytenr,
4610                                 (unsigned long long)parent,
4611                                 (unsigned long long)root);
4612                 }
4613                 back->node.found_ref = 1;
4614         } else {
4615                 if (back->node.found_extent_tree) {
4616                         fprintf(stderr, "Extent back ref already exists "
4617                                 "for %llu parent %llu root %llu \n",
4618                                 (unsigned long long)bytenr,
4619                                 (unsigned long long)parent,
4620                                 (unsigned long long)root);
4621                 }
4622                 back->node.found_extent_tree = 1;
4623         }
4624         check_extent_type(rec);
4625         maybe_free_extent_rec(extent_cache, rec);
4626         return 0;
4627 }
4628
4629 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4630                             u64 parent, u64 root, u64 owner, u64 offset,
4631                             u32 num_refs, int found_ref, u64 max_size)
4632 {
4633         struct extent_record *rec;
4634         struct data_backref *back;
4635         struct cache_extent *cache;
4636
4637         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4638         if (!cache) {
4639                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4640                                0, 0, max_size);
4641                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4642                 if (!cache)
4643                         abort();
4644         }
4645
4646         rec = container_of(cache, struct extent_record, cache);
4647         if (rec->max_size < max_size)
4648                 rec->max_size = max_size;
4649
4650         /*
4651          * If found_ref is set then max_size is the real size and must match the
4652          * existing refs.  So if we have already found a ref then we need to
4653          * make sure that this ref matches the existing one, otherwise we need
4654          * to add a new backref so we can notice that the backrefs don't match
4655          * and we need to figure out who is telling the truth.  This is to
4656          * account for that awful fsync bug I introduced where we'd end up with
4657          * a btrfs_file_extent_item that would have its length include multiple
4658          * prealloc extents or point inside of a prealloc extent.
4659          */
4660         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4661                                  bytenr, max_size);
4662         if (!back)
4663                 back = alloc_data_backref(rec, parent, root, owner, offset,
4664                                           max_size);
4665
4666         if (found_ref) {
4667                 BUG_ON(num_refs != 1);
4668                 if (back->node.found_ref)
4669                         BUG_ON(back->bytes != max_size);
4670                 back->node.found_ref = 1;
4671                 back->found_ref += 1;
4672                 back->bytes = max_size;
4673                 back->disk_bytenr = bytenr;
4674                 rec->refs += 1;
4675                 rec->content_checked = 1;
4676                 rec->owner_ref_checked = 1;
4677         } else {
4678                 if (back->node.found_extent_tree) {
4679                         fprintf(stderr, "Extent back ref already exists "
4680                                 "for %llu parent %llu root %llu "
4681                                 "owner %llu offset %llu num_refs %lu\n",
4682                                 (unsigned long long)bytenr,
4683                                 (unsigned long long)parent,
4684                                 (unsigned long long)root,
4685                                 (unsigned long long)owner,
4686                                 (unsigned long long)offset,
4687                                 (unsigned long)num_refs);
4688                 }
4689                 back->num_refs = num_refs;
4690                 back->node.found_extent_tree = 1;
4691         }
4692         maybe_free_extent_rec(extent_cache, rec);
4693         return 0;
4694 }
4695
4696 static int add_pending(struct cache_tree *pending,
4697                        struct cache_tree *seen, u64 bytenr, u32 size)
4698 {
4699         int ret;
4700         ret = add_cache_extent(seen, bytenr, size);
4701         if (ret)
4702                 return ret;
4703         add_cache_extent(pending, bytenr, size);
4704         return 0;
4705 }
4706
4707 static int pick_next_pending(struct cache_tree *pending,
4708                         struct cache_tree *reada,
4709                         struct cache_tree *nodes,
4710                         u64 last, struct block_info *bits, int bits_nr,
4711                         int *reada_bits)
4712 {
4713         unsigned long node_start = last;
4714         struct cache_extent *cache;
4715         int ret;
4716
4717         cache = search_cache_extent(reada, 0);
4718         if (cache) {
4719                 bits[0].start = cache->start;
4720                 bits[0].size = cache->size;
4721                 *reada_bits = 1;
4722                 return 1;
4723         }
4724         *reada_bits = 0;
4725         if (node_start > 32768)
4726                 node_start -= 32768;
4727
4728         cache = search_cache_extent(nodes, node_start);
4729         if (!cache)
4730                 cache = search_cache_extent(nodes, 0);
4731
4732         if (!cache) {
4733                  cache = search_cache_extent(pending, 0);
4734                  if (!cache)
4735                          return 0;
4736                  ret = 0;
4737                  do {
4738                          bits[ret].start = cache->start;
4739                          bits[ret].size = cache->size;
4740                          cache = next_cache_extent(cache);
4741                          ret++;
4742                  } while (cache && ret < bits_nr);
4743                  return ret;
4744         }
4745
4746         ret = 0;
4747         do {
4748                 bits[ret].start = cache->start;
4749                 bits[ret].size = cache->size;
4750                 cache = next_cache_extent(cache);
4751                 ret++;
4752         } while (cache && ret < bits_nr);
4753
4754         if (bits_nr - ret > 8) {
4755                 u64 lookup = bits[0].start + bits[0].size;
4756                 struct cache_extent *next;
4757                 next = search_cache_extent(pending, lookup);
4758                 while(next) {
4759                         if (next->start - lookup > 32768)
4760                                 break;
4761                         bits[ret].start = next->start;
4762                         bits[ret].size = next->size;
4763                         lookup = next->start + next->size;
4764                         ret++;
4765                         if (ret == bits_nr)
4766                                 break;
4767                         next = next_cache_extent(next);
4768                         if (!next)
4769                                 break;
4770                 }
4771         }
4772         return ret;
4773 }
4774
4775 static void free_chunk_record(struct cache_extent *cache)
4776 {
4777         struct chunk_record *rec;
4778
4779         rec = container_of(cache, struct chunk_record, cache);
4780         list_del_init(&rec->list);
4781         list_del_init(&rec->dextents);
4782         free(rec);
4783 }
4784
4785 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4786 {
4787         cache_tree_free_extents(chunk_cache, free_chunk_record);
4788 }
4789
4790 static void free_device_record(struct rb_node *node)
4791 {
4792         struct device_record *rec;
4793
4794         rec = container_of(node, struct device_record, node);
4795         free(rec);
4796 }
4797
4798 FREE_RB_BASED_TREE(device_cache, free_device_record);
4799
4800 int insert_block_group_record(struct block_group_tree *tree,
4801                               struct block_group_record *bg_rec)
4802 {
4803         int ret;
4804
4805         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4806         if (ret)
4807                 return ret;
4808
4809         list_add_tail(&bg_rec->list, &tree->block_groups);
4810         return 0;
4811 }
4812
4813 static void free_block_group_record(struct cache_extent *cache)
4814 {
4815         struct block_group_record *rec;
4816
4817         rec = container_of(cache, struct block_group_record, cache);
4818         list_del_init(&rec->list);
4819         free(rec);
4820 }
4821
4822 void free_block_group_tree(struct block_group_tree *tree)
4823 {
4824         cache_tree_free_extents(&tree->tree, free_block_group_record);
4825 }
4826
4827 int insert_device_extent_record(struct device_extent_tree *tree,
4828                                 struct device_extent_record *de_rec)
4829 {
4830         int ret;
4831
4832         /*
4833          * Device extent is a bit different from the other extents, because
4834          * the extents which belong to the different devices may have the
4835          * same start and size, so we need use the special extent cache
4836          * search/insert functions.
4837          */
4838         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4839         if (ret)
4840                 return ret;
4841
4842         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4843         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4844         return 0;
4845 }
4846
4847 static void free_device_extent_record(struct cache_extent *cache)
4848 {
4849         struct device_extent_record *rec;
4850
4851         rec = container_of(cache, struct device_extent_record, cache);
4852         if (!list_empty(&rec->chunk_list))
4853                 list_del_init(&rec->chunk_list);
4854         if (!list_empty(&rec->device_list))
4855                 list_del_init(&rec->device_list);
4856         free(rec);
4857 }
4858
4859 void free_device_extent_tree(struct device_extent_tree *tree)
4860 {
4861         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4862 }
4863
4864 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4865 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4866                                  struct extent_buffer *leaf, int slot)
4867 {
4868         struct btrfs_extent_ref_v0 *ref0;
4869         struct btrfs_key key;
4870
4871         btrfs_item_key_to_cpu(leaf, &key, slot);
4872         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4873         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4874                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4875         } else {
4876                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4877                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4878         }
4879         return 0;
4880 }
4881 #endif
4882
4883 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4884                                             struct btrfs_key *key,
4885                                             int slot)
4886 {
4887         struct btrfs_chunk *ptr;
4888         struct chunk_record *rec;
4889         int num_stripes, i;
4890
4891         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4892         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4893
4894         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4895         if (!rec) {
4896                 fprintf(stderr, "memory allocation failed\n");
4897                 exit(-1);
4898         }
4899
4900         INIT_LIST_HEAD(&rec->list);
4901         INIT_LIST_HEAD(&rec->dextents);
4902         rec->bg_rec = NULL;
4903
4904         rec->cache.start = key->offset;
4905         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4906
4907         rec->generation = btrfs_header_generation(leaf);
4908
4909         rec->objectid = key->objectid;
4910         rec->type = key->type;
4911         rec->offset = key->offset;
4912
4913         rec->length = rec->cache.size;
4914         rec->owner = btrfs_chunk_owner(leaf, ptr);
4915         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4916         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4917         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4918         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4919         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4920         rec->num_stripes = num_stripes;
4921         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4922
4923         for (i = 0; i < rec->num_stripes; ++i) {
4924                 rec->stripes[i].devid =
4925                         btrfs_stripe_devid_nr(leaf, ptr, i);
4926                 rec->stripes[i].offset =
4927                         btrfs_stripe_offset_nr(leaf, ptr, i);
4928                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4929                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4930                                 BTRFS_UUID_SIZE);
4931         }
4932
4933         return rec;
4934 }
4935
4936 static int process_chunk_item(struct cache_tree *chunk_cache,
4937                               struct btrfs_key *key, struct extent_buffer *eb,
4938                               int slot)
4939 {
4940         struct chunk_record *rec;
4941         int ret = 0;
4942
4943         rec = btrfs_new_chunk_record(eb, key, slot);
4944         ret = insert_cache_extent(chunk_cache, &rec->cache);
4945         if (ret) {
4946                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4947                         rec->offset, rec->length);
4948                 free(rec);
4949         }
4950
4951         return ret;
4952 }
4953
4954 static int process_device_item(struct rb_root *dev_cache,
4955                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4956 {
4957         struct btrfs_dev_item *ptr;
4958         struct device_record *rec;
4959         int ret = 0;
4960
4961         ptr = btrfs_item_ptr(eb,
4962                 slot, struct btrfs_dev_item);
4963
4964         rec = malloc(sizeof(*rec));
4965         if (!rec) {
4966                 fprintf(stderr, "memory allocation failed\n");
4967                 return -ENOMEM;
4968         }
4969
4970         rec->devid = key->offset;
4971         rec->generation = btrfs_header_generation(eb);
4972
4973         rec->objectid = key->objectid;
4974         rec->type = key->type;
4975         rec->offset = key->offset;
4976
4977         rec->devid = btrfs_device_id(eb, ptr);
4978         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4979         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4980
4981         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4982         if (ret) {
4983                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4984                 free(rec);
4985         }
4986
4987         return ret;
4988 }
4989
4990 struct block_group_record *
4991 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4992                              int slot)
4993 {
4994         struct btrfs_block_group_item *ptr;
4995         struct block_group_record *rec;
4996
4997         rec = calloc(1, sizeof(*rec));
4998         if (!rec) {
4999                 fprintf(stderr, "memory allocation failed\n");
5000                 exit(-1);
5001         }
5002
5003         rec->cache.start = key->objectid;
5004         rec->cache.size = key->offset;
5005
5006         rec->generation = btrfs_header_generation(leaf);
5007
5008         rec->objectid = key->objectid;
5009         rec->type = key->type;
5010         rec->offset = key->offset;
5011
5012         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5013         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5014
5015         INIT_LIST_HEAD(&rec->list);
5016
5017         return rec;
5018 }
5019
5020 static int process_block_group_item(struct block_group_tree *block_group_cache,
5021                                     struct btrfs_key *key,
5022                                     struct extent_buffer *eb, int slot)
5023 {
5024         struct block_group_record *rec;
5025         int ret = 0;
5026
5027         rec = btrfs_new_block_group_record(eb, key, slot);
5028         ret = insert_block_group_record(block_group_cache, rec);
5029         if (ret) {
5030                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5031                         rec->objectid, rec->offset);
5032                 free(rec);
5033         }
5034
5035         return ret;
5036 }
5037
5038 struct device_extent_record *
5039 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5040                                struct btrfs_key *key, int slot)
5041 {
5042         struct device_extent_record *rec;
5043         struct btrfs_dev_extent *ptr;
5044
5045         rec = calloc(1, sizeof(*rec));
5046         if (!rec) {
5047                 fprintf(stderr, "memory allocation failed\n");
5048                 exit(-1);
5049         }
5050
5051         rec->cache.objectid = key->objectid;
5052         rec->cache.start = key->offset;
5053
5054         rec->generation = btrfs_header_generation(leaf);
5055
5056         rec->objectid = key->objectid;
5057         rec->type = key->type;
5058         rec->offset = key->offset;
5059
5060         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5061         rec->chunk_objecteid =
5062                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5063         rec->chunk_offset =
5064                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5065         rec->length = btrfs_dev_extent_length(leaf, ptr);
5066         rec->cache.size = rec->length;
5067
5068         INIT_LIST_HEAD(&rec->chunk_list);
5069         INIT_LIST_HEAD(&rec->device_list);
5070
5071         return rec;
5072 }
5073
5074 static int
5075 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5076                            struct btrfs_key *key, struct extent_buffer *eb,
5077                            int slot)
5078 {
5079         struct device_extent_record *rec;
5080         int ret;
5081
5082         rec = btrfs_new_device_extent_record(eb, key, slot);
5083         ret = insert_device_extent_record(dev_extent_cache, rec);
5084         if (ret) {
5085                 fprintf(stderr,
5086                         "Device extent[%llu, %llu, %llu] existed.\n",
5087                         rec->objectid, rec->offset, rec->length);
5088                 free(rec);
5089         }
5090
5091         return ret;
5092 }
5093
5094 static int process_extent_item(struct btrfs_root *root,
5095                                struct cache_tree *extent_cache,
5096                                struct extent_buffer *eb, int slot)
5097 {
5098         struct btrfs_extent_item *ei;
5099         struct btrfs_extent_inline_ref *iref;
5100         struct btrfs_extent_data_ref *dref;
5101         struct btrfs_shared_data_ref *sref;
5102         struct btrfs_key key;
5103         unsigned long end;
5104         unsigned long ptr;
5105         int type;
5106         u32 item_size = btrfs_item_size_nr(eb, slot);
5107         u64 refs = 0;
5108         u64 offset;
5109         u64 num_bytes;
5110         int metadata = 0;
5111
5112         btrfs_item_key_to_cpu(eb, &key, slot);
5113
5114         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5115                 metadata = 1;
5116                 num_bytes = root->leafsize;
5117         } else {
5118                 num_bytes = key.offset;
5119         }
5120
5121         if (item_size < sizeof(*ei)) {
5122 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5123                 struct btrfs_extent_item_v0 *ei0;
5124                 BUG_ON(item_size != sizeof(*ei0));
5125                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5126                 refs = btrfs_extent_refs_v0(eb, ei0);
5127 #else
5128                 BUG();
5129 #endif
5130                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5131                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5132                                       num_bytes);
5133         }
5134
5135         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5136         refs = btrfs_extent_refs(eb, ei);
5137         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5138                 metadata = 1;
5139         else
5140                 metadata = 0;
5141
5142         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5143                        refs, 0, 0, 0, metadata, 1, num_bytes);
5144
5145         ptr = (unsigned long)(ei + 1);
5146         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5147             key.type == BTRFS_EXTENT_ITEM_KEY)
5148                 ptr += sizeof(struct btrfs_tree_block_info);
5149
5150         end = (unsigned long)ei + item_size;
5151         while (ptr < end) {
5152                 iref = (struct btrfs_extent_inline_ref *)ptr;
5153                 type = btrfs_extent_inline_ref_type(eb, iref);
5154                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5155                 switch (type) {
5156                 case BTRFS_TREE_BLOCK_REF_KEY:
5157                         add_tree_backref(extent_cache, key.objectid,
5158                                          0, offset, 0);
5159                         break;
5160                 case BTRFS_SHARED_BLOCK_REF_KEY:
5161                         add_tree_backref(extent_cache, key.objectid,
5162                                          offset, 0, 0);
5163                         break;
5164                 case BTRFS_EXTENT_DATA_REF_KEY:
5165                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5166                         add_data_backref(extent_cache, key.objectid, 0,
5167                                         btrfs_extent_data_ref_root(eb, dref),
5168                                         btrfs_extent_data_ref_objectid(eb,
5169                                                                        dref),
5170                                         btrfs_extent_data_ref_offset(eb, dref),
5171                                         btrfs_extent_data_ref_count(eb, dref),
5172                                         0, num_bytes);
5173                         break;
5174                 case BTRFS_SHARED_DATA_REF_KEY:
5175                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5176                         add_data_backref(extent_cache, key.objectid, offset,
5177                                         0, 0, 0,
5178                                         btrfs_shared_data_ref_count(eb, sref),
5179                                         0, num_bytes);
5180                         break;
5181                 default:
5182                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5183                                 key.objectid, key.type, num_bytes);
5184                         goto out;
5185                 }
5186                 ptr += btrfs_extent_inline_ref_size(type);
5187         }
5188         WARN_ON(ptr > end);
5189 out:
5190         return 0;
5191 }
5192
5193 static int check_cache_range(struct btrfs_root *root,
5194                              struct btrfs_block_group_cache *cache,
5195                              u64 offset, u64 bytes)
5196 {
5197         struct btrfs_free_space *entry;
5198         u64 *logical;
5199         u64 bytenr;
5200         int stripe_len;
5201         int i, nr, ret;
5202
5203         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5204                 bytenr = btrfs_sb_offset(i);
5205                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5206                                        cache->key.objectid, bytenr, 0,
5207                                        &logical, &nr, &stripe_len);
5208                 if (ret)
5209                         return ret;
5210
5211                 while (nr--) {
5212                         if (logical[nr] + stripe_len <= offset)
5213                                 continue;
5214                         if (offset + bytes <= logical[nr])
5215                                 continue;
5216                         if (logical[nr] == offset) {
5217                                 if (stripe_len >= bytes) {
5218                                         kfree(logical);
5219                                         return 0;
5220                                 }
5221                                 bytes -= stripe_len;
5222                                 offset += stripe_len;
5223                         } else if (logical[nr] < offset) {
5224                                 if (logical[nr] + stripe_len >=
5225                                     offset + bytes) {
5226                                         kfree(logical);
5227                                         return 0;
5228                                 }
5229                                 bytes = (offset + bytes) -
5230                                         (logical[nr] + stripe_len);
5231                                 offset = logical[nr] + stripe_len;
5232                         } else {
5233                                 /*
5234                                  * Could be tricky, the super may land in the
5235                                  * middle of the area we're checking.  First
5236                                  * check the easiest case, it's at the end.
5237                                  */
5238                                 if (logical[nr] + stripe_len >=
5239                                     bytes + offset) {
5240                                         bytes = logical[nr] - offset;
5241                                         continue;
5242                                 }
5243
5244                                 /* Check the left side */
5245                                 ret = check_cache_range(root, cache,
5246                                                         offset,
5247                                                         logical[nr] - offset);
5248                                 if (ret) {
5249                                         kfree(logical);
5250                                         return ret;
5251                                 }
5252
5253                                 /* Now we continue with the right side */
5254                                 bytes = (offset + bytes) -
5255                                         (logical[nr] + stripe_len);
5256                                 offset = logical[nr] + stripe_len;
5257                         }
5258                 }
5259
5260                 kfree(logical);
5261         }
5262
5263         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5264         if (!entry) {
5265                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5266                         offset, offset+bytes);
5267                 return -EINVAL;
5268         }
5269
5270         if (entry->offset != offset) {
5271                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5272                         entry->offset);
5273                 return -EINVAL;
5274         }
5275
5276         if (entry->bytes != bytes) {
5277                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5278                         bytes, entry->bytes, offset);
5279                 return -EINVAL;
5280         }
5281
5282         unlink_free_space(cache->free_space_ctl, entry);
5283         free(entry);
5284         return 0;
5285 }
5286
5287 static int verify_space_cache(struct btrfs_root *root,
5288                               struct btrfs_block_group_cache *cache)
5289 {
5290         struct btrfs_path *path;
5291         struct extent_buffer *leaf;
5292         struct btrfs_key key;
5293         u64 last;
5294         int ret = 0;
5295
5296         path = btrfs_alloc_path();
5297         if (!path)
5298                 return -ENOMEM;
5299
5300         root = root->fs_info->extent_root;
5301
5302         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5303
5304         key.objectid = last;
5305         key.offset = 0;
5306         key.type = BTRFS_EXTENT_ITEM_KEY;
5307
5308         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5309         if (ret < 0)
5310                 goto out;
5311         ret = 0;
5312         while (1) {
5313                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5314                         ret = btrfs_next_leaf(root, path);
5315                         if (ret < 0)
5316                                 goto out;
5317                         if (ret > 0) {
5318                                 ret = 0;
5319                                 break;
5320                         }
5321                 }
5322                 leaf = path->nodes[0];
5323                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5324                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5325                         break;
5326                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5327                     key.type != BTRFS_METADATA_ITEM_KEY) {
5328                         path->slots[0]++;
5329                         continue;
5330                 }
5331
5332                 if (last == key.objectid) {
5333                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5334                                 last = key.objectid + key.offset;
5335                         else
5336                                 last = key.objectid + root->leafsize;
5337                         path->slots[0]++;
5338                         continue;
5339                 }
5340
5341                 ret = check_cache_range(root, cache, last,
5342                                         key.objectid - last);
5343                 if (ret)
5344                         break;
5345                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5346                         last = key.objectid + key.offset;
5347                 else
5348                         last = key.objectid + root->leafsize;
5349                 path->slots[0]++;
5350         }
5351
5352         if (last < cache->key.objectid + cache->key.offset)
5353                 ret = check_cache_range(root, cache, last,
5354                                         cache->key.objectid +
5355                                         cache->key.offset - last);
5356
5357 out:
5358         btrfs_free_path(path);
5359
5360         if (!ret &&
5361             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5362                 fprintf(stderr, "There are still entries left in the space "
5363                         "cache\n");
5364                 ret = -EINVAL;
5365         }
5366
5367         return ret;
5368 }
5369
5370 static int check_space_cache(struct btrfs_root *root)
5371 {
5372         struct btrfs_block_group_cache *cache;
5373         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5374         int ret;
5375         int error = 0;
5376
5377         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5378             btrfs_super_generation(root->fs_info->super_copy) !=
5379             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5380                 printf("cache and super generation don't match, space cache "
5381                        "will be invalidated\n");
5382                 return 0;
5383         }
5384
5385         if (ctx.progress_enabled) {
5386                 ctx.tp = TASK_FREE_SPACE;
5387                 task_start(ctx.info);
5388         }
5389
5390         while (1) {
5391                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5392                 if (!cache)
5393                         break;
5394
5395                 start = cache->key.objectid + cache->key.offset;
5396                 if (!cache->free_space_ctl) {
5397                         if (btrfs_init_free_space_ctl(cache,
5398                                                       root->sectorsize)) {
5399                                 ret = -ENOMEM;
5400                                 break;
5401                         }
5402                 } else {
5403                         btrfs_remove_free_space_cache(cache);
5404                 }
5405
5406                 ret = load_free_space_cache(root->fs_info, cache);
5407                 if (!ret)
5408                         continue;
5409
5410                 ret = verify_space_cache(root, cache);
5411                 if (ret) {
5412                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5413                                 cache->key.objectid);
5414                         error++;
5415                 }
5416         }
5417
5418         task_stop(ctx.info);
5419
5420         return error ? -EINVAL : 0;
5421 }
5422
5423 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5424                         u64 num_bytes, unsigned long leaf_offset,
5425                         struct extent_buffer *eb) {
5426
5427         u64 offset = 0;
5428         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5429         char *data;
5430         unsigned long csum_offset;
5431         u32 csum;
5432         u32 csum_expected;
5433         u64 read_len;
5434         u64 data_checked = 0;
5435         u64 tmp;
5436         int ret = 0;
5437         int mirror;
5438         int num_copies;
5439
5440         if (num_bytes % root->sectorsize)
5441                 return -EINVAL;
5442
5443         data = malloc(num_bytes);
5444         if (!data)
5445                 return -ENOMEM;
5446
5447         while (offset < num_bytes) {
5448                 mirror = 0;
5449 again:
5450                 read_len = num_bytes - offset;
5451                 /* read as much space once a time */
5452                 ret = read_extent_data(root, data + offset,
5453                                 bytenr + offset, &read_len, mirror);
5454                 if (ret)
5455                         goto out;
5456                 data_checked = 0;
5457                 /* verify every 4k data's checksum */
5458                 while (data_checked < read_len) {
5459                         csum = ~(u32)0;
5460                         tmp = offset + data_checked;
5461
5462                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5463                                                csum, root->sectorsize);
5464                         btrfs_csum_final(csum, (char *)&csum);
5465
5466                         csum_offset = leaf_offset +
5467                                  tmp / root->sectorsize * csum_size;
5468                         read_extent_buffer(eb, (char *)&csum_expected,
5469                                            csum_offset, csum_size);
5470                         /* try another mirror */
5471                         if (csum != csum_expected) {
5472                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5473                                                 mirror, bytenr + tmp,
5474                                                 csum, csum_expected);
5475                                 num_copies = btrfs_num_copies(
5476                                                 &root->fs_info->mapping_tree,
5477                                                 bytenr, num_bytes);
5478                                 if (mirror < num_copies - 1) {
5479                                         mirror += 1;
5480                                         goto again;
5481                                 }
5482                         }
5483                         data_checked += root->sectorsize;
5484                 }
5485                 offset += read_len;
5486         }
5487 out:
5488         free(data);
5489         return ret;
5490 }
5491
5492 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5493                                u64 num_bytes)
5494 {
5495         struct btrfs_path *path;
5496         struct extent_buffer *leaf;
5497         struct btrfs_key key;
5498         int ret;
5499
5500         path = btrfs_alloc_path();
5501         if (!path) {
5502                 fprintf(stderr, "Error allocing path\n");
5503                 return -ENOMEM;
5504         }
5505
5506         key.objectid = bytenr;
5507         key.type = BTRFS_EXTENT_ITEM_KEY;
5508         key.offset = (u64)-1;
5509
5510 again:
5511         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5512                                 0, 0);
5513         if (ret < 0) {
5514                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5515                 btrfs_free_path(path);
5516                 return ret;
5517         } else if (ret) {
5518                 if (path->slots[0] > 0) {
5519                         path->slots[0]--;
5520                 } else {
5521                         ret = btrfs_prev_leaf(root, path);
5522                         if (ret < 0) {
5523                                 goto out;
5524                         } else if (ret > 0) {
5525                                 ret = 0;
5526                                 goto out;
5527                         }
5528                 }
5529         }
5530
5531         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5532
5533         /*
5534          * Block group items come before extent items if they have the same
5535          * bytenr, so walk back one more just in case.  Dear future traveler,
5536          * first congrats on mastering time travel.  Now if it's not too much
5537          * trouble could you go back to 2006 and tell Chris to make the
5538          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5539          * EXTENT_ITEM_KEY please?
5540          */
5541         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5542                 if (path->slots[0] > 0) {
5543                         path->slots[0]--;
5544                 } else {
5545                         ret = btrfs_prev_leaf(root, path);
5546                         if (ret < 0) {
5547                                 goto out;
5548                         } else if (ret > 0) {
5549                                 ret = 0;
5550                                 goto out;
5551                         }
5552                 }
5553                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5554         }
5555
5556         while (num_bytes) {
5557                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5558                         ret = btrfs_next_leaf(root, path);
5559                         if (ret < 0) {
5560                                 fprintf(stderr, "Error going to next leaf "
5561                                         "%d\n", ret);
5562                                 btrfs_free_path(path);
5563                                 return ret;
5564                         } else if (ret) {
5565                                 break;
5566                         }
5567                 }
5568                 leaf = path->nodes[0];
5569                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5570                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5571                         path->slots[0]++;
5572                         continue;
5573                 }
5574                 if (key.objectid + key.offset < bytenr) {
5575                         path->slots[0]++;
5576                         continue;
5577                 }
5578                 if (key.objectid > bytenr + num_bytes)
5579                         break;
5580
5581                 if (key.objectid == bytenr) {
5582                         if (key.offset >= num_bytes) {
5583                                 num_bytes = 0;
5584                                 break;
5585                         }
5586                         num_bytes -= key.offset;
5587                         bytenr += key.offset;
5588                 } else if (key.objectid < bytenr) {
5589                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5590                                 num_bytes = 0;
5591                                 break;
5592                         }
5593                         num_bytes = (bytenr + num_bytes) -
5594                                 (key.objectid + key.offset);
5595                         bytenr = key.objectid + key.offset;
5596                 } else {
5597                         if (key.objectid + key.offset < bytenr + num_bytes) {
5598                                 u64 new_start = key.objectid + key.offset;
5599                                 u64 new_bytes = bytenr + num_bytes - new_start;
5600
5601                                 /*
5602                                  * Weird case, the extent is in the middle of
5603                                  * our range, we'll have to search one side
5604                                  * and then the other.  Not sure if this happens
5605                                  * in real life, but no harm in coding it up
5606                                  * anyway just in case.
5607                                  */
5608                                 btrfs_release_path(path);
5609                                 ret = check_extent_exists(root, new_start,
5610                                                           new_bytes);
5611                                 if (ret) {
5612                                         fprintf(stderr, "Right section didn't "
5613                                                 "have a record\n");
5614                                         break;
5615                                 }
5616                                 num_bytes = key.objectid - bytenr;
5617                                 goto again;
5618                         }
5619                         num_bytes = key.objectid - bytenr;
5620                 }
5621                 path->slots[0]++;
5622         }
5623         ret = 0;
5624
5625 out:
5626         if (num_bytes && !ret) {
5627                 fprintf(stderr, "There are no extents for csum range "
5628                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5629                 ret = 1;
5630         }
5631
5632         btrfs_free_path(path);
5633         return ret;
5634 }
5635
5636 static int check_csums(struct btrfs_root *root)
5637 {
5638         struct btrfs_path *path;
5639         struct extent_buffer *leaf;
5640         struct btrfs_key key;
5641         u64 offset = 0, num_bytes = 0;
5642         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5643         int errors = 0;
5644         int ret;
5645         u64 data_len;
5646         unsigned long leaf_offset;
5647
5648         root = root->fs_info->csum_root;
5649         if (!extent_buffer_uptodate(root->node)) {
5650                 fprintf(stderr, "No valid csum tree found\n");
5651                 return -ENOENT;
5652         }
5653
5654         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5655         key.type = BTRFS_EXTENT_CSUM_KEY;
5656         key.offset = 0;
5657
5658         path = btrfs_alloc_path();
5659         if (!path)
5660                 return -ENOMEM;
5661
5662         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5663         if (ret < 0) {
5664                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5665                 btrfs_free_path(path);
5666                 return ret;
5667         }
5668
5669         if (ret > 0 && path->slots[0])
5670                 path->slots[0]--;
5671         ret = 0;
5672
5673         while (1) {
5674                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5675                         ret = btrfs_next_leaf(root, path);
5676                         if (ret < 0) {
5677                                 fprintf(stderr, "Error going to next leaf "
5678                                         "%d\n", ret);
5679                                 break;
5680                         }
5681                         if (ret)
5682                                 break;
5683                 }
5684                 leaf = path->nodes[0];
5685
5686                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5687                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5688                         path->slots[0]++;
5689                         continue;
5690                 }
5691
5692                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5693                               csum_size) * root->sectorsize;
5694                 if (!check_data_csum)
5695                         goto skip_csum_check;
5696                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5697                 ret = check_extent_csums(root, key.offset, data_len,
5698                                          leaf_offset, leaf);
5699                 if (ret)
5700                         break;
5701 skip_csum_check:
5702                 if (!num_bytes) {
5703                         offset = key.offset;
5704                 } else if (key.offset != offset + num_bytes) {
5705                         ret = check_extent_exists(root, offset, num_bytes);
5706                         if (ret) {
5707                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5708                                         "there is no extent record\n",
5709                                         offset, offset+num_bytes);
5710                                 errors++;
5711                         }
5712                         offset = key.offset;
5713                         num_bytes = 0;
5714                 }
5715                 num_bytes += data_len;
5716                 path->slots[0]++;
5717         }
5718
5719         btrfs_free_path(path);
5720         return errors;
5721 }
5722
5723 static int is_dropped_key(struct btrfs_key *key,
5724                           struct btrfs_key *drop_key) {
5725         if (key->objectid < drop_key->objectid)
5726                 return 1;
5727         else if (key->objectid == drop_key->objectid) {
5728                 if (key->type < drop_key->type)
5729                         return 1;
5730                 else if (key->type == drop_key->type) {
5731                         if (key->offset < drop_key->offset)
5732                                 return 1;
5733                 }
5734         }
5735         return 0;
5736 }
5737
5738 /*
5739  * Here are the rules for FULL_BACKREF.
5740  *
5741  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5742  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5743  *      FULL_BACKREF set.
5744  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5745  *    if it happened after the relocation occurred since we'll have dropped the
5746  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5747  *    have no real way to know for sure.
5748  *
5749  * We process the blocks one root at a time, and we start from the lowest root
5750  * objectid and go to the highest.  So we can just lookup the owner backref for
5751  * the record and if we don't find it then we know it doesn't exist and we have
5752  * a FULL BACKREF.
5753  *
5754  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5755  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5756  * be set or not and then we can check later once we've gathered all the refs.
5757  */
5758 static int calc_extent_flag(struct btrfs_root *root,
5759                            struct cache_tree *extent_cache,
5760                            struct extent_buffer *buf,
5761                            struct root_item_record *ri,
5762                            u64 *flags)
5763 {
5764         struct extent_record *rec;
5765         struct cache_extent *cache;
5766         struct tree_backref *tback;
5767         u64 owner = 0;
5768
5769         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5770         /* we have added this extent before */
5771         BUG_ON(!cache);
5772         rec = container_of(cache, struct extent_record, cache);
5773
5774         /*
5775          * Except file/reloc tree, we can not have
5776          * FULL BACKREF MODE
5777          */
5778         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5779                 goto normal;
5780         /*
5781          * root node
5782          */
5783         if (buf->start == ri->bytenr)
5784                 goto normal;
5785
5786         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5787                 goto full_backref;
5788
5789         owner = btrfs_header_owner(buf);
5790         if (owner == ri->objectid)
5791                 goto normal;
5792
5793         tback = find_tree_backref(rec, 0, owner);
5794         if (!tback)
5795                 goto full_backref;
5796 normal:
5797         *flags = 0;
5798         if (rec->flag_block_full_backref != -1 &&
5799             rec->flag_block_full_backref != 0)
5800                 rec->bad_full_backref = 1;
5801         return 0;
5802 full_backref:
5803         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5804         if (rec->flag_block_full_backref != -1 &&
5805             rec->flag_block_full_backref != 1)
5806                 rec->bad_full_backref = 1;
5807         return 0;
5808 }
5809
5810 static int run_next_block(struct btrfs_root *root,
5811                           struct block_info *bits,
5812                           int bits_nr,
5813                           u64 *last,
5814                           struct cache_tree *pending,
5815                           struct cache_tree *seen,
5816                           struct cache_tree *reada,
5817                           struct cache_tree *nodes,
5818                           struct cache_tree *extent_cache,
5819                           struct cache_tree *chunk_cache,
5820                           struct rb_root *dev_cache,
5821                           struct block_group_tree *block_group_cache,
5822                           struct device_extent_tree *dev_extent_cache,
5823                           struct root_item_record *ri)
5824 {
5825         struct extent_buffer *buf;
5826         struct extent_record *rec = NULL;
5827         u64 bytenr;
5828         u32 size;
5829         u64 parent;
5830         u64 owner;
5831         u64 flags;
5832         u64 ptr;
5833         u64 gen = 0;
5834         int ret = 0;
5835         int i;
5836         int nritems;
5837         struct btrfs_key key;
5838         struct cache_extent *cache;
5839         int reada_bits;
5840
5841         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5842                                     bits_nr, &reada_bits);
5843         if (nritems == 0)
5844                 return 1;
5845
5846         if (!reada_bits) {
5847                 for(i = 0; i < nritems; i++) {
5848                         ret = add_cache_extent(reada, bits[i].start,
5849                                                bits[i].size);
5850                         if (ret == -EEXIST)
5851                                 continue;
5852
5853                         /* fixme, get the parent transid */
5854                         readahead_tree_block(root, bits[i].start,
5855                                              bits[i].size, 0);
5856                 }
5857         }
5858         *last = bits[0].start;
5859         bytenr = bits[0].start;
5860         size = bits[0].size;
5861
5862         cache = lookup_cache_extent(pending, bytenr, size);
5863         if (cache) {
5864                 remove_cache_extent(pending, cache);
5865                 free(cache);
5866         }
5867         cache = lookup_cache_extent(reada, bytenr, size);
5868         if (cache) {
5869                 remove_cache_extent(reada, cache);
5870                 free(cache);
5871         }
5872         cache = lookup_cache_extent(nodes, bytenr, size);
5873         if (cache) {
5874                 remove_cache_extent(nodes, cache);
5875                 free(cache);
5876         }
5877         cache = lookup_cache_extent(extent_cache, bytenr, size);
5878         if (cache) {
5879                 rec = container_of(cache, struct extent_record, cache);
5880                 gen = rec->parent_generation;
5881         }
5882
5883         /* fixme, get the real parent transid */
5884         buf = read_tree_block(root, bytenr, size, gen);
5885         if (!extent_buffer_uptodate(buf)) {
5886                 record_bad_block_io(root->fs_info,
5887                                     extent_cache, bytenr, size);
5888                 goto out;
5889         }
5890
5891         nritems = btrfs_header_nritems(buf);
5892
5893         flags = 0;
5894         if (!init_extent_tree) {
5895                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5896                                        btrfs_header_level(buf), 1, NULL,
5897                                        &flags);
5898                 if (ret < 0) {
5899                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5900                         if (ret < 0) {
5901                                 fprintf(stderr, "Couldn't calc extent flags\n");
5902                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5903                         }
5904                 }
5905         } else {
5906                 flags = 0;
5907                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5908                 if (ret < 0) {
5909                         fprintf(stderr, "Couldn't calc extent flags\n");
5910                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5911                 }
5912         }
5913
5914         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5915                 if (ri != NULL &&
5916                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5917                     ri->objectid == btrfs_header_owner(buf)) {
5918                         /*
5919                          * Ok we got to this block from it's original owner and
5920                          * we have FULL_BACKREF set.  Relocation can leave
5921                          * converted blocks over so this is altogether possible,
5922                          * however it's not possible if the generation > the
5923                          * last snapshot, so check for this case.
5924                          */
5925                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5926                             btrfs_header_generation(buf) > ri->last_snapshot) {
5927                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5928                                 rec->bad_full_backref = 1;
5929                         }
5930                 }
5931         } else {
5932                 if (ri != NULL &&
5933                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5934                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5935                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5936                         rec->bad_full_backref = 1;
5937                 }
5938         }
5939
5940         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5941                 rec->flag_block_full_backref = 1;
5942                 parent = bytenr;
5943                 owner = 0;
5944         } else {
5945                 rec->flag_block_full_backref = 0;
5946                 parent = 0;
5947                 owner = btrfs_header_owner(buf);
5948         }
5949
5950         ret = check_block(root, extent_cache, buf, flags);
5951         if (ret)
5952                 goto out;
5953
5954         if (btrfs_is_leaf(buf)) {
5955                 btree_space_waste += btrfs_leaf_free_space(root, buf);
5956                 for (i = 0; i < nritems; i++) {
5957                         struct btrfs_file_extent_item *fi;
5958                         btrfs_item_key_to_cpu(buf, &key, i);
5959                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5960                                 process_extent_item(root, extent_cache, buf,
5961                                                     i);
5962                                 continue;
5963                         }
5964                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5965                                 process_extent_item(root, extent_cache, buf,
5966                                                     i);
5967                                 continue;
5968                         }
5969                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5970                                 total_csum_bytes +=
5971                                         btrfs_item_size_nr(buf, i);
5972                                 continue;
5973                         }
5974                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5975                                 process_chunk_item(chunk_cache, &key, buf, i);
5976                                 continue;
5977                         }
5978                         if (key.type == BTRFS_DEV_ITEM_KEY) {
5979                                 process_device_item(dev_cache, &key, buf, i);
5980                                 continue;
5981                         }
5982                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5983                                 process_block_group_item(block_group_cache,
5984                                         &key, buf, i);
5985                                 continue;
5986                         }
5987                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
5988                                 process_device_extent_item(dev_extent_cache,
5989                                         &key, buf, i);
5990                                 continue;
5991
5992                         }
5993                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5994 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5995                                 process_extent_ref_v0(extent_cache, buf, i);
5996 #else
5997                                 BUG();
5998 #endif
5999                                 continue;
6000                         }
6001
6002                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6003                                 add_tree_backref(extent_cache, key.objectid, 0,
6004                                                  key.offset, 0);
6005                                 continue;
6006                         }
6007                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6008                                 add_tree_backref(extent_cache, key.objectid,
6009                                                  key.offset, 0, 0);
6010                                 continue;
6011                         }
6012                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6013                                 struct btrfs_extent_data_ref *ref;
6014                                 ref = btrfs_item_ptr(buf, i,
6015                                                 struct btrfs_extent_data_ref);
6016                                 add_data_backref(extent_cache,
6017                                         key.objectid, 0,
6018                                         btrfs_extent_data_ref_root(buf, ref),
6019                                         btrfs_extent_data_ref_objectid(buf,
6020                                                                        ref),
6021                                         btrfs_extent_data_ref_offset(buf, ref),
6022                                         btrfs_extent_data_ref_count(buf, ref),
6023                                         0, root->sectorsize);
6024                                 continue;
6025                         }
6026                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6027                                 struct btrfs_shared_data_ref *ref;
6028                                 ref = btrfs_item_ptr(buf, i,
6029                                                 struct btrfs_shared_data_ref);
6030                                 add_data_backref(extent_cache,
6031                                         key.objectid, key.offset, 0, 0, 0,
6032                                         btrfs_shared_data_ref_count(buf, ref),
6033                                         0, root->sectorsize);
6034                                 continue;
6035                         }
6036                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6037                                 struct bad_item *bad;
6038
6039                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6040                                         continue;
6041                                 if (!owner)
6042                                         continue;
6043                                 bad = malloc(sizeof(struct bad_item));
6044                                 if (!bad)
6045                                         continue;
6046                                 INIT_LIST_HEAD(&bad->list);
6047                                 memcpy(&bad->key, &key,
6048                                        sizeof(struct btrfs_key));
6049                                 bad->root_id = owner;
6050                                 list_add_tail(&bad->list, &delete_items);
6051                                 continue;
6052                         }
6053                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6054                                 continue;
6055                         fi = btrfs_item_ptr(buf, i,
6056                                             struct btrfs_file_extent_item);
6057                         if (btrfs_file_extent_type(buf, fi) ==
6058                             BTRFS_FILE_EXTENT_INLINE)
6059                                 continue;
6060                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6061                                 continue;
6062
6063                         data_bytes_allocated +=
6064                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6065                         if (data_bytes_allocated < root->sectorsize) {
6066                                 abort();
6067                         }
6068                         data_bytes_referenced +=
6069                                 btrfs_file_extent_num_bytes(buf, fi);
6070                         add_data_backref(extent_cache,
6071                                 btrfs_file_extent_disk_bytenr(buf, fi),
6072                                 parent, owner, key.objectid, key.offset -
6073                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6074                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6075                 }
6076         } else {
6077                 int level;
6078                 struct btrfs_key first_key;
6079
6080                 first_key.objectid = 0;
6081
6082                 if (nritems > 0)
6083                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6084                 level = btrfs_header_level(buf);
6085                 for (i = 0; i < nritems; i++) {
6086                         ptr = btrfs_node_blockptr(buf, i);
6087                         size = btrfs_level_size(root, level - 1);
6088                         btrfs_node_key_to_cpu(buf, &key, i);
6089                         if (ri != NULL) {
6090                                 if ((level == ri->drop_level)
6091                                     && is_dropped_key(&key, &ri->drop_key)) {
6092                                         continue;
6093                                 }
6094                         }
6095                         ret = add_extent_rec(extent_cache, &key,
6096                                              btrfs_node_ptr_generation(buf, i),
6097                                              ptr, size, 0, 0, 1, 0, 1, 0,
6098                                              size);
6099                         BUG_ON(ret);
6100
6101                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6102
6103                         if (level > 1) {
6104                                 add_pending(nodes, seen, ptr, size);
6105                         } else {
6106                                 add_pending(pending, seen, ptr, size);
6107                         }
6108                 }
6109                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6110                                       nritems) * sizeof(struct btrfs_key_ptr);
6111         }
6112         total_btree_bytes += buf->len;
6113         if (fs_root_objectid(btrfs_header_owner(buf)))
6114                 total_fs_tree_bytes += buf->len;
6115         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6116                 total_extent_tree_bytes += buf->len;
6117         if (!found_old_backref &&
6118             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6119             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6120             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6121                 found_old_backref = 1;
6122 out:
6123         free_extent_buffer(buf);
6124         return ret;
6125 }
6126
6127 static int add_root_to_pending(struct extent_buffer *buf,
6128                                struct cache_tree *extent_cache,
6129                                struct cache_tree *pending,
6130                                struct cache_tree *seen,
6131                                struct cache_tree *nodes,
6132                                u64 objectid)
6133 {
6134         if (btrfs_header_level(buf) > 0)
6135                 add_pending(nodes, seen, buf->start, buf->len);
6136         else
6137                 add_pending(pending, seen, buf->start, buf->len);
6138         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6139                        0, 1, 1, 0, 1, 0, buf->len);
6140
6141         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6142             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6143                 add_tree_backref(extent_cache, buf->start, buf->start,
6144                                  0, 1);
6145         else
6146                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6147         return 0;
6148 }
6149
6150 /* as we fix the tree, we might be deleting blocks that
6151  * we're tracking for repair.  This hook makes sure we
6152  * remove any backrefs for blocks as we are fixing them.
6153  */
6154 static int free_extent_hook(struct btrfs_trans_handle *trans,
6155                             struct btrfs_root *root,
6156                             u64 bytenr, u64 num_bytes, u64 parent,
6157                             u64 root_objectid, u64 owner, u64 offset,
6158                             int refs_to_drop)
6159 {
6160         struct extent_record *rec;
6161         struct cache_extent *cache;
6162         int is_data;
6163         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6164
6165         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6166         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6167         if (!cache)
6168                 return 0;
6169
6170         rec = container_of(cache, struct extent_record, cache);
6171         if (is_data) {
6172                 struct data_backref *back;
6173                 back = find_data_backref(rec, parent, root_objectid, owner,
6174                                          offset, 1, bytenr, num_bytes);
6175                 if (!back)
6176                         goto out;
6177                 if (back->node.found_ref) {
6178                         back->found_ref -= refs_to_drop;
6179                         if (rec->refs)
6180                                 rec->refs -= refs_to_drop;
6181                 }
6182                 if (back->node.found_extent_tree) {
6183                         back->num_refs -= refs_to_drop;
6184                         if (rec->extent_item_refs)
6185                                 rec->extent_item_refs -= refs_to_drop;
6186                 }
6187                 if (back->found_ref == 0)
6188                         back->node.found_ref = 0;
6189                 if (back->num_refs == 0)
6190                         back->node.found_extent_tree = 0;
6191
6192                 if (!back->node.found_extent_tree && back->node.found_ref) {
6193                         list_del(&back->node.list);
6194                         free(back);
6195                 }
6196         } else {
6197                 struct tree_backref *back;
6198                 back = find_tree_backref(rec, parent, root_objectid);
6199                 if (!back)
6200                         goto out;
6201                 if (back->node.found_ref) {
6202                         if (rec->refs)
6203                                 rec->refs--;
6204                         back->node.found_ref = 0;
6205                 }
6206                 if (back->node.found_extent_tree) {
6207                         if (rec->extent_item_refs)
6208                                 rec->extent_item_refs--;
6209                         back->node.found_extent_tree = 0;
6210                 }
6211                 if (!back->node.found_extent_tree && back->node.found_ref) {
6212                         list_del(&back->node.list);
6213                         free(back);
6214                 }
6215         }
6216         maybe_free_extent_rec(extent_cache, rec);
6217 out:
6218         return 0;
6219 }
6220
6221 static int delete_extent_records(struct btrfs_trans_handle *trans,
6222                                  struct btrfs_root *root,
6223                                  struct btrfs_path *path,
6224                                  u64 bytenr, u64 new_len)
6225 {
6226         struct btrfs_key key;
6227         struct btrfs_key found_key;
6228         struct extent_buffer *leaf;
6229         int ret;
6230         int slot;
6231
6232
6233         key.objectid = bytenr;
6234         key.type = (u8)-1;
6235         key.offset = (u64)-1;
6236
6237         while(1) {
6238                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6239                                         &key, path, 0, 1);
6240                 if (ret < 0)
6241                         break;
6242
6243                 if (ret > 0) {
6244                         ret = 0;
6245                         if (path->slots[0] == 0)
6246                                 break;
6247                         path->slots[0]--;
6248                 }
6249                 ret = 0;
6250
6251                 leaf = path->nodes[0];
6252                 slot = path->slots[0];
6253
6254                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6255                 if (found_key.objectid != bytenr)
6256                         break;
6257
6258                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6259                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6260                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6261                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6262                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6263                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6264                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6265                         btrfs_release_path(path);
6266                         if (found_key.type == 0) {
6267                                 if (found_key.offset == 0)
6268                                         break;
6269                                 key.offset = found_key.offset - 1;
6270                                 key.type = found_key.type;
6271                         }
6272                         key.type = found_key.type - 1;
6273                         key.offset = (u64)-1;
6274                         continue;
6275                 }
6276
6277                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6278                         found_key.objectid, found_key.type, found_key.offset);
6279
6280                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6281                 if (ret)
6282                         break;
6283                 btrfs_release_path(path);
6284
6285                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6286                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6287                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6288                                 found_key.offset : root->leafsize;
6289
6290                         ret = btrfs_update_block_group(trans, root, bytenr,
6291                                                        bytes, 0, 0);
6292                         if (ret)
6293                                 break;
6294                 }
6295         }
6296
6297         btrfs_release_path(path);
6298         return ret;
6299 }
6300
6301 /*
6302  * for a single backref, this will allocate a new extent
6303  * and add the backref to it.
6304  */
6305 static int record_extent(struct btrfs_trans_handle *trans,
6306                          struct btrfs_fs_info *info,
6307                          struct btrfs_path *path,
6308                          struct extent_record *rec,
6309                          struct extent_backref *back,
6310                          int allocated, u64 flags)
6311 {
6312         int ret;
6313         struct btrfs_root *extent_root = info->extent_root;
6314         struct extent_buffer *leaf;
6315         struct btrfs_key ins_key;
6316         struct btrfs_extent_item *ei;
6317         struct tree_backref *tback;
6318         struct data_backref *dback;
6319         struct btrfs_tree_block_info *bi;
6320
6321         if (!back->is_data)
6322                 rec->max_size = max_t(u64, rec->max_size,
6323                                     info->extent_root->leafsize);
6324
6325         if (!allocated) {
6326                 u32 item_size = sizeof(*ei);
6327
6328                 if (!back->is_data)
6329                         item_size += sizeof(*bi);
6330
6331                 ins_key.objectid = rec->start;
6332                 ins_key.offset = rec->max_size;
6333                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6334
6335                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6336                                         &ins_key, item_size);
6337                 if (ret)
6338                         goto fail;
6339
6340                 leaf = path->nodes[0];
6341                 ei = btrfs_item_ptr(leaf, path->slots[0],
6342                                     struct btrfs_extent_item);
6343
6344                 btrfs_set_extent_refs(leaf, ei, 0);
6345                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6346
6347                 if (back->is_data) {
6348                         btrfs_set_extent_flags(leaf, ei,
6349                                                BTRFS_EXTENT_FLAG_DATA);
6350                 } else {
6351                         struct btrfs_disk_key copy_key;;
6352
6353                         tback = (struct tree_backref *)back;
6354                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6355                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6356                                              sizeof(*bi));
6357
6358                         btrfs_set_disk_key_objectid(&copy_key,
6359                                                     rec->info_objectid);
6360                         btrfs_set_disk_key_type(&copy_key, 0);
6361                         btrfs_set_disk_key_offset(&copy_key, 0);
6362
6363                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6364                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6365
6366                         btrfs_set_extent_flags(leaf, ei,
6367                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6368                 }
6369
6370                 btrfs_mark_buffer_dirty(leaf);
6371                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6372                                                rec->max_size, 1, 0);
6373                 if (ret)
6374                         goto fail;
6375                 btrfs_release_path(path);
6376         }
6377
6378         if (back->is_data) {
6379                 u64 parent;
6380                 int i;
6381
6382                 dback = (struct data_backref *)back;
6383                 if (back->full_backref)
6384                         parent = dback->parent;
6385                 else
6386                         parent = 0;
6387
6388                 for (i = 0; i < dback->found_ref; i++) {
6389                         /* if parent != 0, we're doing a full backref
6390                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6391                          * just makes the backref allocator create a data
6392                          * backref
6393                          */
6394                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6395                                                    rec->start, rec->max_size,
6396                                                    parent,
6397                                                    dback->root,
6398                                                    parent ?
6399                                                    BTRFS_FIRST_FREE_OBJECTID :
6400                                                    dback->owner,
6401                                                    dback->offset);
6402                         if (ret)
6403                                 break;
6404                 }
6405                 fprintf(stderr, "adding new data backref"
6406                                 " on %llu %s %llu owner %llu"
6407                                 " offset %llu found %d\n",
6408                                 (unsigned long long)rec->start,
6409                                 back->full_backref ?
6410                                 "parent" : "root",
6411                                 back->full_backref ?
6412                                 (unsigned long long)parent :
6413                                 (unsigned long long)dback->root,
6414                                 (unsigned long long)dback->owner,
6415                                 (unsigned long long)dback->offset,
6416                                 dback->found_ref);
6417         } else {
6418                 u64 parent;
6419
6420                 tback = (struct tree_backref *)back;
6421                 if (back->full_backref)
6422                         parent = tback->parent;
6423                 else
6424                         parent = 0;
6425
6426                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6427                                            rec->start, rec->max_size,
6428                                            parent, tback->root, 0, 0);
6429                 fprintf(stderr, "adding new tree backref on "
6430                         "start %llu len %llu parent %llu root %llu\n",
6431                         rec->start, rec->max_size, parent, tback->root);
6432         }
6433 fail:
6434         btrfs_release_path(path);
6435         return ret;
6436 }
6437
6438 struct extent_entry {
6439         u64 bytenr;
6440         u64 bytes;
6441         int count;
6442         int broken;
6443         struct list_head list;
6444 };
6445
6446 static struct extent_entry *find_entry(struct list_head *entries,
6447                                        u64 bytenr, u64 bytes)
6448 {
6449         struct extent_entry *entry = NULL;
6450
6451         list_for_each_entry(entry, entries, list) {
6452                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6453                         return entry;
6454         }
6455
6456         return NULL;
6457 }
6458
6459 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6460 {
6461         struct extent_entry *entry, *best = NULL, *prev = NULL;
6462
6463         list_for_each_entry(entry, entries, list) {
6464                 if (!prev) {
6465                         prev = entry;
6466                         continue;
6467                 }
6468
6469                 /*
6470                  * If there are as many broken entries as entries then we know
6471                  * not to trust this particular entry.
6472                  */
6473                 if (entry->broken == entry->count)
6474                         continue;
6475
6476                 /*
6477                  * If our current entry == best then we can't be sure our best
6478                  * is really the best, so we need to keep searching.
6479                  */
6480                 if (best && best->count == entry->count) {
6481                         prev = entry;
6482                         best = NULL;
6483                         continue;
6484                 }
6485
6486                 /* Prev == entry, not good enough, have to keep searching */
6487                 if (!prev->broken && prev->count == entry->count)
6488                         continue;
6489
6490                 if (!best)
6491                         best = (prev->count > entry->count) ? prev : entry;
6492                 else if (best->count < entry->count)
6493                         best = entry;
6494                 prev = entry;
6495         }
6496
6497         return best;
6498 }
6499
6500 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6501                       struct data_backref *dback, struct extent_entry *entry)
6502 {
6503         struct btrfs_trans_handle *trans;
6504         struct btrfs_root *root;
6505         struct btrfs_file_extent_item *fi;
6506         struct extent_buffer *leaf;
6507         struct btrfs_key key;
6508         u64 bytenr, bytes;
6509         int ret, err;
6510
6511         key.objectid = dback->root;
6512         key.type = BTRFS_ROOT_ITEM_KEY;
6513         key.offset = (u64)-1;
6514         root = btrfs_read_fs_root(info, &key);
6515         if (IS_ERR(root)) {
6516                 fprintf(stderr, "Couldn't find root for our ref\n");
6517                 return -EINVAL;
6518         }
6519
6520         /*
6521          * The backref points to the original offset of the extent if it was
6522          * split, so we need to search down to the offset we have and then walk
6523          * forward until we find the backref we're looking for.
6524          */
6525         key.objectid = dback->owner;
6526         key.type = BTRFS_EXTENT_DATA_KEY;
6527         key.offset = dback->offset;
6528         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6529         if (ret < 0) {
6530                 fprintf(stderr, "Error looking up ref %d\n", ret);
6531                 return ret;
6532         }
6533
6534         while (1) {
6535                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6536                         ret = btrfs_next_leaf(root, path);
6537                         if (ret) {
6538                                 fprintf(stderr, "Couldn't find our ref, next\n");
6539                                 return -EINVAL;
6540                         }
6541                 }
6542                 leaf = path->nodes[0];
6543                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6544                 if (key.objectid != dback->owner ||
6545                     key.type != BTRFS_EXTENT_DATA_KEY) {
6546                         fprintf(stderr, "Couldn't find our ref, search\n");
6547                         return -EINVAL;
6548                 }
6549                 fi = btrfs_item_ptr(leaf, path->slots[0],
6550                                     struct btrfs_file_extent_item);
6551                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6552                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6553
6554                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6555                         break;
6556                 path->slots[0]++;
6557         }
6558
6559         btrfs_release_path(path);
6560
6561         trans = btrfs_start_transaction(root, 1);
6562         if (IS_ERR(trans))
6563                 return PTR_ERR(trans);
6564
6565         /*
6566          * Ok we have the key of the file extent we want to fix, now we can cow
6567          * down to the thing and fix it.
6568          */
6569         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6570         if (ret < 0) {
6571                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6572                         key.objectid, key.type, key.offset, ret);
6573                 goto out;
6574         }
6575         if (ret > 0) {
6576                 fprintf(stderr, "Well that's odd, we just found this key "
6577                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6578                         key.offset);
6579                 ret = -EINVAL;
6580                 goto out;
6581         }
6582         leaf = path->nodes[0];
6583         fi = btrfs_item_ptr(leaf, path->slots[0],
6584                             struct btrfs_file_extent_item);
6585
6586         if (btrfs_file_extent_compression(leaf, fi) &&
6587             dback->disk_bytenr != entry->bytenr) {
6588                 fprintf(stderr, "Ref doesn't match the record start and is "
6589                         "compressed, please take a btrfs-image of this file "
6590                         "system and send it to a btrfs developer so they can "
6591                         "complete this functionality for bytenr %Lu\n",
6592                         dback->disk_bytenr);
6593                 ret = -EINVAL;
6594                 goto out;
6595         }
6596
6597         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6598                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6599         } else if (dback->disk_bytenr > entry->bytenr) {
6600                 u64 off_diff, offset;
6601
6602                 off_diff = dback->disk_bytenr - entry->bytenr;
6603                 offset = btrfs_file_extent_offset(leaf, fi);
6604                 if (dback->disk_bytenr + offset +
6605                     btrfs_file_extent_num_bytes(leaf, fi) >
6606                     entry->bytenr + entry->bytes) {
6607                         fprintf(stderr, "Ref is past the entry end, please "
6608                                 "take a btrfs-image of this file system and "
6609                                 "send it to a btrfs developer, ref %Lu\n",
6610                                 dback->disk_bytenr);
6611                         ret = -EINVAL;
6612                         goto out;
6613                 }
6614                 offset += off_diff;
6615                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6616                 btrfs_set_file_extent_offset(leaf, fi, offset);
6617         } else if (dback->disk_bytenr < entry->bytenr) {
6618                 u64 offset;
6619
6620                 offset = btrfs_file_extent_offset(leaf, fi);
6621                 if (dback->disk_bytenr + offset < entry->bytenr) {
6622                         fprintf(stderr, "Ref is before the entry start, please"
6623                                 " take a btrfs-image of this file system and "
6624                                 "send it to a btrfs developer, ref %Lu\n",
6625                                 dback->disk_bytenr);
6626                         ret = -EINVAL;
6627                         goto out;
6628                 }
6629
6630                 offset += dback->disk_bytenr;
6631                 offset -= entry->bytenr;
6632                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6633                 btrfs_set_file_extent_offset(leaf, fi, offset);
6634         }
6635
6636         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6637
6638         /*
6639          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6640          * only do this if we aren't using compression, otherwise it's a
6641          * trickier case.
6642          */
6643         if (!btrfs_file_extent_compression(leaf, fi))
6644                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6645         else
6646                 printf("ram bytes may be wrong?\n");
6647         btrfs_mark_buffer_dirty(leaf);
6648 out:
6649         err = btrfs_commit_transaction(trans, root);
6650         btrfs_release_path(path);
6651         return ret ? ret : err;
6652 }
6653
6654 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6655                            struct extent_record *rec)
6656 {
6657         struct extent_backref *back;
6658         struct data_backref *dback;
6659         struct extent_entry *entry, *best = NULL;
6660         LIST_HEAD(entries);
6661         int nr_entries = 0;
6662         int broken_entries = 0;
6663         int ret = 0;
6664         short mismatch = 0;
6665
6666         /*
6667          * Metadata is easy and the backrefs should always agree on bytenr and
6668          * size, if not we've got bigger issues.
6669          */
6670         if (rec->metadata)
6671                 return 0;
6672
6673         list_for_each_entry(back, &rec->backrefs, list) {
6674                 if (back->full_backref || !back->is_data)
6675                         continue;
6676
6677                 dback = (struct data_backref *)back;
6678
6679                 /*
6680                  * We only pay attention to backrefs that we found a real
6681                  * backref for.
6682                  */
6683                 if (dback->found_ref == 0)
6684                         continue;
6685
6686                 /*
6687                  * For now we only catch when the bytes don't match, not the
6688                  * bytenr.  We can easily do this at the same time, but I want
6689                  * to have a fs image to test on before we just add repair
6690                  * functionality willy-nilly so we know we won't screw up the
6691                  * repair.
6692                  */
6693
6694                 entry = find_entry(&entries, dback->disk_bytenr,
6695                                    dback->bytes);
6696                 if (!entry) {
6697                         entry = malloc(sizeof(struct extent_entry));
6698                         if (!entry) {
6699                                 ret = -ENOMEM;
6700                                 goto out;
6701                         }
6702                         memset(entry, 0, sizeof(*entry));
6703                         entry->bytenr = dback->disk_bytenr;
6704                         entry->bytes = dback->bytes;
6705                         list_add_tail(&entry->list, &entries);
6706                         nr_entries++;
6707                 }
6708
6709                 /*
6710                  * If we only have on entry we may think the entries agree when
6711                  * in reality they don't so we have to do some extra checking.
6712                  */
6713                 if (dback->disk_bytenr != rec->start ||
6714                     dback->bytes != rec->nr || back->broken)
6715                         mismatch = 1;
6716
6717                 if (back->broken) {
6718                         entry->broken++;
6719                         broken_entries++;
6720                 }
6721
6722                 entry->count++;
6723         }
6724
6725         /* Yay all the backrefs agree, carry on good sir */
6726         if (nr_entries <= 1 && !mismatch)
6727                 goto out;
6728
6729         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6730                 "%Lu\n", rec->start);
6731
6732         /*
6733          * First we want to see if the backrefs can agree amongst themselves who
6734          * is right, so figure out which one of the entries has the highest
6735          * count.
6736          */
6737         best = find_most_right_entry(&entries);
6738
6739         /*
6740          * Ok so we may have an even split between what the backrefs think, so
6741          * this is where we use the extent ref to see what it thinks.
6742          */
6743         if (!best) {
6744                 entry = find_entry(&entries, rec->start, rec->nr);
6745                 if (!entry && (!broken_entries || !rec->found_rec)) {
6746                         fprintf(stderr, "Backrefs don't agree with each other "
6747                                 "and extent record doesn't agree with anybody,"
6748                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6749                                 rec->start, rec->nr);
6750                         ret = -EINVAL;
6751                         goto out;
6752                 } else if (!entry) {
6753                         /*
6754                          * Ok our backrefs were broken, we'll assume this is the
6755                          * correct value and add an entry for this range.
6756                          */
6757                         entry = malloc(sizeof(struct extent_entry));
6758                         if (!entry) {
6759                                 ret = -ENOMEM;
6760                                 goto out;
6761                         }
6762                         memset(entry, 0, sizeof(*entry));
6763                         entry->bytenr = rec->start;
6764                         entry->bytes = rec->nr;
6765                         list_add_tail(&entry->list, &entries);
6766                         nr_entries++;
6767                 }
6768                 entry->count++;
6769                 best = find_most_right_entry(&entries);
6770                 if (!best) {
6771                         fprintf(stderr, "Backrefs and extent record evenly "
6772                                 "split on who is right, this is going to "
6773                                 "require user input to fix bytenr %Lu bytes "
6774                                 "%Lu\n", rec->start, rec->nr);
6775                         ret = -EINVAL;
6776                         goto out;
6777                 }
6778         }
6779
6780         /*
6781          * I don't think this can happen currently as we'll abort() if we catch
6782          * this case higher up, but in case somebody removes that we still can't
6783          * deal with it properly here yet, so just bail out of that's the case.
6784          */
6785         if (best->bytenr != rec->start) {
6786                 fprintf(stderr, "Extent start and backref starts don't match, "
6787                         "please use btrfs-image on this file system and send "
6788                         "it to a btrfs developer so they can make fsck fix "
6789                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6790                         rec->start, rec->nr);
6791                 ret = -EINVAL;
6792                 goto out;
6793         }
6794
6795         /*
6796          * Ok great we all agreed on an extent record, let's go find the real
6797          * references and fix up the ones that don't match.
6798          */
6799         list_for_each_entry(back, &rec->backrefs, list) {
6800                 if (back->full_backref || !back->is_data)
6801                         continue;
6802
6803                 dback = (struct data_backref *)back;
6804
6805                 /*
6806                  * Still ignoring backrefs that don't have a real ref attached
6807                  * to them.
6808                  */
6809                 if (dback->found_ref == 0)
6810                         continue;
6811
6812                 if (dback->bytes == best->bytes &&
6813                     dback->disk_bytenr == best->bytenr)
6814                         continue;
6815
6816                 ret = repair_ref(info, path, dback, best);
6817                 if (ret)
6818                         goto out;
6819         }
6820
6821         /*
6822          * Ok we messed with the actual refs, which means we need to drop our
6823          * entire cache and go back and rescan.  I know this is a huge pain and
6824          * adds a lot of extra work, but it's the only way to be safe.  Once all
6825          * the backrefs agree we may not need to do anything to the extent
6826          * record itself.
6827          */
6828         ret = -EAGAIN;
6829 out:
6830         while (!list_empty(&entries)) {
6831                 entry = list_entry(entries.next, struct extent_entry, list);
6832                 list_del_init(&entry->list);
6833                 free(entry);
6834         }
6835         return ret;
6836 }
6837
6838 static int process_duplicates(struct btrfs_root *root,
6839                               struct cache_tree *extent_cache,
6840                               struct extent_record *rec)
6841 {
6842         struct extent_record *good, *tmp;
6843         struct cache_extent *cache;
6844         int ret;
6845
6846         /*
6847          * If we found a extent record for this extent then return, or if we
6848          * have more than one duplicate we are likely going to need to delete
6849          * something.
6850          */
6851         if (rec->found_rec || rec->num_duplicates > 1)
6852                 return 0;
6853
6854         /* Shouldn't happen but just in case */
6855         BUG_ON(!rec->num_duplicates);
6856
6857         /*
6858          * So this happens if we end up with a backref that doesn't match the
6859          * actual extent entry.  So either the backref is bad or the extent
6860          * entry is bad.  Either way we want to have the extent_record actually
6861          * reflect what we found in the extent_tree, so we need to take the
6862          * duplicate out and use that as the extent_record since the only way we
6863          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6864          */
6865         remove_cache_extent(extent_cache, &rec->cache);
6866
6867         good = list_entry(rec->dups.next, struct extent_record, list);
6868         list_del_init(&good->list);
6869         INIT_LIST_HEAD(&good->backrefs);
6870         INIT_LIST_HEAD(&good->dups);
6871         good->cache.start = good->start;
6872         good->cache.size = good->nr;
6873         good->content_checked = 0;
6874         good->owner_ref_checked = 0;
6875         good->num_duplicates = 0;
6876         good->refs = rec->refs;
6877         list_splice_init(&rec->backrefs, &good->backrefs);
6878         while (1) {
6879                 cache = lookup_cache_extent(extent_cache, good->start,
6880                                             good->nr);
6881                 if (!cache)
6882                         break;
6883                 tmp = container_of(cache, struct extent_record, cache);
6884
6885                 /*
6886                  * If we find another overlapping extent and it's found_rec is
6887                  * set then it's a duplicate and we need to try and delete
6888                  * something.
6889                  */
6890                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6891                         if (list_empty(&good->list))
6892                                 list_add_tail(&good->list,
6893                                               &duplicate_extents);
6894                         good->num_duplicates += tmp->num_duplicates + 1;
6895                         list_splice_init(&tmp->dups, &good->dups);
6896                         list_del_init(&tmp->list);
6897                         list_add_tail(&tmp->list, &good->dups);
6898                         remove_cache_extent(extent_cache, &tmp->cache);
6899                         continue;
6900                 }
6901
6902                 /*
6903                  * Ok we have another non extent item backed extent rec, so lets
6904                  * just add it to this extent and carry on like we did above.
6905                  */
6906                 good->refs += tmp->refs;
6907                 list_splice_init(&tmp->backrefs, &good->backrefs);
6908                 remove_cache_extent(extent_cache, &tmp->cache);
6909                 free(tmp);
6910         }
6911         ret = insert_cache_extent(extent_cache, &good->cache);
6912         BUG_ON(ret);
6913         free(rec);
6914         return good->num_duplicates ? 0 : 1;
6915 }
6916
6917 static int delete_duplicate_records(struct btrfs_root *root,
6918                                     struct extent_record *rec)
6919 {
6920         struct btrfs_trans_handle *trans;
6921         LIST_HEAD(delete_list);
6922         struct btrfs_path *path;
6923         struct extent_record *tmp, *good, *n;
6924         int nr_del = 0;
6925         int ret = 0, err;
6926         struct btrfs_key key;
6927
6928         path = btrfs_alloc_path();
6929         if (!path) {
6930                 ret = -ENOMEM;
6931                 goto out;
6932         }
6933
6934         good = rec;
6935         /* Find the record that covers all of the duplicates. */
6936         list_for_each_entry(tmp, &rec->dups, list) {
6937                 if (good->start < tmp->start)
6938                         continue;
6939                 if (good->nr > tmp->nr)
6940                         continue;
6941
6942                 if (tmp->start + tmp->nr < good->start + good->nr) {
6943                         fprintf(stderr, "Ok we have overlapping extents that "
6944                                 "aren't completely covered by eachother, this "
6945                                 "is going to require more careful thought.  "
6946                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6947                                 tmp->start, tmp->nr, good->start, good->nr);
6948                         abort();
6949                 }
6950                 good = tmp;
6951         }
6952
6953         if (good != rec)
6954                 list_add_tail(&rec->list, &delete_list);
6955
6956         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6957                 if (tmp == good)
6958                         continue;
6959                 list_move_tail(&tmp->list, &delete_list);
6960         }
6961
6962         root = root->fs_info->extent_root;
6963         trans = btrfs_start_transaction(root, 1);
6964         if (IS_ERR(trans)) {
6965                 ret = PTR_ERR(trans);
6966                 goto out;
6967         }
6968
6969         list_for_each_entry(tmp, &delete_list, list) {
6970                 if (tmp->found_rec == 0)
6971                         continue;
6972                 key.objectid = tmp->start;
6973                 key.type = BTRFS_EXTENT_ITEM_KEY;
6974                 key.offset = tmp->nr;
6975
6976                 /* Shouldn't happen but just in case */
6977                 if (tmp->metadata) {
6978                         fprintf(stderr, "Well this shouldn't happen, extent "
6979                                 "record overlaps but is metadata? "
6980                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6981                         abort();
6982                 }
6983
6984                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6985                 if (ret) {
6986                         if (ret > 0)
6987                                 ret = -EINVAL;
6988                         break;
6989                 }
6990                 ret = btrfs_del_item(trans, root, path);
6991                 if (ret)
6992                         break;
6993                 btrfs_release_path(path);
6994                 nr_del++;
6995         }
6996         err = btrfs_commit_transaction(trans, root);
6997         if (err && !ret)
6998                 ret = err;
6999 out:
7000         while (!list_empty(&delete_list)) {
7001                 tmp = list_entry(delete_list.next, struct extent_record, list);
7002                 list_del_init(&tmp->list);
7003                 if (tmp == rec)
7004                         continue;
7005                 free(tmp);
7006         }
7007
7008         while (!list_empty(&rec->dups)) {
7009                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7010                 list_del_init(&tmp->list);
7011                 free(tmp);
7012         }
7013
7014         btrfs_free_path(path);
7015
7016         if (!ret && !nr_del)
7017                 rec->num_duplicates = 0;
7018
7019         return ret ? ret : nr_del;
7020 }
7021
7022 static int find_possible_backrefs(struct btrfs_fs_info *info,
7023                                   struct btrfs_path *path,
7024                                   struct cache_tree *extent_cache,
7025                                   struct extent_record *rec)
7026 {
7027         struct btrfs_root *root;
7028         struct extent_backref *back;
7029         struct data_backref *dback;
7030         struct cache_extent *cache;
7031         struct btrfs_file_extent_item *fi;
7032         struct btrfs_key key;
7033         u64 bytenr, bytes;
7034         int ret;
7035
7036         list_for_each_entry(back, &rec->backrefs, list) {
7037                 /* Don't care about full backrefs (poor unloved backrefs) */
7038                 if (back->full_backref || !back->is_data)
7039                         continue;
7040
7041                 dback = (struct data_backref *)back;
7042
7043                 /* We found this one, we don't need to do a lookup */
7044                 if (dback->found_ref)
7045                         continue;
7046
7047                 key.objectid = dback->root;
7048                 key.type = BTRFS_ROOT_ITEM_KEY;
7049                 key.offset = (u64)-1;
7050
7051                 root = btrfs_read_fs_root(info, &key);
7052
7053                 /* No root, definitely a bad ref, skip */
7054                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7055                         continue;
7056                 /* Other err, exit */
7057                 if (IS_ERR(root))
7058                         return PTR_ERR(root);
7059
7060                 key.objectid = dback->owner;
7061                 key.type = BTRFS_EXTENT_DATA_KEY;
7062                 key.offset = dback->offset;
7063                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7064                 if (ret) {
7065                         btrfs_release_path(path);
7066                         if (ret < 0)
7067                                 return ret;
7068                         /* Didn't find it, we can carry on */
7069                         ret = 0;
7070                         continue;
7071                 }
7072
7073                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7074                                     struct btrfs_file_extent_item);
7075                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7076                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7077                 btrfs_release_path(path);
7078                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7079                 if (cache) {
7080                         struct extent_record *tmp;
7081                         tmp = container_of(cache, struct extent_record, cache);
7082
7083                         /*
7084                          * If we found an extent record for the bytenr for this
7085                          * particular backref then we can't add it to our
7086                          * current extent record.  We only want to add backrefs
7087                          * that don't have a corresponding extent item in the
7088                          * extent tree since they likely belong to this record
7089                          * and we need to fix it if it doesn't match bytenrs.
7090                          */
7091                         if  (tmp->found_rec)
7092                                 continue;
7093                 }
7094
7095                 dback->found_ref += 1;
7096                 dback->disk_bytenr = bytenr;
7097                 dback->bytes = bytes;
7098
7099                 /*
7100                  * Set this so the verify backref code knows not to trust the
7101                  * values in this backref.
7102                  */
7103                 back->broken = 1;
7104         }
7105
7106         return 0;
7107 }
7108
7109 /*
7110  * Record orphan data ref into corresponding root.
7111  *
7112  * Return 0 if the extent item contains data ref and recorded.
7113  * Return 1 if the extent item contains no useful data ref
7114  *   On that case, it may contains only shared_dataref or metadata backref
7115  *   or the file extent exists(this should be handled by the extent bytenr
7116  *   recovery routine)
7117  * Return <0 if something goes wrong.
7118  */
7119 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7120                                       struct extent_record *rec)
7121 {
7122         struct btrfs_key key;
7123         struct btrfs_root *dest_root;
7124         struct extent_backref *back;
7125         struct data_backref *dback;
7126         struct orphan_data_extent *orphan;
7127         struct btrfs_path *path;
7128         int recorded_data_ref = 0;
7129         int ret = 0;
7130
7131         if (rec->metadata)
7132                 return 1;
7133         path = btrfs_alloc_path();
7134         if (!path)
7135                 return -ENOMEM;
7136         list_for_each_entry(back, &rec->backrefs, list) {
7137                 if (back->full_backref || !back->is_data ||
7138                     !back->found_extent_tree)
7139                         continue;
7140                 dback = (struct data_backref *)back;
7141                 if (dback->found_ref)
7142                         continue;
7143                 key.objectid = dback->root;
7144                 key.type = BTRFS_ROOT_ITEM_KEY;
7145                 key.offset = (u64)-1;
7146
7147                 dest_root = btrfs_read_fs_root(fs_info, &key);
7148
7149                 /* For non-exist root we just skip it */
7150                 if (IS_ERR(dest_root) || !dest_root)
7151                         continue;
7152
7153                 key.objectid = dback->owner;
7154                 key.type = BTRFS_EXTENT_DATA_KEY;
7155                 key.offset = dback->offset;
7156
7157                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7158                 /*
7159                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7160                  * we need to record it for inode/file extent rebuild.
7161                  * For ret > 0, we record it only for file extent rebuild.
7162                  * For ret == 0, the file extent exists but only bytenr
7163                  * mismatch, let the original bytenr fix routine to handle,
7164                  * don't record it.
7165                  */
7166                 if (ret == 0)
7167                         continue;
7168                 ret = 0;
7169                 orphan = malloc(sizeof(*orphan));
7170                 if (!orphan) {
7171                         ret = -ENOMEM;
7172                         goto out;
7173                 }
7174                 INIT_LIST_HEAD(&orphan->list);
7175                 orphan->root = dback->root;
7176                 orphan->objectid = dback->owner;
7177                 orphan->offset = dback->offset;
7178                 orphan->disk_bytenr = rec->cache.start;
7179                 orphan->disk_len = rec->cache.size;
7180                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7181                 recorded_data_ref = 1;
7182         }
7183 out:
7184         btrfs_free_path(path);
7185         if (!ret)
7186                 return !recorded_data_ref;
7187         else
7188                 return ret;
7189 }
7190
7191 /*
7192  * when an incorrect extent item is found, this will delete
7193  * all of the existing entries for it and recreate them
7194  * based on what the tree scan found.
7195  */
7196 static int fixup_extent_refs(struct btrfs_fs_info *info,
7197                              struct cache_tree *extent_cache,
7198                              struct extent_record *rec)
7199 {
7200         struct btrfs_trans_handle *trans = NULL;
7201         int ret;
7202         struct btrfs_path *path;
7203         struct list_head *cur = rec->backrefs.next;
7204         struct cache_extent *cache;
7205         struct extent_backref *back;
7206         int allocated = 0;
7207         u64 flags = 0;
7208
7209         if (rec->flag_block_full_backref)
7210                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7211
7212         path = btrfs_alloc_path();
7213         if (!path)
7214                 return -ENOMEM;
7215
7216         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7217                 /*
7218                  * Sometimes the backrefs themselves are so broken they don't
7219                  * get attached to any meaningful rec, so first go back and
7220                  * check any of our backrefs that we couldn't find and throw
7221                  * them into the list if we find the backref so that
7222                  * verify_backrefs can figure out what to do.
7223                  */
7224                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7225                 if (ret < 0)
7226                         goto out;
7227         }
7228
7229         /* step one, make sure all of the backrefs agree */
7230         ret = verify_backrefs(info, path, rec);
7231         if (ret < 0)
7232                 goto out;
7233
7234         trans = btrfs_start_transaction(info->extent_root, 1);
7235         if (IS_ERR(trans)) {
7236                 ret = PTR_ERR(trans);
7237                 goto out;
7238         }
7239
7240         /* step two, delete all the existing records */
7241         ret = delete_extent_records(trans, info->extent_root, path,
7242                                     rec->start, rec->max_size);
7243
7244         if (ret < 0)
7245                 goto out;
7246
7247         /* was this block corrupt?  If so, don't add references to it */
7248         cache = lookup_cache_extent(info->corrupt_blocks,
7249                                     rec->start, rec->max_size);
7250         if (cache) {
7251                 ret = 0;
7252                 goto out;
7253         }
7254
7255         /* step three, recreate all the refs we did find */
7256         while(cur != &rec->backrefs) {
7257                 back = list_entry(cur, struct extent_backref, list);
7258                 cur = cur->next;
7259
7260                 /*
7261                  * if we didn't find any references, don't create a
7262                  * new extent record
7263                  */
7264                 if (!back->found_ref)
7265                         continue;
7266
7267                 rec->bad_full_backref = 0;
7268                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7269                 allocated = 1;
7270
7271                 if (ret)
7272                         goto out;
7273         }
7274 out:
7275         if (trans) {
7276                 int err = btrfs_commit_transaction(trans, info->extent_root);
7277                 if (!ret)
7278                         ret = err;
7279         }
7280
7281         btrfs_free_path(path);
7282         return ret;
7283 }
7284
7285 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7286                               struct extent_record *rec)
7287 {
7288         struct btrfs_trans_handle *trans;
7289         struct btrfs_root *root = fs_info->extent_root;
7290         struct btrfs_path *path;
7291         struct btrfs_extent_item *ei;
7292         struct btrfs_key key;
7293         u64 flags;
7294         int ret = 0;
7295
7296         key.objectid = rec->start;
7297         if (rec->metadata) {
7298                 key.type = BTRFS_METADATA_ITEM_KEY;
7299                 key.offset = rec->info_level;
7300         } else {
7301                 key.type = BTRFS_EXTENT_ITEM_KEY;
7302                 key.offset = rec->max_size;
7303         }
7304
7305         path = btrfs_alloc_path();
7306         if (!path)
7307                 return -ENOMEM;
7308
7309         trans = btrfs_start_transaction(root, 0);
7310         if (IS_ERR(trans)) {
7311                 btrfs_free_path(path);
7312                 return PTR_ERR(trans);
7313         }
7314
7315         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7316         if (ret < 0) {
7317                 btrfs_free_path(path);
7318                 btrfs_commit_transaction(trans, root);
7319                 return ret;
7320         } else if (ret) {
7321                 fprintf(stderr, "Didn't find extent for %llu\n",
7322                         (unsigned long long)rec->start);
7323                 btrfs_free_path(path);
7324                 btrfs_commit_transaction(trans, root);
7325                 return -ENOENT;
7326         }
7327
7328         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7329                             struct btrfs_extent_item);
7330         flags = btrfs_extent_flags(path->nodes[0], ei);
7331         if (rec->flag_block_full_backref) {
7332                 fprintf(stderr, "setting full backref on %llu\n",
7333                         (unsigned long long)key.objectid);
7334                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7335         } else {
7336                 fprintf(stderr, "clearing full backref on %llu\n",
7337                         (unsigned long long)key.objectid);
7338                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7339         }
7340         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7341         btrfs_mark_buffer_dirty(path->nodes[0]);
7342         btrfs_free_path(path);
7343         return btrfs_commit_transaction(trans, root);
7344 }
7345
7346 /* right now we only prune from the extent allocation tree */
7347 static int prune_one_block(struct btrfs_trans_handle *trans,
7348                            struct btrfs_fs_info *info,
7349                            struct btrfs_corrupt_block *corrupt)
7350 {
7351         int ret;
7352         struct btrfs_path path;
7353         struct extent_buffer *eb;
7354         u64 found;
7355         int slot;
7356         int nritems;
7357         int level = corrupt->level + 1;
7358
7359         btrfs_init_path(&path);
7360 again:
7361         /* we want to stop at the parent to our busted block */
7362         path.lowest_level = level;
7363
7364         ret = btrfs_search_slot(trans, info->extent_root,
7365                                 &corrupt->key, &path, -1, 1);
7366
7367         if (ret < 0)
7368                 goto out;
7369
7370         eb = path.nodes[level];
7371         if (!eb) {
7372                 ret = -ENOENT;
7373                 goto out;
7374         }
7375
7376         /*
7377          * hopefully the search gave us the block we want to prune,
7378          * lets try that first
7379          */
7380         slot = path.slots[level];
7381         found =  btrfs_node_blockptr(eb, slot);
7382         if (found == corrupt->cache.start)
7383                 goto del_ptr;
7384
7385         nritems = btrfs_header_nritems(eb);
7386
7387         /* the search failed, lets scan this node and hope we find it */
7388         for (slot = 0; slot < nritems; slot++) {
7389                 found =  btrfs_node_blockptr(eb, slot);
7390                 if (found == corrupt->cache.start)
7391                         goto del_ptr;
7392         }
7393         /*
7394          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7395          * to this block
7396          */
7397         if (eb == info->extent_root->node) {
7398                 ret = -ENOENT;
7399                 goto out;
7400         } else {
7401                 level++;
7402                 btrfs_release_path(&path);
7403                 goto again;
7404         }
7405
7406 del_ptr:
7407         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7408         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7409
7410 out:
7411         btrfs_release_path(&path);
7412         return ret;
7413 }
7414
7415 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7416 {
7417         struct btrfs_trans_handle *trans = NULL;
7418         struct cache_extent *cache;
7419         struct btrfs_corrupt_block *corrupt;
7420
7421         while (1) {
7422                 cache = search_cache_extent(info->corrupt_blocks, 0);
7423                 if (!cache)
7424                         break;
7425                 if (!trans) {
7426                         trans = btrfs_start_transaction(info->extent_root, 1);
7427                         if (IS_ERR(trans))
7428                                 return PTR_ERR(trans);
7429                 }
7430                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7431                 prune_one_block(trans, info, corrupt);
7432                 remove_cache_extent(info->corrupt_blocks, cache);
7433         }
7434         if (trans)
7435                 return btrfs_commit_transaction(trans, info->extent_root);
7436         return 0;
7437 }
7438
7439 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7440 {
7441         struct btrfs_block_group_cache *cache;
7442         u64 start, end;
7443         int ret;
7444
7445         while (1) {
7446                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7447                                             &start, &end, EXTENT_DIRTY);
7448                 if (ret)
7449                         break;
7450                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7451                                    GFP_NOFS);
7452         }
7453
7454         start = 0;
7455         while (1) {
7456                 cache = btrfs_lookup_first_block_group(fs_info, start);
7457                 if (!cache)
7458                         break;
7459                 if (cache->cached)
7460                         cache->cached = 0;
7461                 start = cache->key.objectid + cache->key.offset;
7462         }
7463 }
7464
7465 static int check_extent_refs(struct btrfs_root *root,
7466                              struct cache_tree *extent_cache)
7467 {
7468         struct extent_record *rec;
7469         struct cache_extent *cache;
7470         int err = 0;
7471         int ret = 0;
7472         int fixed = 0;
7473         int had_dups = 0;
7474         int recorded = 0;
7475
7476         if (repair) {
7477                 /*
7478                  * if we're doing a repair, we have to make sure
7479                  * we don't allocate from the problem extents.
7480                  * In the worst case, this will be all the
7481                  * extents in the FS
7482                  */
7483                 cache = search_cache_extent(extent_cache, 0);
7484                 while(cache) {
7485                         rec = container_of(cache, struct extent_record, cache);
7486                         set_extent_dirty(root->fs_info->excluded_extents,
7487                                          rec->start,
7488                                          rec->start + rec->max_size - 1,
7489                                          GFP_NOFS);
7490                         cache = next_cache_extent(cache);
7491                 }
7492
7493                 /* pin down all the corrupted blocks too */
7494                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7495                 while(cache) {
7496                         set_extent_dirty(root->fs_info->excluded_extents,
7497                                          cache->start,
7498                                          cache->start + cache->size - 1,
7499                                          GFP_NOFS);
7500                         cache = next_cache_extent(cache);
7501                 }
7502                 prune_corrupt_blocks(root->fs_info);
7503                 reset_cached_block_groups(root->fs_info);
7504         }
7505
7506         reset_cached_block_groups(root->fs_info);
7507
7508         /*
7509          * We need to delete any duplicate entries we find first otherwise we
7510          * could mess up the extent tree when we have backrefs that actually
7511          * belong to a different extent item and not the weird duplicate one.
7512          */
7513         while (repair && !list_empty(&duplicate_extents)) {
7514                 rec = list_entry(duplicate_extents.next, struct extent_record,
7515                                  list);
7516                 list_del_init(&rec->list);
7517
7518                 /* Sometimes we can find a backref before we find an actual
7519                  * extent, so we need to process it a little bit to see if there
7520                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7521                  * if this is a backref screwup.  If we need to delete stuff
7522                  * process_duplicates() will return 0, otherwise it will return
7523                  * 1 and we
7524                  */
7525                 if (process_duplicates(root, extent_cache, rec))
7526                         continue;
7527                 ret = delete_duplicate_records(root, rec);
7528                 if (ret < 0)
7529                         return ret;
7530                 /*
7531                  * delete_duplicate_records will return the number of entries
7532                  * deleted, so if it's greater than 0 then we know we actually
7533                  * did something and we need to remove.
7534                  */
7535                 if (ret)
7536                         had_dups = 1;
7537         }
7538
7539         if (had_dups)
7540                 return -EAGAIN;
7541
7542         while(1) {
7543                 int cur_err = 0;
7544
7545                 fixed = 0;
7546                 recorded = 0;
7547                 cache = search_cache_extent(extent_cache, 0);
7548                 if (!cache)
7549                         break;
7550                 rec = container_of(cache, struct extent_record, cache);
7551                 if (rec->num_duplicates) {
7552                         fprintf(stderr, "extent item %llu has multiple extent "
7553                                 "items\n", (unsigned long long)rec->start);
7554                         err = 1;
7555                         cur_err = 1;
7556                 }
7557
7558                 if (rec->refs != rec->extent_item_refs) {
7559                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7560                                 (unsigned long long)rec->start,
7561                                 (unsigned long long)rec->nr);
7562                         fprintf(stderr, "extent item %llu, found %llu\n",
7563                                 (unsigned long long)rec->extent_item_refs,
7564                                 (unsigned long long)rec->refs);
7565                         ret = record_orphan_data_extents(root->fs_info, rec);
7566                         if (ret < 0)
7567                                 goto repair_abort;
7568                         if (ret == 0) {
7569                                 recorded = 1;
7570                         } else {
7571                                 /*
7572                                  * we can't use the extent to repair file
7573                                  * extent, let the fallback method handle it.
7574                                  */
7575                                 if (!fixed && repair) {
7576                                         ret = fixup_extent_refs(
7577                                                         root->fs_info,
7578                                                         extent_cache, rec);
7579                                         if (ret)
7580                                                 goto repair_abort;
7581                                         fixed = 1;
7582                                 }
7583                         }
7584                         err = 1;
7585                         cur_err = 1;
7586                 }
7587                 if (all_backpointers_checked(rec, 1)) {
7588                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7589                                 (unsigned long long)rec->start,
7590                                 (unsigned long long)rec->nr);
7591
7592                         if (!fixed && !recorded && repair) {
7593                                 ret = fixup_extent_refs(root->fs_info,
7594                                                         extent_cache, rec);
7595                                 if (ret)
7596                                         goto repair_abort;
7597                                 fixed = 1;
7598                         }
7599                         cur_err = 1;
7600                         err = 1;
7601                 }
7602                 if (!rec->owner_ref_checked) {
7603                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7604                                 (unsigned long long)rec->start,
7605                                 (unsigned long long)rec->nr);
7606                         if (!fixed && !recorded && repair) {
7607                                 ret = fixup_extent_refs(root->fs_info,
7608                                                         extent_cache, rec);
7609                                 if (ret)
7610                                         goto repair_abort;
7611                                 fixed = 1;
7612                         }
7613                         err = 1;
7614                         cur_err = 1;
7615                 }
7616                 if (rec->bad_full_backref) {
7617                         fprintf(stderr, "bad full backref, on [%llu]\n",
7618                                 (unsigned long long)rec->start);
7619                         if (repair) {
7620                                 ret = fixup_extent_flags(root->fs_info, rec);
7621                                 if (ret)
7622                                         goto repair_abort;
7623                                 fixed = 1;
7624                         }
7625                         err = 1;
7626                         cur_err = 1;
7627                 }
7628                 /*
7629                  * Although it's not a extent ref's problem, we reuse this
7630                  * routine for error reporting.
7631                  * No repair function yet.
7632                  */
7633                 if (rec->crossing_stripes) {
7634                         fprintf(stderr,
7635                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7636                                 rec->start, rec->start + rec->max_size);
7637                         err = 1;
7638                         cur_err = 1;
7639                 }
7640
7641                 if (rec->wrong_chunk_type) {
7642                         fprintf(stderr,
7643                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7644                                 rec->start, rec->start + rec->max_size);
7645                         err = 1;
7646                         cur_err = 1;
7647                 }
7648
7649                 remove_cache_extent(extent_cache, cache);
7650                 free_all_extent_backrefs(rec);
7651                 if (!init_extent_tree && repair && (!cur_err || fixed))
7652                         clear_extent_dirty(root->fs_info->excluded_extents,
7653                                            rec->start,
7654                                            rec->start + rec->max_size - 1,
7655                                            GFP_NOFS);
7656                 free(rec);
7657         }
7658 repair_abort:
7659         if (repair) {
7660                 if (ret && ret != -EAGAIN) {
7661                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7662                         exit(1);
7663                 } else if (!ret) {
7664                         struct btrfs_trans_handle *trans;
7665
7666                         root = root->fs_info->extent_root;
7667                         trans = btrfs_start_transaction(root, 1);
7668                         if (IS_ERR(trans)) {
7669                                 ret = PTR_ERR(trans);
7670                                 goto repair_abort;
7671                         }
7672
7673                         btrfs_fix_block_accounting(trans, root);
7674                         ret = btrfs_commit_transaction(trans, root);
7675                         if (ret)
7676                                 goto repair_abort;
7677                 }
7678                 if (err)
7679                         fprintf(stderr, "repaired damaged extent references\n");
7680                 return ret;
7681         }
7682         return err;
7683 }
7684
7685 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7686 {
7687         u64 stripe_size;
7688
7689         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7690                 stripe_size = length;
7691                 stripe_size /= num_stripes;
7692         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7693                 stripe_size = length * 2;
7694                 stripe_size /= num_stripes;
7695         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7696                 stripe_size = length;
7697                 stripe_size /= (num_stripes - 1);
7698         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7699                 stripe_size = length;
7700                 stripe_size /= (num_stripes - 2);
7701         } else {
7702                 stripe_size = length;
7703         }
7704         return stripe_size;
7705 }
7706
7707 /*
7708  * Check the chunk with its block group/dev list ref:
7709  * Return 0 if all refs seems valid.
7710  * Return 1 if part of refs seems valid, need later check for rebuild ref
7711  * like missing block group and needs to search extent tree to rebuild them.
7712  * Return -1 if essential refs are missing and unable to rebuild.
7713  */
7714 static int check_chunk_refs(struct chunk_record *chunk_rec,
7715                             struct block_group_tree *block_group_cache,
7716                             struct device_extent_tree *dev_extent_cache,
7717                             int silent)
7718 {
7719         struct cache_extent *block_group_item;
7720         struct block_group_record *block_group_rec;
7721         struct cache_extent *dev_extent_item;
7722         struct device_extent_record *dev_extent_rec;
7723         u64 devid;
7724         u64 offset;
7725         u64 length;
7726         int metadump_v2 = 0;
7727         int i;
7728         int ret = 0;
7729
7730         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7731                                                chunk_rec->offset,
7732                                                chunk_rec->length);
7733         if (block_group_item) {
7734                 block_group_rec = container_of(block_group_item,
7735                                                struct block_group_record,
7736                                                cache);
7737                 if (chunk_rec->length != block_group_rec->offset ||
7738                     chunk_rec->offset != block_group_rec->objectid ||
7739                     (!metadump_v2 &&
7740                      chunk_rec->type_flags != block_group_rec->flags)) {
7741                         if (!silent)
7742                                 fprintf(stderr,
7743                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7744                                         chunk_rec->objectid,
7745                                         chunk_rec->type,
7746                                         chunk_rec->offset,
7747                                         chunk_rec->length,
7748                                         chunk_rec->offset,
7749                                         chunk_rec->type_flags,
7750                                         block_group_rec->objectid,
7751                                         block_group_rec->type,
7752                                         block_group_rec->offset,
7753                                         block_group_rec->offset,
7754                                         block_group_rec->objectid,
7755                                         block_group_rec->flags);
7756                         ret = -1;
7757                 } else {
7758                         list_del_init(&block_group_rec->list);
7759                         chunk_rec->bg_rec = block_group_rec;
7760                 }
7761         } else {
7762                 if (!silent)
7763                         fprintf(stderr,
7764                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7765                                 chunk_rec->objectid,
7766                                 chunk_rec->type,
7767                                 chunk_rec->offset,
7768                                 chunk_rec->length,
7769                                 chunk_rec->offset,
7770                                 chunk_rec->type_flags);
7771                 ret = 1;
7772         }
7773
7774         if (metadump_v2)
7775                 return ret;
7776
7777         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7778                                     chunk_rec->num_stripes);
7779         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7780                 devid = chunk_rec->stripes[i].devid;
7781                 offset = chunk_rec->stripes[i].offset;
7782                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7783                                                        devid, offset, length);
7784                 if (dev_extent_item) {
7785                         dev_extent_rec = container_of(dev_extent_item,
7786                                                 struct device_extent_record,
7787                                                 cache);
7788                         if (dev_extent_rec->objectid != devid ||
7789                             dev_extent_rec->offset != offset ||
7790                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7791                             dev_extent_rec->length != length) {
7792                                 if (!silent)
7793                                         fprintf(stderr,
7794                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7795                                                 chunk_rec->objectid,
7796                                                 chunk_rec->type,
7797                                                 chunk_rec->offset,
7798                                                 chunk_rec->stripes[i].devid,
7799                                                 chunk_rec->stripes[i].offset,
7800                                                 dev_extent_rec->objectid,
7801                                                 dev_extent_rec->offset,
7802                                                 dev_extent_rec->length);
7803                                 ret = -1;
7804                         } else {
7805                                 list_move(&dev_extent_rec->chunk_list,
7806                                           &chunk_rec->dextents);
7807                         }
7808                 } else {
7809                         if (!silent)
7810                                 fprintf(stderr,
7811                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7812                                         chunk_rec->objectid,
7813                                         chunk_rec->type,
7814                                         chunk_rec->offset,
7815                                         chunk_rec->stripes[i].devid,
7816                                         chunk_rec->stripes[i].offset);
7817                         ret = -1;
7818                 }
7819         }
7820         return ret;
7821 }
7822
7823 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7824 int check_chunks(struct cache_tree *chunk_cache,
7825                  struct block_group_tree *block_group_cache,
7826                  struct device_extent_tree *dev_extent_cache,
7827                  struct list_head *good, struct list_head *bad,
7828                  struct list_head *rebuild, int silent)
7829 {
7830         struct cache_extent *chunk_item;
7831         struct chunk_record *chunk_rec;
7832         struct block_group_record *bg_rec;
7833         struct device_extent_record *dext_rec;
7834         int err;
7835         int ret = 0;
7836
7837         chunk_item = first_cache_extent(chunk_cache);
7838         while (chunk_item) {
7839                 chunk_rec = container_of(chunk_item, struct chunk_record,
7840                                          cache);
7841                 err = check_chunk_refs(chunk_rec, block_group_cache,
7842                                        dev_extent_cache, silent);
7843                 if (err < 0)
7844                         ret = err;
7845                 if (err == 0 && good)
7846                         list_add_tail(&chunk_rec->list, good);
7847                 if (err > 0 && rebuild)
7848                         list_add_tail(&chunk_rec->list, rebuild);
7849                 if (err < 0 && bad)
7850                         list_add_tail(&chunk_rec->list, bad);
7851                 chunk_item = next_cache_extent(chunk_item);
7852         }
7853
7854         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7855                 if (!silent)
7856                         fprintf(stderr,
7857                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7858                                 bg_rec->objectid,
7859                                 bg_rec->offset,
7860                                 bg_rec->flags);
7861                 if (!ret)
7862                         ret = 1;
7863         }
7864
7865         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7866                             chunk_list) {
7867                 if (!silent)
7868                         fprintf(stderr,
7869                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7870                                 dext_rec->objectid,
7871                                 dext_rec->offset,
7872                                 dext_rec->length);
7873                 if (!ret)
7874                         ret = 1;
7875         }
7876         return ret;
7877 }
7878
7879
7880 static int check_device_used(struct device_record *dev_rec,
7881                              struct device_extent_tree *dext_cache)
7882 {
7883         struct cache_extent *cache;
7884         struct device_extent_record *dev_extent_rec;
7885         u64 total_byte = 0;
7886
7887         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7888         while (cache) {
7889                 dev_extent_rec = container_of(cache,
7890                                               struct device_extent_record,
7891                                               cache);
7892                 if (dev_extent_rec->objectid != dev_rec->devid)
7893                         break;
7894
7895                 list_del_init(&dev_extent_rec->device_list);
7896                 total_byte += dev_extent_rec->length;
7897                 cache = next_cache_extent(cache);
7898         }
7899
7900         if (total_byte != dev_rec->byte_used) {
7901                 fprintf(stderr,
7902                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7903                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7904                         dev_rec->type, dev_rec->offset);
7905                 return -1;
7906         } else {
7907                 return 0;
7908         }
7909 }
7910
7911 /* check btrfs_dev_item -> btrfs_dev_extent */
7912 static int check_devices(struct rb_root *dev_cache,
7913                          struct device_extent_tree *dev_extent_cache)
7914 {
7915         struct rb_node *dev_node;
7916         struct device_record *dev_rec;
7917         struct device_extent_record *dext_rec;
7918         int err;
7919         int ret = 0;
7920
7921         dev_node = rb_first(dev_cache);
7922         while (dev_node) {
7923                 dev_rec = container_of(dev_node, struct device_record, node);
7924                 err = check_device_used(dev_rec, dev_extent_cache);
7925                 if (err)
7926                         ret = err;
7927
7928                 dev_node = rb_next(dev_node);
7929         }
7930         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7931                             device_list) {
7932                 fprintf(stderr,
7933                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7934                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7935                 if (!ret)
7936                         ret = 1;
7937         }
7938         return ret;
7939 }
7940
7941 static int add_root_item_to_list(struct list_head *head,
7942                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7943                                   u8 level, u8 drop_level,
7944                                   int level_size, struct btrfs_key *drop_key)
7945 {
7946
7947         struct root_item_record *ri_rec;
7948         ri_rec = malloc(sizeof(*ri_rec));
7949         if (!ri_rec)
7950                 return -ENOMEM;
7951         ri_rec->bytenr = bytenr;
7952         ri_rec->objectid = objectid;
7953         ri_rec->level = level;
7954         ri_rec->level_size = level_size;
7955         ri_rec->drop_level = drop_level;
7956         ri_rec->last_snapshot = last_snapshot;
7957         if (drop_key)
7958                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7959         list_add_tail(&ri_rec->list, head);
7960
7961         return 0;
7962 }
7963
7964 static void free_root_item_list(struct list_head *list)
7965 {
7966         struct root_item_record *ri_rec;
7967
7968         while (!list_empty(list)) {
7969                 ri_rec = list_first_entry(list, struct root_item_record,
7970                                           list);
7971                 list_del_init(&ri_rec->list);
7972                 free(ri_rec);
7973         }
7974 }
7975
7976 static int deal_root_from_list(struct list_head *list,
7977                                struct btrfs_root *root,
7978                                struct block_info *bits,
7979                                int bits_nr,
7980                                struct cache_tree *pending,
7981                                struct cache_tree *seen,
7982                                struct cache_tree *reada,
7983                                struct cache_tree *nodes,
7984                                struct cache_tree *extent_cache,
7985                                struct cache_tree *chunk_cache,
7986                                struct rb_root *dev_cache,
7987                                struct block_group_tree *block_group_cache,
7988                                struct device_extent_tree *dev_extent_cache)
7989 {
7990         int ret = 0;
7991         u64 last;
7992
7993         while (!list_empty(list)) {
7994                 struct root_item_record *rec;
7995                 struct extent_buffer *buf;
7996                 rec = list_entry(list->next,
7997                                  struct root_item_record, list);
7998                 last = 0;
7999                 buf = read_tree_block(root->fs_info->tree_root,
8000                                       rec->bytenr, rec->level_size, 0);
8001                 if (!extent_buffer_uptodate(buf)) {
8002                         free_extent_buffer(buf);
8003                         ret = -EIO;
8004                         break;
8005                 }
8006                 add_root_to_pending(buf, extent_cache, pending,
8007                                     seen, nodes, rec->objectid);
8008                 /*
8009                  * To rebuild extent tree, we need deal with snapshot
8010                  * one by one, otherwise we deal with node firstly which
8011                  * can maximize readahead.
8012                  */
8013                 while (1) {
8014                         ret = run_next_block(root, bits, bits_nr, &last,
8015                                              pending, seen, reada, nodes,
8016                                              extent_cache, chunk_cache,
8017                                              dev_cache, block_group_cache,
8018                                              dev_extent_cache, rec);
8019                         if (ret != 0)
8020                                 break;
8021                 }
8022                 free_extent_buffer(buf);
8023                 list_del(&rec->list);
8024                 free(rec);
8025                 if (ret < 0)
8026                         break;
8027         }
8028         while (ret >= 0) {
8029                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8030                                      reada, nodes, extent_cache, chunk_cache,
8031                                      dev_cache, block_group_cache,
8032                                      dev_extent_cache, NULL);
8033                 if (ret != 0) {
8034                         if (ret > 0)
8035                                 ret = 0;
8036                         break;
8037                 }
8038         }
8039         return ret;
8040 }
8041
8042 static int check_chunks_and_extents(struct btrfs_root *root)
8043 {
8044         struct rb_root dev_cache;
8045         struct cache_tree chunk_cache;
8046         struct block_group_tree block_group_cache;
8047         struct device_extent_tree dev_extent_cache;
8048         struct cache_tree extent_cache;
8049         struct cache_tree seen;
8050         struct cache_tree pending;
8051         struct cache_tree reada;
8052         struct cache_tree nodes;
8053         struct extent_io_tree excluded_extents;
8054         struct cache_tree corrupt_blocks;
8055         struct btrfs_path path;
8056         struct btrfs_key key;
8057         struct btrfs_key found_key;
8058         int ret, err = 0;
8059         struct block_info *bits;
8060         int bits_nr;
8061         struct extent_buffer *leaf;
8062         int slot;
8063         struct btrfs_root_item ri;
8064         struct list_head dropping_trees;
8065         struct list_head normal_trees;
8066         struct btrfs_root *root1;
8067         u64 objectid;
8068         u32 level_size;
8069         u8 level;
8070
8071         dev_cache = RB_ROOT;
8072         cache_tree_init(&chunk_cache);
8073         block_group_tree_init(&block_group_cache);
8074         device_extent_tree_init(&dev_extent_cache);
8075
8076         cache_tree_init(&extent_cache);
8077         cache_tree_init(&seen);
8078         cache_tree_init(&pending);
8079         cache_tree_init(&nodes);
8080         cache_tree_init(&reada);
8081         cache_tree_init(&corrupt_blocks);
8082         extent_io_tree_init(&excluded_extents);
8083         INIT_LIST_HEAD(&dropping_trees);
8084         INIT_LIST_HEAD(&normal_trees);
8085
8086         if (repair) {
8087                 root->fs_info->excluded_extents = &excluded_extents;
8088                 root->fs_info->fsck_extent_cache = &extent_cache;
8089                 root->fs_info->free_extent_hook = free_extent_hook;
8090                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8091         }
8092
8093         bits_nr = 1024;
8094         bits = malloc(bits_nr * sizeof(struct block_info));
8095         if (!bits) {
8096                 perror("malloc");
8097                 exit(1);
8098         }
8099
8100         if (ctx.progress_enabled) {
8101                 ctx.tp = TASK_EXTENTS;
8102                 task_start(ctx.info);
8103         }
8104
8105 again:
8106         root1 = root->fs_info->tree_root;
8107         level = btrfs_header_level(root1->node);
8108         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8109                                     root1->node->start, 0, level, 0,
8110                                     btrfs_level_size(root1, level), NULL);
8111         if (ret < 0)
8112                 goto out;
8113         root1 = root->fs_info->chunk_root;
8114         level = btrfs_header_level(root1->node);
8115         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8116                                     root1->node->start, 0, level, 0,
8117                                     btrfs_level_size(root1, level), NULL);
8118         if (ret < 0)
8119                 goto out;
8120         btrfs_init_path(&path);
8121         key.offset = 0;
8122         key.objectid = 0;
8123         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8124         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8125                                         &key, &path, 0, 0);
8126         if (ret < 0)
8127                 goto out;
8128         while(1) {
8129                 leaf = path.nodes[0];
8130                 slot = path.slots[0];
8131                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8132                         ret = btrfs_next_leaf(root, &path);
8133                         if (ret != 0)
8134                                 break;
8135                         leaf = path.nodes[0];
8136                         slot = path.slots[0];
8137                 }
8138                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8139                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8140                         unsigned long offset;
8141                         u64 last_snapshot;
8142
8143                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8144                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8145                         last_snapshot = btrfs_root_last_snapshot(&ri);
8146                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8147                                 level = btrfs_root_level(&ri);
8148                                 level_size = btrfs_level_size(root, level);
8149                                 ret = add_root_item_to_list(&normal_trees,
8150                                                 found_key.objectid,
8151                                                 btrfs_root_bytenr(&ri),
8152                                                 last_snapshot, level,
8153                                                 0, level_size, NULL);
8154                                 if (ret < 0)
8155                                         goto out;
8156                         } else {
8157                                 level = btrfs_root_level(&ri);
8158                                 level_size = btrfs_level_size(root, level);
8159                                 objectid = found_key.objectid;
8160                                 btrfs_disk_key_to_cpu(&found_key,
8161                                                       &ri.drop_progress);
8162                                 ret = add_root_item_to_list(&dropping_trees,
8163                                                 objectid,
8164                                                 btrfs_root_bytenr(&ri),
8165                                                 last_snapshot, level,
8166                                                 ri.drop_level,
8167                                                 level_size, &found_key);
8168                                 if (ret < 0)
8169                                         goto out;
8170                         }
8171                 }
8172                 path.slots[0]++;
8173         }
8174         btrfs_release_path(&path);
8175
8176         /*
8177          * check_block can return -EAGAIN if it fixes something, please keep
8178          * this in mind when dealing with return values from these functions, if
8179          * we get -EAGAIN we want to fall through and restart the loop.
8180          */
8181         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8182                                   &seen, &reada, &nodes, &extent_cache,
8183                                   &chunk_cache, &dev_cache, &block_group_cache,
8184                                   &dev_extent_cache);
8185         if (ret < 0) {
8186                 if (ret == -EAGAIN)
8187                         goto loop;
8188                 goto out;
8189         }
8190         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8191                                   &pending, &seen, &reada, &nodes,
8192                                   &extent_cache, &chunk_cache, &dev_cache,
8193                                   &block_group_cache, &dev_extent_cache);
8194         if (ret < 0) {
8195                 if (ret == -EAGAIN)
8196                         goto loop;
8197                 goto out;
8198         }
8199
8200         ret = check_chunks(&chunk_cache, &block_group_cache,
8201                            &dev_extent_cache, NULL, NULL, NULL, 0);
8202         if (ret) {
8203                 if (ret == -EAGAIN)
8204                         goto loop;
8205                 err = ret;
8206         }
8207
8208         ret = check_extent_refs(root, &extent_cache);
8209         if (ret < 0) {
8210                 if (ret == -EAGAIN)
8211                         goto loop;
8212                 goto out;
8213         }
8214
8215         ret = check_devices(&dev_cache, &dev_extent_cache);
8216         if (ret && err)
8217                 ret = err;
8218
8219 out:
8220         task_stop(ctx.info);
8221         if (repair) {
8222                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8223                 extent_io_tree_cleanup(&excluded_extents);
8224                 root->fs_info->fsck_extent_cache = NULL;
8225                 root->fs_info->free_extent_hook = NULL;
8226                 root->fs_info->corrupt_blocks = NULL;
8227                 root->fs_info->excluded_extents = NULL;
8228         }
8229         free(bits);
8230         free_chunk_cache_tree(&chunk_cache);
8231         free_device_cache_tree(&dev_cache);
8232         free_block_group_tree(&block_group_cache);
8233         free_device_extent_tree(&dev_extent_cache);
8234         free_extent_cache_tree(&seen);
8235         free_extent_cache_tree(&pending);
8236         free_extent_cache_tree(&reada);
8237         free_extent_cache_tree(&nodes);
8238         return ret;
8239 loop:
8240         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8241         free_extent_cache_tree(&seen);
8242         free_extent_cache_tree(&pending);
8243         free_extent_cache_tree(&reada);
8244         free_extent_cache_tree(&nodes);
8245         free_chunk_cache_tree(&chunk_cache);
8246         free_block_group_tree(&block_group_cache);
8247         free_device_cache_tree(&dev_cache);
8248         free_device_extent_tree(&dev_extent_cache);
8249         free_extent_record_cache(root->fs_info, &extent_cache);
8250         free_root_item_list(&normal_trees);
8251         free_root_item_list(&dropping_trees);
8252         extent_io_tree_cleanup(&excluded_extents);
8253         goto again;
8254 }
8255
8256 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8257                            struct btrfs_root *root, int overwrite)
8258 {
8259         struct extent_buffer *c;
8260         struct extent_buffer *old = root->node;
8261         int level;
8262         int ret;
8263         struct btrfs_disk_key disk_key = {0,0,0};
8264
8265         level = 0;
8266
8267         if (overwrite) {
8268                 c = old;
8269                 extent_buffer_get(c);
8270                 goto init;
8271         }
8272         c = btrfs_alloc_free_block(trans, root,
8273                                    btrfs_level_size(root, 0),
8274                                    root->root_key.objectid,
8275                                    &disk_key, level, 0, 0);
8276         if (IS_ERR(c)) {
8277                 c = old;
8278                 extent_buffer_get(c);
8279                 overwrite = 1;
8280         }
8281 init:
8282         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8283         btrfs_set_header_level(c, level);
8284         btrfs_set_header_bytenr(c, c->start);
8285         btrfs_set_header_generation(c, trans->transid);
8286         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8287         btrfs_set_header_owner(c, root->root_key.objectid);
8288
8289         write_extent_buffer(c, root->fs_info->fsid,
8290                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8291
8292         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8293                             btrfs_header_chunk_tree_uuid(c),
8294                             BTRFS_UUID_SIZE);
8295
8296         btrfs_mark_buffer_dirty(c);
8297         /*
8298          * this case can happen in the following case:
8299          *
8300          * 1.overwrite previous root.
8301          *
8302          * 2.reinit reloc data root, this is because we skip pin
8303          * down reloc data tree before which means we can allocate
8304          * same block bytenr here.
8305          */
8306         if (old->start == c->start) {
8307                 btrfs_set_root_generation(&root->root_item,
8308                                           trans->transid);
8309                 root->root_item.level = btrfs_header_level(root->node);
8310                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8311                                         &root->root_key, &root->root_item);
8312                 if (ret) {
8313                         free_extent_buffer(c);
8314                         return ret;
8315                 }
8316         }
8317         free_extent_buffer(old);
8318         root->node = c;
8319         add_root_to_dirty_list(root);
8320         return 0;
8321 }
8322
8323 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8324                                 struct extent_buffer *eb, int tree_root)
8325 {
8326         struct extent_buffer *tmp;
8327         struct btrfs_root_item *ri;
8328         struct btrfs_key key;
8329         u64 bytenr;
8330         u32 leafsize;
8331         int level = btrfs_header_level(eb);
8332         int nritems;
8333         int ret;
8334         int i;
8335
8336         /*
8337          * If we have pinned this block before, don't pin it again.
8338          * This can not only avoid forever loop with broken filesystem
8339          * but also give us some speedups.
8340          */
8341         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8342                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8343                 return 0;
8344
8345         btrfs_pin_extent(fs_info, eb->start, eb->len);
8346
8347         leafsize = btrfs_super_leafsize(fs_info->super_copy);
8348         nritems = btrfs_header_nritems(eb);
8349         for (i = 0; i < nritems; i++) {
8350                 if (level == 0) {
8351                         btrfs_item_key_to_cpu(eb, &key, i);
8352                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8353                                 continue;
8354                         /* Skip the extent root and reloc roots */
8355                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8356                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8357                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8358                                 continue;
8359                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8360                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8361
8362                         /*
8363                          * If at any point we start needing the real root we
8364                          * will have to build a stump root for the root we are
8365                          * in, but for now this doesn't actually use the root so
8366                          * just pass in extent_root.
8367                          */
8368                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8369                                               leafsize, 0);
8370                         if (!extent_buffer_uptodate(tmp)) {
8371                                 fprintf(stderr, "Error reading root block\n");
8372                                 return -EIO;
8373                         }
8374                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8375                         free_extent_buffer(tmp);
8376                         if (ret)
8377                                 return ret;
8378                 } else {
8379                         bytenr = btrfs_node_blockptr(eb, i);
8380
8381                         /* If we aren't the tree root don't read the block */
8382                         if (level == 1 && !tree_root) {
8383                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
8384                                 continue;
8385                         }
8386
8387                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8388                                               leafsize, 0);
8389                         if (!extent_buffer_uptodate(tmp)) {
8390                                 fprintf(stderr, "Error reading tree block\n");
8391                                 return -EIO;
8392                         }
8393                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8394                         free_extent_buffer(tmp);
8395                         if (ret)
8396                                 return ret;
8397                 }
8398         }
8399
8400         return 0;
8401 }
8402
8403 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8404 {
8405         int ret;
8406
8407         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8408         if (ret)
8409                 return ret;
8410
8411         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8412 }
8413
8414 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8415 {
8416         struct btrfs_block_group_cache *cache;
8417         struct btrfs_path *path;
8418         struct extent_buffer *leaf;
8419         struct btrfs_chunk *chunk;
8420         struct btrfs_key key;
8421         int ret;
8422         u64 start;
8423
8424         path = btrfs_alloc_path();
8425         if (!path)
8426                 return -ENOMEM;
8427
8428         key.objectid = 0;
8429         key.type = BTRFS_CHUNK_ITEM_KEY;
8430         key.offset = 0;
8431
8432         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8433         if (ret < 0) {
8434                 btrfs_free_path(path);
8435                 return ret;
8436         }
8437
8438         /*
8439          * We do this in case the block groups were screwed up and had alloc
8440          * bits that aren't actually set on the chunks.  This happens with
8441          * restored images every time and could happen in real life I guess.
8442          */
8443         fs_info->avail_data_alloc_bits = 0;
8444         fs_info->avail_metadata_alloc_bits = 0;
8445         fs_info->avail_system_alloc_bits = 0;
8446
8447         /* First we need to create the in-memory block groups */
8448         while (1) {
8449                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8450                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8451                         if (ret < 0) {
8452                                 btrfs_free_path(path);
8453                                 return ret;
8454                         }
8455                         if (ret) {
8456                                 ret = 0;
8457                                 break;
8458                         }
8459                 }
8460                 leaf = path->nodes[0];
8461                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8462                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8463                         path->slots[0]++;
8464                         continue;
8465                 }
8466
8467                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8468                                        struct btrfs_chunk);
8469                 btrfs_add_block_group(fs_info, 0,
8470                                       btrfs_chunk_type(leaf, chunk),
8471                                       key.objectid, key.offset,
8472                                       btrfs_chunk_length(leaf, chunk));
8473                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8474                                  key.offset + btrfs_chunk_length(leaf, chunk),
8475                                  GFP_NOFS);
8476                 path->slots[0]++;
8477         }
8478         start = 0;
8479         while (1) {
8480                 cache = btrfs_lookup_first_block_group(fs_info, start);
8481                 if (!cache)
8482                         break;
8483                 cache->cached = 1;
8484                 start = cache->key.objectid + cache->key.offset;
8485         }
8486
8487         btrfs_free_path(path);
8488         return 0;
8489 }
8490
8491 static int reset_balance(struct btrfs_trans_handle *trans,
8492                          struct btrfs_fs_info *fs_info)
8493 {
8494         struct btrfs_root *root = fs_info->tree_root;
8495         struct btrfs_path *path;
8496         struct extent_buffer *leaf;
8497         struct btrfs_key key;
8498         int del_slot, del_nr = 0;
8499         int ret;
8500         int found = 0;
8501
8502         path = btrfs_alloc_path();
8503         if (!path)
8504                 return -ENOMEM;
8505
8506         key.objectid = BTRFS_BALANCE_OBJECTID;
8507         key.type = BTRFS_BALANCE_ITEM_KEY;
8508         key.offset = 0;
8509
8510         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8511         if (ret) {
8512                 if (ret > 0)
8513                         ret = 0;
8514                 if (!ret)
8515                         goto reinit_data_reloc;
8516                 else
8517                         goto out;
8518         }
8519
8520         ret = btrfs_del_item(trans, root, path);
8521         if (ret)
8522                 goto out;
8523         btrfs_release_path(path);
8524
8525         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8526         key.type = BTRFS_ROOT_ITEM_KEY;
8527         key.offset = 0;
8528
8529         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8530         if (ret < 0)
8531                 goto out;
8532         while (1) {
8533                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8534                         if (!found)
8535                                 break;
8536
8537                         if (del_nr) {
8538                                 ret = btrfs_del_items(trans, root, path,
8539                                                       del_slot, del_nr);
8540                                 del_nr = 0;
8541                                 if (ret)
8542                                         goto out;
8543                         }
8544                         key.offset++;
8545                         btrfs_release_path(path);
8546
8547                         found = 0;
8548                         ret = btrfs_search_slot(trans, root, &key, path,
8549                                                 -1, 1);
8550                         if (ret < 0)
8551                                 goto out;
8552                         continue;
8553                 }
8554                 found = 1;
8555                 leaf = path->nodes[0];
8556                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8557                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8558                         break;
8559                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8560                         path->slots[0]++;
8561                         continue;
8562                 }
8563                 if (!del_nr) {
8564                         del_slot = path->slots[0];
8565                         del_nr = 1;
8566                 } else {
8567                         del_nr++;
8568                 }
8569                 path->slots[0]++;
8570         }
8571
8572         if (del_nr) {
8573                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8574                 if (ret)
8575                         goto out;
8576         }
8577         btrfs_release_path(path);
8578
8579 reinit_data_reloc:
8580         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8581         key.type = BTRFS_ROOT_ITEM_KEY;
8582         key.offset = (u64)-1;
8583         root = btrfs_read_fs_root(fs_info, &key);
8584         if (IS_ERR(root)) {
8585                 fprintf(stderr, "Error reading data reloc tree\n");
8586                 ret = PTR_ERR(root);
8587                 goto out;
8588         }
8589         record_root_in_trans(trans, root);
8590         ret = btrfs_fsck_reinit_root(trans, root, 0);
8591         if (ret)
8592                 goto out;
8593         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8594 out:
8595         btrfs_free_path(path);
8596         return ret;
8597 }
8598
8599 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8600                               struct btrfs_fs_info *fs_info)
8601 {
8602         u64 start = 0;
8603         int ret;
8604
8605         /*
8606          * The only reason we don't do this is because right now we're just
8607          * walking the trees we find and pinning down their bytes, we don't look
8608          * at any of the leaves.  In order to do mixed groups we'd have to check
8609          * the leaves of any fs roots and pin down the bytes for any file
8610          * extents we find.  Not hard but why do it if we don't have to?
8611          */
8612         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8613                 fprintf(stderr, "We don't support re-initing the extent tree "
8614                         "for mixed block groups yet, please notify a btrfs "
8615                         "developer you want to do this so they can add this "
8616                         "functionality.\n");
8617                 return -EINVAL;
8618         }
8619
8620         /*
8621          * first we need to walk all of the trees except the extent tree and pin
8622          * down the bytes that are in use so we don't overwrite any existing
8623          * metadata.
8624          */
8625         ret = pin_metadata_blocks(fs_info);
8626         if (ret) {
8627                 fprintf(stderr, "error pinning down used bytes\n");
8628                 return ret;
8629         }
8630
8631         /*
8632          * Need to drop all the block groups since we're going to recreate all
8633          * of them again.
8634          */
8635         btrfs_free_block_groups(fs_info);
8636         ret = reset_block_groups(fs_info);
8637         if (ret) {
8638                 fprintf(stderr, "error resetting the block groups\n");
8639                 return ret;
8640         }
8641
8642         /* Ok we can allocate now, reinit the extent root */
8643         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8644         if (ret) {
8645                 fprintf(stderr, "extent root initialization failed\n");
8646                 /*
8647                  * When the transaction code is updated we should end the
8648                  * transaction, but for now progs only knows about commit so
8649                  * just return an error.
8650                  */
8651                 return ret;
8652         }
8653
8654         /*
8655          * Now we have all the in-memory block groups setup so we can make
8656          * allocations properly, and the metadata we care about is safe since we
8657          * pinned all of it above.
8658          */
8659         while (1) {
8660                 struct btrfs_block_group_cache *cache;
8661
8662                 cache = btrfs_lookup_first_block_group(fs_info, start);
8663                 if (!cache)
8664                         break;
8665                 start = cache->key.objectid + cache->key.offset;
8666                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8667                                         &cache->key, &cache->item,
8668                                         sizeof(cache->item));
8669                 if (ret) {
8670                         fprintf(stderr, "Error adding block group\n");
8671                         return ret;
8672                 }
8673                 btrfs_extent_post_op(trans, fs_info->extent_root);
8674         }
8675
8676         ret = reset_balance(trans, fs_info);
8677         if (ret)
8678                 fprintf(stderr, "error reseting the pending balance\n");
8679
8680         return ret;
8681 }
8682
8683 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8684 {
8685         struct btrfs_path *path;
8686         struct btrfs_trans_handle *trans;
8687         struct btrfs_key key;
8688         int ret;
8689
8690         printf("Recowing metadata block %llu\n", eb->start);
8691         key.objectid = btrfs_header_owner(eb);
8692         key.type = BTRFS_ROOT_ITEM_KEY;
8693         key.offset = (u64)-1;
8694
8695         root = btrfs_read_fs_root(root->fs_info, &key);
8696         if (IS_ERR(root)) {
8697                 fprintf(stderr, "Couldn't find owner root %llu\n",
8698                         key.objectid);
8699                 return PTR_ERR(root);
8700         }
8701
8702         path = btrfs_alloc_path();
8703         if (!path)
8704                 return -ENOMEM;
8705
8706         trans = btrfs_start_transaction(root, 1);
8707         if (IS_ERR(trans)) {
8708                 btrfs_free_path(path);
8709                 return PTR_ERR(trans);
8710         }
8711
8712         path->lowest_level = btrfs_header_level(eb);
8713         if (path->lowest_level)
8714                 btrfs_node_key_to_cpu(eb, &key, 0);
8715         else
8716                 btrfs_item_key_to_cpu(eb, &key, 0);
8717
8718         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8719         btrfs_commit_transaction(trans, root);
8720         btrfs_free_path(path);
8721         return ret;
8722 }
8723
8724 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8725 {
8726         struct btrfs_path *path;
8727         struct btrfs_trans_handle *trans;
8728         struct btrfs_key key;
8729         int ret;
8730
8731         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8732                bad->key.type, bad->key.offset);
8733         key.objectid = bad->root_id;
8734         key.type = BTRFS_ROOT_ITEM_KEY;
8735         key.offset = (u64)-1;
8736
8737         root = btrfs_read_fs_root(root->fs_info, &key);
8738         if (IS_ERR(root)) {
8739                 fprintf(stderr, "Couldn't find owner root %llu\n",
8740                         key.objectid);
8741                 return PTR_ERR(root);
8742         }
8743
8744         path = btrfs_alloc_path();
8745         if (!path)
8746                 return -ENOMEM;
8747
8748         trans = btrfs_start_transaction(root, 1);
8749         if (IS_ERR(trans)) {
8750                 btrfs_free_path(path);
8751                 return PTR_ERR(trans);
8752         }
8753
8754         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8755         if (ret) {
8756                 if (ret > 0)
8757                         ret = 0;
8758                 goto out;
8759         }
8760         ret = btrfs_del_item(trans, root, path);
8761 out:
8762         btrfs_commit_transaction(trans, root);
8763         btrfs_free_path(path);
8764         return ret;
8765 }
8766
8767 static int zero_log_tree(struct btrfs_root *root)
8768 {
8769         struct btrfs_trans_handle *trans;
8770         int ret;
8771
8772         trans = btrfs_start_transaction(root, 1);
8773         if (IS_ERR(trans)) {
8774                 ret = PTR_ERR(trans);
8775                 return ret;
8776         }
8777         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8778         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8779         ret = btrfs_commit_transaction(trans, root);
8780         return ret;
8781 }
8782
8783 static int populate_csum(struct btrfs_trans_handle *trans,
8784                          struct btrfs_root *csum_root, char *buf, u64 start,
8785                          u64 len)
8786 {
8787         u64 offset = 0;
8788         u64 sectorsize;
8789         int ret = 0;
8790
8791         while (offset < len) {
8792                 sectorsize = csum_root->sectorsize;
8793                 ret = read_extent_data(csum_root, buf, start + offset,
8794                                        &sectorsize, 0);
8795                 if (ret)
8796                         break;
8797                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8798                                             start + offset, buf, sectorsize);
8799                 if (ret)
8800                         break;
8801                 offset += sectorsize;
8802         }
8803         return ret;
8804 }
8805
8806 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8807                                       struct btrfs_root *csum_root,
8808                                       struct btrfs_root *cur_root)
8809 {
8810         struct btrfs_path *path;
8811         struct btrfs_key key;
8812         struct extent_buffer *node;
8813         struct btrfs_file_extent_item *fi;
8814         char *buf = NULL;
8815         u64 start = 0;
8816         u64 len = 0;
8817         int slot = 0;
8818         int ret = 0;
8819
8820         path = btrfs_alloc_path();
8821         if (!path)
8822                 return -ENOMEM;
8823         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8824         if (!buf) {
8825                 ret = -ENOMEM;
8826                 goto out;
8827         }
8828
8829         key.objectid = 0;
8830         key.offset = 0;
8831         key.type = 0;
8832
8833         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8834         if (ret < 0)
8835                 goto out;
8836         /* Iterate all regular file extents and fill its csum */
8837         while (1) {
8838                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8839
8840                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8841                         goto next;
8842                 node = path->nodes[0];
8843                 slot = path->slots[0];
8844                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8845                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8846                         goto next;
8847                 start = btrfs_file_extent_disk_bytenr(node, fi);
8848                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8849
8850                 ret = populate_csum(trans, csum_root, buf, start, len);
8851                 if (ret == -EEXIST)
8852                         ret = 0;
8853                 if (ret < 0)
8854                         goto out;
8855 next:
8856                 /*
8857                  * TODO: if next leaf is corrupted, jump to nearest next valid
8858                  * leaf.
8859                  */
8860                 ret = btrfs_next_item(cur_root, path);
8861                 if (ret < 0)
8862                         goto out;
8863                 if (ret > 0) {
8864                         ret = 0;
8865                         goto out;
8866                 }
8867         }
8868
8869 out:
8870         btrfs_free_path(path);
8871         free(buf);
8872         return ret;
8873 }
8874
8875 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8876                                   struct btrfs_root *csum_root)
8877 {
8878         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8879         struct btrfs_path *path;
8880         struct btrfs_root *tree_root = fs_info->tree_root;
8881         struct btrfs_root *cur_root;
8882         struct extent_buffer *node;
8883         struct btrfs_key key;
8884         int slot = 0;
8885         int ret = 0;
8886
8887         path = btrfs_alloc_path();
8888         if (!path)
8889                 return -ENOMEM;
8890
8891         key.objectid = BTRFS_FS_TREE_OBJECTID;
8892         key.offset = 0;
8893         key.type = BTRFS_ROOT_ITEM_KEY;
8894
8895         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8896         if (ret < 0)
8897                 goto out;
8898         if (ret > 0) {
8899                 ret = -ENOENT;
8900                 goto out;
8901         }
8902
8903         while (1) {
8904                 node = path->nodes[0];
8905                 slot = path->slots[0];
8906                 btrfs_item_key_to_cpu(node, &key, slot);
8907                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8908                         goto out;
8909                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8910                         goto next;
8911                 if (!is_fstree(key.objectid))
8912                         goto next;
8913                 key.offset = (u64)-1;
8914
8915                 cur_root = btrfs_read_fs_root(fs_info, &key);
8916                 if (IS_ERR(cur_root) || !cur_root) {
8917                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8918                                 key.objectid);
8919                         goto out;
8920                 }
8921                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8922                                 cur_root);
8923                 if (ret < 0)
8924                         goto out;
8925 next:
8926                 ret = btrfs_next_item(tree_root, path);
8927                 if (ret > 0) {
8928                         ret = 0;
8929                         goto out;
8930                 }
8931                 if (ret < 0)
8932                         goto out;
8933         }
8934
8935 out:
8936         btrfs_free_path(path);
8937         return ret;
8938 }
8939
8940 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8941                                       struct btrfs_root *csum_root)
8942 {
8943         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8944         struct btrfs_path *path;
8945         struct btrfs_extent_item *ei;
8946         struct extent_buffer *leaf;
8947         char *buf;
8948         struct btrfs_key key;
8949         int ret;
8950
8951         path = btrfs_alloc_path();
8952         if (!path)
8953                 return -ENOMEM;
8954
8955         key.objectid = 0;
8956         key.type = BTRFS_EXTENT_ITEM_KEY;
8957         key.offset = 0;
8958
8959         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
8960         if (ret < 0) {
8961                 btrfs_free_path(path);
8962                 return ret;
8963         }
8964
8965         buf = malloc(csum_root->sectorsize);
8966         if (!buf) {
8967                 btrfs_free_path(path);
8968                 return -ENOMEM;
8969         }
8970
8971         while (1) {
8972                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8973                         ret = btrfs_next_leaf(extent_root, path);
8974                         if (ret < 0)
8975                                 break;
8976                         if (ret) {
8977                                 ret = 0;
8978                                 break;
8979                         }
8980                 }
8981                 leaf = path->nodes[0];
8982
8983                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8984                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8985                         path->slots[0]++;
8986                         continue;
8987                 }
8988
8989                 ei = btrfs_item_ptr(leaf, path->slots[0],
8990                                     struct btrfs_extent_item);
8991                 if (!(btrfs_extent_flags(leaf, ei) &
8992                       BTRFS_EXTENT_FLAG_DATA)) {
8993                         path->slots[0]++;
8994                         continue;
8995                 }
8996
8997                 ret = populate_csum(trans, csum_root, buf, key.objectid,
8998                                     key.offset);
8999                 if (ret)
9000                         break;
9001                 path->slots[0]++;
9002         }
9003
9004         btrfs_free_path(path);
9005         free(buf);
9006         return ret;
9007 }
9008
9009 /*
9010  * Recalculate the csum and put it into the csum tree.
9011  *
9012  * Extent tree init will wipe out all the extent info, so in that case, we
9013  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9014  * will use fs/subvol trees to init the csum tree.
9015  */
9016 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9017                           struct btrfs_root *csum_root,
9018                           int search_fs_tree)
9019 {
9020         if (search_fs_tree)
9021                 return fill_csum_tree_from_fs(trans, csum_root);
9022         else
9023                 return fill_csum_tree_from_extent(trans, csum_root);
9024 }
9025
9026 struct root_item_info {
9027         /* level of the root */
9028         u8 level;
9029         /* number of nodes at this level, must be 1 for a root */
9030         int node_count;
9031         u64 bytenr;
9032         u64 gen;
9033         struct cache_extent cache_extent;
9034 };
9035
9036 static struct cache_tree *roots_info_cache = NULL;
9037
9038 static void free_roots_info_cache(void)
9039 {
9040         if (!roots_info_cache)
9041                 return;
9042
9043         while (!cache_tree_empty(roots_info_cache)) {
9044                 struct cache_extent *entry;
9045                 struct root_item_info *rii;
9046
9047                 entry = first_cache_extent(roots_info_cache);
9048                 if (!entry)
9049                         break;
9050                 remove_cache_extent(roots_info_cache, entry);
9051                 rii = container_of(entry, struct root_item_info, cache_extent);
9052                 free(rii);
9053         }
9054
9055         free(roots_info_cache);
9056         roots_info_cache = NULL;
9057 }
9058
9059 static int build_roots_info_cache(struct btrfs_fs_info *info)
9060 {
9061         int ret = 0;
9062         struct btrfs_key key;
9063         struct extent_buffer *leaf;
9064         struct btrfs_path *path;
9065
9066         if (!roots_info_cache) {
9067                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9068                 if (!roots_info_cache)
9069                         return -ENOMEM;
9070                 cache_tree_init(roots_info_cache);
9071         }
9072
9073         path = btrfs_alloc_path();
9074         if (!path)
9075                 return -ENOMEM;
9076
9077         key.objectid = 0;
9078         key.type = BTRFS_EXTENT_ITEM_KEY;
9079         key.offset = 0;
9080
9081         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9082         if (ret < 0)
9083                 goto out;
9084         leaf = path->nodes[0];
9085
9086         while (1) {
9087                 struct btrfs_key found_key;
9088                 struct btrfs_extent_item *ei;
9089                 struct btrfs_extent_inline_ref *iref;
9090                 int slot = path->slots[0];
9091                 int type;
9092                 u64 flags;
9093                 u64 root_id;
9094                 u8 level;
9095                 struct cache_extent *entry;
9096                 struct root_item_info *rii;
9097
9098                 if (slot >= btrfs_header_nritems(leaf)) {
9099                         ret = btrfs_next_leaf(info->extent_root, path);
9100                         if (ret < 0) {
9101                                 break;
9102                         } else if (ret) {
9103                                 ret = 0;
9104                                 break;
9105                         }
9106                         leaf = path->nodes[0];
9107                         slot = path->slots[0];
9108                 }
9109
9110                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9111
9112                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9113                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9114                         goto next;
9115
9116                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9117                 flags = btrfs_extent_flags(leaf, ei);
9118
9119                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9120                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9121                         goto next;
9122
9123                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9124                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9125                         level = found_key.offset;
9126                 } else {
9127                         struct btrfs_tree_block_info *info;
9128
9129                         info = (struct btrfs_tree_block_info *)(ei + 1);
9130                         iref = (struct btrfs_extent_inline_ref *)(info + 1);
9131                         level = btrfs_tree_block_level(leaf, info);
9132                 }
9133
9134                 /*
9135                  * For a root extent, it must be of the following type and the
9136                  * first (and only one) iref in the item.
9137                  */
9138                 type = btrfs_extent_inline_ref_type(leaf, iref);
9139                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9140                         goto next;
9141
9142                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9143                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9144                 if (!entry) {
9145                         rii = malloc(sizeof(struct root_item_info));
9146                         if (!rii) {
9147                                 ret = -ENOMEM;
9148                                 goto out;
9149                         }
9150                         rii->cache_extent.start = root_id;
9151                         rii->cache_extent.size = 1;
9152                         rii->level = (u8)-1;
9153                         entry = &rii->cache_extent;
9154                         ret = insert_cache_extent(roots_info_cache, entry);
9155                         ASSERT(ret == 0);
9156                 } else {
9157                         rii = container_of(entry, struct root_item_info,
9158                                            cache_extent);
9159                 }
9160
9161                 ASSERT(rii->cache_extent.start == root_id);
9162                 ASSERT(rii->cache_extent.size == 1);
9163
9164                 if (level > rii->level || rii->level == (u8)-1) {
9165                         rii->level = level;
9166                         rii->bytenr = found_key.objectid;
9167                         rii->gen = btrfs_extent_generation(leaf, ei);
9168                         rii->node_count = 1;
9169                 } else if (level == rii->level) {
9170                         rii->node_count++;
9171                 }
9172 next:
9173                 path->slots[0]++;
9174         }
9175
9176 out:
9177         btrfs_free_path(path);
9178
9179         return ret;
9180 }
9181
9182 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9183                                   struct btrfs_path *path,
9184                                   const struct btrfs_key *root_key,
9185                                   const int read_only_mode)
9186 {
9187         const u64 root_id = root_key->objectid;
9188         struct cache_extent *entry;
9189         struct root_item_info *rii;
9190         struct btrfs_root_item ri;
9191         unsigned long offset;
9192
9193         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9194         if (!entry) {
9195                 fprintf(stderr,
9196                         "Error: could not find extent items for root %llu\n",
9197                         root_key->objectid);
9198                 return -ENOENT;
9199         }
9200
9201         rii = container_of(entry, struct root_item_info, cache_extent);
9202         ASSERT(rii->cache_extent.start == root_id);
9203         ASSERT(rii->cache_extent.size == 1);
9204
9205         if (rii->node_count != 1) {
9206                 fprintf(stderr,
9207                         "Error: could not find btree root extent for root %llu\n",
9208                         root_id);
9209                 return -ENOENT;
9210         }
9211
9212         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9213         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9214
9215         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9216             btrfs_root_level(&ri) != rii->level ||
9217             btrfs_root_generation(&ri) != rii->gen) {
9218
9219                 /*
9220                  * If we're in repair mode but our caller told us to not update
9221                  * the root item, i.e. just check if it needs to be updated, don't
9222                  * print this message, since the caller will call us again shortly
9223                  * for the same root item without read only mode (the caller will
9224                  * open a transaction first).
9225                  */
9226                 if (!(read_only_mode && repair))
9227                         fprintf(stderr,
9228                                 "%sroot item for root %llu,"
9229                                 " current bytenr %llu, current gen %llu, current level %u,"
9230                                 " new bytenr %llu, new gen %llu, new level %u\n",
9231                                 (read_only_mode ? "" : "fixing "),
9232                                 root_id,
9233                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9234                                 btrfs_root_level(&ri),
9235                                 rii->bytenr, rii->gen, rii->level);
9236
9237                 if (btrfs_root_generation(&ri) > rii->gen) {
9238                         fprintf(stderr,
9239                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9240                                 root_id, btrfs_root_generation(&ri), rii->gen);
9241                         return -EINVAL;
9242                 }
9243
9244                 if (!read_only_mode) {
9245                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9246                         btrfs_set_root_level(&ri, rii->level);
9247                         btrfs_set_root_generation(&ri, rii->gen);
9248                         write_extent_buffer(path->nodes[0], &ri,
9249                                             offset, sizeof(ri));
9250                 }
9251
9252                 return 1;
9253         }
9254
9255         return 0;
9256 }
9257
9258 /*
9259  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9260  * caused read-only snapshots to be corrupted if they were created at a moment
9261  * when the source subvolume/snapshot had orphan items. The issue was that the
9262  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9263  * node instead of the post orphan cleanup root node.
9264  * So this function, and its callees, just detects and fixes those cases. Even
9265  * though the regression was for read-only snapshots, this function applies to
9266  * any snapshot/subvolume root.
9267  * This must be run before any other repair code - not doing it so, makes other
9268  * repair code delete or modify backrefs in the extent tree for example, which
9269  * will result in an inconsistent fs after repairing the root items.
9270  */
9271 static int repair_root_items(struct btrfs_fs_info *info)
9272 {
9273         struct btrfs_path *path = NULL;
9274         struct btrfs_key key;
9275         struct extent_buffer *leaf;
9276         struct btrfs_trans_handle *trans = NULL;
9277         int ret = 0;
9278         int bad_roots = 0;
9279         int need_trans = 0;
9280
9281         ret = build_roots_info_cache(info);
9282         if (ret)
9283                 goto out;
9284
9285         path = btrfs_alloc_path();
9286         if (!path) {
9287                 ret = -ENOMEM;
9288                 goto out;
9289         }
9290
9291         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9292         key.type = BTRFS_ROOT_ITEM_KEY;
9293         key.offset = 0;
9294
9295 again:
9296         /*
9297          * Avoid opening and committing transactions if a leaf doesn't have
9298          * any root items that need to be fixed, so that we avoid rotating
9299          * backup roots unnecessarily.
9300          */
9301         if (need_trans) {
9302                 trans = btrfs_start_transaction(info->tree_root, 1);
9303                 if (IS_ERR(trans)) {
9304                         ret = PTR_ERR(trans);
9305                         goto out;
9306                 }
9307         }
9308
9309         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9310                                 0, trans ? 1 : 0);
9311         if (ret < 0)
9312                 goto out;
9313         leaf = path->nodes[0];
9314
9315         while (1) {
9316                 struct btrfs_key found_key;
9317
9318                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9319                         int no_more_keys = find_next_key(path, &key);
9320
9321                         btrfs_release_path(path);
9322                         if (trans) {
9323                                 ret = btrfs_commit_transaction(trans,
9324                                                                info->tree_root);
9325                                 trans = NULL;
9326                                 if (ret < 0)
9327                                         goto out;
9328                         }
9329                         need_trans = 0;
9330                         if (no_more_keys)
9331                                 break;
9332                         goto again;
9333                 }
9334
9335                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9336
9337                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9338                         goto next;
9339                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9340                         goto next;
9341
9342                 ret = maybe_repair_root_item(info, path, &found_key,
9343                                              trans ? 0 : 1);
9344                 if (ret < 0)
9345                         goto out;
9346                 if (ret) {
9347                         if (!trans && repair) {
9348                                 need_trans = 1;
9349                                 key = found_key;
9350                                 btrfs_release_path(path);
9351                                 goto again;
9352                         }
9353                         bad_roots++;
9354                 }
9355 next:
9356                 path->slots[0]++;
9357         }
9358         ret = 0;
9359 out:
9360         free_roots_info_cache();
9361         btrfs_free_path(path);
9362         if (trans)
9363                 btrfs_commit_transaction(trans, info->tree_root);
9364         if (ret < 0)
9365                 return ret;
9366
9367         return bad_roots;
9368 }
9369
9370 const char * const cmd_check_usage[] = {
9371         "btrfs check [options] <device>",
9372         "Check structural inegrity of a filesystem (unmounted).",
9373         "Check structural inegrity of an unmounted filesystem. Verify internal",
9374         "trees' consistency and item connectivity. In the repair mode try to",
9375         "fix the problems found.",
9376         "WARNING: the repair mode is considered dangerous",
9377         "",
9378         "-s|--super <superblock>     use this superblock copy",
9379         "-b|--backup                 use the backup root copy",
9380         "--repair                    try to repair the filesystem",
9381         "--readonly                  run in read-only mode (default)",
9382         "--init-csum-tree            create a new CRC tree",
9383         "--init-extent-tree          create a new extent tree",
9384         "--check-data-csum           verify checkums of data blocks",
9385         "-Q|--qgroup-report           print a report on qgroup consistency",
9386         "-E|--subvol-extents <subvolid>",
9387         "                            print subvolume extents and sharing state",
9388         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9389         "-p|--progress               indicate progress",
9390         NULL
9391 };
9392
9393 int cmd_check(int argc, char **argv)
9394 {
9395         struct cache_tree root_cache;
9396         struct btrfs_root *root;
9397         struct btrfs_fs_info *info;
9398         u64 bytenr = 0;
9399         u64 subvolid = 0;
9400         u64 tree_root_bytenr = 0;
9401         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9402         int ret;
9403         u64 num;
9404         int init_csum_tree = 0;
9405         int readonly = 0;
9406         int qgroup_report = 0;
9407         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9408
9409         while(1) {
9410                 int c;
9411                 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9412                         OPT_CHECK_CSUM, OPT_READONLY };
9413                 static const struct option long_options[] = {
9414                         { "super", required_argument, NULL, 's' },
9415                         { "repair", no_argument, NULL, OPT_REPAIR },
9416                         { "readonly", no_argument, NULL, OPT_READONLY },
9417                         { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9418                         { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9419                         { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9420                         { "backup", no_argument, NULL, 'b' },
9421                         { "subvol-extents", required_argument, NULL, 'E' },
9422                         { "qgroup-report", no_argument, NULL, 'Q' },
9423                         { "tree-root", required_argument, NULL, 'r' },
9424                         { "progress", no_argument, NULL, 'p' },
9425                         { NULL, 0, NULL, 0}
9426                 };
9427
9428                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9429                 if (c < 0)
9430                         break;
9431                 switch(c) {
9432                         case 'a': /* ignored */ break;
9433                         case 'b':
9434                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9435                                 break;
9436                         case 's':
9437                                 num = arg_strtou64(optarg);
9438                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9439                                         fprintf(stderr,
9440                                                 "ERROR: super mirror should be less than: %d\n",
9441                                                 BTRFS_SUPER_MIRROR_MAX);
9442                                         exit(1);
9443                                 }
9444                                 bytenr = btrfs_sb_offset(((int)num));
9445                                 printf("using SB copy %llu, bytenr %llu\n", num,
9446                                        (unsigned long long)bytenr);
9447                                 break;
9448                         case 'Q':
9449                                 qgroup_report = 1;
9450                                 break;
9451                         case 'E':
9452                                 subvolid = arg_strtou64(optarg);
9453                                 break;
9454                         case 'r':
9455                                 tree_root_bytenr = arg_strtou64(optarg);
9456                                 break;
9457                         case 'p':
9458                                 ctx.progress_enabled = true;
9459                                 break;
9460                         case '?':
9461                         case 'h':
9462                                 usage(cmd_check_usage);
9463                         case OPT_REPAIR:
9464                                 printf("enabling repair mode\n");
9465                                 repair = 1;
9466                                 ctree_flags |= OPEN_CTREE_WRITES;
9467                                 break;
9468                         case OPT_READONLY:
9469                                 readonly = 1;
9470                                 break;
9471                         case OPT_INIT_CSUM:
9472                                 printf("Creating a new CRC tree\n");
9473                                 init_csum_tree = 1;
9474                                 repair = 1;
9475                                 ctree_flags |= OPEN_CTREE_WRITES;
9476                                 break;
9477                         case OPT_INIT_EXTENT:
9478                                 init_extent_tree = 1;
9479                                 ctree_flags |= (OPEN_CTREE_WRITES |
9480                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9481                                 repair = 1;
9482                                 break;
9483                         case OPT_CHECK_CSUM:
9484                                 check_data_csum = 1;
9485                                 break;
9486                 }
9487         }
9488         argc = argc - optind;
9489
9490         if (check_argc_exact(argc, 1))
9491                 usage(cmd_check_usage);
9492
9493         if (ctx.progress_enabled) {
9494                 ctx.tp = TASK_NOTHING;
9495                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9496         }
9497
9498         /* This check is the only reason for --readonly to exist */
9499         if (readonly && repair) {
9500                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9501                 exit(1);
9502         }
9503
9504         radix_tree_init();
9505         cache_tree_init(&root_cache);
9506
9507         if((ret = check_mounted(argv[optind])) < 0) {
9508                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9509                 goto err_out;
9510         } else if(ret) {
9511                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9512                 ret = -EBUSY;
9513                 goto err_out;
9514         }
9515
9516         /* only allow partial opening under repair mode */
9517         if (repair)
9518                 ctree_flags |= OPEN_CTREE_PARTIAL;
9519
9520         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9521                                   ctree_flags);
9522         if (!info) {
9523                 fprintf(stderr, "Couldn't open file system\n");
9524                 ret = -EIO;
9525                 goto err_out;
9526         }
9527
9528         global_info = info;
9529         root = info->fs_root;
9530
9531         /*
9532          * repair mode will force us to commit transaction which
9533          * will make us fail to load log tree when mounting.
9534          */
9535         if (repair && btrfs_super_log_root(info->super_copy)) {
9536                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9537                 if (!ret) {
9538                         ret = 1;
9539                         goto close_out;
9540                 }
9541                 ret = zero_log_tree(root);
9542                 if (ret) {
9543                         fprintf(stderr, "fail to zero log tree\n");
9544                         goto close_out;
9545                 }
9546         }
9547
9548         uuid_unparse(info->super_copy->fsid, uuidbuf);
9549         if (qgroup_report) {
9550                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9551                        uuidbuf);
9552                 ret = qgroup_verify_all(info);
9553                 if (ret == 0)
9554                         print_qgroup_report(1);
9555                 goto close_out;
9556         }
9557         if (subvolid) {
9558                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9559                        subvolid, argv[optind], uuidbuf);
9560                 ret = print_extent_state(info, subvolid);
9561                 goto close_out;
9562         }
9563         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9564
9565         if (!extent_buffer_uptodate(info->tree_root->node) ||
9566             !extent_buffer_uptodate(info->dev_root->node) ||
9567             !extent_buffer_uptodate(info->chunk_root->node)) {
9568                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9569                 ret = -EIO;
9570                 goto close_out;
9571         }
9572
9573         if (init_extent_tree || init_csum_tree) {
9574                 struct btrfs_trans_handle *trans;
9575
9576                 trans = btrfs_start_transaction(info->extent_root, 0);
9577                 if (IS_ERR(trans)) {
9578                         fprintf(stderr, "Error starting transaction\n");
9579                         ret = PTR_ERR(trans);
9580                         goto close_out;
9581                 }
9582
9583                 if (init_extent_tree) {
9584                         printf("Creating a new extent tree\n");
9585                         ret = reinit_extent_tree(trans, info);
9586                         if (ret)
9587                                 goto close_out;
9588                 }
9589
9590                 if (init_csum_tree) {
9591                         fprintf(stderr, "Reinit crc root\n");
9592                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9593                         if (ret) {
9594                                 fprintf(stderr, "crc root initialization failed\n");
9595                                 ret = -EIO;
9596                                 goto close_out;
9597                         }
9598
9599                         ret = fill_csum_tree(trans, info->csum_root,
9600                                              init_extent_tree);
9601                         if (ret) {
9602                                 fprintf(stderr, "crc refilling failed\n");
9603                                 return -EIO;
9604                         }
9605                 }
9606                 /*
9607                  * Ok now we commit and run the normal fsck, which will add
9608                  * extent entries for all of the items it finds.
9609                  */
9610                 ret = btrfs_commit_transaction(trans, info->extent_root);
9611                 if (ret)
9612                         goto close_out;
9613         }
9614         if (!extent_buffer_uptodate(info->extent_root->node)) {
9615                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9616                 ret = -EIO;
9617                 goto close_out;
9618         }
9619         if (!extent_buffer_uptodate(info->csum_root->node)) {
9620                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9621                 ret = -EIO;
9622                 goto close_out;
9623         }
9624
9625         if (!ctx.progress_enabled)
9626                 fprintf(stderr, "checking extents\n");
9627         ret = check_chunks_and_extents(root);
9628         if (ret)
9629                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9630
9631         ret = repair_root_items(info);
9632         if (ret < 0)
9633                 goto close_out;
9634         if (repair) {
9635                 fprintf(stderr, "Fixed %d roots.\n", ret);
9636                 ret = 0;
9637         } else if (ret > 0) {
9638                 fprintf(stderr,
9639                        "Found %d roots with an outdated root item.\n",
9640                        ret);
9641                 fprintf(stderr,
9642                         "Please run a filesystem check with the option --repair to fix them.\n");
9643                 ret = 1;
9644                 goto close_out;
9645         }
9646
9647         if (!ctx.progress_enabled)
9648                 fprintf(stderr, "checking free space cache\n");
9649         ret = check_space_cache(root);
9650         if (ret)
9651                 goto out;
9652
9653         /*
9654          * We used to have to have these hole extents in between our real
9655          * extents so if we don't have this flag set we need to make sure there
9656          * are no gaps in the file extents for inodes, otherwise we can just
9657          * ignore it when this happens.
9658          */
9659         no_holes = btrfs_fs_incompat(root->fs_info,
9660                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9661         if (!ctx.progress_enabled)
9662                 fprintf(stderr, "checking fs roots\n");
9663         ret = check_fs_roots(root, &root_cache);
9664         if (ret)
9665                 goto out;
9666
9667         fprintf(stderr, "checking csums\n");
9668         ret = check_csums(root);
9669         if (ret)
9670                 goto out;
9671
9672         fprintf(stderr, "checking root refs\n");
9673         ret = check_root_refs(root, &root_cache);
9674         if (ret)
9675                 goto out;
9676
9677         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9678                 struct extent_buffer *eb;
9679
9680                 eb = list_first_entry(&root->fs_info->recow_ebs,
9681                                       struct extent_buffer, recow);
9682                 list_del_init(&eb->recow);
9683                 ret = recow_extent_buffer(root, eb);
9684                 if (ret)
9685                         break;
9686         }
9687
9688         while (!list_empty(&delete_items)) {
9689                 struct bad_item *bad;
9690
9691                 bad = list_first_entry(&delete_items, struct bad_item, list);
9692                 list_del_init(&bad->list);
9693                 if (repair)
9694                         ret = delete_bad_item(root, bad);
9695                 free(bad);
9696         }
9697
9698         if (info->quota_enabled) {
9699                 int err;
9700                 fprintf(stderr, "checking quota groups\n");
9701                 err = qgroup_verify_all(info);
9702                 if (err)
9703                         goto out;
9704         }
9705
9706         if (!list_empty(&root->fs_info->recow_ebs)) {
9707                 fprintf(stderr, "Transid errors in file system\n");
9708                 ret = 1;
9709         }
9710 out:
9711         print_qgroup_report(0);
9712         if (found_old_backref) { /*
9713                  * there was a disk format change when mixed
9714                  * backref was in testing tree. The old format
9715                  * existed about one week.
9716                  */
9717                 printf("\n * Found old mixed backref format. "
9718                        "The old format is not supported! *"
9719                        "\n * Please mount the FS in readonly mode, "
9720                        "backup data and re-format the FS. *\n\n");
9721                 ret = 1;
9722         }
9723         printf("found %llu bytes used err is %d\n",
9724                (unsigned long long)bytes_used, ret);
9725         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9726         printf("total tree bytes: %llu\n",
9727                (unsigned long long)total_btree_bytes);
9728         printf("total fs tree bytes: %llu\n",
9729                (unsigned long long)total_fs_tree_bytes);
9730         printf("total extent tree bytes: %llu\n",
9731                (unsigned long long)total_extent_tree_bytes);
9732         printf("btree space waste bytes: %llu\n",
9733                (unsigned long long)btree_space_waste);
9734         printf("file data blocks allocated: %llu\n referenced %llu\n",
9735                 (unsigned long long)data_bytes_allocated,
9736                 (unsigned long long)data_bytes_referenced);
9737
9738         free_root_recs_tree(&root_cache);
9739 close_out:
9740         close_ctree(root);
9741 err_out:
9742         if (ctx.progress_enabled)
9743                 task_deinit(ctx.info);
9744
9745         return ret;
9746 }