btrfs-progs: docs: enhance manual page for balance
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "btrfsck.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
41 #include "backref.h"
42 #include "ulist.h"
43
44 enum task_position {
45         TASK_EXTENTS,
46         TASK_FREE_SPACE,
47         TASK_FS_ROOTS,
48         TASK_NOTHING, /* have to be the last element */
49 };
50
51 struct task_ctx {
52         int progress_enabled;
53         enum task_position tp;
54
55         struct task_info *info;
56 };
57
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75
76 static void *print_status_check(void *p)
77 {
78         struct task_ctx *priv = p;
79         const char work_indicator[] = { '.', 'o', 'O', 'o' };
80         uint32_t count = 0;
81         static char *task_position_string[] = {
82                 "checking extents",
83                 "checking free space cache",
84                 "checking fs roots",
85         };
86
87         task_period_start(priv->info, 1000 /* 1s */);
88
89         if (priv->tp == TASK_NOTHING)
90                 return NULL;
91
92         while (1) {
93                 printf("%s [%c]\r", task_position_string[priv->tp],
94                                 work_indicator[count % 4]);
95                 count++;
96                 fflush(stdout);
97                 task_period_wait(priv->info);
98         }
99         return NULL;
100 }
101
102 static int print_status_return(void *p)
103 {
104         printf("\n");
105         fflush(stdout);
106
107         return 0;
108 }
109
110 struct extent_backref {
111         struct list_head list;
112         unsigned int is_data:1;
113         unsigned int found_extent_tree:1;
114         unsigned int full_backref:1;
115         unsigned int found_ref:1;
116         unsigned int broken:1;
117 };
118
119 struct data_backref {
120         struct extent_backref node;
121         union {
122                 u64 parent;
123                 u64 root;
124         };
125         u64 owner;
126         u64 offset;
127         u64 disk_bytenr;
128         u64 bytes;
129         u64 ram_bytes;
130         u32 num_refs;
131         u32 found_ref;
132 };
133
134 /*
135  * Much like data_backref, just removed the undetermined members
136  * and change it to use list_head.
137  * During extent scan, it is stored in root->orphan_data_extent.
138  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
139  */
140 struct orphan_data_extent {
141         struct list_head list;
142         u64 root;
143         u64 objectid;
144         u64 offset;
145         u64 disk_bytenr;
146         u64 disk_len;
147 };
148
149 struct tree_backref {
150         struct extent_backref node;
151         union {
152                 u64 parent;
153                 u64 root;
154         };
155 };
156
157 struct extent_record {
158         struct list_head backrefs;
159         struct list_head dups;
160         struct list_head list;
161         struct cache_extent cache;
162         struct btrfs_disk_key parent_key;
163         u64 start;
164         u64 max_size;
165         u64 nr;
166         u64 refs;
167         u64 extent_item_refs;
168         u64 generation;
169         u64 parent_generation;
170         u64 info_objectid;
171         u32 num_duplicates;
172         u8 info_level;
173         int flag_block_full_backref;
174         unsigned int found_rec:1;
175         unsigned int content_checked:1;
176         unsigned int owner_ref_checked:1;
177         unsigned int is_root:1;
178         unsigned int metadata:1;
179         unsigned int bad_full_backref:1;
180         unsigned int crossing_stripes:1;
181         unsigned int wrong_chunk_type:1;
182 };
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         unsigned int filetype:8;
190         int errors;
191         unsigned int ref_type;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 struct root_item_record {
199         struct list_head list;
200         u64 objectid;
201         u64 bytenr;
202         u64 last_snapshot;
203         u8 level;
204         u8 drop_level;
205         int level_size;
206         struct btrfs_key drop_key;
207 };
208
209 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
211 #define REF_ERR_NO_INODE_REF            (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
214 #define REF_ERR_DUP_INODE_REF           (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF             (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
222
223 struct file_extent_hole {
224         struct rb_node node;
225         u64 start;
226         u64 len;
227 };
228
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
231 {
232         struct file_extent_hole *hole;
233
234         if (RB_EMPTY_ROOT(holes))
235                 return (u64)-1;
236
237         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
238         return hole->start;
239 }
240
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
242 {
243         struct file_extent_hole *hole1;
244         struct file_extent_hole *hole2;
245
246         hole1 = rb_entry(node1, struct file_extent_hole, node);
247         hole2 = rb_entry(node2, struct file_extent_hole, node);
248
249         if (hole1->start > hole2->start)
250                 return -1;
251         if (hole1->start < hole2->start)
252                 return 1;
253         /* Now hole1->start == hole2->start */
254         if (hole1->len >= hole2->len)
255                 /*
256                  * Hole 1 will be merge center
257                  * Same hole will be merged later
258                  */
259                 return -1;
260         /* Hole 2 will be merge center */
261         return 1;
262 }
263
264 /*
265  * Add a hole to the record
266  *
267  * This will do hole merge for copy_file_extent_holes(),
268  * which will ensure there won't be continuous holes.
269  */
270 static int add_file_extent_hole(struct rb_root *holes,
271                                 u64 start, u64 len)
272 {
273         struct file_extent_hole *hole;
274         struct file_extent_hole *prev = NULL;
275         struct file_extent_hole *next = NULL;
276
277         hole = malloc(sizeof(*hole));
278         if (!hole)
279                 return -ENOMEM;
280         hole->start = start;
281         hole->len = len;
282         /* Since compare will not return 0, no -EEXIST will happen */
283         rb_insert(holes, &hole->node, compare_hole);
284
285         /* simple merge with previous hole */
286         if (rb_prev(&hole->node))
287                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
288                                 node);
289         if (prev && prev->start + prev->len >= hole->start) {
290                 hole->len = hole->start + hole->len - prev->start;
291                 hole->start = prev->start;
292                 rb_erase(&prev->node, holes);
293                 free(prev);
294                 prev = NULL;
295         }
296
297         /* iterate merge with next holes */
298         while (1) {
299                 if (!rb_next(&hole->node))
300                         break;
301                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
302                                         node);
303                 if (hole->start + hole->len >= next->start) {
304                         if (hole->start + hole->len <= next->start + next->len)
305                                 hole->len = next->start + next->len -
306                                             hole->start;
307                         rb_erase(&next->node, holes);
308                         free(next);
309                         next = NULL;
310                 } else
311                         break;
312         }
313         return 0;
314 }
315
316 static int compare_hole_range(struct rb_node *node, void *data)
317 {
318         struct file_extent_hole *hole;
319         u64 start;
320
321         hole = (struct file_extent_hole *)data;
322         start = hole->start;
323
324         hole = rb_entry(node, struct file_extent_hole, node);
325         if (start < hole->start)
326                 return -1;
327         if (start >= hole->start && start < hole->start + hole->len)
328                 return 0;
329         return 1;
330 }
331
332 /*
333  * Delete a hole in the record
334  *
335  * This will do the hole split and is much restrict than add.
336  */
337 static int del_file_extent_hole(struct rb_root *holes,
338                                 u64 start, u64 len)
339 {
340         struct file_extent_hole *hole;
341         struct file_extent_hole tmp;
342         u64 prev_start = 0;
343         u64 prev_len = 0;
344         u64 next_start = 0;
345         u64 next_len = 0;
346         struct rb_node *node;
347         int have_prev = 0;
348         int have_next = 0;
349         int ret = 0;
350
351         tmp.start = start;
352         tmp.len = len;
353         node = rb_search(holes, &tmp, compare_hole_range, NULL);
354         if (!node)
355                 return -EEXIST;
356         hole = rb_entry(node, struct file_extent_hole, node);
357         if (start + len > hole->start + hole->len)
358                 return -EEXIST;
359
360         /*
361          * Now there will be no overflap, delete the hole and re-add the
362          * split(s) if they exists.
363          */
364         if (start > hole->start) {
365                 prev_start = hole->start;
366                 prev_len = start - hole->start;
367                 have_prev = 1;
368         }
369         if (hole->start + hole->len > start + len) {
370                 next_start = start + len;
371                 next_len = hole->start + hole->len - start - len;
372                 have_next = 1;
373         }
374         rb_erase(node, holes);
375         free(hole);
376         if (have_prev) {
377                 ret = add_file_extent_hole(holes, prev_start, prev_len);
378                 if (ret < 0)
379                         return ret;
380         }
381         if (have_next) {
382                 ret = add_file_extent_hole(holes, next_start, next_len);
383                 if (ret < 0)
384                         return ret;
385         }
386         return 0;
387 }
388
389 static int copy_file_extent_holes(struct rb_root *dst,
390                                   struct rb_root *src)
391 {
392         struct file_extent_hole *hole;
393         struct rb_node *node;
394         int ret = 0;
395
396         node = rb_first(src);
397         while (node) {
398                 hole = rb_entry(node, struct file_extent_hole, node);
399                 ret = add_file_extent_hole(dst, hole->start, hole->len);
400                 if (ret)
401                         break;
402                 node = rb_next(node);
403         }
404         return ret;
405 }
406
407 static void free_file_extent_holes(struct rb_root *holes)
408 {
409         struct rb_node *node;
410         struct file_extent_hole *hole;
411
412         node = rb_first(holes);
413         while (node) {
414                 hole = rb_entry(node, struct file_extent_hole, node);
415                 rb_erase(node, holes);
416                 free(hole);
417                 node = rb_first(holes);
418         }
419 }
420
421 struct inode_record {
422         struct list_head backrefs;
423         unsigned int checked:1;
424         unsigned int merging:1;
425         unsigned int found_inode_item:1;
426         unsigned int found_dir_item:1;
427         unsigned int found_file_extent:1;
428         unsigned int found_csum_item:1;
429         unsigned int some_csum_missing:1;
430         unsigned int nodatasum:1;
431         int errors;
432
433         u64 ino;
434         u32 nlink;
435         u32 imode;
436         u64 isize;
437         u64 nbytes;
438
439         u32 found_link;
440         u64 found_size;
441         u64 extent_start;
442         u64 extent_end;
443         struct rb_root holes;
444         struct list_head orphan_extents;
445
446         u32 refs;
447 };
448
449 #define I_ERR_NO_INODE_ITEM             (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
464
465 struct root_backref {
466         struct list_head list;
467         unsigned int found_dir_item:1;
468         unsigned int found_dir_index:1;
469         unsigned int found_back_ref:1;
470         unsigned int found_forward_ref:1;
471         unsigned int reachable:1;
472         int errors;
473         u64 ref_root;
474         u64 dir;
475         u64 index;
476         u16 namelen;
477         char name[0];
478 };
479
480 struct root_record {
481         struct list_head backrefs;
482         struct cache_extent cache;
483         unsigned int found_root_item:1;
484         u64 objectid;
485         u32 found_ref;
486 };
487
488 struct ptr_node {
489         struct cache_extent cache;
490         void *data;
491 };
492
493 struct shared_node {
494         struct cache_extent cache;
495         struct cache_tree root_cache;
496         struct cache_tree inode_cache;
497         struct inode_record *current;
498         u32 refs;
499 };
500
501 struct block_info {
502         u64 start;
503         u32 size;
504 };
505
506 struct walk_control {
507         struct cache_tree shared;
508         struct shared_node *nodes[BTRFS_MAX_LEVEL];
509         int active_node;
510         int root_level;
511 };
512
513 struct bad_item {
514         struct btrfs_key key;
515         u64 root_id;
516         struct list_head list;
517 };
518
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
520
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522                                  struct btrfs_root *root)
523 {
524         if (root->last_trans != trans->transid) {
525                 root->track_dirty = 1;
526                 root->last_trans = trans->transid;
527                 root->commit_root = root->node;
528                 extent_buffer_get(root->node);
529         }
530 }
531
532 static u8 imode_to_type(u32 imode)
533 {
534 #define S_SHIFT 12
535         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
537                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
538                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
539                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
540                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
541                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
542                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
543         };
544
545         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
546 #undef S_SHIFT
547 }
548
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
550 {
551         struct device_record *rec1;
552         struct device_record *rec2;
553
554         rec1 = rb_entry(node1, struct device_record, node);
555         rec2 = rb_entry(node2, struct device_record, node);
556         if (rec1->devid > rec2->devid)
557                 return -1;
558         else if (rec1->devid < rec2->devid)
559                 return 1;
560         else
561                 return 0;
562 }
563
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
565 {
566         struct inode_record *rec;
567         struct inode_backref *backref;
568         struct inode_backref *orig;
569         struct orphan_data_extent *src_orphan;
570         struct orphan_data_extent *dst_orphan;
571         size_t size;
572         int ret;
573
574         rec = malloc(sizeof(*rec));
575         memcpy(rec, orig_rec, sizeof(*rec));
576         rec->refs = 1;
577         INIT_LIST_HEAD(&rec->backrefs);
578         INIT_LIST_HEAD(&rec->orphan_extents);
579         rec->holes = RB_ROOT;
580
581         list_for_each_entry(orig, &orig_rec->backrefs, list) {
582                 size = sizeof(*orig) + orig->namelen + 1;
583                 backref = malloc(size);
584                 memcpy(backref, orig, size);
585                 list_add_tail(&backref->list, &rec->backrefs);
586         }
587         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
588                 dst_orphan = malloc(sizeof(*dst_orphan));
589                 /* TODO: Fix all the HELL of un-catched -ENOMEM case */
590                 BUG_ON(!dst_orphan);
591                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
592                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
593         }
594         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
595         BUG_ON(ret < 0);
596
597         return rec;
598 }
599
600 static void print_orphan_data_extents(struct list_head *orphan_extents,
601                                       u64 objectid)
602 {
603         struct orphan_data_extent *orphan;
604
605         if (list_empty(orphan_extents))
606                 return;
607         printf("The following data extent is lost in tree %llu:\n",
608                objectid);
609         list_for_each_entry(orphan, orphan_extents, list) {
610                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
611                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
612                        orphan->disk_len);
613         }
614 }
615
616 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
617 {
618         u64 root_objectid = root->root_key.objectid;
619         int errors = rec->errors;
620
621         if (!errors)
622                 return;
623         /* reloc root errors, we print its corresponding fs root objectid*/
624         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
625                 root_objectid = root->root_key.offset;
626                 fprintf(stderr, "reloc");
627         }
628         fprintf(stderr, "root %llu inode %llu errors %x",
629                 (unsigned long long) root_objectid,
630                 (unsigned long long) rec->ino, rec->errors);
631
632         if (errors & I_ERR_NO_INODE_ITEM)
633                 fprintf(stderr, ", no inode item");
634         if (errors & I_ERR_NO_ORPHAN_ITEM)
635                 fprintf(stderr, ", no orphan item");
636         if (errors & I_ERR_DUP_INODE_ITEM)
637                 fprintf(stderr, ", dup inode item");
638         if (errors & I_ERR_DUP_DIR_INDEX)
639                 fprintf(stderr, ", dup dir index");
640         if (errors & I_ERR_ODD_DIR_ITEM)
641                 fprintf(stderr, ", odd dir item");
642         if (errors & I_ERR_ODD_FILE_EXTENT)
643                 fprintf(stderr, ", odd file extent");
644         if (errors & I_ERR_BAD_FILE_EXTENT)
645                 fprintf(stderr, ", bad file extent");
646         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
647                 fprintf(stderr, ", file extent overlap");
648         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
649                 fprintf(stderr, ", file extent discount");
650         if (errors & I_ERR_DIR_ISIZE_WRONG)
651                 fprintf(stderr, ", dir isize wrong");
652         if (errors & I_ERR_FILE_NBYTES_WRONG)
653                 fprintf(stderr, ", nbytes wrong");
654         if (errors & I_ERR_ODD_CSUM_ITEM)
655                 fprintf(stderr, ", odd csum item");
656         if (errors & I_ERR_SOME_CSUM_MISSING)
657                 fprintf(stderr, ", some csum missing");
658         if (errors & I_ERR_LINK_COUNT_WRONG)
659                 fprintf(stderr, ", link count wrong");
660         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
661                 fprintf(stderr, ", orphan file extent");
662         fprintf(stderr, "\n");
663         /* Print the orphan extents if needed */
664         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
665                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
666
667         /* Print the holes if needed */
668         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
669                 struct file_extent_hole *hole;
670                 struct rb_node *node;
671                 int found = 0;
672
673                 node = rb_first(&rec->holes);
674                 fprintf(stderr, "Found file extent holes:\n");
675                 while (node) {
676                         found = 1;
677                         hole = rb_entry(node, struct file_extent_hole, node);
678                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
679                                 hole->start, hole->len);
680                         node = rb_next(node);
681                 }
682                 if (!found)
683                         fprintf(stderr, "\tstart: 0, len: %llu\n",
684                                 round_up(rec->isize, root->sectorsize));
685         }
686 }
687
688 static void print_ref_error(int errors)
689 {
690         if (errors & REF_ERR_NO_DIR_ITEM)
691                 fprintf(stderr, ", no dir item");
692         if (errors & REF_ERR_NO_DIR_INDEX)
693                 fprintf(stderr, ", no dir index");
694         if (errors & REF_ERR_NO_INODE_REF)
695                 fprintf(stderr, ", no inode ref");
696         if (errors & REF_ERR_DUP_DIR_ITEM)
697                 fprintf(stderr, ", dup dir item");
698         if (errors & REF_ERR_DUP_DIR_INDEX)
699                 fprintf(stderr, ", dup dir index");
700         if (errors & REF_ERR_DUP_INODE_REF)
701                 fprintf(stderr, ", dup inode ref");
702         if (errors & REF_ERR_INDEX_UNMATCH)
703                 fprintf(stderr, ", index unmatch");
704         if (errors & REF_ERR_FILETYPE_UNMATCH)
705                 fprintf(stderr, ", filetype unmatch");
706         if (errors & REF_ERR_NAME_TOO_LONG)
707                 fprintf(stderr, ", name too long");
708         if (errors & REF_ERR_NO_ROOT_REF)
709                 fprintf(stderr, ", no root ref");
710         if (errors & REF_ERR_NO_ROOT_BACKREF)
711                 fprintf(stderr, ", no root backref");
712         if (errors & REF_ERR_DUP_ROOT_REF)
713                 fprintf(stderr, ", dup root ref");
714         if (errors & REF_ERR_DUP_ROOT_BACKREF)
715                 fprintf(stderr, ", dup root backref");
716         fprintf(stderr, "\n");
717 }
718
719 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
720                                           u64 ino, int mod)
721 {
722         struct ptr_node *node;
723         struct cache_extent *cache;
724         struct inode_record *rec = NULL;
725         int ret;
726
727         cache = lookup_cache_extent(inode_cache, ino, 1);
728         if (cache) {
729                 node = container_of(cache, struct ptr_node, cache);
730                 rec = node->data;
731                 if (mod && rec->refs > 1) {
732                         node->data = clone_inode_rec(rec);
733                         rec->refs--;
734                         rec = node->data;
735                 }
736         } else if (mod) {
737                 rec = calloc(1, sizeof(*rec));
738                 rec->ino = ino;
739                 rec->extent_start = (u64)-1;
740                 rec->refs = 1;
741                 INIT_LIST_HEAD(&rec->backrefs);
742                 INIT_LIST_HEAD(&rec->orphan_extents);
743                 rec->holes = RB_ROOT;
744
745                 node = malloc(sizeof(*node));
746                 node->cache.start = ino;
747                 node->cache.size = 1;
748                 node->data = rec;
749
750                 if (ino == BTRFS_FREE_INO_OBJECTID)
751                         rec->found_link = 1;
752
753                 ret = insert_cache_extent(inode_cache, &node->cache);
754                 BUG_ON(ret);
755         }
756         return rec;
757 }
758
759 static void free_orphan_data_extents(struct list_head *orphan_extents)
760 {
761         struct orphan_data_extent *orphan;
762
763         while (!list_empty(orphan_extents)) {
764                 orphan = list_entry(orphan_extents->next,
765                                     struct orphan_data_extent, list);
766                 list_del(&orphan->list);
767                 free(orphan);
768         }
769 }
770
771 static void free_inode_rec(struct inode_record *rec)
772 {
773         struct inode_backref *backref;
774
775         if (--rec->refs > 0)
776                 return;
777
778         while (!list_empty(&rec->backrefs)) {
779                 backref = list_entry(rec->backrefs.next,
780                                      struct inode_backref, list);
781                 list_del(&backref->list);
782                 free(backref);
783         }
784         free_orphan_data_extents(&rec->orphan_extents);
785         free_file_extent_holes(&rec->holes);
786         free(rec);
787 }
788
789 static int can_free_inode_rec(struct inode_record *rec)
790 {
791         if (!rec->errors && rec->checked && rec->found_inode_item &&
792             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
793                 return 1;
794         return 0;
795 }
796
797 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
798                                  struct inode_record *rec)
799 {
800         struct cache_extent *cache;
801         struct inode_backref *tmp, *backref;
802         struct ptr_node *node;
803         unsigned char filetype;
804
805         if (!rec->found_inode_item)
806                 return;
807
808         filetype = imode_to_type(rec->imode);
809         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
810                 if (backref->found_dir_item && backref->found_dir_index) {
811                         if (backref->filetype != filetype)
812                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
813                         if (!backref->errors && backref->found_inode_ref) {
814                                 list_del(&backref->list);
815                                 free(backref);
816                         }
817                 }
818         }
819
820         if (!rec->checked || rec->merging)
821                 return;
822
823         if (S_ISDIR(rec->imode)) {
824                 if (rec->found_size != rec->isize)
825                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
826                 if (rec->found_file_extent)
827                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
828         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
829                 if (rec->found_dir_item)
830                         rec->errors |= I_ERR_ODD_DIR_ITEM;
831                 if (rec->found_size != rec->nbytes)
832                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
833                 if (rec->nlink > 0 && !no_holes &&
834                     (rec->extent_end < rec->isize ||
835                      first_extent_gap(&rec->holes) < rec->isize))
836                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
837         }
838
839         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
840                 if (rec->found_csum_item && rec->nodatasum)
841                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
842                 if (rec->some_csum_missing && !rec->nodatasum)
843                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
844         }
845
846         BUG_ON(rec->refs != 1);
847         if (can_free_inode_rec(rec)) {
848                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
849                 node = container_of(cache, struct ptr_node, cache);
850                 BUG_ON(node->data != rec);
851                 remove_cache_extent(inode_cache, &node->cache);
852                 free(node);
853                 free_inode_rec(rec);
854         }
855 }
856
857 static int check_orphan_item(struct btrfs_root *root, u64 ino)
858 {
859         struct btrfs_path path;
860         struct btrfs_key key;
861         int ret;
862
863         key.objectid = BTRFS_ORPHAN_OBJECTID;
864         key.type = BTRFS_ORPHAN_ITEM_KEY;
865         key.offset = ino;
866
867         btrfs_init_path(&path);
868         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
869         btrfs_release_path(&path);
870         if (ret > 0)
871                 ret = -ENOENT;
872         return ret;
873 }
874
875 static int process_inode_item(struct extent_buffer *eb,
876                               int slot, struct btrfs_key *key,
877                               struct shared_node *active_node)
878 {
879         struct inode_record *rec;
880         struct btrfs_inode_item *item;
881
882         rec = active_node->current;
883         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
884         if (rec->found_inode_item) {
885                 rec->errors |= I_ERR_DUP_INODE_ITEM;
886                 return 1;
887         }
888         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
889         rec->nlink = btrfs_inode_nlink(eb, item);
890         rec->isize = btrfs_inode_size(eb, item);
891         rec->nbytes = btrfs_inode_nbytes(eb, item);
892         rec->imode = btrfs_inode_mode(eb, item);
893         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
894                 rec->nodatasum = 1;
895         rec->found_inode_item = 1;
896         if (rec->nlink == 0)
897                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
898         maybe_free_inode_rec(&active_node->inode_cache, rec);
899         return 0;
900 }
901
902 static struct inode_backref *get_inode_backref(struct inode_record *rec,
903                                                 const char *name,
904                                                 int namelen, u64 dir)
905 {
906         struct inode_backref *backref;
907
908         list_for_each_entry(backref, &rec->backrefs, list) {
909                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
910                         break;
911                 if (backref->dir != dir || backref->namelen != namelen)
912                         continue;
913                 if (memcmp(name, backref->name, namelen))
914                         continue;
915                 return backref;
916         }
917
918         backref = malloc(sizeof(*backref) + namelen + 1);
919         memset(backref, 0, sizeof(*backref));
920         backref->dir = dir;
921         backref->namelen = namelen;
922         memcpy(backref->name, name, namelen);
923         backref->name[namelen] = '\0';
924         list_add_tail(&backref->list, &rec->backrefs);
925         return backref;
926 }
927
928 static int add_inode_backref(struct cache_tree *inode_cache,
929                              u64 ino, u64 dir, u64 index,
930                              const char *name, int namelen,
931                              int filetype, int itemtype, int errors)
932 {
933         struct inode_record *rec;
934         struct inode_backref *backref;
935
936         rec = get_inode_rec(inode_cache, ino, 1);
937         backref = get_inode_backref(rec, name, namelen, dir);
938         if (errors)
939                 backref->errors |= errors;
940         if (itemtype == BTRFS_DIR_INDEX_KEY) {
941                 if (backref->found_dir_index)
942                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
943                 if (backref->found_inode_ref && backref->index != index)
944                         backref->errors |= REF_ERR_INDEX_UNMATCH;
945                 if (backref->found_dir_item && backref->filetype != filetype)
946                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
947
948                 backref->index = index;
949                 backref->filetype = filetype;
950                 backref->found_dir_index = 1;
951         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
952                 rec->found_link++;
953                 if (backref->found_dir_item)
954                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
955                 if (backref->found_dir_index && backref->filetype != filetype)
956                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
957
958                 backref->filetype = filetype;
959                 backref->found_dir_item = 1;
960         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
961                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
962                 if (backref->found_inode_ref)
963                         backref->errors |= REF_ERR_DUP_INODE_REF;
964                 if (backref->found_dir_index && backref->index != index)
965                         backref->errors |= REF_ERR_INDEX_UNMATCH;
966                 else
967                         backref->index = index;
968
969                 backref->ref_type = itemtype;
970                 backref->found_inode_ref = 1;
971         } else {
972                 BUG_ON(1);
973         }
974
975         maybe_free_inode_rec(inode_cache, rec);
976         return 0;
977 }
978
979 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
980                             struct cache_tree *dst_cache)
981 {
982         struct inode_backref *backref;
983         u32 dir_count = 0;
984         int ret = 0;
985
986         dst->merging = 1;
987         list_for_each_entry(backref, &src->backrefs, list) {
988                 if (backref->found_dir_index) {
989                         add_inode_backref(dst_cache, dst->ino, backref->dir,
990                                         backref->index, backref->name,
991                                         backref->namelen, backref->filetype,
992                                         BTRFS_DIR_INDEX_KEY, backref->errors);
993                 }
994                 if (backref->found_dir_item) {
995                         dir_count++;
996                         add_inode_backref(dst_cache, dst->ino,
997                                         backref->dir, 0, backref->name,
998                                         backref->namelen, backref->filetype,
999                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1000                 }
1001                 if (backref->found_inode_ref) {
1002                         add_inode_backref(dst_cache, dst->ino,
1003                                         backref->dir, backref->index,
1004                                         backref->name, backref->namelen, 0,
1005                                         backref->ref_type, backref->errors);
1006                 }
1007         }
1008
1009         if (src->found_dir_item)
1010                 dst->found_dir_item = 1;
1011         if (src->found_file_extent)
1012                 dst->found_file_extent = 1;
1013         if (src->found_csum_item)
1014                 dst->found_csum_item = 1;
1015         if (src->some_csum_missing)
1016                 dst->some_csum_missing = 1;
1017         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1018                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1019                 if (ret < 0)
1020                         return ret;
1021         }
1022
1023         BUG_ON(src->found_link < dir_count);
1024         dst->found_link += src->found_link - dir_count;
1025         dst->found_size += src->found_size;
1026         if (src->extent_start != (u64)-1) {
1027                 if (dst->extent_start == (u64)-1) {
1028                         dst->extent_start = src->extent_start;
1029                         dst->extent_end = src->extent_end;
1030                 } else {
1031                         if (dst->extent_end > src->extent_start)
1032                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1033                         else if (dst->extent_end < src->extent_start) {
1034                                 ret = add_file_extent_hole(&dst->holes,
1035                                         dst->extent_end,
1036                                         src->extent_start - dst->extent_end);
1037                         }
1038                         if (dst->extent_end < src->extent_end)
1039                                 dst->extent_end = src->extent_end;
1040                 }
1041         }
1042
1043         dst->errors |= src->errors;
1044         if (src->found_inode_item) {
1045                 if (!dst->found_inode_item) {
1046                         dst->nlink = src->nlink;
1047                         dst->isize = src->isize;
1048                         dst->nbytes = src->nbytes;
1049                         dst->imode = src->imode;
1050                         dst->nodatasum = src->nodatasum;
1051                         dst->found_inode_item = 1;
1052                 } else {
1053                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1054                 }
1055         }
1056         dst->merging = 0;
1057
1058         return 0;
1059 }
1060
1061 static int splice_shared_node(struct shared_node *src_node,
1062                               struct shared_node *dst_node)
1063 {
1064         struct cache_extent *cache;
1065         struct ptr_node *node, *ins;
1066         struct cache_tree *src, *dst;
1067         struct inode_record *rec, *conflict;
1068         u64 current_ino = 0;
1069         int splice = 0;
1070         int ret;
1071
1072         if (--src_node->refs == 0)
1073                 splice = 1;
1074         if (src_node->current)
1075                 current_ino = src_node->current->ino;
1076
1077         src = &src_node->root_cache;
1078         dst = &dst_node->root_cache;
1079 again:
1080         cache = search_cache_extent(src, 0);
1081         while (cache) {
1082                 node = container_of(cache, struct ptr_node, cache);
1083                 rec = node->data;
1084                 cache = next_cache_extent(cache);
1085
1086                 if (splice) {
1087                         remove_cache_extent(src, &node->cache);
1088                         ins = node;
1089                 } else {
1090                         ins = malloc(sizeof(*ins));
1091                         ins->cache.start = node->cache.start;
1092                         ins->cache.size = node->cache.size;
1093                         ins->data = rec;
1094                         rec->refs++;
1095                 }
1096                 ret = insert_cache_extent(dst, &ins->cache);
1097                 if (ret == -EEXIST) {
1098                         conflict = get_inode_rec(dst, rec->ino, 1);
1099                         merge_inode_recs(rec, conflict, dst);
1100                         if (rec->checked) {
1101                                 conflict->checked = 1;
1102                                 if (dst_node->current == conflict)
1103                                         dst_node->current = NULL;
1104                         }
1105                         maybe_free_inode_rec(dst, conflict);
1106                         free_inode_rec(rec);
1107                         free(ins);
1108                 } else {
1109                         BUG_ON(ret);
1110                 }
1111         }
1112
1113         if (src == &src_node->root_cache) {
1114                 src = &src_node->inode_cache;
1115                 dst = &dst_node->inode_cache;
1116                 goto again;
1117         }
1118
1119         if (current_ino > 0 && (!dst_node->current ||
1120             current_ino > dst_node->current->ino)) {
1121                 if (dst_node->current) {
1122                         dst_node->current->checked = 1;
1123                         maybe_free_inode_rec(dst, dst_node->current);
1124                 }
1125                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1126         }
1127         return 0;
1128 }
1129
1130 static void free_inode_ptr(struct cache_extent *cache)
1131 {
1132         struct ptr_node *node;
1133         struct inode_record *rec;
1134
1135         node = container_of(cache, struct ptr_node, cache);
1136         rec = node->data;
1137         free_inode_rec(rec);
1138         free(node);
1139 }
1140
1141 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1142
1143 static struct shared_node *find_shared_node(struct cache_tree *shared,
1144                                             u64 bytenr)
1145 {
1146         struct cache_extent *cache;
1147         struct shared_node *node;
1148
1149         cache = lookup_cache_extent(shared, bytenr, 1);
1150         if (cache) {
1151                 node = container_of(cache, struct shared_node, cache);
1152                 return node;
1153         }
1154         return NULL;
1155 }
1156
1157 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1158 {
1159         int ret;
1160         struct shared_node *node;
1161
1162         node = calloc(1, sizeof(*node));
1163         node->cache.start = bytenr;
1164         node->cache.size = 1;
1165         cache_tree_init(&node->root_cache);
1166         cache_tree_init(&node->inode_cache);
1167         node->refs = refs;
1168
1169         ret = insert_cache_extent(shared, &node->cache);
1170         BUG_ON(ret);
1171         return 0;
1172 }
1173
1174 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1175                              struct walk_control *wc, int level)
1176 {
1177         struct shared_node *node;
1178         struct shared_node *dest;
1179
1180         if (level == wc->active_node)
1181                 return 0;
1182
1183         BUG_ON(wc->active_node <= level);
1184         node = find_shared_node(&wc->shared, bytenr);
1185         if (!node) {
1186                 add_shared_node(&wc->shared, bytenr, refs);
1187                 node = find_shared_node(&wc->shared, bytenr);
1188                 wc->nodes[level] = node;
1189                 wc->active_node = level;
1190                 return 0;
1191         }
1192
1193         if (wc->root_level == wc->active_node &&
1194             btrfs_root_refs(&root->root_item) == 0) {
1195                 if (--node->refs == 0) {
1196                         free_inode_recs_tree(&node->root_cache);
1197                         free_inode_recs_tree(&node->inode_cache);
1198                         remove_cache_extent(&wc->shared, &node->cache);
1199                         free(node);
1200                 }
1201                 return 1;
1202         }
1203
1204         dest = wc->nodes[wc->active_node];
1205         splice_shared_node(node, dest);
1206         if (node->refs == 0) {
1207                 remove_cache_extent(&wc->shared, &node->cache);
1208                 free(node);
1209         }
1210         return 1;
1211 }
1212
1213 static int leave_shared_node(struct btrfs_root *root,
1214                              struct walk_control *wc, int level)
1215 {
1216         struct shared_node *node;
1217         struct shared_node *dest;
1218         int i;
1219
1220         if (level == wc->root_level)
1221                 return 0;
1222
1223         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1224                 if (wc->nodes[i])
1225                         break;
1226         }
1227         BUG_ON(i >= BTRFS_MAX_LEVEL);
1228
1229         node = wc->nodes[wc->active_node];
1230         wc->nodes[wc->active_node] = NULL;
1231         wc->active_node = i;
1232
1233         dest = wc->nodes[wc->active_node];
1234         if (wc->active_node < wc->root_level ||
1235             btrfs_root_refs(&root->root_item) > 0) {
1236                 BUG_ON(node->refs <= 1);
1237                 splice_shared_node(node, dest);
1238         } else {
1239                 BUG_ON(node->refs < 2);
1240                 node->refs--;
1241         }
1242         return 0;
1243 }
1244
1245 /*
1246  * Returns:
1247  * < 0 - on error
1248  * 1   - if the root with id child_root_id is a child of root parent_root_id
1249  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1250  *       has other root(s) as parent(s)
1251  * 2   - if the root child_root_id doesn't have any parent roots
1252  */
1253 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1254                          u64 child_root_id)
1255 {
1256         struct btrfs_path path;
1257         struct btrfs_key key;
1258         struct extent_buffer *leaf;
1259         int has_parent = 0;
1260         int ret;
1261
1262         btrfs_init_path(&path);
1263
1264         key.objectid = parent_root_id;
1265         key.type = BTRFS_ROOT_REF_KEY;
1266         key.offset = child_root_id;
1267         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1268                                 0, 0);
1269         if (ret < 0)
1270                 return ret;
1271         btrfs_release_path(&path);
1272         if (!ret)
1273                 return 1;
1274
1275         key.objectid = child_root_id;
1276         key.type = BTRFS_ROOT_BACKREF_KEY;
1277         key.offset = 0;
1278         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1279                                 0, 0);
1280         if (ret < 0)
1281                 goto out;
1282
1283         while (1) {
1284                 leaf = path.nodes[0];
1285                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1286                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1287                         if (ret)
1288                                 break;
1289                         leaf = path.nodes[0];
1290                 }
1291
1292                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1293                 if (key.objectid != child_root_id ||
1294                     key.type != BTRFS_ROOT_BACKREF_KEY)
1295                         break;
1296
1297                 has_parent = 1;
1298
1299                 if (key.offset == parent_root_id) {
1300                         btrfs_release_path(&path);
1301                         return 1;
1302                 }
1303
1304                 path.slots[0]++;
1305         }
1306 out:
1307         btrfs_release_path(&path);
1308         if (ret < 0)
1309                 return ret;
1310         return has_parent ? 0 : 2;
1311 }
1312
1313 static int process_dir_item(struct btrfs_root *root,
1314                             struct extent_buffer *eb,
1315                             int slot, struct btrfs_key *key,
1316                             struct shared_node *active_node)
1317 {
1318         u32 total;
1319         u32 cur = 0;
1320         u32 len;
1321         u32 name_len;
1322         u32 data_len;
1323         int error;
1324         int nritems = 0;
1325         int filetype;
1326         struct btrfs_dir_item *di;
1327         struct inode_record *rec;
1328         struct cache_tree *root_cache;
1329         struct cache_tree *inode_cache;
1330         struct btrfs_key location;
1331         char namebuf[BTRFS_NAME_LEN];
1332
1333         root_cache = &active_node->root_cache;
1334         inode_cache = &active_node->inode_cache;
1335         rec = active_node->current;
1336         rec->found_dir_item = 1;
1337
1338         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1339         total = btrfs_item_size_nr(eb, slot);
1340         while (cur < total) {
1341                 nritems++;
1342                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1343                 name_len = btrfs_dir_name_len(eb, di);
1344                 data_len = btrfs_dir_data_len(eb, di);
1345                 filetype = btrfs_dir_type(eb, di);
1346
1347                 rec->found_size += name_len;
1348                 if (name_len <= BTRFS_NAME_LEN) {
1349                         len = name_len;
1350                         error = 0;
1351                 } else {
1352                         len = BTRFS_NAME_LEN;
1353                         error = REF_ERR_NAME_TOO_LONG;
1354                 }
1355                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1356
1357                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1358                         add_inode_backref(inode_cache, location.objectid,
1359                                           key->objectid, key->offset, namebuf,
1360                                           len, filetype, key->type, error);
1361                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1362                         add_inode_backref(root_cache, location.objectid,
1363                                           key->objectid, key->offset,
1364                                           namebuf, len, filetype,
1365                                           key->type, error);
1366                 } else {
1367                         fprintf(stderr, "invalid location in dir item %u\n",
1368                                 location.type);
1369                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1370                                           key->objectid, key->offset, namebuf,
1371                                           len, filetype, key->type, error);
1372                 }
1373
1374                 len = sizeof(*di) + name_len + data_len;
1375                 di = (struct btrfs_dir_item *)((char *)di + len);
1376                 cur += len;
1377         }
1378         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1379                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1380
1381         return 0;
1382 }
1383
1384 static int process_inode_ref(struct extent_buffer *eb,
1385                              int slot, struct btrfs_key *key,
1386                              struct shared_node *active_node)
1387 {
1388         u32 total;
1389         u32 cur = 0;
1390         u32 len;
1391         u32 name_len;
1392         u64 index;
1393         int error;
1394         struct cache_tree *inode_cache;
1395         struct btrfs_inode_ref *ref;
1396         char namebuf[BTRFS_NAME_LEN];
1397
1398         inode_cache = &active_node->inode_cache;
1399
1400         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1401         total = btrfs_item_size_nr(eb, slot);
1402         while (cur < total) {
1403                 name_len = btrfs_inode_ref_name_len(eb, ref);
1404                 index = btrfs_inode_ref_index(eb, ref);
1405                 if (name_len <= BTRFS_NAME_LEN) {
1406                         len = name_len;
1407                         error = 0;
1408                 } else {
1409                         len = BTRFS_NAME_LEN;
1410                         error = REF_ERR_NAME_TOO_LONG;
1411                 }
1412                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1413                 add_inode_backref(inode_cache, key->objectid, key->offset,
1414                                   index, namebuf, len, 0, key->type, error);
1415
1416                 len = sizeof(*ref) + name_len;
1417                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1418                 cur += len;
1419         }
1420         return 0;
1421 }
1422
1423 static int process_inode_extref(struct extent_buffer *eb,
1424                                 int slot, struct btrfs_key *key,
1425                                 struct shared_node *active_node)
1426 {
1427         u32 total;
1428         u32 cur = 0;
1429         u32 len;
1430         u32 name_len;
1431         u64 index;
1432         u64 parent;
1433         int error;
1434         struct cache_tree *inode_cache;
1435         struct btrfs_inode_extref *extref;
1436         char namebuf[BTRFS_NAME_LEN];
1437
1438         inode_cache = &active_node->inode_cache;
1439
1440         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1441         total = btrfs_item_size_nr(eb, slot);
1442         while (cur < total) {
1443                 name_len = btrfs_inode_extref_name_len(eb, extref);
1444                 index = btrfs_inode_extref_index(eb, extref);
1445                 parent = btrfs_inode_extref_parent(eb, extref);
1446                 if (name_len <= BTRFS_NAME_LEN) {
1447                         len = name_len;
1448                         error = 0;
1449                 } else {
1450                         len = BTRFS_NAME_LEN;
1451                         error = REF_ERR_NAME_TOO_LONG;
1452                 }
1453                 read_extent_buffer(eb, namebuf,
1454                                    (unsigned long)(extref + 1), len);
1455                 add_inode_backref(inode_cache, key->objectid, parent,
1456                                   index, namebuf, len, 0, key->type, error);
1457
1458                 len = sizeof(*extref) + name_len;
1459                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1460                 cur += len;
1461         }
1462         return 0;
1463
1464 }
1465
1466 static int count_csum_range(struct btrfs_root *root, u64 start,
1467                             u64 len, u64 *found)
1468 {
1469         struct btrfs_key key;
1470         struct btrfs_path path;
1471         struct extent_buffer *leaf;
1472         int ret;
1473         size_t size;
1474         *found = 0;
1475         u64 csum_end;
1476         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1477
1478         btrfs_init_path(&path);
1479
1480         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1481         key.offset = start;
1482         key.type = BTRFS_EXTENT_CSUM_KEY;
1483
1484         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1485                                 &key, &path, 0, 0);
1486         if (ret < 0)
1487                 goto out;
1488         if (ret > 0 && path.slots[0] > 0) {
1489                 leaf = path.nodes[0];
1490                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1491                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1492                     key.type == BTRFS_EXTENT_CSUM_KEY)
1493                         path.slots[0]--;
1494         }
1495
1496         while (len > 0) {
1497                 leaf = path.nodes[0];
1498                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1499                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1500                         if (ret > 0)
1501                                 break;
1502                         else if (ret < 0)
1503                                 goto out;
1504                         leaf = path.nodes[0];
1505                 }
1506
1507                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1508                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1509                     key.type != BTRFS_EXTENT_CSUM_KEY)
1510                         break;
1511
1512                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1513                 if (key.offset >= start + len)
1514                         break;
1515
1516                 if (key.offset > start)
1517                         start = key.offset;
1518
1519                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1520                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1521                 if (csum_end > start) {
1522                         size = min(csum_end - start, len);
1523                         len -= size;
1524                         start += size;
1525                         *found += size;
1526                 }
1527
1528                 path.slots[0]++;
1529         }
1530 out:
1531         btrfs_release_path(&path);
1532         if (ret < 0)
1533                 return ret;
1534         return 0;
1535 }
1536
1537 static int process_file_extent(struct btrfs_root *root,
1538                                 struct extent_buffer *eb,
1539                                 int slot, struct btrfs_key *key,
1540                                 struct shared_node *active_node)
1541 {
1542         struct inode_record *rec;
1543         struct btrfs_file_extent_item *fi;
1544         u64 num_bytes = 0;
1545         u64 disk_bytenr = 0;
1546         u64 extent_offset = 0;
1547         u64 mask = root->sectorsize - 1;
1548         int extent_type;
1549         int ret;
1550
1551         rec = active_node->current;
1552         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1553         rec->found_file_extent = 1;
1554
1555         if (rec->extent_start == (u64)-1) {
1556                 rec->extent_start = key->offset;
1557                 rec->extent_end = key->offset;
1558         }
1559
1560         if (rec->extent_end > key->offset)
1561                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1562         else if (rec->extent_end < key->offset) {
1563                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1564                                            key->offset - rec->extent_end);
1565                 if (ret < 0)
1566                         return ret;
1567         }
1568
1569         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1570         extent_type = btrfs_file_extent_type(eb, fi);
1571
1572         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1573                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1574                 if (num_bytes == 0)
1575                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1576                 rec->found_size += num_bytes;
1577                 num_bytes = (num_bytes + mask) & ~mask;
1578         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1579                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1580                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1581                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1582                 extent_offset = btrfs_file_extent_offset(eb, fi);
1583                 if (num_bytes == 0 || (num_bytes & mask))
1584                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1585                 if (num_bytes + extent_offset >
1586                     btrfs_file_extent_ram_bytes(eb, fi))
1587                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1588                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1589                     (btrfs_file_extent_compression(eb, fi) ||
1590                      btrfs_file_extent_encryption(eb, fi) ||
1591                      btrfs_file_extent_other_encoding(eb, fi)))
1592                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1593                 if (disk_bytenr > 0)
1594                         rec->found_size += num_bytes;
1595         } else {
1596                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1597         }
1598         rec->extent_end = key->offset + num_bytes;
1599
1600         /*
1601          * The data reloc tree will copy full extents into its inode and then
1602          * copy the corresponding csums.  Because the extent it copied could be
1603          * a preallocated extent that hasn't been written to yet there may be no
1604          * csums to copy, ergo we won't have csums for our file extent.  This is
1605          * ok so just don't bother checking csums if the inode belongs to the
1606          * data reloc tree.
1607          */
1608         if (disk_bytenr > 0 &&
1609             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1610                 u64 found;
1611                 if (btrfs_file_extent_compression(eb, fi))
1612                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1613                 else
1614                         disk_bytenr += extent_offset;
1615
1616                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1617                 if (ret < 0)
1618                         return ret;
1619                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1620                         if (found > 0)
1621                                 rec->found_csum_item = 1;
1622                         if (found < num_bytes)
1623                                 rec->some_csum_missing = 1;
1624                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1625                         if (found > 0)
1626                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1627                 }
1628         }
1629         return 0;
1630 }
1631
1632 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1633                             struct walk_control *wc)
1634 {
1635         struct btrfs_key key;
1636         u32 nritems;
1637         int i;
1638         int ret = 0;
1639         struct cache_tree *inode_cache;
1640         struct shared_node *active_node;
1641
1642         if (wc->root_level == wc->active_node &&
1643             btrfs_root_refs(&root->root_item) == 0)
1644                 return 0;
1645
1646         active_node = wc->nodes[wc->active_node];
1647         inode_cache = &active_node->inode_cache;
1648         nritems = btrfs_header_nritems(eb);
1649         for (i = 0; i < nritems; i++) {
1650                 btrfs_item_key_to_cpu(eb, &key, i);
1651
1652                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1653                         continue;
1654                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1655                         continue;
1656
1657                 if (active_node->current == NULL ||
1658                     active_node->current->ino < key.objectid) {
1659                         if (active_node->current) {
1660                                 active_node->current->checked = 1;
1661                                 maybe_free_inode_rec(inode_cache,
1662                                                      active_node->current);
1663                         }
1664                         active_node->current = get_inode_rec(inode_cache,
1665                                                              key.objectid, 1);
1666                 }
1667                 switch (key.type) {
1668                 case BTRFS_DIR_ITEM_KEY:
1669                 case BTRFS_DIR_INDEX_KEY:
1670                         ret = process_dir_item(root, eb, i, &key, active_node);
1671                         break;
1672                 case BTRFS_INODE_REF_KEY:
1673                         ret = process_inode_ref(eb, i, &key, active_node);
1674                         break;
1675                 case BTRFS_INODE_EXTREF_KEY:
1676                         ret = process_inode_extref(eb, i, &key, active_node);
1677                         break;
1678                 case BTRFS_INODE_ITEM_KEY:
1679                         ret = process_inode_item(eb, i, &key, active_node);
1680                         break;
1681                 case BTRFS_EXTENT_DATA_KEY:
1682                         ret = process_file_extent(root, eb, i, &key,
1683                                                   active_node);
1684                         break;
1685                 default:
1686                         break;
1687                 };
1688         }
1689         return ret;
1690 }
1691
1692 static void reada_walk_down(struct btrfs_root *root,
1693                             struct extent_buffer *node, int slot)
1694 {
1695         u64 bytenr;
1696         u64 ptr_gen;
1697         u32 nritems;
1698         u32 blocksize;
1699         int i;
1700         int level;
1701
1702         level = btrfs_header_level(node);
1703         if (level != 1)
1704                 return;
1705
1706         nritems = btrfs_header_nritems(node);
1707         blocksize = btrfs_level_size(root, level - 1);
1708         for (i = slot; i < nritems; i++) {
1709                 bytenr = btrfs_node_blockptr(node, i);
1710                 ptr_gen = btrfs_node_ptr_generation(node, i);
1711                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1712         }
1713 }
1714
1715 /*
1716  * Check the child node/leaf by the following condition:
1717  * 1. the first item key of the node/leaf should be the same with the one
1718  *    in parent.
1719  * 2. block in parent node should match the child node/leaf.
1720  * 3. generation of parent node and child's header should be consistent.
1721  *
1722  * Or the child node/leaf pointed by the key in parent is not valid.
1723  *
1724  * We hope to check leaf owner too, but since subvol may share leaves,
1725  * which makes leaf owner check not so strong, key check should be
1726  * sufficient enough for that case.
1727  */
1728 static int check_child_node(struct btrfs_root *root,
1729                             struct extent_buffer *parent, int slot,
1730                             struct extent_buffer *child)
1731 {
1732         struct btrfs_key parent_key;
1733         struct btrfs_key child_key;
1734         int ret = 0;
1735
1736         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1737         if (btrfs_header_level(child) == 0)
1738                 btrfs_item_key_to_cpu(child, &child_key, 0);
1739         else
1740                 btrfs_node_key_to_cpu(child, &child_key, 0);
1741
1742         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1743                 ret = -EINVAL;
1744                 fprintf(stderr,
1745                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1746                         parent_key.objectid, parent_key.type, parent_key.offset,
1747                         child_key.objectid, child_key.type, child_key.offset);
1748         }
1749         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1750                 ret = -EINVAL;
1751                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1752                         btrfs_node_blockptr(parent, slot),
1753                         btrfs_header_bytenr(child));
1754         }
1755         if (btrfs_node_ptr_generation(parent, slot) !=
1756             btrfs_header_generation(child)) {
1757                 ret = -EINVAL;
1758                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1759                         btrfs_header_generation(child),
1760                         btrfs_node_ptr_generation(parent, slot));
1761         }
1762         return ret;
1763 }
1764
1765 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1766                           struct walk_control *wc, int *level)
1767 {
1768         enum btrfs_tree_block_status status;
1769         u64 bytenr;
1770         u64 ptr_gen;
1771         struct extent_buffer *next;
1772         struct extent_buffer *cur;
1773         u32 blocksize;
1774         int ret, err = 0;
1775         u64 refs;
1776
1777         WARN_ON(*level < 0);
1778         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1779         ret = btrfs_lookup_extent_info(NULL, root,
1780                                        path->nodes[*level]->start,
1781                                        *level, 1, &refs, NULL);
1782         if (ret < 0) {
1783                 err = ret;
1784                 goto out;
1785         }
1786
1787         if (refs > 1) {
1788                 ret = enter_shared_node(root, path->nodes[*level]->start,
1789                                         refs, wc, *level);
1790                 if (ret > 0) {
1791                         err = ret;
1792                         goto out;
1793                 }
1794         }
1795
1796         while (*level >= 0) {
1797                 WARN_ON(*level < 0);
1798                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1799                 cur = path->nodes[*level];
1800
1801                 if (btrfs_header_level(cur) != *level)
1802                         WARN_ON(1);
1803
1804                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1805                         break;
1806                 if (*level == 0) {
1807                         ret = process_one_leaf(root, cur, wc);
1808                         if (ret < 0)
1809                                 err = ret;
1810                         break;
1811                 }
1812                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1813                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1814                 blocksize = btrfs_level_size(root, *level - 1);
1815                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1816                                                1, &refs, NULL);
1817                 if (ret < 0)
1818                         refs = 0;
1819
1820                 if (refs > 1) {
1821                         ret = enter_shared_node(root, bytenr, refs,
1822                                                 wc, *level - 1);
1823                         if (ret > 0) {
1824                                 path->slots[*level]++;
1825                                 continue;
1826                         }
1827                 }
1828
1829                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1830                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1831                         free_extent_buffer(next);
1832                         reada_walk_down(root, cur, path->slots[*level]);
1833                         next = read_tree_block(root, bytenr, blocksize,
1834                                                ptr_gen);
1835                         if (!extent_buffer_uptodate(next)) {
1836                                 struct btrfs_key node_key;
1837
1838                                 btrfs_node_key_to_cpu(path->nodes[*level],
1839                                                       &node_key,
1840                                                       path->slots[*level]);
1841                                 btrfs_add_corrupt_extent_record(root->fs_info,
1842                                                 &node_key,
1843                                                 path->nodes[*level]->start,
1844                                                 root->leafsize, *level);
1845                                 err = -EIO;
1846                                 goto out;
1847                         }
1848                 }
1849
1850                 ret = check_child_node(root, cur, path->slots[*level], next);
1851                 if (ret) {
1852                         err = ret;
1853                         goto out;
1854                 }
1855
1856                 if (btrfs_is_leaf(next))
1857                         status = btrfs_check_leaf(root, NULL, next);
1858                 else
1859                         status = btrfs_check_node(root, NULL, next);
1860                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1861                         free_extent_buffer(next);
1862                         err = -EIO;
1863                         goto out;
1864                 }
1865
1866                 *level = *level - 1;
1867                 free_extent_buffer(path->nodes[*level]);
1868                 path->nodes[*level] = next;
1869                 path->slots[*level] = 0;
1870         }
1871 out:
1872         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1873         return err;
1874 }
1875
1876 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1877                         struct walk_control *wc, int *level)
1878 {
1879         int i;
1880         struct extent_buffer *leaf;
1881
1882         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1883                 leaf = path->nodes[i];
1884                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1885                         path->slots[i]++;
1886                         *level = i;
1887                         return 0;
1888                 } else {
1889                         free_extent_buffer(path->nodes[*level]);
1890                         path->nodes[*level] = NULL;
1891                         BUG_ON(*level > wc->active_node);
1892                         if (*level == wc->active_node)
1893                                 leave_shared_node(root, wc, *level);
1894                         *level = i + 1;
1895                 }
1896         }
1897         return 1;
1898 }
1899
1900 static int check_root_dir(struct inode_record *rec)
1901 {
1902         struct inode_backref *backref;
1903         int ret = -1;
1904
1905         if (!rec->found_inode_item || rec->errors)
1906                 goto out;
1907         if (rec->nlink != 1 || rec->found_link != 0)
1908                 goto out;
1909         if (list_empty(&rec->backrefs))
1910                 goto out;
1911         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1912         if (!backref->found_inode_ref)
1913                 goto out;
1914         if (backref->index != 0 || backref->namelen != 2 ||
1915             memcmp(backref->name, "..", 2))
1916                 goto out;
1917         if (backref->found_dir_index || backref->found_dir_item)
1918                 goto out;
1919         ret = 0;
1920 out:
1921         return ret;
1922 }
1923
1924 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1925                               struct btrfs_root *root, struct btrfs_path *path,
1926                               struct inode_record *rec)
1927 {
1928         struct btrfs_inode_item *ei;
1929         struct btrfs_key key;
1930         int ret;
1931
1932         key.objectid = rec->ino;
1933         key.type = BTRFS_INODE_ITEM_KEY;
1934         key.offset = (u64)-1;
1935
1936         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1937         if (ret < 0)
1938                 goto out;
1939         if (ret) {
1940                 if (!path->slots[0]) {
1941                         ret = -ENOENT;
1942                         goto out;
1943                 }
1944                 path->slots[0]--;
1945                 ret = 0;
1946         }
1947         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1948         if (key.objectid != rec->ino) {
1949                 ret = -ENOENT;
1950                 goto out;
1951         }
1952
1953         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1954                             struct btrfs_inode_item);
1955         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1956         btrfs_mark_buffer_dirty(path->nodes[0]);
1957         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1958         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1959                root->root_key.objectid);
1960 out:
1961         btrfs_release_path(path);
1962         return ret;
1963 }
1964
1965 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1966                                     struct btrfs_root *root,
1967                                     struct btrfs_path *path,
1968                                     struct inode_record *rec)
1969 {
1970         int ret;
1971
1972         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1973         btrfs_release_path(path);
1974         if (!ret)
1975                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1976         return ret;
1977 }
1978
1979 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1980                                struct btrfs_root *root,
1981                                struct btrfs_path *path,
1982                                struct inode_record *rec)
1983 {
1984         struct btrfs_inode_item *ei;
1985         struct btrfs_key key;
1986         int ret = 0;
1987
1988         key.objectid = rec->ino;
1989         key.type = BTRFS_INODE_ITEM_KEY;
1990         key.offset = 0;
1991
1992         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1993         if (ret) {
1994                 if (ret > 0)
1995                         ret = -ENOENT;
1996                 goto out;
1997         }
1998
1999         /* Since ret == 0, no need to check anything */
2000         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2001                             struct btrfs_inode_item);
2002         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2003         btrfs_mark_buffer_dirty(path->nodes[0]);
2004         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2005         printf("reset nbytes for ino %llu root %llu\n",
2006                rec->ino, root->root_key.objectid);
2007 out:
2008         btrfs_release_path(path);
2009         return ret;
2010 }
2011
2012 static int add_missing_dir_index(struct btrfs_root *root,
2013                                  struct cache_tree *inode_cache,
2014                                  struct inode_record *rec,
2015                                  struct inode_backref *backref)
2016 {
2017         struct btrfs_path *path;
2018         struct btrfs_trans_handle *trans;
2019         struct btrfs_dir_item *dir_item;
2020         struct extent_buffer *leaf;
2021         struct btrfs_key key;
2022         struct btrfs_disk_key disk_key;
2023         struct inode_record *dir_rec;
2024         unsigned long name_ptr;
2025         u32 data_size = sizeof(*dir_item) + backref->namelen;
2026         int ret;
2027
2028         path = btrfs_alloc_path();
2029         if (!path)
2030                 return -ENOMEM;
2031
2032         trans = btrfs_start_transaction(root, 1);
2033         if (IS_ERR(trans)) {
2034                 btrfs_free_path(path);
2035                 return PTR_ERR(trans);
2036         }
2037
2038         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2039                 (unsigned long long)rec->ino);
2040         key.objectid = backref->dir;
2041         key.type = BTRFS_DIR_INDEX_KEY;
2042         key.offset = backref->index;
2043
2044         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2045         BUG_ON(ret);
2046
2047         leaf = path->nodes[0];
2048         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2049
2050         disk_key.objectid = cpu_to_le64(rec->ino);
2051         disk_key.type = BTRFS_INODE_ITEM_KEY;
2052         disk_key.offset = 0;
2053
2054         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2055         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2056         btrfs_set_dir_data_len(leaf, dir_item, 0);
2057         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2058         name_ptr = (unsigned long)(dir_item + 1);
2059         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2060         btrfs_mark_buffer_dirty(leaf);
2061         btrfs_free_path(path);
2062         btrfs_commit_transaction(trans, root);
2063
2064         backref->found_dir_index = 1;
2065         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2066         if (!dir_rec)
2067                 return 0;
2068         dir_rec->found_size += backref->namelen;
2069         if (dir_rec->found_size == dir_rec->isize &&
2070             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2071                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2072         if (dir_rec->found_size != dir_rec->isize)
2073                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2074
2075         return 0;
2076 }
2077
2078 static int delete_dir_index(struct btrfs_root *root,
2079                             struct cache_tree *inode_cache,
2080                             struct inode_record *rec,
2081                             struct inode_backref *backref)
2082 {
2083         struct btrfs_trans_handle *trans;
2084         struct btrfs_dir_item *di;
2085         struct btrfs_path *path;
2086         int ret = 0;
2087
2088         path = btrfs_alloc_path();
2089         if (!path)
2090                 return -ENOMEM;
2091
2092         trans = btrfs_start_transaction(root, 1);
2093         if (IS_ERR(trans)) {
2094                 btrfs_free_path(path);
2095                 return PTR_ERR(trans);
2096         }
2097
2098
2099         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2100                 (unsigned long long)backref->dir,
2101                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2102                 (unsigned long long)root->objectid);
2103
2104         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2105                                     backref->name, backref->namelen,
2106                                     backref->index, -1);
2107         if (IS_ERR(di)) {
2108                 ret = PTR_ERR(di);
2109                 btrfs_free_path(path);
2110                 btrfs_commit_transaction(trans, root);
2111                 if (ret == -ENOENT)
2112                         return 0;
2113                 return ret;
2114         }
2115
2116         if (!di)
2117                 ret = btrfs_del_item(trans, root, path);
2118         else
2119                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2120         BUG_ON(ret);
2121         btrfs_free_path(path);
2122         btrfs_commit_transaction(trans, root);
2123         return ret;
2124 }
2125
2126 static int create_inode_item(struct btrfs_root *root,
2127                              struct inode_record *rec,
2128                              struct inode_backref *backref, int root_dir)
2129 {
2130         struct btrfs_trans_handle *trans;
2131         struct btrfs_inode_item inode_item;
2132         time_t now = time(NULL);
2133         int ret;
2134
2135         trans = btrfs_start_transaction(root, 1);
2136         if (IS_ERR(trans)) {
2137                 ret = PTR_ERR(trans);
2138                 return ret;
2139         }
2140
2141         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2142                 "be incomplete, please check permissions and content after "
2143                 "the fsck completes.\n", (unsigned long long)root->objectid,
2144                 (unsigned long long)rec->ino);
2145
2146         memset(&inode_item, 0, sizeof(inode_item));
2147         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2148         if (root_dir)
2149                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2150         else
2151                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2152         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2153         if (rec->found_dir_item) {
2154                 if (rec->found_file_extent)
2155                         fprintf(stderr, "root %llu inode %llu has both a dir "
2156                                 "item and extents, unsure if it is a dir or a "
2157                                 "regular file so setting it as a directory\n",
2158                                 (unsigned long long)root->objectid,
2159                                 (unsigned long long)rec->ino);
2160                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2161                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2162         } else if (!rec->found_dir_item) {
2163                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2164                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2165         }
2166         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2167         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2168         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2169         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2170         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2171         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2172         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2173         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2174
2175         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2176         BUG_ON(ret);
2177         btrfs_commit_transaction(trans, root);
2178         return 0;
2179 }
2180
2181 static int repair_inode_backrefs(struct btrfs_root *root,
2182                                  struct inode_record *rec,
2183                                  struct cache_tree *inode_cache,
2184                                  int delete)
2185 {
2186         struct inode_backref *tmp, *backref;
2187         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2188         int ret = 0;
2189         int repaired = 0;
2190
2191         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2192                 if (!delete && rec->ino == root_dirid) {
2193                         if (!rec->found_inode_item) {
2194                                 ret = create_inode_item(root, rec, backref, 1);
2195                                 if (ret)
2196                                         break;
2197                                 repaired++;
2198                         }
2199                 }
2200
2201                 /* Index 0 for root dir's are special, don't mess with it */
2202                 if (rec->ino == root_dirid && backref->index == 0)
2203                         continue;
2204
2205                 if (delete &&
2206                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2207                      (backref->found_dir_index && backref->found_inode_ref &&
2208                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2209                         ret = delete_dir_index(root, inode_cache, rec, backref);
2210                         if (ret)
2211                                 break;
2212                         repaired++;
2213                         list_del(&backref->list);
2214                         free(backref);
2215                 }
2216
2217                 if (!delete && !backref->found_dir_index &&
2218                     backref->found_dir_item && backref->found_inode_ref) {
2219                         ret = add_missing_dir_index(root, inode_cache, rec,
2220                                                     backref);
2221                         if (ret)
2222                                 break;
2223                         repaired++;
2224                         if (backref->found_dir_item &&
2225                             backref->found_dir_index &&
2226                             backref->found_dir_index) {
2227                                 if (!backref->errors &&
2228                                     backref->found_inode_ref) {
2229                                         list_del(&backref->list);
2230                                         free(backref);
2231                                 }
2232                         }
2233                 }
2234
2235                 if (!delete && (!backref->found_dir_index &&
2236                                 !backref->found_dir_item &&
2237                                 backref->found_inode_ref)) {
2238                         struct btrfs_trans_handle *trans;
2239                         struct btrfs_key location;
2240
2241                         ret = check_dir_conflict(root, backref->name,
2242                                                  backref->namelen,
2243                                                  backref->dir,
2244                                                  backref->index);
2245                         if (ret) {
2246                                 /*
2247                                  * let nlink fixing routine to handle it,
2248                                  * which can do it better.
2249                                  */
2250                                 ret = 0;
2251                                 break;
2252                         }
2253                         location.objectid = rec->ino;
2254                         location.type = BTRFS_INODE_ITEM_KEY;
2255                         location.offset = 0;
2256
2257                         trans = btrfs_start_transaction(root, 1);
2258                         if (IS_ERR(trans)) {
2259                                 ret = PTR_ERR(trans);
2260                                 break;
2261                         }
2262                         fprintf(stderr, "adding missing dir index/item pair "
2263                                 "for inode %llu\n",
2264                                 (unsigned long long)rec->ino);
2265                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2266                                                     backref->namelen,
2267                                                     backref->dir, &location,
2268                                                     imode_to_type(rec->imode),
2269                                                     backref->index);
2270                         BUG_ON(ret);
2271                         btrfs_commit_transaction(trans, root);
2272                         repaired++;
2273                 }
2274
2275                 if (!delete && (backref->found_inode_ref &&
2276                                 backref->found_dir_index &&
2277                                 backref->found_dir_item &&
2278                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2279                                 !rec->found_inode_item)) {
2280                         ret = create_inode_item(root, rec, backref, 0);
2281                         if (ret)
2282                                 break;
2283                         repaired++;
2284                 }
2285
2286         }
2287         return ret ? ret : repaired;
2288 }
2289
2290 /*
2291  * To determine the file type for nlink/inode_item repair
2292  *
2293  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2294  * Return -ENOENT if file type is not found.
2295  */
2296 static int find_file_type(struct inode_record *rec, u8 *type)
2297 {
2298         struct inode_backref *backref;
2299
2300         /* For inode item recovered case */
2301         if (rec->found_inode_item) {
2302                 *type = imode_to_type(rec->imode);
2303                 return 0;
2304         }
2305
2306         list_for_each_entry(backref, &rec->backrefs, list) {
2307                 if (backref->found_dir_index || backref->found_dir_item) {
2308                         *type = backref->filetype;
2309                         return 0;
2310                 }
2311         }
2312         return -ENOENT;
2313 }
2314
2315 /*
2316  * To determine the file name for nlink repair
2317  *
2318  * Return 0 if file name is found, set name and namelen.
2319  * Return -ENOENT if file name is not found.
2320  */
2321 static int find_file_name(struct inode_record *rec,
2322                           char *name, int *namelen)
2323 {
2324         struct inode_backref *backref;
2325
2326         list_for_each_entry(backref, &rec->backrefs, list) {
2327                 if (backref->found_dir_index || backref->found_dir_item ||
2328                     backref->found_inode_ref) {
2329                         memcpy(name, backref->name, backref->namelen);
2330                         *namelen = backref->namelen;
2331                         return 0;
2332                 }
2333         }
2334         return -ENOENT;
2335 }
2336
2337 /* Reset the nlink of the inode to the correct one */
2338 static int reset_nlink(struct btrfs_trans_handle *trans,
2339                        struct btrfs_root *root,
2340                        struct btrfs_path *path,
2341                        struct inode_record *rec)
2342 {
2343         struct inode_backref *backref;
2344         struct inode_backref *tmp;
2345         struct btrfs_key key;
2346         struct btrfs_inode_item *inode_item;
2347         int ret = 0;
2348
2349         /* We don't believe this either, reset it and iterate backref */
2350         rec->found_link = 0;
2351
2352         /* Remove all backref including the valid ones */
2353         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2355                                    backref->index, backref->name,
2356                                    backref->namelen, 0);
2357                 if (ret < 0)
2358                         goto out;
2359
2360                 /* remove invalid backref, so it won't be added back */
2361                 if (!(backref->found_dir_index &&
2362                       backref->found_dir_item &&
2363                       backref->found_inode_ref)) {
2364                         list_del(&backref->list);
2365                         free(backref);
2366                 } else {
2367                         rec->found_link++;
2368                 }
2369         }
2370
2371         /* Set nlink to 0 */
2372         key.objectid = rec->ino;
2373         key.type = BTRFS_INODE_ITEM_KEY;
2374         key.offset = 0;
2375         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2376         if (ret < 0)
2377                 goto out;
2378         if (ret > 0) {
2379                 ret = -ENOENT;
2380                 goto out;
2381         }
2382         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2383                                     struct btrfs_inode_item);
2384         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2385         btrfs_mark_buffer_dirty(path->nodes[0]);
2386         btrfs_release_path(path);
2387
2388         /*
2389          * Add back valid inode_ref/dir_item/dir_index,
2390          * add_link() will handle the nlink inc, so new nlink must be correct
2391          */
2392         list_for_each_entry(backref, &rec->backrefs, list) {
2393                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2394                                      backref->name, backref->namelen,
2395                                      backref->ref_type, &backref->index, 1);
2396                 if (ret < 0)
2397                         goto out;
2398         }
2399 out:
2400         btrfs_release_path(path);
2401         return ret;
2402 }
2403
2404 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2405                                struct btrfs_root *root,
2406                                struct btrfs_path *path,
2407                                struct inode_record *rec)
2408 {
2409         char *dir_name = "lost+found";
2410         char namebuf[BTRFS_NAME_LEN] = {0};
2411         u64 lost_found_ino;
2412         u32 mode = 0700;
2413         u8 type = 0;
2414         int namelen = 0;
2415         int name_recovered = 0;
2416         int type_recovered = 0;
2417         int ret = 0;
2418
2419         /*
2420          * Get file name and type first before these invalid inode ref
2421          * are deleted by remove_all_invalid_backref()
2422          */
2423         name_recovered = !find_file_name(rec, namebuf, &namelen);
2424         type_recovered = !find_file_type(rec, &type);
2425
2426         if (!name_recovered) {
2427                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2428                        rec->ino, rec->ino);
2429                 namelen = count_digits(rec->ino);
2430                 sprintf(namebuf, "%llu", rec->ino);
2431                 name_recovered = 1;
2432         }
2433         if (!type_recovered) {
2434                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2435                        rec->ino);
2436                 type = BTRFS_FT_REG_FILE;
2437                 type_recovered = 1;
2438         }
2439
2440         ret = reset_nlink(trans, root, path, rec);
2441         if (ret < 0) {
2442                 fprintf(stderr,
2443                         "Failed to reset nlink for inode %llu: %s\n",
2444                         rec->ino, strerror(-ret));
2445                 goto out;
2446         }
2447
2448         if (rec->found_link == 0) {
2449                 lost_found_ino = root->highest_inode;
2450                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2451                         ret = -EOVERFLOW;
2452                         goto out;
2453                 }
2454                 lost_found_ino++;
2455                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2456                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2457                                   mode);
2458                 if (ret < 0) {
2459                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2460                                 dir_name, strerror(-ret));
2461                         goto out;
2462                 }
2463                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2464                                      namebuf, namelen, type, NULL, 1);
2465                 /*
2466                  * Add ".INO" suffix several times to handle case where
2467                  * "FILENAME.INO" is already taken by another file.
2468                  */
2469                 while (ret == -EEXIST) {
2470                         /*
2471                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2472                          */
2473                         if (namelen + count_digits(rec->ino) + 1 >
2474                             BTRFS_NAME_LEN) {
2475                                 ret = -EFBIG;
2476                                 goto out;
2477                         }
2478                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2479                                  ".%llu", rec->ino);
2480                         namelen += count_digits(rec->ino) + 1;
2481                         ret = btrfs_add_link(trans, root, rec->ino,
2482                                              lost_found_ino, namebuf,
2483                                              namelen, type, NULL, 1);
2484                 }
2485                 if (ret < 0) {
2486                         fprintf(stderr,
2487                                 "Failed to link the inode %llu to %s dir: %s\n",
2488                                 rec->ino, dir_name, strerror(-ret));
2489                         goto out;
2490                 }
2491                 /*
2492                  * Just increase the found_link, don't actually add the
2493                  * backref. This will make things easier and this inode
2494                  * record will be freed after the repair is done.
2495                  * So fsck will not report problem about this inode.
2496                  */
2497                 rec->found_link++;
2498                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2499                        namelen, namebuf, dir_name);
2500         }
2501         printf("Fixed the nlink of inode %llu\n", rec->ino);
2502 out:
2503         /*
2504          * Clear the flag anyway, or we will loop forever for the same inode
2505          * as it will not be removed from the bad inode list and the dead loop
2506          * happens.
2507          */
2508         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2509         btrfs_release_path(path);
2510         return ret;
2511 }
2512
2513 /*
2514  * Check if there is any normal(reg or prealloc) file extent for given
2515  * ino.
2516  * This is used to determine the file type when neither its dir_index/item or
2517  * inode_item exists.
2518  *
2519  * This will *NOT* report error, if any error happens, just consider it does
2520  * not have any normal file extent.
2521  */
2522 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2523 {
2524         struct btrfs_path *path;
2525         struct btrfs_key key;
2526         struct btrfs_key found_key;
2527         struct btrfs_file_extent_item *fi;
2528         u8 type;
2529         int ret = 0;
2530
2531         path = btrfs_alloc_path();
2532         if (!path)
2533                 goto out;
2534         key.objectid = ino;
2535         key.type = BTRFS_EXTENT_DATA_KEY;
2536         key.offset = 0;
2537
2538         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2539         if (ret < 0) {
2540                 ret = 0;
2541                 goto out;
2542         }
2543         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2544                 ret = btrfs_next_leaf(root, path);
2545                 if (ret) {
2546                         ret = 0;
2547                         goto out;
2548                 }
2549         }
2550         while (1) {
2551                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2552                                       path->slots[0]);
2553                 if (found_key.objectid != ino ||
2554                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2555                         break;
2556                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2557                                     struct btrfs_file_extent_item);
2558                 type = btrfs_file_extent_type(path->nodes[0], fi);
2559                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2560                         ret = 1;
2561                         goto out;
2562                 }
2563         }
2564 out:
2565         btrfs_free_path(path);
2566         return ret;
2567 }
2568
2569 static u32 btrfs_type_to_imode(u8 type)
2570 {
2571         static u32 imode_by_btrfs_type[] = {
2572                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2573                 [BTRFS_FT_DIR]          = S_IFDIR,
2574                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2575                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2576                 [BTRFS_FT_FIFO]         = S_IFIFO,
2577                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2578                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2579         };
2580
2581         return imode_by_btrfs_type[(type)];
2582 }
2583
2584 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2585                                 struct btrfs_root *root,
2586                                 struct btrfs_path *path,
2587                                 struct inode_record *rec)
2588 {
2589         u8 filetype;
2590         u32 mode = 0700;
2591         int type_recovered = 0;
2592         int ret = 0;
2593
2594         printf("Trying to rebuild inode:%llu\n", rec->ino);
2595
2596         type_recovered = !find_file_type(rec, &filetype);
2597
2598         /*
2599          * Try to determine inode type if type not found.
2600          *
2601          * For found regular file extent, it must be FILE.
2602          * For found dir_item/index, it must be DIR.
2603          *
2604          * For undetermined one, use FILE as fallback.
2605          *
2606          * TODO:
2607          * 1. If found backref(inode_index/item is already handled) to it,
2608          *    it must be DIR.
2609          *    Need new inode-inode ref structure to allow search for that.
2610          */
2611         if (!type_recovered) {
2612                 if (rec->found_file_extent &&
2613                     find_normal_file_extent(root, rec->ino)) {
2614                         type_recovered = 1;
2615                         filetype = BTRFS_FT_REG_FILE;
2616                 } else if (rec->found_dir_item) {
2617                         type_recovered = 1;
2618                         filetype = BTRFS_FT_DIR;
2619                 } else if (!list_empty(&rec->orphan_extents)) {
2620                         type_recovered = 1;
2621                         filetype = BTRFS_FT_REG_FILE;
2622                 } else{
2623                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2624                                rec->ino);
2625                         type_recovered = 1;
2626                         filetype = BTRFS_FT_REG_FILE;
2627                 }
2628         }
2629
2630         ret = btrfs_new_inode(trans, root, rec->ino,
2631                               mode | btrfs_type_to_imode(filetype));
2632         if (ret < 0)
2633                 goto out;
2634
2635         /*
2636          * Here inode rebuild is done, we only rebuild the inode item,
2637          * don't repair the nlink(like move to lost+found).
2638          * That is the job of nlink repair.
2639          *
2640          * We just fill the record and return
2641          */
2642         rec->found_dir_item = 1;
2643         rec->imode = mode | btrfs_type_to_imode(filetype);
2644         rec->nlink = 0;
2645         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2646         /* Ensure the inode_nlinks repair function will be called */
2647         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2648 out:
2649         return ret;
2650 }
2651
2652 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2653                                       struct btrfs_root *root,
2654                                       struct btrfs_path *path,
2655                                       struct inode_record *rec)
2656 {
2657         struct orphan_data_extent *orphan;
2658         struct orphan_data_extent *tmp;
2659         int ret = 0;
2660
2661         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2662                 /*
2663                  * Check for conflicting file extents
2664                  *
2665                  * Here we don't know whether the extents is compressed or not,
2666                  * so we can only assume it not compressed nor data offset,
2667                  * and use its disk_len as extent length.
2668                  */
2669                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2670                                        orphan->offset, orphan->disk_len, 0);
2671                 btrfs_release_path(path);
2672                 if (ret < 0)
2673                         goto out;
2674                 if (!ret) {
2675                         fprintf(stderr,
2676                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2677                                 orphan->disk_bytenr, orphan->disk_len);
2678                         ret = btrfs_free_extent(trans,
2679                                         root->fs_info->extent_root,
2680                                         orphan->disk_bytenr, orphan->disk_len,
2681                                         0, root->objectid, orphan->objectid,
2682                                         orphan->offset);
2683                         if (ret < 0)
2684                                 goto out;
2685                 }
2686                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2687                                 orphan->offset, orphan->disk_bytenr,
2688                                 orphan->disk_len, orphan->disk_len);
2689                 if (ret < 0)
2690                         goto out;
2691
2692                 /* Update file size info */
2693                 rec->found_size += orphan->disk_len;
2694                 if (rec->found_size == rec->nbytes)
2695                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2696
2697                 /* Update the file extent hole info too */
2698                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2699                                            orphan->disk_len);
2700                 if (ret < 0)
2701                         goto out;
2702                 if (RB_EMPTY_ROOT(&rec->holes))
2703                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2704
2705                 list_del(&orphan->list);
2706                 free(orphan);
2707         }
2708         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2709 out:
2710         return ret;
2711 }
2712
2713 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2714                                         struct btrfs_root *root,
2715                                         struct btrfs_path *path,
2716                                         struct inode_record *rec)
2717 {
2718         struct rb_node *node;
2719         struct file_extent_hole *hole;
2720         int found = 0;
2721         int ret = 0;
2722
2723         node = rb_first(&rec->holes);
2724
2725         while (node) {
2726                 found = 1;
2727                 hole = rb_entry(node, struct file_extent_hole, node);
2728                 ret = btrfs_punch_hole(trans, root, rec->ino,
2729                                        hole->start, hole->len);
2730                 if (ret < 0)
2731                         goto out;
2732                 ret = del_file_extent_hole(&rec->holes, hole->start,
2733                                            hole->len);
2734                 if (ret < 0)
2735                         goto out;
2736                 if (RB_EMPTY_ROOT(&rec->holes))
2737                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2738                 node = rb_first(&rec->holes);
2739         }
2740         /* special case for a file losing all its file extent */
2741         if (!found) {
2742                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2743                                        round_up(rec->isize, root->sectorsize));
2744                 if (ret < 0)
2745                         goto out;
2746         }
2747         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2748                rec->ino, root->objectid);
2749 out:
2750         return ret;
2751 }
2752
2753 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2754 {
2755         struct btrfs_trans_handle *trans;
2756         struct btrfs_path *path;
2757         int ret = 0;
2758
2759         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2760                              I_ERR_NO_ORPHAN_ITEM |
2761                              I_ERR_LINK_COUNT_WRONG |
2762                              I_ERR_NO_INODE_ITEM |
2763                              I_ERR_FILE_EXTENT_ORPHAN |
2764                              I_ERR_FILE_EXTENT_DISCOUNT|
2765                              I_ERR_FILE_NBYTES_WRONG)))
2766                 return rec->errors;
2767
2768         path = btrfs_alloc_path();
2769         if (!path)
2770                 return -ENOMEM;
2771
2772         /*
2773          * For nlink repair, it may create a dir and add link, so
2774          * 2 for parent(256)'s dir_index and dir_item
2775          * 2 for lost+found dir's inode_item and inode_ref
2776          * 1 for the new inode_ref of the file
2777          * 2 for lost+found dir's dir_index and dir_item for the file
2778          */
2779         trans = btrfs_start_transaction(root, 7);
2780         if (IS_ERR(trans)) {
2781                 btrfs_free_path(path);
2782                 return PTR_ERR(trans);
2783         }
2784
2785         if (rec->errors & I_ERR_NO_INODE_ITEM)
2786                 ret = repair_inode_no_item(trans, root, path, rec);
2787         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2788                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2789         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2790                 ret = repair_inode_discount_extent(trans, root, path, rec);
2791         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2792                 ret = repair_inode_isize(trans, root, path, rec);
2793         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2794                 ret = repair_inode_orphan_item(trans, root, path, rec);
2795         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2796                 ret = repair_inode_nlinks(trans, root, path, rec);
2797         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2798                 ret = repair_inode_nbytes(trans, root, path, rec);
2799         btrfs_commit_transaction(trans, root);
2800         btrfs_free_path(path);
2801         return ret;
2802 }
2803
2804 static int check_inode_recs(struct btrfs_root *root,
2805                             struct cache_tree *inode_cache)
2806 {
2807         struct cache_extent *cache;
2808         struct ptr_node *node;
2809         struct inode_record *rec;
2810         struct inode_backref *backref;
2811         int stage = 0;
2812         int ret = 0;
2813         int err = 0;
2814         u64 error = 0;
2815         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2816
2817         if (btrfs_root_refs(&root->root_item) == 0) {
2818                 if (!cache_tree_empty(inode_cache))
2819                         fprintf(stderr, "warning line %d\n", __LINE__);
2820                 return 0;
2821         }
2822
2823         /*
2824          * We need to record the highest inode number for later 'lost+found'
2825          * dir creation.
2826          * We must select a ino not used/refered by any existing inode, or
2827          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2828          * this may cause 'lost+found' dir has wrong nlinks.
2829          */
2830         cache = last_cache_extent(inode_cache);
2831         if (cache) {
2832                 node = container_of(cache, struct ptr_node, cache);
2833                 rec = node->data;
2834                 if (rec->ino > root->highest_inode)
2835                         root->highest_inode = rec->ino;
2836         }
2837
2838         /*
2839          * We need to repair backrefs first because we could change some of the
2840          * errors in the inode recs.
2841          *
2842          * We also need to go through and delete invalid backrefs first and then
2843          * add the correct ones second.  We do this because we may get EEXIST
2844          * when adding back the correct index because we hadn't yet deleted the
2845          * invalid index.
2846          *
2847          * For example, if we were missing a dir index then the directories
2848          * isize would be wrong, so if we fixed the isize to what we thought it
2849          * would be and then fixed the backref we'd still have a invalid fs, so
2850          * we need to add back the dir index and then check to see if the isize
2851          * is still wrong.
2852          */
2853         while (stage < 3) {
2854                 stage++;
2855                 if (stage == 3 && !err)
2856                         break;
2857
2858                 cache = search_cache_extent(inode_cache, 0);
2859                 while (repair && cache) {
2860                         node = container_of(cache, struct ptr_node, cache);
2861                         rec = node->data;
2862                         cache = next_cache_extent(cache);
2863
2864                         /* Need to free everything up and rescan */
2865                         if (stage == 3) {
2866                                 remove_cache_extent(inode_cache, &node->cache);
2867                                 free(node);
2868                                 free_inode_rec(rec);
2869                                 continue;
2870                         }
2871
2872                         if (list_empty(&rec->backrefs))
2873                                 continue;
2874
2875                         ret = repair_inode_backrefs(root, rec, inode_cache,
2876                                                     stage == 1);
2877                         if (ret < 0) {
2878                                 err = ret;
2879                                 stage = 2;
2880                                 break;
2881                         } if (ret > 0) {
2882                                 err = -EAGAIN;
2883                         }
2884                 }
2885         }
2886         if (err)
2887                 return err;
2888
2889         rec = get_inode_rec(inode_cache, root_dirid, 0);
2890         if (rec) {
2891                 ret = check_root_dir(rec);
2892                 if (ret) {
2893                         fprintf(stderr, "root %llu root dir %llu error\n",
2894                                 (unsigned long long)root->root_key.objectid,
2895                                 (unsigned long long)root_dirid);
2896                         print_inode_error(root, rec);
2897                         error++;
2898                 }
2899         } else {
2900                 if (repair) {
2901                         struct btrfs_trans_handle *trans;
2902
2903                         trans = btrfs_start_transaction(root, 1);
2904                         if (IS_ERR(trans)) {
2905                                 err = PTR_ERR(trans);
2906                                 return err;
2907                         }
2908
2909                         fprintf(stderr,
2910                                 "root %llu missing its root dir, recreating\n",
2911                                 (unsigned long long)root->objectid);
2912
2913                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2914                         BUG_ON(ret);
2915
2916                         btrfs_commit_transaction(trans, root);
2917                         return -EAGAIN;
2918                 }
2919
2920                 fprintf(stderr, "root %llu root dir %llu not found\n",
2921                         (unsigned long long)root->root_key.objectid,
2922                         (unsigned long long)root_dirid);
2923         }
2924
2925         while (1) {
2926                 cache = search_cache_extent(inode_cache, 0);
2927                 if (!cache)
2928                         break;
2929                 node = container_of(cache, struct ptr_node, cache);
2930                 rec = node->data;
2931                 remove_cache_extent(inode_cache, &node->cache);
2932                 free(node);
2933                 if (rec->ino == root_dirid ||
2934                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2935                         free_inode_rec(rec);
2936                         continue;
2937                 }
2938
2939                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2940                         ret = check_orphan_item(root, rec->ino);
2941                         if (ret == 0)
2942                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2943                         if (can_free_inode_rec(rec)) {
2944                                 free_inode_rec(rec);
2945                                 continue;
2946                         }
2947                 }
2948
2949                 if (!rec->found_inode_item)
2950                         rec->errors |= I_ERR_NO_INODE_ITEM;
2951                 if (rec->found_link != rec->nlink)
2952                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2953                 if (repair) {
2954                         ret = try_repair_inode(root, rec);
2955                         if (ret == 0 && can_free_inode_rec(rec)) {
2956                                 free_inode_rec(rec);
2957                                 continue;
2958                         }
2959                         ret = 0;
2960                 }
2961
2962                 if (!(repair && ret == 0))
2963                         error++;
2964                 print_inode_error(root, rec);
2965                 list_for_each_entry(backref, &rec->backrefs, list) {
2966                         if (!backref->found_dir_item)
2967                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2968                         if (!backref->found_dir_index)
2969                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2970                         if (!backref->found_inode_ref)
2971                                 backref->errors |= REF_ERR_NO_INODE_REF;
2972                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2973                                 " namelen %u name %s filetype %d errors %x",
2974                                 (unsigned long long)backref->dir,
2975                                 (unsigned long long)backref->index,
2976                                 backref->namelen, backref->name,
2977                                 backref->filetype, backref->errors);
2978                         print_ref_error(backref->errors);
2979                 }
2980                 free_inode_rec(rec);
2981         }
2982         return (error > 0) ? -1 : 0;
2983 }
2984
2985 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2986                                         u64 objectid)
2987 {
2988         struct cache_extent *cache;
2989         struct root_record *rec = NULL;
2990         int ret;
2991
2992         cache = lookup_cache_extent(root_cache, objectid, 1);
2993         if (cache) {
2994                 rec = container_of(cache, struct root_record, cache);
2995         } else {
2996                 rec = calloc(1, sizeof(*rec));
2997                 rec->objectid = objectid;
2998                 INIT_LIST_HEAD(&rec->backrefs);
2999                 rec->cache.start = objectid;
3000                 rec->cache.size = 1;
3001
3002                 ret = insert_cache_extent(root_cache, &rec->cache);
3003                 BUG_ON(ret);
3004         }
3005         return rec;
3006 }
3007
3008 static struct root_backref *get_root_backref(struct root_record *rec,
3009                                              u64 ref_root, u64 dir, u64 index,
3010                                              const char *name, int namelen)
3011 {
3012         struct root_backref *backref;
3013
3014         list_for_each_entry(backref, &rec->backrefs, list) {
3015                 if (backref->ref_root != ref_root || backref->dir != dir ||
3016                     backref->namelen != namelen)
3017                         continue;
3018                 if (memcmp(name, backref->name, namelen))
3019                         continue;
3020                 return backref;
3021         }
3022
3023         backref = calloc(1, sizeof(*backref) + namelen + 1);
3024         backref->ref_root = ref_root;
3025         backref->dir = dir;
3026         backref->index = index;
3027         backref->namelen = namelen;
3028         memcpy(backref->name, name, namelen);
3029         backref->name[namelen] = '\0';
3030         list_add_tail(&backref->list, &rec->backrefs);
3031         return backref;
3032 }
3033
3034 static void free_root_record(struct cache_extent *cache)
3035 {
3036         struct root_record *rec;
3037         struct root_backref *backref;
3038
3039         rec = container_of(cache, struct root_record, cache);
3040         while (!list_empty(&rec->backrefs)) {
3041                 backref = list_entry(rec->backrefs.next,
3042                                      struct root_backref, list);
3043                 list_del(&backref->list);
3044                 free(backref);
3045         }
3046
3047         kfree(rec);
3048 }
3049
3050 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3051
3052 static int add_root_backref(struct cache_tree *root_cache,
3053                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3054                             const char *name, int namelen,
3055                             int item_type, int errors)
3056 {
3057         struct root_record *rec;
3058         struct root_backref *backref;
3059
3060         rec = get_root_rec(root_cache, root_id);
3061         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3062
3063         backref->errors |= errors;
3064
3065         if (item_type != BTRFS_DIR_ITEM_KEY) {
3066                 if (backref->found_dir_index || backref->found_back_ref ||
3067                     backref->found_forward_ref) {
3068                         if (backref->index != index)
3069                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3070                 } else {
3071                         backref->index = index;
3072                 }
3073         }
3074
3075         if (item_type == BTRFS_DIR_ITEM_KEY) {
3076                 if (backref->found_forward_ref)
3077                         rec->found_ref++;
3078                 backref->found_dir_item = 1;
3079         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3080                 backref->found_dir_index = 1;
3081         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3082                 if (backref->found_forward_ref)
3083                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3084                 else if (backref->found_dir_item)
3085                         rec->found_ref++;
3086                 backref->found_forward_ref = 1;
3087         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3088                 if (backref->found_back_ref)
3089                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3090                 backref->found_back_ref = 1;
3091         } else {
3092                 BUG_ON(1);
3093         }
3094
3095         if (backref->found_forward_ref && backref->found_dir_item)
3096                 backref->reachable = 1;
3097         return 0;
3098 }
3099
3100 static int merge_root_recs(struct btrfs_root *root,
3101                            struct cache_tree *src_cache,
3102                            struct cache_tree *dst_cache)
3103 {
3104         struct cache_extent *cache;
3105         struct ptr_node *node;
3106         struct inode_record *rec;
3107         struct inode_backref *backref;
3108         int ret = 0;
3109
3110         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3111                 free_inode_recs_tree(src_cache);
3112                 return 0;
3113         }
3114
3115         while (1) {
3116                 cache = search_cache_extent(src_cache, 0);
3117                 if (!cache)
3118                         break;
3119                 node = container_of(cache, struct ptr_node, cache);
3120                 rec = node->data;
3121                 remove_cache_extent(src_cache, &node->cache);
3122                 free(node);
3123
3124                 ret = is_child_root(root, root->objectid, rec->ino);
3125                 if (ret < 0)
3126                         break;
3127                 else if (ret == 0)
3128                         goto skip;
3129
3130                 list_for_each_entry(backref, &rec->backrefs, list) {
3131                         BUG_ON(backref->found_inode_ref);
3132                         if (backref->found_dir_item)
3133                                 add_root_backref(dst_cache, rec->ino,
3134                                         root->root_key.objectid, backref->dir,
3135                                         backref->index, backref->name,
3136                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3137                                         backref->errors);
3138                         if (backref->found_dir_index)
3139                                 add_root_backref(dst_cache, rec->ino,
3140                                         root->root_key.objectid, backref->dir,
3141                                         backref->index, backref->name,
3142                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3143                                         backref->errors);
3144                 }
3145 skip:
3146                 free_inode_rec(rec);
3147         }
3148         if (ret < 0)
3149                 return ret;
3150         return 0;
3151 }
3152
3153 static int check_root_refs(struct btrfs_root *root,
3154                            struct cache_tree *root_cache)
3155 {
3156         struct root_record *rec;
3157         struct root_record *ref_root;
3158         struct root_backref *backref;
3159         struct cache_extent *cache;
3160         int loop = 1;
3161         int ret;
3162         int error;
3163         int errors = 0;
3164
3165         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3166         rec->found_ref = 1;
3167
3168         /* fixme: this can not detect circular references */
3169         while (loop) {
3170                 loop = 0;
3171                 cache = search_cache_extent(root_cache, 0);
3172                 while (1) {
3173                         if (!cache)
3174                                 break;
3175                         rec = container_of(cache, struct root_record, cache);
3176                         cache = next_cache_extent(cache);
3177
3178                         if (rec->found_ref == 0)
3179                                 continue;
3180
3181                         list_for_each_entry(backref, &rec->backrefs, list) {
3182                                 if (!backref->reachable)
3183                                         continue;
3184
3185                                 ref_root = get_root_rec(root_cache,
3186                                                         backref->ref_root);
3187                                 if (ref_root->found_ref > 0)
3188                                         continue;
3189
3190                                 backref->reachable = 0;
3191                                 rec->found_ref--;
3192                                 if (rec->found_ref == 0)
3193                                         loop = 1;
3194                         }
3195                 }
3196         }
3197
3198         cache = search_cache_extent(root_cache, 0);
3199         while (1) {
3200                 if (!cache)
3201                         break;
3202                 rec = container_of(cache, struct root_record, cache);
3203                 cache = next_cache_extent(cache);
3204
3205                 if (rec->found_ref == 0 &&
3206                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3207                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3208                         ret = check_orphan_item(root->fs_info->tree_root,
3209                                                 rec->objectid);
3210                         if (ret == 0)
3211                                 continue;
3212
3213                         /*
3214                          * If we don't have a root item then we likely just have
3215                          * a dir item in a snapshot for this root but no actual
3216                          * ref key or anything so it's meaningless.
3217                          */
3218                         if (!rec->found_root_item)
3219                                 continue;
3220                         errors++;
3221                         fprintf(stderr, "fs tree %llu not referenced\n",
3222                                 (unsigned long long)rec->objectid);
3223                 }
3224
3225                 error = 0;
3226                 if (rec->found_ref > 0 && !rec->found_root_item)
3227                         error = 1;
3228                 list_for_each_entry(backref, &rec->backrefs, list) {
3229                         if (!backref->found_dir_item)
3230                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3231                         if (!backref->found_dir_index)
3232                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3233                         if (!backref->found_back_ref)
3234                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3235                         if (!backref->found_forward_ref)
3236                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3237                         if (backref->reachable && backref->errors)
3238                                 error = 1;
3239                 }
3240                 if (!error)
3241                         continue;
3242
3243                 errors++;
3244                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3245                         (unsigned long long)rec->objectid, rec->found_ref,
3246                          rec->found_root_item ? "" : "not found");
3247
3248                 list_for_each_entry(backref, &rec->backrefs, list) {
3249                         if (!backref->reachable)
3250                                 continue;
3251                         if (!backref->errors && rec->found_root_item)
3252                                 continue;
3253                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3254                                 " index %llu namelen %u name %s errors %x\n",
3255                                 (unsigned long long)backref->ref_root,
3256                                 (unsigned long long)backref->dir,
3257                                 (unsigned long long)backref->index,
3258                                 backref->namelen, backref->name,
3259                                 backref->errors);
3260                         print_ref_error(backref->errors);
3261                 }
3262         }
3263         return errors > 0 ? 1 : 0;
3264 }
3265
3266 static int process_root_ref(struct extent_buffer *eb, int slot,
3267                             struct btrfs_key *key,
3268                             struct cache_tree *root_cache)
3269 {
3270         u64 dirid;
3271         u64 index;
3272         u32 len;
3273         u32 name_len;
3274         struct btrfs_root_ref *ref;
3275         char namebuf[BTRFS_NAME_LEN];
3276         int error;
3277
3278         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3279
3280         dirid = btrfs_root_ref_dirid(eb, ref);
3281         index = btrfs_root_ref_sequence(eb, ref);
3282         name_len = btrfs_root_ref_name_len(eb, ref);
3283
3284         if (name_len <= BTRFS_NAME_LEN) {
3285                 len = name_len;
3286                 error = 0;
3287         } else {
3288                 len = BTRFS_NAME_LEN;
3289                 error = REF_ERR_NAME_TOO_LONG;
3290         }
3291         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3292
3293         if (key->type == BTRFS_ROOT_REF_KEY) {
3294                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3295                                  index, namebuf, len, key->type, error);
3296         } else {
3297                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3298                                  index, namebuf, len, key->type, error);
3299         }
3300         return 0;
3301 }
3302
3303 static void free_corrupt_block(struct cache_extent *cache)
3304 {
3305         struct btrfs_corrupt_block *corrupt;
3306
3307         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3308         free(corrupt);
3309 }
3310
3311 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3312
3313 /*
3314  * Repair the btree of the given root.
3315  *
3316  * The fix is to remove the node key in corrupt_blocks cache_tree.
3317  * and rebalance the tree.
3318  * After the fix, the btree should be writeable.
3319  */
3320 static int repair_btree(struct btrfs_root *root,
3321                         struct cache_tree *corrupt_blocks)
3322 {
3323         struct btrfs_trans_handle *trans;
3324         struct btrfs_path *path;
3325         struct btrfs_corrupt_block *corrupt;
3326         struct cache_extent *cache;
3327         struct btrfs_key key;
3328         u64 offset;
3329         int level;
3330         int ret = 0;
3331
3332         if (cache_tree_empty(corrupt_blocks))
3333                 return 0;
3334
3335         path = btrfs_alloc_path();
3336         if (!path)
3337                 return -ENOMEM;
3338
3339         trans = btrfs_start_transaction(root, 1);
3340         if (IS_ERR(trans)) {
3341                 ret = PTR_ERR(trans);
3342                 fprintf(stderr, "Error starting transaction: %s\n",
3343                         strerror(-ret));
3344                 goto out_free_path;
3345         }
3346         cache = first_cache_extent(corrupt_blocks);
3347         while (cache) {
3348                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3349                                        cache);
3350                 level = corrupt->level;
3351                 path->lowest_level = level;
3352                 key.objectid = corrupt->key.objectid;
3353                 key.type = corrupt->key.type;
3354                 key.offset = corrupt->key.offset;
3355
3356                 /*
3357                  * Here we don't want to do any tree balance, since it may
3358                  * cause a balance with corrupted brother leaf/node,
3359                  * so ins_len set to 0 here.
3360                  * Balance will be done after all corrupt node/leaf is deleted.
3361                  */
3362                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3363                 if (ret < 0)
3364                         goto out;
3365                 offset = btrfs_node_blockptr(path->nodes[level],
3366                                              path->slots[level]);
3367
3368                 /* Remove the ptr */
3369                 ret = btrfs_del_ptr(trans, root, path, level,
3370                                     path->slots[level]);
3371                 if (ret < 0)
3372                         goto out;
3373                 /*
3374                  * Remove the corresponding extent
3375                  * return value is not concerned.
3376                  */
3377                 btrfs_release_path(path);
3378                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3379                                         0, root->root_key.objectid,
3380                                         level - 1, 0);
3381                 cache = next_cache_extent(cache);
3382         }
3383
3384         /* Balance the btree using btrfs_search_slot() */
3385         cache = first_cache_extent(corrupt_blocks);
3386         while (cache) {
3387                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3388                                        cache);
3389                 memcpy(&key, &corrupt->key, sizeof(key));
3390                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3391                 if (ret < 0)
3392                         goto out;
3393                 /* return will always >0 since it won't find the item */
3394                 ret = 0;
3395                 btrfs_release_path(path);
3396                 cache = next_cache_extent(cache);
3397         }
3398 out:
3399         btrfs_commit_transaction(trans, root);
3400 out_free_path:
3401         btrfs_free_path(path);
3402         return ret;
3403 }
3404
3405 static int check_fs_root(struct btrfs_root *root,
3406                          struct cache_tree *root_cache,
3407                          struct walk_control *wc)
3408 {
3409         int ret = 0;
3410         int err = 0;
3411         int wret;
3412         int level;
3413         struct btrfs_path path;
3414         struct shared_node root_node;
3415         struct root_record *rec;
3416         struct btrfs_root_item *root_item = &root->root_item;
3417         struct cache_tree corrupt_blocks;
3418         struct orphan_data_extent *orphan;
3419         struct orphan_data_extent *tmp;
3420         enum btrfs_tree_block_status status;
3421
3422         /*
3423          * Reuse the corrupt_block cache tree to record corrupted tree block
3424          *
3425          * Unlike the usage in extent tree check, here we do it in a per
3426          * fs/subvol tree base.
3427          */
3428         cache_tree_init(&corrupt_blocks);
3429         root->fs_info->corrupt_blocks = &corrupt_blocks;
3430
3431         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3432                 rec = get_root_rec(root_cache, root->root_key.objectid);
3433                 if (btrfs_root_refs(root_item) > 0)
3434                         rec->found_root_item = 1;
3435         }
3436
3437         btrfs_init_path(&path);
3438         memset(&root_node, 0, sizeof(root_node));
3439         cache_tree_init(&root_node.root_cache);
3440         cache_tree_init(&root_node.inode_cache);
3441
3442         /* Move the orphan extent record to corresponding inode_record */
3443         list_for_each_entry_safe(orphan, tmp,
3444                                  &root->orphan_data_extents, list) {
3445                 struct inode_record *inode;
3446
3447                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3448                                       1);
3449                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3450                 list_move(&orphan->list, &inode->orphan_extents);
3451         }
3452
3453         level = btrfs_header_level(root->node);
3454         memset(wc->nodes, 0, sizeof(wc->nodes));
3455         wc->nodes[level] = &root_node;
3456         wc->active_node = level;
3457         wc->root_level = level;
3458
3459         /* We may not have checked the root block, lets do that now */
3460         if (btrfs_is_leaf(root->node))
3461                 status = btrfs_check_leaf(root, NULL, root->node);
3462         else
3463                 status = btrfs_check_node(root, NULL, root->node);
3464         if (status != BTRFS_TREE_BLOCK_CLEAN)
3465                 return -EIO;
3466
3467         if (btrfs_root_refs(root_item) > 0 ||
3468             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3469                 path.nodes[level] = root->node;
3470                 extent_buffer_get(root->node);
3471                 path.slots[level] = 0;
3472         } else {
3473                 struct btrfs_key key;
3474                 struct btrfs_disk_key found_key;
3475
3476                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3477                 level = root_item->drop_level;
3478                 path.lowest_level = level;
3479                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3480                 if (wret < 0)
3481                         goto skip_walking;
3482                 btrfs_node_key(path.nodes[level], &found_key,
3483                                 path.slots[level]);
3484                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3485                                         sizeof(found_key)));
3486         }
3487
3488         while (1) {
3489                 wret = walk_down_tree(root, &path, wc, &level);
3490                 if (wret < 0)
3491                         ret = wret;
3492                 if (wret != 0)
3493                         break;
3494
3495                 wret = walk_up_tree(root, &path, wc, &level);
3496                 if (wret < 0)
3497                         ret = wret;
3498                 if (wret != 0)
3499                         break;
3500         }
3501 skip_walking:
3502         btrfs_release_path(&path);
3503
3504         if (!cache_tree_empty(&corrupt_blocks)) {
3505                 struct cache_extent *cache;
3506                 struct btrfs_corrupt_block *corrupt;
3507
3508                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3509                        root->root_key.objectid);
3510                 cache = first_cache_extent(&corrupt_blocks);
3511                 while (cache) {
3512                         corrupt = container_of(cache,
3513                                                struct btrfs_corrupt_block,
3514                                                cache);
3515                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3516                                cache->start, corrupt->level,
3517                                corrupt->key.objectid, corrupt->key.type,
3518                                corrupt->key.offset);
3519                         cache = next_cache_extent(cache);
3520                 }
3521                 if (repair) {
3522                         printf("Try to repair the btree for root %llu\n",
3523                                root->root_key.objectid);
3524                         ret = repair_btree(root, &corrupt_blocks);
3525                         if (ret < 0)
3526                                 fprintf(stderr, "Failed to repair btree: %s\n",
3527                                         strerror(-ret));
3528                         if (!ret)
3529                                 printf("Btree for root %llu is fixed\n",
3530                                        root->root_key.objectid);
3531                 }
3532         }
3533
3534         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3535         if (err < 0)
3536                 ret = err;
3537
3538         if (root_node.current) {
3539                 root_node.current->checked = 1;
3540                 maybe_free_inode_rec(&root_node.inode_cache,
3541                                 root_node.current);
3542         }
3543
3544         err = check_inode_recs(root, &root_node.inode_cache);
3545         if (!ret)
3546                 ret = err;
3547
3548         free_corrupt_blocks_tree(&corrupt_blocks);
3549         root->fs_info->corrupt_blocks = NULL;
3550         free_orphan_data_extents(&root->orphan_data_extents);
3551         return ret;
3552 }
3553
3554 static int fs_root_objectid(u64 objectid)
3555 {
3556         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3557             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3558                 return 1;
3559         return is_fstree(objectid);
3560 }
3561
3562 static int check_fs_roots(struct btrfs_root *root,
3563                           struct cache_tree *root_cache)
3564 {
3565         struct btrfs_path path;
3566         struct btrfs_key key;
3567         struct walk_control wc;
3568         struct extent_buffer *leaf, *tree_node;
3569         struct btrfs_root *tmp_root;
3570         struct btrfs_root *tree_root = root->fs_info->tree_root;
3571         int ret;
3572         int err = 0;
3573
3574         if (ctx.progress_enabled) {
3575                 ctx.tp = TASK_FS_ROOTS;
3576                 task_start(ctx.info);
3577         }
3578
3579         /*
3580          * Just in case we made any changes to the extent tree that weren't
3581          * reflected into the free space cache yet.
3582          */
3583         if (repair)
3584                 reset_cached_block_groups(root->fs_info);
3585         memset(&wc, 0, sizeof(wc));
3586         cache_tree_init(&wc.shared);
3587         btrfs_init_path(&path);
3588
3589 again:
3590         key.offset = 0;
3591         key.objectid = 0;
3592         key.type = BTRFS_ROOT_ITEM_KEY;
3593         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3594         if (ret < 0) {
3595                 err = 1;
3596                 goto out;
3597         }
3598         tree_node = tree_root->node;
3599         while (1) {
3600                 if (tree_node != tree_root->node) {
3601                         free_root_recs_tree(root_cache);
3602                         btrfs_release_path(&path);
3603                         goto again;
3604                 }
3605                 leaf = path.nodes[0];
3606                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3607                         ret = btrfs_next_leaf(tree_root, &path);
3608                         if (ret) {
3609                                 if (ret < 0)
3610                                         err = 1;
3611                                 break;
3612                         }
3613                         leaf = path.nodes[0];
3614                 }
3615                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3616                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3617                     fs_root_objectid(key.objectid)) {
3618                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3619                                 tmp_root = btrfs_read_fs_root_no_cache(
3620                                                 root->fs_info, &key);
3621                         } else {
3622                                 key.offset = (u64)-1;
3623                                 tmp_root = btrfs_read_fs_root(
3624                                                 root->fs_info, &key);
3625                         }
3626                         if (IS_ERR(tmp_root)) {
3627                                 err = 1;
3628                                 goto next;
3629                         }
3630                         ret = check_fs_root(tmp_root, root_cache, &wc);
3631                         if (ret == -EAGAIN) {
3632                                 free_root_recs_tree(root_cache);
3633                                 btrfs_release_path(&path);
3634                                 goto again;
3635                         }
3636                         if (ret)
3637                                 err = 1;
3638                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3639                                 btrfs_free_fs_root(tmp_root);
3640                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3641                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3642                         process_root_ref(leaf, path.slots[0], &key,
3643                                          root_cache);
3644                 }
3645 next:
3646                 path.slots[0]++;
3647         }
3648 out:
3649         btrfs_release_path(&path);
3650         if (err)
3651                 free_extent_cache_tree(&wc.shared);
3652         if (!cache_tree_empty(&wc.shared))
3653                 fprintf(stderr, "warning line %d\n", __LINE__);
3654
3655         task_stop(ctx.info);
3656
3657         return err;
3658 }
3659
3660 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3661 {
3662         struct list_head *cur = rec->backrefs.next;
3663         struct extent_backref *back;
3664         struct tree_backref *tback;
3665         struct data_backref *dback;
3666         u64 found = 0;
3667         int err = 0;
3668
3669         while(cur != &rec->backrefs) {
3670                 back = list_entry(cur, struct extent_backref, list);
3671                 cur = cur->next;
3672                 if (!back->found_extent_tree) {
3673                         err = 1;
3674                         if (!print_errs)
3675                                 goto out;
3676                         if (back->is_data) {
3677                                 dback = (struct data_backref *)back;
3678                                 fprintf(stderr, "Backref %llu %s %llu"
3679                                         " owner %llu offset %llu num_refs %lu"
3680                                         " not found in extent tree\n",
3681                                         (unsigned long long)rec->start,
3682                                         back->full_backref ?
3683                                         "parent" : "root",
3684                                         back->full_backref ?
3685                                         (unsigned long long)dback->parent:
3686                                         (unsigned long long)dback->root,
3687                                         (unsigned long long)dback->owner,
3688                                         (unsigned long long)dback->offset,
3689                                         (unsigned long)dback->num_refs);
3690                         } else {
3691                                 tback = (struct tree_backref *)back;
3692                                 fprintf(stderr, "Backref %llu parent %llu"
3693                                         " root %llu not found in extent tree\n",
3694                                         (unsigned long long)rec->start,
3695                                         (unsigned long long)tback->parent,
3696                                         (unsigned long long)tback->root);
3697                         }
3698                 }
3699                 if (!back->is_data && !back->found_ref) {
3700                         err = 1;
3701                         if (!print_errs)
3702                                 goto out;
3703                         tback = (struct tree_backref *)back;
3704                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3705                                 (unsigned long long)rec->start,
3706                                 back->full_backref ? "parent" : "root",
3707                                 back->full_backref ?
3708                                 (unsigned long long)tback->parent :
3709                                 (unsigned long long)tback->root, back);
3710                 }
3711                 if (back->is_data) {
3712                         dback = (struct data_backref *)back;
3713                         if (dback->found_ref != dback->num_refs) {
3714                                 err = 1;
3715                                 if (!print_errs)
3716                                         goto out;
3717                                 fprintf(stderr, "Incorrect local backref count"
3718                                         " on %llu %s %llu owner %llu"
3719                                         " offset %llu found %u wanted %u back %p\n",
3720                                         (unsigned long long)rec->start,
3721                                         back->full_backref ?
3722                                         "parent" : "root",
3723                                         back->full_backref ?
3724                                         (unsigned long long)dback->parent:
3725                                         (unsigned long long)dback->root,
3726                                         (unsigned long long)dback->owner,
3727                                         (unsigned long long)dback->offset,
3728                                         dback->found_ref, dback->num_refs, back);
3729                         }
3730                         if (dback->disk_bytenr != rec->start) {
3731                                 err = 1;
3732                                 if (!print_errs)
3733                                         goto out;
3734                                 fprintf(stderr, "Backref disk bytenr does not"
3735                                         " match extent record, bytenr=%llu, "
3736                                         "ref bytenr=%llu\n",
3737                                         (unsigned long long)rec->start,
3738                                         (unsigned long long)dback->disk_bytenr);
3739                         }
3740
3741                         if (dback->bytes != rec->nr) {
3742                                 err = 1;
3743                                 if (!print_errs)
3744                                         goto out;
3745                                 fprintf(stderr, "Backref bytes do not match "
3746                                         "extent backref, bytenr=%llu, ref "
3747                                         "bytes=%llu, backref bytes=%llu\n",
3748                                         (unsigned long long)rec->start,
3749                                         (unsigned long long)rec->nr,
3750                                         (unsigned long long)dback->bytes);
3751                         }
3752                 }
3753                 if (!back->is_data) {
3754                         found += 1;
3755                 } else {
3756                         dback = (struct data_backref *)back;
3757                         found += dback->found_ref;
3758                 }
3759         }
3760         if (found != rec->refs) {
3761                 err = 1;
3762                 if (!print_errs)
3763                         goto out;
3764                 fprintf(stderr, "Incorrect global backref count "
3765                         "on %llu found %llu wanted %llu\n",
3766                         (unsigned long long)rec->start,
3767                         (unsigned long long)found,
3768                         (unsigned long long)rec->refs);
3769         }
3770 out:
3771         return err;
3772 }
3773
3774 static int free_all_extent_backrefs(struct extent_record *rec)
3775 {
3776         struct extent_backref *back;
3777         struct list_head *cur;
3778         while (!list_empty(&rec->backrefs)) {
3779                 cur = rec->backrefs.next;
3780                 back = list_entry(cur, struct extent_backref, list);
3781                 list_del(cur);
3782                 free(back);
3783         }
3784         return 0;
3785 }
3786
3787 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3788                                      struct cache_tree *extent_cache)
3789 {
3790         struct cache_extent *cache;
3791         struct extent_record *rec;
3792
3793         while (1) {
3794                 cache = first_cache_extent(extent_cache);
3795                 if (!cache)
3796                         break;
3797                 rec = container_of(cache, struct extent_record, cache);
3798                 remove_cache_extent(extent_cache, cache);
3799                 free_all_extent_backrefs(rec);
3800                 free(rec);
3801         }
3802 }
3803
3804 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3805                                  struct extent_record *rec)
3806 {
3807         if (rec->content_checked && rec->owner_ref_checked &&
3808             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3809             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3810             !rec->bad_full_backref && !rec->crossing_stripes &&
3811             !rec->wrong_chunk_type) {
3812                 remove_cache_extent(extent_cache, &rec->cache);
3813                 free_all_extent_backrefs(rec);
3814                 list_del_init(&rec->list);
3815                 free(rec);
3816         }
3817         return 0;
3818 }
3819
3820 static int check_owner_ref(struct btrfs_root *root,
3821                             struct extent_record *rec,
3822                             struct extent_buffer *buf)
3823 {
3824         struct extent_backref *node;
3825         struct tree_backref *back;
3826         struct btrfs_root *ref_root;
3827         struct btrfs_key key;
3828         struct btrfs_path path;
3829         struct extent_buffer *parent;
3830         int level;
3831         int found = 0;
3832         int ret;
3833
3834         list_for_each_entry(node, &rec->backrefs, list) {
3835                 if (node->is_data)
3836                         continue;
3837                 if (!node->found_ref)
3838                         continue;
3839                 if (node->full_backref)
3840                         continue;
3841                 back = (struct tree_backref *)node;
3842                 if (btrfs_header_owner(buf) == back->root)
3843                         return 0;
3844         }
3845         BUG_ON(rec->is_root);
3846
3847         /* try to find the block by search corresponding fs tree */
3848         key.objectid = btrfs_header_owner(buf);
3849         key.type = BTRFS_ROOT_ITEM_KEY;
3850         key.offset = (u64)-1;
3851
3852         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3853         if (IS_ERR(ref_root))
3854                 return 1;
3855
3856         level = btrfs_header_level(buf);
3857         if (level == 0)
3858                 btrfs_item_key_to_cpu(buf, &key, 0);
3859         else
3860                 btrfs_node_key_to_cpu(buf, &key, 0);
3861
3862         btrfs_init_path(&path);
3863         path.lowest_level = level + 1;
3864         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3865         if (ret < 0)
3866                 return 0;
3867
3868         parent = path.nodes[level + 1];
3869         if (parent && buf->start == btrfs_node_blockptr(parent,
3870                                                         path.slots[level + 1]))
3871                 found = 1;
3872
3873         btrfs_release_path(&path);
3874         return found ? 0 : 1;
3875 }
3876
3877 static int is_extent_tree_record(struct extent_record *rec)
3878 {
3879         struct list_head *cur = rec->backrefs.next;
3880         struct extent_backref *node;
3881         struct tree_backref *back;
3882         int is_extent = 0;
3883
3884         while(cur != &rec->backrefs) {
3885                 node = list_entry(cur, struct extent_backref, list);
3886                 cur = cur->next;
3887                 if (node->is_data)
3888                         return 0;
3889                 back = (struct tree_backref *)node;
3890                 if (node->full_backref)
3891                         return 0;
3892                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3893                         is_extent = 1;
3894         }
3895         return is_extent;
3896 }
3897
3898
3899 static int record_bad_block_io(struct btrfs_fs_info *info,
3900                                struct cache_tree *extent_cache,
3901                                u64 start, u64 len)
3902 {
3903         struct extent_record *rec;
3904         struct cache_extent *cache;
3905         struct btrfs_key key;
3906
3907         cache = lookup_cache_extent(extent_cache, start, len);
3908         if (!cache)
3909                 return 0;
3910
3911         rec = container_of(cache, struct extent_record, cache);
3912         if (!is_extent_tree_record(rec))
3913                 return 0;
3914
3915         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3916         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3917 }
3918
3919 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3920                        struct extent_buffer *buf, int slot)
3921 {
3922         if (btrfs_header_level(buf)) {
3923                 struct btrfs_key_ptr ptr1, ptr2;
3924
3925                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3926                                    sizeof(struct btrfs_key_ptr));
3927                 read_extent_buffer(buf, &ptr2,
3928                                    btrfs_node_key_ptr_offset(slot + 1),
3929                                    sizeof(struct btrfs_key_ptr));
3930                 write_extent_buffer(buf, &ptr1,
3931                                     btrfs_node_key_ptr_offset(slot + 1),
3932                                     sizeof(struct btrfs_key_ptr));
3933                 write_extent_buffer(buf, &ptr2,
3934                                     btrfs_node_key_ptr_offset(slot),
3935                                     sizeof(struct btrfs_key_ptr));
3936                 if (slot == 0) {
3937                         struct btrfs_disk_key key;
3938                         btrfs_node_key(buf, &key, 0);
3939                         btrfs_fixup_low_keys(root, path, &key,
3940                                              btrfs_header_level(buf) + 1);
3941                 }
3942         } else {
3943                 struct btrfs_item *item1, *item2;
3944                 struct btrfs_key k1, k2;
3945                 char *item1_data, *item2_data;
3946                 u32 item1_offset, item2_offset, item1_size, item2_size;
3947
3948                 item1 = btrfs_item_nr(slot);
3949                 item2 = btrfs_item_nr(slot + 1);
3950                 btrfs_item_key_to_cpu(buf, &k1, slot);
3951                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3952                 item1_offset = btrfs_item_offset(buf, item1);
3953                 item2_offset = btrfs_item_offset(buf, item2);
3954                 item1_size = btrfs_item_size(buf, item1);
3955                 item2_size = btrfs_item_size(buf, item2);
3956
3957                 item1_data = malloc(item1_size);
3958                 if (!item1_data)
3959                         return -ENOMEM;
3960                 item2_data = malloc(item2_size);
3961                 if (!item2_data) {
3962                         free(item1_data);
3963                         return -ENOMEM;
3964                 }
3965
3966                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3967                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3968
3969                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3970                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3971                 free(item1_data);
3972                 free(item2_data);
3973
3974                 btrfs_set_item_offset(buf, item1, item2_offset);
3975                 btrfs_set_item_offset(buf, item2, item1_offset);
3976                 btrfs_set_item_size(buf, item1, item2_size);
3977                 btrfs_set_item_size(buf, item2, item1_size);
3978
3979                 path->slots[0] = slot;
3980                 btrfs_set_item_key_unsafe(root, path, &k2);
3981                 path->slots[0] = slot + 1;
3982                 btrfs_set_item_key_unsafe(root, path, &k1);
3983         }
3984         return 0;
3985 }
3986
3987 static int fix_key_order(struct btrfs_trans_handle *trans,
3988                          struct btrfs_root *root,
3989                          struct btrfs_path *path)
3990 {
3991         struct extent_buffer *buf;
3992         struct btrfs_key k1, k2;
3993         int i;
3994         int level = path->lowest_level;
3995         int ret = -EIO;
3996
3997         buf = path->nodes[level];
3998         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3999                 if (level) {
4000                         btrfs_node_key_to_cpu(buf, &k1, i);
4001                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4002                 } else {
4003                         btrfs_item_key_to_cpu(buf, &k1, i);
4004                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4005                 }
4006                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4007                         continue;
4008                 ret = swap_values(root, path, buf, i);
4009                 if (ret)
4010                         break;
4011                 btrfs_mark_buffer_dirty(buf);
4012                 i = 0;
4013         }
4014         return ret;
4015 }
4016
4017 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4018                              struct btrfs_root *root,
4019                              struct btrfs_path *path,
4020                              struct extent_buffer *buf, int slot)
4021 {
4022         struct btrfs_key key;
4023         int nritems = btrfs_header_nritems(buf);
4024
4025         btrfs_item_key_to_cpu(buf, &key, slot);
4026
4027         /* These are all the keys we can deal with missing. */
4028         if (key.type != BTRFS_DIR_INDEX_KEY &&
4029             key.type != BTRFS_EXTENT_ITEM_KEY &&
4030             key.type != BTRFS_METADATA_ITEM_KEY &&
4031             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4032             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4033                 return -1;
4034
4035         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4036                (unsigned long long)key.objectid, key.type,
4037                (unsigned long long)key.offset, slot, buf->start);
4038         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4039                               btrfs_item_nr_offset(slot + 1),
4040                               sizeof(struct btrfs_item) *
4041                               (nritems - slot - 1));
4042         btrfs_set_header_nritems(buf, nritems - 1);
4043         if (slot == 0) {
4044                 struct btrfs_disk_key disk_key;
4045
4046                 btrfs_item_key(buf, &disk_key, 0);
4047                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4048         }
4049         btrfs_mark_buffer_dirty(buf);
4050         return 0;
4051 }
4052
4053 static int fix_item_offset(struct btrfs_trans_handle *trans,
4054                            struct btrfs_root *root,
4055                            struct btrfs_path *path)
4056 {
4057         struct extent_buffer *buf;
4058         int i;
4059         int ret = 0;
4060
4061         /* We should only get this for leaves */
4062         BUG_ON(path->lowest_level);
4063         buf = path->nodes[0];
4064 again:
4065         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4066                 unsigned int shift = 0, offset;
4067
4068                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4069                     BTRFS_LEAF_DATA_SIZE(root)) {
4070                         if (btrfs_item_end_nr(buf, i) >
4071                             BTRFS_LEAF_DATA_SIZE(root)) {
4072                                 ret = delete_bogus_item(trans, root, path,
4073                                                         buf, i);
4074                                 if (!ret)
4075                                         goto again;
4076                                 fprintf(stderr, "item is off the end of the "
4077                                         "leaf, can't fix\n");
4078                                 ret = -EIO;
4079                                 break;
4080                         }
4081                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4082                                 btrfs_item_end_nr(buf, i);
4083                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4084                            btrfs_item_offset_nr(buf, i - 1)) {
4085                         if (btrfs_item_end_nr(buf, i) >
4086                             btrfs_item_offset_nr(buf, i - 1)) {
4087                                 ret = delete_bogus_item(trans, root, path,
4088                                                         buf, i);
4089                                 if (!ret)
4090                                         goto again;
4091                                 fprintf(stderr, "items overlap, can't fix\n");
4092                                 ret = -EIO;
4093                                 break;
4094                         }
4095                         shift = btrfs_item_offset_nr(buf, i - 1) -
4096                                 btrfs_item_end_nr(buf, i);
4097                 }
4098                 if (!shift)
4099                         continue;
4100
4101                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4102                        i, shift, (unsigned long long)buf->start);
4103                 offset = btrfs_item_offset_nr(buf, i);
4104                 memmove_extent_buffer(buf,
4105                                       btrfs_leaf_data(buf) + offset + shift,
4106                                       btrfs_leaf_data(buf) + offset,
4107                                       btrfs_item_size_nr(buf, i));
4108                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4109                                       offset + shift);
4110                 btrfs_mark_buffer_dirty(buf);
4111         }
4112
4113         /*
4114          * We may have moved things, in which case we want to exit so we don't
4115          * write those changes out.  Once we have proper abort functionality in
4116          * progs this can be changed to something nicer.
4117          */
4118         BUG_ON(ret);
4119         return ret;
4120 }
4121
4122 /*
4123  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4124  * then just return -EIO.
4125  */
4126 static int try_to_fix_bad_block(struct btrfs_root *root,
4127                                 struct extent_buffer *buf,
4128                                 enum btrfs_tree_block_status status)
4129 {
4130         struct btrfs_trans_handle *trans;
4131         struct ulist *roots;
4132         struct ulist_node *node;
4133         struct btrfs_root *search_root;
4134         struct btrfs_path *path;
4135         struct ulist_iterator iter;
4136         struct btrfs_key root_key, key;
4137         int ret;
4138
4139         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4140             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4141                 return -EIO;
4142
4143         path = btrfs_alloc_path();
4144         if (!path)
4145                 return -EIO;
4146
4147         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4148                                    0, &roots);
4149         if (ret) {
4150                 btrfs_free_path(path);
4151                 return -EIO;
4152         }
4153
4154         ULIST_ITER_INIT(&iter);
4155         while ((node = ulist_next(roots, &iter))) {
4156                 root_key.objectid = node->val;
4157                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4158                 root_key.offset = (u64)-1;
4159
4160                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4161                 if (IS_ERR(root)) {
4162                         ret = -EIO;
4163                         break;
4164                 }
4165
4166
4167                 trans = btrfs_start_transaction(search_root, 0);
4168                 if (IS_ERR(trans)) {
4169                         ret = PTR_ERR(trans);
4170                         break;
4171                 }
4172
4173                 path->lowest_level = btrfs_header_level(buf);
4174                 path->skip_check_block = 1;
4175                 if (path->lowest_level)
4176                         btrfs_node_key_to_cpu(buf, &key, 0);
4177                 else
4178                         btrfs_item_key_to_cpu(buf, &key, 0);
4179                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4180                 if (ret) {
4181                         ret = -EIO;
4182                         btrfs_commit_transaction(trans, search_root);
4183                         break;
4184                 }
4185                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4186                         ret = fix_key_order(trans, search_root, path);
4187                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4188                         ret = fix_item_offset(trans, search_root, path);
4189                 if (ret) {
4190                         btrfs_commit_transaction(trans, search_root);
4191                         break;
4192                 }
4193                 btrfs_release_path(path);
4194                 btrfs_commit_transaction(trans, search_root);
4195         }
4196         ulist_free(roots);
4197         btrfs_free_path(path);
4198         return ret;
4199 }
4200
4201 static int check_block(struct btrfs_root *root,
4202                        struct cache_tree *extent_cache,
4203                        struct extent_buffer *buf, u64 flags)
4204 {
4205         struct extent_record *rec;
4206         struct cache_extent *cache;
4207         struct btrfs_key key;
4208         enum btrfs_tree_block_status status;
4209         int ret = 0;
4210         int level;
4211
4212         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4213         if (!cache)
4214                 return 1;
4215         rec = container_of(cache, struct extent_record, cache);
4216         rec->generation = btrfs_header_generation(buf);
4217
4218         level = btrfs_header_level(buf);
4219         if (btrfs_header_nritems(buf) > 0) {
4220
4221                 if (level == 0)
4222                         btrfs_item_key_to_cpu(buf, &key, 0);
4223                 else
4224                         btrfs_node_key_to_cpu(buf, &key, 0);
4225
4226                 rec->info_objectid = key.objectid;
4227         }
4228         rec->info_level = level;
4229
4230         if (btrfs_is_leaf(buf))
4231                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4232         else
4233                 status = btrfs_check_node(root, &rec->parent_key, buf);
4234
4235         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4236                 if (repair)
4237                         status = try_to_fix_bad_block(root, buf, status);
4238                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4239                         ret = -EIO;
4240                         fprintf(stderr, "bad block %llu\n",
4241                                 (unsigned long long)buf->start);
4242                 } else {
4243                         /*
4244                          * Signal to callers we need to start the scan over
4245                          * again since we'll have cow'ed blocks.
4246                          */
4247                         ret = -EAGAIN;
4248                 }
4249         } else {
4250                 rec->content_checked = 1;
4251                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4252                         rec->owner_ref_checked = 1;
4253                 else {
4254                         ret = check_owner_ref(root, rec, buf);
4255                         if (!ret)
4256                                 rec->owner_ref_checked = 1;
4257                 }
4258         }
4259         if (!ret)
4260                 maybe_free_extent_rec(extent_cache, rec);
4261         return ret;
4262 }
4263
4264 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4265                                                 u64 parent, u64 root)
4266 {
4267         struct list_head *cur = rec->backrefs.next;
4268         struct extent_backref *node;
4269         struct tree_backref *back;
4270
4271         while(cur != &rec->backrefs) {
4272                 node = list_entry(cur, struct extent_backref, list);
4273                 cur = cur->next;
4274                 if (node->is_data)
4275                         continue;
4276                 back = (struct tree_backref *)node;
4277                 if (parent > 0) {
4278                         if (!node->full_backref)
4279                                 continue;
4280                         if (parent == back->parent)
4281                                 return back;
4282                 } else {
4283                         if (node->full_backref)
4284                                 continue;
4285                         if (back->root == root)
4286                                 return back;
4287                 }
4288         }
4289         return NULL;
4290 }
4291
4292 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4293                                                 u64 parent, u64 root)
4294 {
4295         struct tree_backref *ref = malloc(sizeof(*ref));
4296         memset(&ref->node, 0, sizeof(ref->node));
4297         if (parent > 0) {
4298                 ref->parent = parent;
4299                 ref->node.full_backref = 1;
4300         } else {
4301                 ref->root = root;
4302                 ref->node.full_backref = 0;
4303         }
4304         list_add_tail(&ref->node.list, &rec->backrefs);
4305
4306         return ref;
4307 }
4308
4309 static struct data_backref *find_data_backref(struct extent_record *rec,
4310                                                 u64 parent, u64 root,
4311                                                 u64 owner, u64 offset,
4312                                                 int found_ref,
4313                                                 u64 disk_bytenr, u64 bytes)
4314 {
4315         struct list_head *cur = rec->backrefs.next;
4316         struct extent_backref *node;
4317         struct data_backref *back;
4318
4319         while(cur != &rec->backrefs) {
4320                 node = list_entry(cur, struct extent_backref, list);
4321                 cur = cur->next;
4322                 if (!node->is_data)
4323                         continue;
4324                 back = (struct data_backref *)node;
4325                 if (parent > 0) {
4326                         if (!node->full_backref)
4327                                 continue;
4328                         if (parent == back->parent)
4329                                 return back;
4330                 } else {
4331                         if (node->full_backref)
4332                                 continue;
4333                         if (back->root == root && back->owner == owner &&
4334                             back->offset == offset) {
4335                                 if (found_ref && node->found_ref &&
4336                                     (back->bytes != bytes ||
4337                                     back->disk_bytenr != disk_bytenr))
4338                                         continue;
4339                                 return back;
4340                         }
4341                 }
4342         }
4343         return NULL;
4344 }
4345
4346 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4347                                                 u64 parent, u64 root,
4348                                                 u64 owner, u64 offset,
4349                                                 u64 max_size)
4350 {
4351         struct data_backref *ref = malloc(sizeof(*ref));
4352         memset(&ref->node, 0, sizeof(ref->node));
4353         ref->node.is_data = 1;
4354
4355         if (parent > 0) {
4356                 ref->parent = parent;
4357                 ref->owner = 0;
4358                 ref->offset = 0;
4359                 ref->node.full_backref = 1;
4360         } else {
4361                 ref->root = root;
4362                 ref->owner = owner;
4363                 ref->offset = offset;
4364                 ref->node.full_backref = 0;
4365         }
4366         ref->bytes = max_size;
4367         ref->found_ref = 0;
4368         ref->num_refs = 0;
4369         list_add_tail(&ref->node.list, &rec->backrefs);
4370         if (max_size > rec->max_size)
4371                 rec->max_size = max_size;
4372         return ref;
4373 }
4374
4375 /* Check if the type of extent matches with its chunk */
4376 static void check_extent_type(struct extent_record *rec)
4377 {
4378         struct btrfs_block_group_cache *bg_cache;
4379
4380         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4381         if (!bg_cache)
4382                 return;
4383
4384         /* data extent, check chunk directly*/
4385         if (!rec->metadata) {
4386                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4387                         rec->wrong_chunk_type = 1;
4388                 return;
4389         }
4390
4391         /* metadata extent, check the obvious case first */
4392         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4393                                  BTRFS_BLOCK_GROUP_METADATA))) {
4394                 rec->wrong_chunk_type = 1;
4395                 return;
4396         }
4397
4398         /*
4399          * Check SYSTEM extent, as it's also marked as metadata, we can only
4400          * make sure it's a SYSTEM extent by its backref
4401          */
4402         if (!list_empty(&rec->backrefs)) {
4403                 struct extent_backref *node;
4404                 struct tree_backref *tback;
4405                 u64 bg_type;
4406
4407                 node = list_entry(rec->backrefs.next, struct extent_backref,
4408                                   list);
4409                 if (node->is_data) {
4410                         /* tree block shouldn't have data backref */
4411                         rec->wrong_chunk_type = 1;
4412                         return;
4413                 }
4414                 tback = container_of(node, struct tree_backref, node);
4415
4416                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4417                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4418                 else
4419                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4420                 if (!(bg_cache->flags & bg_type))
4421                         rec->wrong_chunk_type = 1;
4422         }
4423 }
4424
4425 static int add_extent_rec(struct cache_tree *extent_cache,
4426                           struct btrfs_key *parent_key, u64 parent_gen,
4427                           u64 start, u64 nr, u64 extent_item_refs,
4428                           int is_root, int inc_ref, int set_checked,
4429                           int metadata, int extent_rec, u64 max_size)
4430 {
4431         struct extent_record *rec;
4432         struct cache_extent *cache;
4433         int ret = 0;
4434         int dup = 0;
4435
4436         cache = lookup_cache_extent(extent_cache, start, nr);
4437         if (cache) {
4438                 rec = container_of(cache, struct extent_record, cache);
4439                 if (inc_ref)
4440                         rec->refs++;
4441                 if (rec->nr == 1)
4442                         rec->nr = max(nr, max_size);
4443
4444                 /*
4445                  * We need to make sure to reset nr to whatever the extent
4446                  * record says was the real size, this way we can compare it to
4447                  * the backrefs.
4448                  */
4449                 if (extent_rec) {
4450                         if (start != rec->start || rec->found_rec) {
4451                                 struct extent_record *tmp;
4452
4453                                 dup = 1;
4454                                 if (list_empty(&rec->list))
4455                                         list_add_tail(&rec->list,
4456                                                       &duplicate_extents);
4457
4458                                 /*
4459                                  * We have to do this song and dance in case we
4460                                  * find an extent record that falls inside of
4461                                  * our current extent record but does not have
4462                                  * the same objectid.
4463                                  */
4464                                 tmp = malloc(sizeof(*tmp));
4465                                 if (!tmp)
4466                                         return -ENOMEM;
4467                                 tmp->start = start;
4468                                 tmp->max_size = max_size;
4469                                 tmp->nr = nr;
4470                                 tmp->found_rec = 1;
4471                                 tmp->metadata = metadata;
4472                                 tmp->extent_item_refs = extent_item_refs;
4473                                 INIT_LIST_HEAD(&tmp->list);
4474                                 list_add_tail(&tmp->list, &rec->dups);
4475                                 rec->num_duplicates++;
4476                         } else {
4477                                 rec->nr = nr;
4478                                 rec->found_rec = 1;
4479                         }
4480                 }
4481
4482                 if (extent_item_refs && !dup) {
4483                         if (rec->extent_item_refs) {
4484                                 fprintf(stderr, "block %llu rec "
4485                                         "extent_item_refs %llu, passed %llu\n",
4486                                         (unsigned long long)start,
4487                                         (unsigned long long)
4488                                                         rec->extent_item_refs,
4489                                         (unsigned long long)extent_item_refs);
4490                         }
4491                         rec->extent_item_refs = extent_item_refs;
4492                 }
4493                 if (is_root)
4494                         rec->is_root = 1;
4495                 if (set_checked) {
4496                         rec->content_checked = 1;
4497                         rec->owner_ref_checked = 1;
4498                 }
4499
4500                 if (parent_key)
4501                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4502                 if (parent_gen)
4503                         rec->parent_generation = parent_gen;
4504
4505                 if (rec->max_size < max_size)
4506                         rec->max_size = max_size;
4507
4508                 /*
4509                  * A metadata extent can't cross stripe_len boundary, otherwise
4510                  * kernel scrub won't be able to handle it.
4511                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4512                  * it.
4513                  */
4514                 if (metadata && check_crossing_stripes(rec->start,
4515                                                        rec->max_size))
4516                                 rec->crossing_stripes = 1;
4517                 check_extent_type(rec);
4518                 maybe_free_extent_rec(extent_cache, rec);
4519                 return ret;
4520         }
4521         rec = malloc(sizeof(*rec));
4522         rec->start = start;
4523         rec->max_size = max_size;
4524         rec->nr = max(nr, max_size);
4525         rec->found_rec = !!extent_rec;
4526         rec->content_checked = 0;
4527         rec->owner_ref_checked = 0;
4528         rec->num_duplicates = 0;
4529         rec->metadata = metadata;
4530         rec->flag_block_full_backref = -1;
4531         rec->bad_full_backref = 0;
4532         rec->crossing_stripes = 0;
4533         rec->wrong_chunk_type = 0;
4534         INIT_LIST_HEAD(&rec->backrefs);
4535         INIT_LIST_HEAD(&rec->dups);
4536         INIT_LIST_HEAD(&rec->list);
4537
4538         if (is_root)
4539                 rec->is_root = 1;
4540         else
4541                 rec->is_root = 0;
4542
4543         if (inc_ref)
4544                 rec->refs = 1;
4545         else
4546                 rec->refs = 0;
4547
4548         if (extent_item_refs)
4549                 rec->extent_item_refs = extent_item_refs;
4550         else
4551                 rec->extent_item_refs = 0;
4552
4553         if (parent_key)
4554                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4555         else
4556                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4557
4558         if (parent_gen)
4559                 rec->parent_generation = parent_gen;
4560         else
4561                 rec->parent_generation = 0;
4562
4563         rec->cache.start = start;
4564         rec->cache.size = nr;
4565         ret = insert_cache_extent(extent_cache, &rec->cache);
4566         BUG_ON(ret);
4567         bytes_used += nr;
4568         if (set_checked) {
4569                 rec->content_checked = 1;
4570                 rec->owner_ref_checked = 1;
4571         }
4572
4573         if (metadata)
4574                 if (check_crossing_stripes(rec->start, rec->max_size))
4575                         rec->crossing_stripes = 1;
4576         check_extent_type(rec);
4577         return ret;
4578 }
4579
4580 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4581                             u64 parent, u64 root, int found_ref)
4582 {
4583         struct extent_record *rec;
4584         struct tree_backref *back;
4585         struct cache_extent *cache;
4586
4587         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4588         if (!cache) {
4589                 add_extent_rec(extent_cache, NULL, 0, bytenr,
4590                                1, 0, 0, 0, 0, 1, 0, 0);
4591                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4592                 if (!cache)
4593                         abort();
4594         }
4595
4596         rec = container_of(cache, struct extent_record, cache);
4597         if (rec->start != bytenr) {
4598                 abort();
4599         }
4600
4601         back = find_tree_backref(rec, parent, root);
4602         if (!back)
4603                 back = alloc_tree_backref(rec, parent, root);
4604
4605         if (found_ref) {
4606                 if (back->node.found_ref) {
4607                         fprintf(stderr, "Extent back ref already exists "
4608                                 "for %llu parent %llu root %llu \n",
4609                                 (unsigned long long)bytenr,
4610                                 (unsigned long long)parent,
4611                                 (unsigned long long)root);
4612                 }
4613                 back->node.found_ref = 1;
4614         } else {
4615                 if (back->node.found_extent_tree) {
4616                         fprintf(stderr, "Extent back ref already exists "
4617                                 "for %llu parent %llu root %llu \n",
4618                                 (unsigned long long)bytenr,
4619                                 (unsigned long long)parent,
4620                                 (unsigned long long)root);
4621                 }
4622                 back->node.found_extent_tree = 1;
4623         }
4624         check_extent_type(rec);
4625         maybe_free_extent_rec(extent_cache, rec);
4626         return 0;
4627 }
4628
4629 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4630                             u64 parent, u64 root, u64 owner, u64 offset,
4631                             u32 num_refs, int found_ref, u64 max_size)
4632 {
4633         struct extent_record *rec;
4634         struct data_backref *back;
4635         struct cache_extent *cache;
4636
4637         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4638         if (!cache) {
4639                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4640                                0, 0, max_size);
4641                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4642                 if (!cache)
4643                         abort();
4644         }
4645
4646         rec = container_of(cache, struct extent_record, cache);
4647         if (rec->max_size < max_size)
4648                 rec->max_size = max_size;
4649
4650         /*
4651          * If found_ref is set then max_size is the real size and must match the
4652          * existing refs.  So if we have already found a ref then we need to
4653          * make sure that this ref matches the existing one, otherwise we need
4654          * to add a new backref so we can notice that the backrefs don't match
4655          * and we need to figure out who is telling the truth.  This is to
4656          * account for that awful fsync bug I introduced where we'd end up with
4657          * a btrfs_file_extent_item that would have its length include multiple
4658          * prealloc extents or point inside of a prealloc extent.
4659          */
4660         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4661                                  bytenr, max_size);
4662         if (!back)
4663                 back = alloc_data_backref(rec, parent, root, owner, offset,
4664                                           max_size);
4665
4666         if (found_ref) {
4667                 BUG_ON(num_refs != 1);
4668                 if (back->node.found_ref)
4669                         BUG_ON(back->bytes != max_size);
4670                 back->node.found_ref = 1;
4671                 back->found_ref += 1;
4672                 back->bytes = max_size;
4673                 back->disk_bytenr = bytenr;
4674                 rec->refs += 1;
4675                 rec->content_checked = 1;
4676                 rec->owner_ref_checked = 1;
4677         } else {
4678                 if (back->node.found_extent_tree) {
4679                         fprintf(stderr, "Extent back ref already exists "
4680                                 "for %llu parent %llu root %llu "
4681                                 "owner %llu offset %llu num_refs %lu\n",
4682                                 (unsigned long long)bytenr,
4683                                 (unsigned long long)parent,
4684                                 (unsigned long long)root,
4685                                 (unsigned long long)owner,
4686                                 (unsigned long long)offset,
4687                                 (unsigned long)num_refs);
4688                 }
4689                 back->num_refs = num_refs;
4690                 back->node.found_extent_tree = 1;
4691         }
4692         maybe_free_extent_rec(extent_cache, rec);
4693         return 0;
4694 }
4695
4696 static int add_pending(struct cache_tree *pending,
4697                        struct cache_tree *seen, u64 bytenr, u32 size)
4698 {
4699         int ret;
4700         ret = add_cache_extent(seen, bytenr, size);
4701         if (ret)
4702                 return ret;
4703         add_cache_extent(pending, bytenr, size);
4704         return 0;
4705 }
4706
4707 static int pick_next_pending(struct cache_tree *pending,
4708                         struct cache_tree *reada,
4709                         struct cache_tree *nodes,
4710                         u64 last, struct block_info *bits, int bits_nr,
4711                         int *reada_bits)
4712 {
4713         unsigned long node_start = last;
4714         struct cache_extent *cache;
4715         int ret;
4716
4717         cache = search_cache_extent(reada, 0);
4718         if (cache) {
4719                 bits[0].start = cache->start;
4720                 bits[0].size = cache->size;
4721                 *reada_bits = 1;
4722                 return 1;
4723         }
4724         *reada_bits = 0;
4725         if (node_start > 32768)
4726                 node_start -= 32768;
4727
4728         cache = search_cache_extent(nodes, node_start);
4729         if (!cache)
4730                 cache = search_cache_extent(nodes, 0);
4731
4732         if (!cache) {
4733                  cache = search_cache_extent(pending, 0);
4734                  if (!cache)
4735                          return 0;
4736                  ret = 0;
4737                  do {
4738                          bits[ret].start = cache->start;
4739                          bits[ret].size = cache->size;
4740                          cache = next_cache_extent(cache);
4741                          ret++;
4742                  } while (cache && ret < bits_nr);
4743                  return ret;
4744         }
4745
4746         ret = 0;
4747         do {
4748                 bits[ret].start = cache->start;
4749                 bits[ret].size = cache->size;
4750                 cache = next_cache_extent(cache);
4751                 ret++;
4752         } while (cache && ret < bits_nr);
4753
4754         if (bits_nr - ret > 8) {
4755                 u64 lookup = bits[0].start + bits[0].size;
4756                 struct cache_extent *next;
4757                 next = search_cache_extent(pending, lookup);
4758                 while(next) {
4759                         if (next->start - lookup > 32768)
4760                                 break;
4761                         bits[ret].start = next->start;
4762                         bits[ret].size = next->size;
4763                         lookup = next->start + next->size;
4764                         ret++;
4765                         if (ret == bits_nr)
4766                                 break;
4767                         next = next_cache_extent(next);
4768                         if (!next)
4769                                 break;
4770                 }
4771         }
4772         return ret;
4773 }
4774
4775 static void free_chunk_record(struct cache_extent *cache)
4776 {
4777         struct chunk_record *rec;
4778
4779         rec = container_of(cache, struct chunk_record, cache);
4780         list_del_init(&rec->list);
4781         list_del_init(&rec->dextents);
4782         free(rec);
4783 }
4784
4785 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4786 {
4787         cache_tree_free_extents(chunk_cache, free_chunk_record);
4788 }
4789
4790 static void free_device_record(struct rb_node *node)
4791 {
4792         struct device_record *rec;
4793
4794         rec = container_of(node, struct device_record, node);
4795         free(rec);
4796 }
4797
4798 FREE_RB_BASED_TREE(device_cache, free_device_record);
4799
4800 int insert_block_group_record(struct block_group_tree *tree,
4801                               struct block_group_record *bg_rec)
4802 {
4803         int ret;
4804
4805         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4806         if (ret)
4807                 return ret;
4808
4809         list_add_tail(&bg_rec->list, &tree->block_groups);
4810         return 0;
4811 }
4812
4813 static void free_block_group_record(struct cache_extent *cache)
4814 {
4815         struct block_group_record *rec;
4816
4817         rec = container_of(cache, struct block_group_record, cache);
4818         list_del_init(&rec->list);
4819         free(rec);
4820 }
4821
4822 void free_block_group_tree(struct block_group_tree *tree)
4823 {
4824         cache_tree_free_extents(&tree->tree, free_block_group_record);
4825 }
4826
4827 int insert_device_extent_record(struct device_extent_tree *tree,
4828                                 struct device_extent_record *de_rec)
4829 {
4830         int ret;
4831
4832         /*
4833          * Device extent is a bit different from the other extents, because
4834          * the extents which belong to the different devices may have the
4835          * same start and size, so we need use the special extent cache
4836          * search/insert functions.
4837          */
4838         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4839         if (ret)
4840                 return ret;
4841
4842         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4843         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4844         return 0;
4845 }
4846
4847 static void free_device_extent_record(struct cache_extent *cache)
4848 {
4849         struct device_extent_record *rec;
4850
4851         rec = container_of(cache, struct device_extent_record, cache);
4852         if (!list_empty(&rec->chunk_list))
4853                 list_del_init(&rec->chunk_list);
4854         if (!list_empty(&rec->device_list))
4855                 list_del_init(&rec->device_list);
4856         free(rec);
4857 }
4858
4859 void free_device_extent_tree(struct device_extent_tree *tree)
4860 {
4861         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4862 }
4863
4864 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4865 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4866                                  struct extent_buffer *leaf, int slot)
4867 {
4868         struct btrfs_extent_ref_v0 *ref0;
4869         struct btrfs_key key;
4870
4871         btrfs_item_key_to_cpu(leaf, &key, slot);
4872         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4873         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4874                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4875         } else {
4876                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4877                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4878         }
4879         return 0;
4880 }
4881 #endif
4882
4883 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4884                                             struct btrfs_key *key,
4885                                             int slot)
4886 {
4887         struct btrfs_chunk *ptr;
4888         struct chunk_record *rec;
4889         int num_stripes, i;
4890
4891         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4892         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4893
4894         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4895         if (!rec) {
4896                 fprintf(stderr, "memory allocation failed\n");
4897                 exit(-1);
4898         }
4899
4900         INIT_LIST_HEAD(&rec->list);
4901         INIT_LIST_HEAD(&rec->dextents);
4902         rec->bg_rec = NULL;
4903
4904         rec->cache.start = key->offset;
4905         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4906
4907         rec->generation = btrfs_header_generation(leaf);
4908
4909         rec->objectid = key->objectid;
4910         rec->type = key->type;
4911         rec->offset = key->offset;
4912
4913         rec->length = rec->cache.size;
4914         rec->owner = btrfs_chunk_owner(leaf, ptr);
4915         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4916         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4917         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4918         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4919         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4920         rec->num_stripes = num_stripes;
4921         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4922
4923         for (i = 0; i < rec->num_stripes; ++i) {
4924                 rec->stripes[i].devid =
4925                         btrfs_stripe_devid_nr(leaf, ptr, i);
4926                 rec->stripes[i].offset =
4927                         btrfs_stripe_offset_nr(leaf, ptr, i);
4928                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4929                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4930                                 BTRFS_UUID_SIZE);
4931         }
4932
4933         return rec;
4934 }
4935
4936 static int process_chunk_item(struct cache_tree *chunk_cache,
4937                               struct btrfs_key *key, struct extent_buffer *eb,
4938                               int slot)
4939 {
4940         struct chunk_record *rec;
4941         int ret = 0;
4942
4943         rec = btrfs_new_chunk_record(eb, key, slot);
4944         ret = insert_cache_extent(chunk_cache, &rec->cache);
4945         if (ret) {
4946                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4947                         rec->offset, rec->length);
4948                 free(rec);
4949         }
4950
4951         return ret;
4952 }
4953
4954 static int process_device_item(struct rb_root *dev_cache,
4955                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4956 {
4957         struct btrfs_dev_item *ptr;
4958         struct device_record *rec;
4959         int ret = 0;
4960
4961         ptr = btrfs_item_ptr(eb,
4962                 slot, struct btrfs_dev_item);
4963
4964         rec = malloc(sizeof(*rec));
4965         if (!rec) {
4966                 fprintf(stderr, "memory allocation failed\n");
4967                 return -ENOMEM;
4968         }
4969
4970         rec->devid = key->offset;
4971         rec->generation = btrfs_header_generation(eb);
4972
4973         rec->objectid = key->objectid;
4974         rec->type = key->type;
4975         rec->offset = key->offset;
4976
4977         rec->devid = btrfs_device_id(eb, ptr);
4978         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4979         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4980
4981         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4982         if (ret) {
4983                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4984                 free(rec);
4985         }
4986
4987         return ret;
4988 }
4989
4990 struct block_group_record *
4991 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4992                              int slot)
4993 {
4994         struct btrfs_block_group_item *ptr;
4995         struct block_group_record *rec;
4996
4997         rec = calloc(1, sizeof(*rec));
4998         if (!rec) {
4999                 fprintf(stderr, "memory allocation failed\n");
5000                 exit(-1);
5001         }
5002
5003         rec->cache.start = key->objectid;
5004         rec->cache.size = key->offset;
5005
5006         rec->generation = btrfs_header_generation(leaf);
5007
5008         rec->objectid = key->objectid;
5009         rec->type = key->type;
5010         rec->offset = key->offset;
5011
5012         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5013         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5014
5015         INIT_LIST_HEAD(&rec->list);
5016
5017         return rec;
5018 }
5019
5020 static int process_block_group_item(struct block_group_tree *block_group_cache,
5021                                     struct btrfs_key *key,
5022                                     struct extent_buffer *eb, int slot)
5023 {
5024         struct block_group_record *rec;
5025         int ret = 0;
5026
5027         rec = btrfs_new_block_group_record(eb, key, slot);
5028         ret = insert_block_group_record(block_group_cache, rec);
5029         if (ret) {
5030                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5031                         rec->objectid, rec->offset);
5032                 free(rec);
5033         }
5034
5035         return ret;
5036 }
5037
5038 struct device_extent_record *
5039 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5040                                struct btrfs_key *key, int slot)
5041 {
5042         struct device_extent_record *rec;
5043         struct btrfs_dev_extent *ptr;
5044
5045         rec = calloc(1, sizeof(*rec));
5046         if (!rec) {
5047                 fprintf(stderr, "memory allocation failed\n");
5048                 exit(-1);
5049         }
5050
5051         rec->cache.objectid = key->objectid;
5052         rec->cache.start = key->offset;
5053
5054         rec->generation = btrfs_header_generation(leaf);
5055
5056         rec->objectid = key->objectid;
5057         rec->type = key->type;
5058         rec->offset = key->offset;
5059
5060         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5061         rec->chunk_objecteid =
5062                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5063         rec->chunk_offset =
5064                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5065         rec->length = btrfs_dev_extent_length(leaf, ptr);
5066         rec->cache.size = rec->length;
5067
5068         INIT_LIST_HEAD(&rec->chunk_list);
5069         INIT_LIST_HEAD(&rec->device_list);
5070
5071         return rec;
5072 }
5073
5074 static int
5075 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5076                            struct btrfs_key *key, struct extent_buffer *eb,
5077                            int slot)
5078 {
5079         struct device_extent_record *rec;
5080         int ret;
5081
5082         rec = btrfs_new_device_extent_record(eb, key, slot);
5083         ret = insert_device_extent_record(dev_extent_cache, rec);
5084         if (ret) {
5085                 fprintf(stderr,
5086                         "Device extent[%llu, %llu, %llu] existed.\n",
5087                         rec->objectid, rec->offset, rec->length);
5088                 free(rec);
5089         }
5090
5091         return ret;
5092 }
5093
5094 static int process_extent_item(struct btrfs_root *root,
5095                                struct cache_tree *extent_cache,
5096                                struct extent_buffer *eb, int slot)
5097 {
5098         struct btrfs_extent_item *ei;
5099         struct btrfs_extent_inline_ref *iref;
5100         struct btrfs_extent_data_ref *dref;
5101         struct btrfs_shared_data_ref *sref;
5102         struct btrfs_key key;
5103         unsigned long end;
5104         unsigned long ptr;
5105         int type;
5106         u32 item_size = btrfs_item_size_nr(eb, slot);
5107         u64 refs = 0;
5108         u64 offset;
5109         u64 num_bytes;
5110         int metadata = 0;
5111
5112         btrfs_item_key_to_cpu(eb, &key, slot);
5113
5114         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5115                 metadata = 1;
5116                 num_bytes = root->leafsize;
5117         } else {
5118                 num_bytes = key.offset;
5119         }
5120
5121         if (item_size < sizeof(*ei)) {
5122 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5123                 struct btrfs_extent_item_v0 *ei0;
5124                 BUG_ON(item_size != sizeof(*ei0));
5125                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5126                 refs = btrfs_extent_refs_v0(eb, ei0);
5127 #else
5128                 BUG();
5129 #endif
5130                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5131                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5132                                       num_bytes);
5133         }
5134
5135         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5136         refs = btrfs_extent_refs(eb, ei);
5137
5138         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5139                        refs, 0, 0, 0, metadata, 1, num_bytes);
5140
5141         ptr = (unsigned long)(ei + 1);
5142         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5143             key.type == BTRFS_EXTENT_ITEM_KEY)
5144                 ptr += sizeof(struct btrfs_tree_block_info);
5145
5146         end = (unsigned long)ei + item_size;
5147         while (ptr < end) {
5148                 iref = (struct btrfs_extent_inline_ref *)ptr;
5149                 type = btrfs_extent_inline_ref_type(eb, iref);
5150                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5151                 switch (type) {
5152                 case BTRFS_TREE_BLOCK_REF_KEY:
5153                         add_tree_backref(extent_cache, key.objectid,
5154                                          0, offset, 0);
5155                         break;
5156                 case BTRFS_SHARED_BLOCK_REF_KEY:
5157                         add_tree_backref(extent_cache, key.objectid,
5158                                          offset, 0, 0);
5159                         break;
5160                 case BTRFS_EXTENT_DATA_REF_KEY:
5161                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5162                         add_data_backref(extent_cache, key.objectid, 0,
5163                                         btrfs_extent_data_ref_root(eb, dref),
5164                                         btrfs_extent_data_ref_objectid(eb,
5165                                                                        dref),
5166                                         btrfs_extent_data_ref_offset(eb, dref),
5167                                         btrfs_extent_data_ref_count(eb, dref),
5168                                         0, num_bytes);
5169                         break;
5170                 case BTRFS_SHARED_DATA_REF_KEY:
5171                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5172                         add_data_backref(extent_cache, key.objectid, offset,
5173                                         0, 0, 0,
5174                                         btrfs_shared_data_ref_count(eb, sref),
5175                                         0, num_bytes);
5176                         break;
5177                 default:
5178                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5179                                 key.objectid, key.type, num_bytes);
5180                         goto out;
5181                 }
5182                 ptr += btrfs_extent_inline_ref_size(type);
5183         }
5184         WARN_ON(ptr > end);
5185 out:
5186         return 0;
5187 }
5188
5189 static int check_cache_range(struct btrfs_root *root,
5190                              struct btrfs_block_group_cache *cache,
5191                              u64 offset, u64 bytes)
5192 {
5193         struct btrfs_free_space *entry;
5194         u64 *logical;
5195         u64 bytenr;
5196         int stripe_len;
5197         int i, nr, ret;
5198
5199         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5200                 bytenr = btrfs_sb_offset(i);
5201                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5202                                        cache->key.objectid, bytenr, 0,
5203                                        &logical, &nr, &stripe_len);
5204                 if (ret)
5205                         return ret;
5206
5207                 while (nr--) {
5208                         if (logical[nr] + stripe_len <= offset)
5209                                 continue;
5210                         if (offset + bytes <= logical[nr])
5211                                 continue;
5212                         if (logical[nr] == offset) {
5213                                 if (stripe_len >= bytes) {
5214                                         kfree(logical);
5215                                         return 0;
5216                                 }
5217                                 bytes -= stripe_len;
5218                                 offset += stripe_len;
5219                         } else if (logical[nr] < offset) {
5220                                 if (logical[nr] + stripe_len >=
5221                                     offset + bytes) {
5222                                         kfree(logical);
5223                                         return 0;
5224                                 }
5225                                 bytes = (offset + bytes) -
5226                                         (logical[nr] + stripe_len);
5227                                 offset = logical[nr] + stripe_len;
5228                         } else {
5229                                 /*
5230                                  * Could be tricky, the super may land in the
5231                                  * middle of the area we're checking.  First
5232                                  * check the easiest case, it's at the end.
5233                                  */
5234                                 if (logical[nr] + stripe_len >=
5235                                     bytes + offset) {
5236                                         bytes = logical[nr] - offset;
5237                                         continue;
5238                                 }
5239
5240                                 /* Check the left side */
5241                                 ret = check_cache_range(root, cache,
5242                                                         offset,
5243                                                         logical[nr] - offset);
5244                                 if (ret) {
5245                                         kfree(logical);
5246                                         return ret;
5247                                 }
5248
5249                                 /* Now we continue with the right side */
5250                                 bytes = (offset + bytes) -
5251                                         (logical[nr] + stripe_len);
5252                                 offset = logical[nr] + stripe_len;
5253                         }
5254                 }
5255
5256                 kfree(logical);
5257         }
5258
5259         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5260         if (!entry) {
5261                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5262                         offset, offset+bytes);
5263                 return -EINVAL;
5264         }
5265
5266         if (entry->offset != offset) {
5267                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5268                         entry->offset);
5269                 return -EINVAL;
5270         }
5271
5272         if (entry->bytes != bytes) {
5273                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5274                         bytes, entry->bytes, offset);
5275                 return -EINVAL;
5276         }
5277
5278         unlink_free_space(cache->free_space_ctl, entry);
5279         free(entry);
5280         return 0;
5281 }
5282
5283 static int verify_space_cache(struct btrfs_root *root,
5284                               struct btrfs_block_group_cache *cache)
5285 {
5286         struct btrfs_path *path;
5287         struct extent_buffer *leaf;
5288         struct btrfs_key key;
5289         u64 last;
5290         int ret = 0;
5291
5292         path = btrfs_alloc_path();
5293         if (!path)
5294                 return -ENOMEM;
5295
5296         root = root->fs_info->extent_root;
5297
5298         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5299
5300         key.objectid = last;
5301         key.offset = 0;
5302         key.type = BTRFS_EXTENT_ITEM_KEY;
5303
5304         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5305         if (ret < 0)
5306                 goto out;
5307         ret = 0;
5308         while (1) {
5309                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5310                         ret = btrfs_next_leaf(root, path);
5311                         if (ret < 0)
5312                                 goto out;
5313                         if (ret > 0) {
5314                                 ret = 0;
5315                                 break;
5316                         }
5317                 }
5318                 leaf = path->nodes[0];
5319                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5320                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5321                         break;
5322                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5323                     key.type != BTRFS_METADATA_ITEM_KEY) {
5324                         path->slots[0]++;
5325                         continue;
5326                 }
5327
5328                 if (last == key.objectid) {
5329                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5330                                 last = key.objectid + key.offset;
5331                         else
5332                                 last = key.objectid + root->leafsize;
5333                         path->slots[0]++;
5334                         continue;
5335                 }
5336
5337                 ret = check_cache_range(root, cache, last,
5338                                         key.objectid - last);
5339                 if (ret)
5340                         break;
5341                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5342                         last = key.objectid + key.offset;
5343                 else
5344                         last = key.objectid + root->leafsize;
5345                 path->slots[0]++;
5346         }
5347
5348         if (last < cache->key.objectid + cache->key.offset)
5349                 ret = check_cache_range(root, cache, last,
5350                                         cache->key.objectid +
5351                                         cache->key.offset - last);
5352
5353 out:
5354         btrfs_free_path(path);
5355
5356         if (!ret &&
5357             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5358                 fprintf(stderr, "There are still entries left in the space "
5359                         "cache\n");
5360                 ret = -EINVAL;
5361         }
5362
5363         return ret;
5364 }
5365
5366 static int check_space_cache(struct btrfs_root *root)
5367 {
5368         struct btrfs_block_group_cache *cache;
5369         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5370         int ret;
5371         int error = 0;
5372
5373         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5374             btrfs_super_generation(root->fs_info->super_copy) !=
5375             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5376                 printf("cache and super generation don't match, space cache "
5377                        "will be invalidated\n");
5378                 return 0;
5379         }
5380
5381         if (ctx.progress_enabled) {
5382                 ctx.tp = TASK_FREE_SPACE;
5383                 task_start(ctx.info);
5384         }
5385
5386         while (1) {
5387                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5388                 if (!cache)
5389                         break;
5390
5391                 start = cache->key.objectid + cache->key.offset;
5392                 if (!cache->free_space_ctl) {
5393                         if (btrfs_init_free_space_ctl(cache,
5394                                                       root->sectorsize)) {
5395                                 ret = -ENOMEM;
5396                                 break;
5397                         }
5398                 } else {
5399                         btrfs_remove_free_space_cache(cache);
5400                 }
5401
5402                 ret = load_free_space_cache(root->fs_info, cache);
5403                 if (!ret)
5404                         continue;
5405
5406                 ret = verify_space_cache(root, cache);
5407                 if (ret) {
5408                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5409                                 cache->key.objectid);
5410                         error++;
5411                 }
5412         }
5413
5414         task_stop(ctx.info);
5415
5416         return error ? -EINVAL : 0;
5417 }
5418
5419 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5420                         u64 num_bytes, unsigned long leaf_offset,
5421                         struct extent_buffer *eb) {
5422
5423         u64 offset = 0;
5424         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5425         char *data;
5426         unsigned long csum_offset;
5427         u32 csum;
5428         u32 csum_expected;
5429         u64 read_len;
5430         u64 data_checked = 0;
5431         u64 tmp;
5432         int ret = 0;
5433         int mirror;
5434         int num_copies;
5435
5436         if (num_bytes % root->sectorsize)
5437                 return -EINVAL;
5438
5439         data = malloc(num_bytes);
5440         if (!data)
5441                 return -ENOMEM;
5442
5443         while (offset < num_bytes) {
5444                 mirror = 0;
5445 again:
5446                 read_len = num_bytes - offset;
5447                 /* read as much space once a time */
5448                 ret = read_extent_data(root, data + offset,
5449                                 bytenr + offset, &read_len, mirror);
5450                 if (ret)
5451                         goto out;
5452                 data_checked = 0;
5453                 /* verify every 4k data's checksum */
5454                 while (data_checked < read_len) {
5455                         csum = ~(u32)0;
5456                         tmp = offset + data_checked;
5457
5458                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5459                                                csum, root->sectorsize);
5460                         btrfs_csum_final(csum, (char *)&csum);
5461
5462                         csum_offset = leaf_offset +
5463                                  tmp / root->sectorsize * csum_size;
5464                         read_extent_buffer(eb, (char *)&csum_expected,
5465                                            csum_offset, csum_size);
5466                         /* try another mirror */
5467                         if (csum != csum_expected) {
5468                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5469                                                 mirror, bytenr + tmp,
5470                                                 csum, csum_expected);
5471                                 num_copies = btrfs_num_copies(
5472                                                 &root->fs_info->mapping_tree,
5473                                                 bytenr, num_bytes);
5474                                 if (mirror < num_copies - 1) {
5475                                         mirror += 1;
5476                                         goto again;
5477                                 }
5478                         }
5479                         data_checked += root->sectorsize;
5480                 }
5481                 offset += read_len;
5482         }
5483 out:
5484         free(data);
5485         return ret;
5486 }
5487
5488 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5489                                u64 num_bytes)
5490 {
5491         struct btrfs_path *path;
5492         struct extent_buffer *leaf;
5493         struct btrfs_key key;
5494         int ret;
5495
5496         path = btrfs_alloc_path();
5497         if (!path) {
5498                 fprintf(stderr, "Error allocing path\n");
5499                 return -ENOMEM;
5500         }
5501
5502         key.objectid = bytenr;
5503         key.type = BTRFS_EXTENT_ITEM_KEY;
5504         key.offset = (u64)-1;
5505
5506 again:
5507         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5508                                 0, 0);
5509         if (ret < 0) {
5510                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5511                 btrfs_free_path(path);
5512                 return ret;
5513         } else if (ret) {
5514                 if (path->slots[0] > 0) {
5515                         path->slots[0]--;
5516                 } else {
5517                         ret = btrfs_prev_leaf(root, path);
5518                         if (ret < 0) {
5519                                 goto out;
5520                         } else if (ret > 0) {
5521                                 ret = 0;
5522                                 goto out;
5523                         }
5524                 }
5525         }
5526
5527         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5528
5529         /*
5530          * Block group items come before extent items if they have the same
5531          * bytenr, so walk back one more just in case.  Dear future traveler,
5532          * first congrats on mastering time travel.  Now if it's not too much
5533          * trouble could you go back to 2006 and tell Chris to make the
5534          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5535          * EXTENT_ITEM_KEY please?
5536          */
5537         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5538                 if (path->slots[0] > 0) {
5539                         path->slots[0]--;
5540                 } else {
5541                         ret = btrfs_prev_leaf(root, path);
5542                         if (ret < 0) {
5543                                 goto out;
5544                         } else if (ret > 0) {
5545                                 ret = 0;
5546                                 goto out;
5547                         }
5548                 }
5549                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5550         }
5551
5552         while (num_bytes) {
5553                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5554                         ret = btrfs_next_leaf(root, path);
5555                         if (ret < 0) {
5556                                 fprintf(stderr, "Error going to next leaf "
5557                                         "%d\n", ret);
5558                                 btrfs_free_path(path);
5559                                 return ret;
5560                         } else if (ret) {
5561                                 break;
5562                         }
5563                 }
5564                 leaf = path->nodes[0];
5565                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5566                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5567                         path->slots[0]++;
5568                         continue;
5569                 }
5570                 if (key.objectid + key.offset < bytenr) {
5571                         path->slots[0]++;
5572                         continue;
5573                 }
5574                 if (key.objectid > bytenr + num_bytes)
5575                         break;
5576
5577                 if (key.objectid == bytenr) {
5578                         if (key.offset >= num_bytes) {
5579                                 num_bytes = 0;
5580                                 break;
5581                         }
5582                         num_bytes -= key.offset;
5583                         bytenr += key.offset;
5584                 } else if (key.objectid < bytenr) {
5585                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5586                                 num_bytes = 0;
5587                                 break;
5588                         }
5589                         num_bytes = (bytenr + num_bytes) -
5590                                 (key.objectid + key.offset);
5591                         bytenr = key.objectid + key.offset;
5592                 } else {
5593                         if (key.objectid + key.offset < bytenr + num_bytes) {
5594                                 u64 new_start = key.objectid + key.offset;
5595                                 u64 new_bytes = bytenr + num_bytes - new_start;
5596
5597                                 /*
5598                                  * Weird case, the extent is in the middle of
5599                                  * our range, we'll have to search one side
5600                                  * and then the other.  Not sure if this happens
5601                                  * in real life, but no harm in coding it up
5602                                  * anyway just in case.
5603                                  */
5604                                 btrfs_release_path(path);
5605                                 ret = check_extent_exists(root, new_start,
5606                                                           new_bytes);
5607                                 if (ret) {
5608                                         fprintf(stderr, "Right section didn't "
5609                                                 "have a record\n");
5610                                         break;
5611                                 }
5612                                 num_bytes = key.objectid - bytenr;
5613                                 goto again;
5614                         }
5615                         num_bytes = key.objectid - bytenr;
5616                 }
5617                 path->slots[0]++;
5618         }
5619         ret = 0;
5620
5621 out:
5622         if (num_bytes && !ret) {
5623                 fprintf(stderr, "There are no extents for csum range "
5624                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5625                 ret = 1;
5626         }
5627
5628         btrfs_free_path(path);
5629         return ret;
5630 }
5631
5632 static int check_csums(struct btrfs_root *root)
5633 {
5634         struct btrfs_path *path;
5635         struct extent_buffer *leaf;
5636         struct btrfs_key key;
5637         u64 offset = 0, num_bytes = 0;
5638         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5639         int errors = 0;
5640         int ret;
5641         u64 data_len;
5642         unsigned long leaf_offset;
5643
5644         root = root->fs_info->csum_root;
5645         if (!extent_buffer_uptodate(root->node)) {
5646                 fprintf(stderr, "No valid csum tree found\n");
5647                 return -ENOENT;
5648         }
5649
5650         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5651         key.type = BTRFS_EXTENT_CSUM_KEY;
5652         key.offset = 0;
5653
5654         path = btrfs_alloc_path();
5655         if (!path)
5656                 return -ENOMEM;
5657
5658         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5659         if (ret < 0) {
5660                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5661                 btrfs_free_path(path);
5662                 return ret;
5663         }
5664
5665         if (ret > 0 && path->slots[0])
5666                 path->slots[0]--;
5667         ret = 0;
5668
5669         while (1) {
5670                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5671                         ret = btrfs_next_leaf(root, path);
5672                         if (ret < 0) {
5673                                 fprintf(stderr, "Error going to next leaf "
5674                                         "%d\n", ret);
5675                                 break;
5676                         }
5677                         if (ret)
5678                                 break;
5679                 }
5680                 leaf = path->nodes[0];
5681
5682                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5683                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5684                         path->slots[0]++;
5685                         continue;
5686                 }
5687
5688                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5689                               csum_size) * root->sectorsize;
5690                 if (!check_data_csum)
5691                         goto skip_csum_check;
5692                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5693                 ret = check_extent_csums(root, key.offset, data_len,
5694                                          leaf_offset, leaf);
5695                 if (ret)
5696                         break;
5697 skip_csum_check:
5698                 if (!num_bytes) {
5699                         offset = key.offset;
5700                 } else if (key.offset != offset + num_bytes) {
5701                         ret = check_extent_exists(root, offset, num_bytes);
5702                         if (ret) {
5703                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5704                                         "there is no extent record\n",
5705                                         offset, offset+num_bytes);
5706                                 errors++;
5707                         }
5708                         offset = key.offset;
5709                         num_bytes = 0;
5710                 }
5711                 num_bytes += data_len;
5712                 path->slots[0]++;
5713         }
5714
5715         btrfs_free_path(path);
5716         return errors;
5717 }
5718
5719 static int is_dropped_key(struct btrfs_key *key,
5720                           struct btrfs_key *drop_key) {
5721         if (key->objectid < drop_key->objectid)
5722                 return 1;
5723         else if (key->objectid == drop_key->objectid) {
5724                 if (key->type < drop_key->type)
5725                         return 1;
5726                 else if (key->type == drop_key->type) {
5727                         if (key->offset < drop_key->offset)
5728                                 return 1;
5729                 }
5730         }
5731         return 0;
5732 }
5733
5734 /*
5735  * Here are the rules for FULL_BACKREF.
5736  *
5737  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5738  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5739  *      FULL_BACKREF set.
5740  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5741  *    if it happened after the relocation occurred since we'll have dropped the
5742  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5743  *    have no real way to know for sure.
5744  *
5745  * We process the blocks one root at a time, and we start from the lowest root
5746  * objectid and go to the highest.  So we can just lookup the owner backref for
5747  * the record and if we don't find it then we know it doesn't exist and we have
5748  * a FULL BACKREF.
5749  *
5750  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5751  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5752  * be set or not and then we can check later once we've gathered all the refs.
5753  */
5754 static int calc_extent_flag(struct btrfs_root *root,
5755                            struct cache_tree *extent_cache,
5756                            struct extent_buffer *buf,
5757                            struct root_item_record *ri,
5758                            u64 *flags)
5759 {
5760         struct extent_record *rec;
5761         struct cache_extent *cache;
5762         struct tree_backref *tback;
5763         u64 owner = 0;
5764
5765         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5766         /* we have added this extent before */
5767         BUG_ON(!cache);
5768         rec = container_of(cache, struct extent_record, cache);
5769
5770         /*
5771          * Except file/reloc tree, we can not have
5772          * FULL BACKREF MODE
5773          */
5774         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5775                 goto normal;
5776         /*
5777          * root node
5778          */
5779         if (buf->start == ri->bytenr)
5780                 goto normal;
5781
5782         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5783                 goto full_backref;
5784
5785         owner = btrfs_header_owner(buf);
5786         if (owner == ri->objectid)
5787                 goto normal;
5788
5789         tback = find_tree_backref(rec, 0, owner);
5790         if (!tback)
5791                 goto full_backref;
5792 normal:
5793         *flags = 0;
5794         if (rec->flag_block_full_backref != -1 &&
5795             rec->flag_block_full_backref != 0)
5796                 rec->bad_full_backref = 1;
5797         return 0;
5798 full_backref:
5799         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5800         if (rec->flag_block_full_backref != -1 &&
5801             rec->flag_block_full_backref != 1)
5802                 rec->bad_full_backref = 1;
5803         return 0;
5804 }
5805
5806 static int run_next_block(struct btrfs_root *root,
5807                           struct block_info *bits,
5808                           int bits_nr,
5809                           u64 *last,
5810                           struct cache_tree *pending,
5811                           struct cache_tree *seen,
5812                           struct cache_tree *reada,
5813                           struct cache_tree *nodes,
5814                           struct cache_tree *extent_cache,
5815                           struct cache_tree *chunk_cache,
5816                           struct rb_root *dev_cache,
5817                           struct block_group_tree *block_group_cache,
5818                           struct device_extent_tree *dev_extent_cache,
5819                           struct root_item_record *ri)
5820 {
5821         struct extent_buffer *buf;
5822         struct extent_record *rec = NULL;
5823         u64 bytenr;
5824         u32 size;
5825         u64 parent;
5826         u64 owner;
5827         u64 flags;
5828         u64 ptr;
5829         u64 gen = 0;
5830         int ret = 0;
5831         int i;
5832         int nritems;
5833         struct btrfs_key key;
5834         struct cache_extent *cache;
5835         int reada_bits;
5836
5837         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5838                                     bits_nr, &reada_bits);
5839         if (nritems == 0)
5840                 return 1;
5841
5842         if (!reada_bits) {
5843                 for(i = 0; i < nritems; i++) {
5844                         ret = add_cache_extent(reada, bits[i].start,
5845                                                bits[i].size);
5846                         if (ret == -EEXIST)
5847                                 continue;
5848
5849                         /* fixme, get the parent transid */
5850                         readahead_tree_block(root, bits[i].start,
5851                                              bits[i].size, 0);
5852                 }
5853         }
5854         *last = bits[0].start;
5855         bytenr = bits[0].start;
5856         size = bits[0].size;
5857
5858         cache = lookup_cache_extent(pending, bytenr, size);
5859         if (cache) {
5860                 remove_cache_extent(pending, cache);
5861                 free(cache);
5862         }
5863         cache = lookup_cache_extent(reada, bytenr, size);
5864         if (cache) {
5865                 remove_cache_extent(reada, cache);
5866                 free(cache);
5867         }
5868         cache = lookup_cache_extent(nodes, bytenr, size);
5869         if (cache) {
5870                 remove_cache_extent(nodes, cache);
5871                 free(cache);
5872         }
5873         cache = lookup_cache_extent(extent_cache, bytenr, size);
5874         if (cache) {
5875                 rec = container_of(cache, struct extent_record, cache);
5876                 gen = rec->parent_generation;
5877         }
5878
5879         /* fixme, get the real parent transid */
5880         buf = read_tree_block(root, bytenr, size, gen);
5881         if (!extent_buffer_uptodate(buf)) {
5882                 record_bad_block_io(root->fs_info,
5883                                     extent_cache, bytenr, size);
5884                 goto out;
5885         }
5886
5887         nritems = btrfs_header_nritems(buf);
5888
5889         flags = 0;
5890         if (!init_extent_tree) {
5891                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5892                                        btrfs_header_level(buf), 1, NULL,
5893                                        &flags);
5894                 if (ret < 0) {
5895                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5896                         if (ret < 0) {
5897                                 fprintf(stderr, "Couldn't calc extent flags\n");
5898                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5899                         }
5900                 }
5901         } else {
5902                 flags = 0;
5903                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5904                 if (ret < 0) {
5905                         fprintf(stderr, "Couldn't calc extent flags\n");
5906                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5907                 }
5908         }
5909
5910         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5911                 if (ri != NULL &&
5912                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5913                     ri->objectid == btrfs_header_owner(buf)) {
5914                         /*
5915                          * Ok we got to this block from it's original owner and
5916                          * we have FULL_BACKREF set.  Relocation can leave
5917                          * converted blocks over so this is altogether possible,
5918                          * however it's not possible if the generation > the
5919                          * last snapshot, so check for this case.
5920                          */
5921                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5922                             btrfs_header_generation(buf) > ri->last_snapshot) {
5923                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5924                                 rec->bad_full_backref = 1;
5925                         }
5926                 }
5927         } else {
5928                 if (ri != NULL &&
5929                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5930                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5931                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5932                         rec->bad_full_backref = 1;
5933                 }
5934         }
5935
5936         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5937                 rec->flag_block_full_backref = 1;
5938                 parent = bytenr;
5939                 owner = 0;
5940         } else {
5941                 rec->flag_block_full_backref = 0;
5942                 parent = 0;
5943                 owner = btrfs_header_owner(buf);
5944         }
5945
5946         ret = check_block(root, extent_cache, buf, flags);
5947         if (ret)
5948                 goto out;
5949
5950         if (btrfs_is_leaf(buf)) {
5951                 btree_space_waste += btrfs_leaf_free_space(root, buf);
5952                 for (i = 0; i < nritems; i++) {
5953                         struct btrfs_file_extent_item *fi;
5954                         btrfs_item_key_to_cpu(buf, &key, i);
5955                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5956                                 process_extent_item(root, extent_cache, buf,
5957                                                     i);
5958                                 continue;
5959                         }
5960                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5961                                 process_extent_item(root, extent_cache, buf,
5962                                                     i);
5963                                 continue;
5964                         }
5965                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5966                                 total_csum_bytes +=
5967                                         btrfs_item_size_nr(buf, i);
5968                                 continue;
5969                         }
5970                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5971                                 process_chunk_item(chunk_cache, &key, buf, i);
5972                                 continue;
5973                         }
5974                         if (key.type == BTRFS_DEV_ITEM_KEY) {
5975                                 process_device_item(dev_cache, &key, buf, i);
5976                                 continue;
5977                         }
5978                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5979                                 process_block_group_item(block_group_cache,
5980                                         &key, buf, i);
5981                                 continue;
5982                         }
5983                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
5984                                 process_device_extent_item(dev_extent_cache,
5985                                         &key, buf, i);
5986                                 continue;
5987
5988                         }
5989                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5990 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5991                                 process_extent_ref_v0(extent_cache, buf, i);
5992 #else
5993                                 BUG();
5994 #endif
5995                                 continue;
5996                         }
5997
5998                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
5999                                 add_tree_backref(extent_cache, key.objectid, 0,
6000                                                  key.offset, 0);
6001                                 continue;
6002                         }
6003                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6004                                 add_tree_backref(extent_cache, key.objectid,
6005                                                  key.offset, 0, 0);
6006                                 continue;
6007                         }
6008                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6009                                 struct btrfs_extent_data_ref *ref;
6010                                 ref = btrfs_item_ptr(buf, i,
6011                                                 struct btrfs_extent_data_ref);
6012                                 add_data_backref(extent_cache,
6013                                         key.objectid, 0,
6014                                         btrfs_extent_data_ref_root(buf, ref),
6015                                         btrfs_extent_data_ref_objectid(buf,
6016                                                                        ref),
6017                                         btrfs_extent_data_ref_offset(buf, ref),
6018                                         btrfs_extent_data_ref_count(buf, ref),
6019                                         0, root->sectorsize);
6020                                 continue;
6021                         }
6022                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6023                                 struct btrfs_shared_data_ref *ref;
6024                                 ref = btrfs_item_ptr(buf, i,
6025                                                 struct btrfs_shared_data_ref);
6026                                 add_data_backref(extent_cache,
6027                                         key.objectid, key.offset, 0, 0, 0,
6028                                         btrfs_shared_data_ref_count(buf, ref),
6029                                         0, root->sectorsize);
6030                                 continue;
6031                         }
6032                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6033                                 struct bad_item *bad;
6034
6035                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6036                                         continue;
6037                                 if (!owner)
6038                                         continue;
6039                                 bad = malloc(sizeof(struct bad_item));
6040                                 if (!bad)
6041                                         continue;
6042                                 INIT_LIST_HEAD(&bad->list);
6043                                 memcpy(&bad->key, &key,
6044                                        sizeof(struct btrfs_key));
6045                                 bad->root_id = owner;
6046                                 list_add_tail(&bad->list, &delete_items);
6047                                 continue;
6048                         }
6049                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6050                                 continue;
6051                         fi = btrfs_item_ptr(buf, i,
6052                                             struct btrfs_file_extent_item);
6053                         if (btrfs_file_extent_type(buf, fi) ==
6054                             BTRFS_FILE_EXTENT_INLINE)
6055                                 continue;
6056                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6057                                 continue;
6058
6059                         data_bytes_allocated +=
6060                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6061                         if (data_bytes_allocated < root->sectorsize) {
6062                                 abort();
6063                         }
6064                         data_bytes_referenced +=
6065                                 btrfs_file_extent_num_bytes(buf, fi);
6066                         add_data_backref(extent_cache,
6067                                 btrfs_file_extent_disk_bytenr(buf, fi),
6068                                 parent, owner, key.objectid, key.offset -
6069                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6070                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6071                 }
6072         } else {
6073                 int level;
6074                 struct btrfs_key first_key;
6075
6076                 first_key.objectid = 0;
6077
6078                 if (nritems > 0)
6079                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6080                 level = btrfs_header_level(buf);
6081                 for (i = 0; i < nritems; i++) {
6082                         ptr = btrfs_node_blockptr(buf, i);
6083                         size = btrfs_level_size(root, level - 1);
6084                         btrfs_node_key_to_cpu(buf, &key, i);
6085                         if (ri != NULL) {
6086                                 if ((level == ri->drop_level)
6087                                     && is_dropped_key(&key, &ri->drop_key)) {
6088                                         continue;
6089                                 }
6090                         }
6091                         ret = add_extent_rec(extent_cache, &key,
6092                                              btrfs_node_ptr_generation(buf, i),
6093                                              ptr, size, 0, 0, 1, 0, 1, 0,
6094                                              size);
6095                         BUG_ON(ret);
6096
6097                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6098
6099                         if (level > 1) {
6100                                 add_pending(nodes, seen, ptr, size);
6101                         } else {
6102                                 add_pending(pending, seen, ptr, size);
6103                         }
6104                 }
6105                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6106                                       nritems) * sizeof(struct btrfs_key_ptr);
6107         }
6108         total_btree_bytes += buf->len;
6109         if (fs_root_objectid(btrfs_header_owner(buf)))
6110                 total_fs_tree_bytes += buf->len;
6111         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6112                 total_extent_tree_bytes += buf->len;
6113         if (!found_old_backref &&
6114             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6115             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6116             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6117                 found_old_backref = 1;
6118 out:
6119         free_extent_buffer(buf);
6120         return ret;
6121 }
6122
6123 static int add_root_to_pending(struct extent_buffer *buf,
6124                                struct cache_tree *extent_cache,
6125                                struct cache_tree *pending,
6126                                struct cache_tree *seen,
6127                                struct cache_tree *nodes,
6128                                u64 objectid)
6129 {
6130         if (btrfs_header_level(buf) > 0)
6131                 add_pending(nodes, seen, buf->start, buf->len);
6132         else
6133                 add_pending(pending, seen, buf->start, buf->len);
6134         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6135                        0, 1, 1, 0, 1, 0, buf->len);
6136
6137         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6138             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6139                 add_tree_backref(extent_cache, buf->start, buf->start,
6140                                  0, 1);
6141         else
6142                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6143         return 0;
6144 }
6145
6146 /* as we fix the tree, we might be deleting blocks that
6147  * we're tracking for repair.  This hook makes sure we
6148  * remove any backrefs for blocks as we are fixing them.
6149  */
6150 static int free_extent_hook(struct btrfs_trans_handle *trans,
6151                             struct btrfs_root *root,
6152                             u64 bytenr, u64 num_bytes, u64 parent,
6153                             u64 root_objectid, u64 owner, u64 offset,
6154                             int refs_to_drop)
6155 {
6156         struct extent_record *rec;
6157         struct cache_extent *cache;
6158         int is_data;
6159         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6160
6161         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6162         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6163         if (!cache)
6164                 return 0;
6165
6166         rec = container_of(cache, struct extent_record, cache);
6167         if (is_data) {
6168                 struct data_backref *back;
6169                 back = find_data_backref(rec, parent, root_objectid, owner,
6170                                          offset, 1, bytenr, num_bytes);
6171                 if (!back)
6172                         goto out;
6173                 if (back->node.found_ref) {
6174                         back->found_ref -= refs_to_drop;
6175                         if (rec->refs)
6176                                 rec->refs -= refs_to_drop;
6177                 }
6178                 if (back->node.found_extent_tree) {
6179                         back->num_refs -= refs_to_drop;
6180                         if (rec->extent_item_refs)
6181                                 rec->extent_item_refs -= refs_to_drop;
6182                 }
6183                 if (back->found_ref == 0)
6184                         back->node.found_ref = 0;
6185                 if (back->num_refs == 0)
6186                         back->node.found_extent_tree = 0;
6187
6188                 if (!back->node.found_extent_tree && back->node.found_ref) {
6189                         list_del(&back->node.list);
6190                         free(back);
6191                 }
6192         } else {
6193                 struct tree_backref *back;
6194                 back = find_tree_backref(rec, parent, root_objectid);
6195                 if (!back)
6196                         goto out;
6197                 if (back->node.found_ref) {
6198                         if (rec->refs)
6199                                 rec->refs--;
6200                         back->node.found_ref = 0;
6201                 }
6202                 if (back->node.found_extent_tree) {
6203                         if (rec->extent_item_refs)
6204                                 rec->extent_item_refs--;
6205                         back->node.found_extent_tree = 0;
6206                 }
6207                 if (!back->node.found_extent_tree && back->node.found_ref) {
6208                         list_del(&back->node.list);
6209                         free(back);
6210                 }
6211         }
6212         maybe_free_extent_rec(extent_cache, rec);
6213 out:
6214         return 0;
6215 }
6216
6217 static int delete_extent_records(struct btrfs_trans_handle *trans,
6218                                  struct btrfs_root *root,
6219                                  struct btrfs_path *path,
6220                                  u64 bytenr, u64 new_len)
6221 {
6222         struct btrfs_key key;
6223         struct btrfs_key found_key;
6224         struct extent_buffer *leaf;
6225         int ret;
6226         int slot;
6227
6228
6229         key.objectid = bytenr;
6230         key.type = (u8)-1;
6231         key.offset = (u64)-1;
6232
6233         while(1) {
6234                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6235                                         &key, path, 0, 1);
6236                 if (ret < 0)
6237                         break;
6238
6239                 if (ret > 0) {
6240                         ret = 0;
6241                         if (path->slots[0] == 0)
6242                                 break;
6243                         path->slots[0]--;
6244                 }
6245                 ret = 0;
6246
6247                 leaf = path->nodes[0];
6248                 slot = path->slots[0];
6249
6250                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6251                 if (found_key.objectid != bytenr)
6252                         break;
6253
6254                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6255                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6256                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6257                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6258                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6259                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6260                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6261                         btrfs_release_path(path);
6262                         if (found_key.type == 0) {
6263                                 if (found_key.offset == 0)
6264                                         break;
6265                                 key.offset = found_key.offset - 1;
6266                                 key.type = found_key.type;
6267                         }
6268                         key.type = found_key.type - 1;
6269                         key.offset = (u64)-1;
6270                         continue;
6271                 }
6272
6273                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6274                         found_key.objectid, found_key.type, found_key.offset);
6275
6276                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6277                 if (ret)
6278                         break;
6279                 btrfs_release_path(path);
6280
6281                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6282                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6283                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6284                                 found_key.offset : root->leafsize;
6285
6286                         ret = btrfs_update_block_group(trans, root, bytenr,
6287                                                        bytes, 0, 0);
6288                         if (ret)
6289                                 break;
6290                 }
6291         }
6292
6293         btrfs_release_path(path);
6294         return ret;
6295 }
6296
6297 /*
6298  * for a single backref, this will allocate a new extent
6299  * and add the backref to it.
6300  */
6301 static int record_extent(struct btrfs_trans_handle *trans,
6302                          struct btrfs_fs_info *info,
6303                          struct btrfs_path *path,
6304                          struct extent_record *rec,
6305                          struct extent_backref *back,
6306                          int allocated, u64 flags)
6307 {
6308         int ret;
6309         struct btrfs_root *extent_root = info->extent_root;
6310         struct extent_buffer *leaf;
6311         struct btrfs_key ins_key;
6312         struct btrfs_extent_item *ei;
6313         struct tree_backref *tback;
6314         struct data_backref *dback;
6315         struct btrfs_tree_block_info *bi;
6316
6317         if (!back->is_data)
6318                 rec->max_size = max_t(u64, rec->max_size,
6319                                     info->extent_root->leafsize);
6320
6321         if (!allocated) {
6322                 u32 item_size = sizeof(*ei);
6323
6324                 if (!back->is_data)
6325                         item_size += sizeof(*bi);
6326
6327                 ins_key.objectid = rec->start;
6328                 ins_key.offset = rec->max_size;
6329                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6330
6331                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6332                                         &ins_key, item_size);
6333                 if (ret)
6334                         goto fail;
6335
6336                 leaf = path->nodes[0];
6337                 ei = btrfs_item_ptr(leaf, path->slots[0],
6338                                     struct btrfs_extent_item);
6339
6340                 btrfs_set_extent_refs(leaf, ei, 0);
6341                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6342
6343                 if (back->is_data) {
6344                         btrfs_set_extent_flags(leaf, ei,
6345                                                BTRFS_EXTENT_FLAG_DATA);
6346                 } else {
6347                         struct btrfs_disk_key copy_key;;
6348
6349                         tback = (struct tree_backref *)back;
6350                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6351                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6352                                              sizeof(*bi));
6353
6354                         btrfs_set_disk_key_objectid(&copy_key,
6355                                                     rec->info_objectid);
6356                         btrfs_set_disk_key_type(&copy_key, 0);
6357                         btrfs_set_disk_key_offset(&copy_key, 0);
6358
6359                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6360                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6361
6362                         btrfs_set_extent_flags(leaf, ei,
6363                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6364                 }
6365
6366                 btrfs_mark_buffer_dirty(leaf);
6367                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6368                                                rec->max_size, 1, 0);
6369                 if (ret)
6370                         goto fail;
6371                 btrfs_release_path(path);
6372         }
6373
6374         if (back->is_data) {
6375                 u64 parent;
6376                 int i;
6377
6378                 dback = (struct data_backref *)back;
6379                 if (back->full_backref)
6380                         parent = dback->parent;
6381                 else
6382                         parent = 0;
6383
6384                 for (i = 0; i < dback->found_ref; i++) {
6385                         /* if parent != 0, we're doing a full backref
6386                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6387                          * just makes the backref allocator create a data
6388                          * backref
6389                          */
6390                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6391                                                    rec->start, rec->max_size,
6392                                                    parent,
6393                                                    dback->root,
6394                                                    parent ?
6395                                                    BTRFS_FIRST_FREE_OBJECTID :
6396                                                    dback->owner,
6397                                                    dback->offset);
6398                         if (ret)
6399                                 break;
6400                 }
6401                 fprintf(stderr, "adding new data backref"
6402                                 " on %llu %s %llu owner %llu"
6403                                 " offset %llu found %d\n",
6404                                 (unsigned long long)rec->start,
6405                                 back->full_backref ?
6406                                 "parent" : "root",
6407                                 back->full_backref ?
6408                                 (unsigned long long)parent :
6409                                 (unsigned long long)dback->root,
6410                                 (unsigned long long)dback->owner,
6411                                 (unsigned long long)dback->offset,
6412                                 dback->found_ref);
6413         } else {
6414                 u64 parent;
6415
6416                 tback = (struct tree_backref *)back;
6417                 if (back->full_backref)
6418                         parent = tback->parent;
6419                 else
6420                         parent = 0;
6421
6422                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6423                                            rec->start, rec->max_size,
6424                                            parent, tback->root, 0, 0);
6425                 fprintf(stderr, "adding new tree backref on "
6426                         "start %llu len %llu parent %llu root %llu\n",
6427                         rec->start, rec->max_size, parent, tback->root);
6428         }
6429 fail:
6430         btrfs_release_path(path);
6431         return ret;
6432 }
6433
6434 struct extent_entry {
6435         u64 bytenr;
6436         u64 bytes;
6437         int count;
6438         int broken;
6439         struct list_head list;
6440 };
6441
6442 static struct extent_entry *find_entry(struct list_head *entries,
6443                                        u64 bytenr, u64 bytes)
6444 {
6445         struct extent_entry *entry = NULL;
6446
6447         list_for_each_entry(entry, entries, list) {
6448                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6449                         return entry;
6450         }
6451
6452         return NULL;
6453 }
6454
6455 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6456 {
6457         struct extent_entry *entry, *best = NULL, *prev = NULL;
6458
6459         list_for_each_entry(entry, entries, list) {
6460                 if (!prev) {
6461                         prev = entry;
6462                         continue;
6463                 }
6464
6465                 /*
6466                  * If there are as many broken entries as entries then we know
6467                  * not to trust this particular entry.
6468                  */
6469                 if (entry->broken == entry->count)
6470                         continue;
6471
6472                 /*
6473                  * If our current entry == best then we can't be sure our best
6474                  * is really the best, so we need to keep searching.
6475                  */
6476                 if (best && best->count == entry->count) {
6477                         prev = entry;
6478                         best = NULL;
6479                         continue;
6480                 }
6481
6482                 /* Prev == entry, not good enough, have to keep searching */
6483                 if (!prev->broken && prev->count == entry->count)
6484                         continue;
6485
6486                 if (!best)
6487                         best = (prev->count > entry->count) ? prev : entry;
6488                 else if (best->count < entry->count)
6489                         best = entry;
6490                 prev = entry;
6491         }
6492
6493         return best;
6494 }
6495
6496 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6497                       struct data_backref *dback, struct extent_entry *entry)
6498 {
6499         struct btrfs_trans_handle *trans;
6500         struct btrfs_root *root;
6501         struct btrfs_file_extent_item *fi;
6502         struct extent_buffer *leaf;
6503         struct btrfs_key key;
6504         u64 bytenr, bytes;
6505         int ret, err;
6506
6507         key.objectid = dback->root;
6508         key.type = BTRFS_ROOT_ITEM_KEY;
6509         key.offset = (u64)-1;
6510         root = btrfs_read_fs_root(info, &key);
6511         if (IS_ERR(root)) {
6512                 fprintf(stderr, "Couldn't find root for our ref\n");
6513                 return -EINVAL;
6514         }
6515
6516         /*
6517          * The backref points to the original offset of the extent if it was
6518          * split, so we need to search down to the offset we have and then walk
6519          * forward until we find the backref we're looking for.
6520          */
6521         key.objectid = dback->owner;
6522         key.type = BTRFS_EXTENT_DATA_KEY;
6523         key.offset = dback->offset;
6524         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6525         if (ret < 0) {
6526                 fprintf(stderr, "Error looking up ref %d\n", ret);
6527                 return ret;
6528         }
6529
6530         while (1) {
6531                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6532                         ret = btrfs_next_leaf(root, path);
6533                         if (ret) {
6534                                 fprintf(stderr, "Couldn't find our ref, next\n");
6535                                 return -EINVAL;
6536                         }
6537                 }
6538                 leaf = path->nodes[0];
6539                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6540                 if (key.objectid != dback->owner ||
6541                     key.type != BTRFS_EXTENT_DATA_KEY) {
6542                         fprintf(stderr, "Couldn't find our ref, search\n");
6543                         return -EINVAL;
6544                 }
6545                 fi = btrfs_item_ptr(leaf, path->slots[0],
6546                                     struct btrfs_file_extent_item);
6547                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6548                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6549
6550                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6551                         break;
6552                 path->slots[0]++;
6553         }
6554
6555         btrfs_release_path(path);
6556
6557         trans = btrfs_start_transaction(root, 1);
6558         if (IS_ERR(trans))
6559                 return PTR_ERR(trans);
6560
6561         /*
6562          * Ok we have the key of the file extent we want to fix, now we can cow
6563          * down to the thing and fix it.
6564          */
6565         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6566         if (ret < 0) {
6567                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6568                         key.objectid, key.type, key.offset, ret);
6569                 goto out;
6570         }
6571         if (ret > 0) {
6572                 fprintf(stderr, "Well that's odd, we just found this key "
6573                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6574                         key.offset);
6575                 ret = -EINVAL;
6576                 goto out;
6577         }
6578         leaf = path->nodes[0];
6579         fi = btrfs_item_ptr(leaf, path->slots[0],
6580                             struct btrfs_file_extent_item);
6581
6582         if (btrfs_file_extent_compression(leaf, fi) &&
6583             dback->disk_bytenr != entry->bytenr) {
6584                 fprintf(stderr, "Ref doesn't match the record start and is "
6585                         "compressed, please take a btrfs-image of this file "
6586                         "system and send it to a btrfs developer so they can "
6587                         "complete this functionality for bytenr %Lu\n",
6588                         dback->disk_bytenr);
6589                 ret = -EINVAL;
6590                 goto out;
6591         }
6592
6593         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6594                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6595         } else if (dback->disk_bytenr > entry->bytenr) {
6596                 u64 off_diff, offset;
6597
6598                 off_diff = dback->disk_bytenr - entry->bytenr;
6599                 offset = btrfs_file_extent_offset(leaf, fi);
6600                 if (dback->disk_bytenr + offset +
6601                     btrfs_file_extent_num_bytes(leaf, fi) >
6602                     entry->bytenr + entry->bytes) {
6603                         fprintf(stderr, "Ref is past the entry end, please "
6604                                 "take a btrfs-image of this file system and "
6605                                 "send it to a btrfs developer, ref %Lu\n",
6606                                 dback->disk_bytenr);
6607                         ret = -EINVAL;
6608                         goto out;
6609                 }
6610                 offset += off_diff;
6611                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6612                 btrfs_set_file_extent_offset(leaf, fi, offset);
6613         } else if (dback->disk_bytenr < entry->bytenr) {
6614                 u64 offset;
6615
6616                 offset = btrfs_file_extent_offset(leaf, fi);
6617                 if (dback->disk_bytenr + offset < entry->bytenr) {
6618                         fprintf(stderr, "Ref is before the entry start, please"
6619                                 " take a btrfs-image of this file system and "
6620                                 "send it to a btrfs developer, ref %Lu\n",
6621                                 dback->disk_bytenr);
6622                         ret = -EINVAL;
6623                         goto out;
6624                 }
6625
6626                 offset += dback->disk_bytenr;
6627                 offset -= entry->bytenr;
6628                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6629                 btrfs_set_file_extent_offset(leaf, fi, offset);
6630         }
6631
6632         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6633
6634         /*
6635          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6636          * only do this if we aren't using compression, otherwise it's a
6637          * trickier case.
6638          */
6639         if (!btrfs_file_extent_compression(leaf, fi))
6640                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6641         else
6642                 printf("ram bytes may be wrong?\n");
6643         btrfs_mark_buffer_dirty(leaf);
6644 out:
6645         err = btrfs_commit_transaction(trans, root);
6646         btrfs_release_path(path);
6647         return ret ? ret : err;
6648 }
6649
6650 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6651                            struct extent_record *rec)
6652 {
6653         struct extent_backref *back;
6654         struct data_backref *dback;
6655         struct extent_entry *entry, *best = NULL;
6656         LIST_HEAD(entries);
6657         int nr_entries = 0;
6658         int broken_entries = 0;
6659         int ret = 0;
6660         short mismatch = 0;
6661
6662         /*
6663          * Metadata is easy and the backrefs should always agree on bytenr and
6664          * size, if not we've got bigger issues.
6665          */
6666         if (rec->metadata)
6667                 return 0;
6668
6669         list_for_each_entry(back, &rec->backrefs, list) {
6670                 if (back->full_backref || !back->is_data)
6671                         continue;
6672
6673                 dback = (struct data_backref *)back;
6674
6675                 /*
6676                  * We only pay attention to backrefs that we found a real
6677                  * backref for.
6678                  */
6679                 if (dback->found_ref == 0)
6680                         continue;
6681
6682                 /*
6683                  * For now we only catch when the bytes don't match, not the
6684                  * bytenr.  We can easily do this at the same time, but I want
6685                  * to have a fs image to test on before we just add repair
6686                  * functionality willy-nilly so we know we won't screw up the
6687                  * repair.
6688                  */
6689
6690                 entry = find_entry(&entries, dback->disk_bytenr,
6691                                    dback->bytes);
6692                 if (!entry) {
6693                         entry = malloc(sizeof(struct extent_entry));
6694                         if (!entry) {
6695                                 ret = -ENOMEM;
6696                                 goto out;
6697                         }
6698                         memset(entry, 0, sizeof(*entry));
6699                         entry->bytenr = dback->disk_bytenr;
6700                         entry->bytes = dback->bytes;
6701                         list_add_tail(&entry->list, &entries);
6702                         nr_entries++;
6703                 }
6704
6705                 /*
6706                  * If we only have on entry we may think the entries agree when
6707                  * in reality they don't so we have to do some extra checking.
6708                  */
6709                 if (dback->disk_bytenr != rec->start ||
6710                     dback->bytes != rec->nr || back->broken)
6711                         mismatch = 1;
6712
6713                 if (back->broken) {
6714                         entry->broken++;
6715                         broken_entries++;
6716                 }
6717
6718                 entry->count++;
6719         }
6720
6721         /* Yay all the backrefs agree, carry on good sir */
6722         if (nr_entries <= 1 && !mismatch)
6723                 goto out;
6724
6725         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6726                 "%Lu\n", rec->start);
6727
6728         /*
6729          * First we want to see if the backrefs can agree amongst themselves who
6730          * is right, so figure out which one of the entries has the highest
6731          * count.
6732          */
6733         best = find_most_right_entry(&entries);
6734
6735         /*
6736          * Ok so we may have an even split between what the backrefs think, so
6737          * this is where we use the extent ref to see what it thinks.
6738          */
6739         if (!best) {
6740                 entry = find_entry(&entries, rec->start, rec->nr);
6741                 if (!entry && (!broken_entries || !rec->found_rec)) {
6742                         fprintf(stderr, "Backrefs don't agree with each other "
6743                                 "and extent record doesn't agree with anybody,"
6744                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6745                                 rec->start, rec->nr);
6746                         ret = -EINVAL;
6747                         goto out;
6748                 } else if (!entry) {
6749                         /*
6750                          * Ok our backrefs were broken, we'll assume this is the
6751                          * correct value and add an entry for this range.
6752                          */
6753                         entry = malloc(sizeof(struct extent_entry));
6754                         if (!entry) {
6755                                 ret = -ENOMEM;
6756                                 goto out;
6757                         }
6758                         memset(entry, 0, sizeof(*entry));
6759                         entry->bytenr = rec->start;
6760                         entry->bytes = rec->nr;
6761                         list_add_tail(&entry->list, &entries);
6762                         nr_entries++;
6763                 }
6764                 entry->count++;
6765                 best = find_most_right_entry(&entries);
6766                 if (!best) {
6767                         fprintf(stderr, "Backrefs and extent record evenly "
6768                                 "split on who is right, this is going to "
6769                                 "require user input to fix bytenr %Lu bytes "
6770                                 "%Lu\n", rec->start, rec->nr);
6771                         ret = -EINVAL;
6772                         goto out;
6773                 }
6774         }
6775
6776         /*
6777          * I don't think this can happen currently as we'll abort() if we catch
6778          * this case higher up, but in case somebody removes that we still can't
6779          * deal with it properly here yet, so just bail out of that's the case.
6780          */
6781         if (best->bytenr != rec->start) {
6782                 fprintf(stderr, "Extent start and backref starts don't match, "
6783                         "please use btrfs-image on this file system and send "
6784                         "it to a btrfs developer so they can make fsck fix "
6785                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6786                         rec->start, rec->nr);
6787                 ret = -EINVAL;
6788                 goto out;
6789         }
6790
6791         /*
6792          * Ok great we all agreed on an extent record, let's go find the real
6793          * references and fix up the ones that don't match.
6794          */
6795         list_for_each_entry(back, &rec->backrefs, list) {
6796                 if (back->full_backref || !back->is_data)
6797                         continue;
6798
6799                 dback = (struct data_backref *)back;
6800
6801                 /*
6802                  * Still ignoring backrefs that don't have a real ref attached
6803                  * to them.
6804                  */
6805                 if (dback->found_ref == 0)
6806                         continue;
6807
6808                 if (dback->bytes == best->bytes &&
6809                     dback->disk_bytenr == best->bytenr)
6810                         continue;
6811
6812                 ret = repair_ref(info, path, dback, best);
6813                 if (ret)
6814                         goto out;
6815         }
6816
6817         /*
6818          * Ok we messed with the actual refs, which means we need to drop our
6819          * entire cache and go back and rescan.  I know this is a huge pain and
6820          * adds a lot of extra work, but it's the only way to be safe.  Once all
6821          * the backrefs agree we may not need to do anything to the extent
6822          * record itself.
6823          */
6824         ret = -EAGAIN;
6825 out:
6826         while (!list_empty(&entries)) {
6827                 entry = list_entry(entries.next, struct extent_entry, list);
6828                 list_del_init(&entry->list);
6829                 free(entry);
6830         }
6831         return ret;
6832 }
6833
6834 static int process_duplicates(struct btrfs_root *root,
6835                               struct cache_tree *extent_cache,
6836                               struct extent_record *rec)
6837 {
6838         struct extent_record *good, *tmp;
6839         struct cache_extent *cache;
6840         int ret;
6841
6842         /*
6843          * If we found a extent record for this extent then return, or if we
6844          * have more than one duplicate we are likely going to need to delete
6845          * something.
6846          */
6847         if (rec->found_rec || rec->num_duplicates > 1)
6848                 return 0;
6849
6850         /* Shouldn't happen but just in case */
6851         BUG_ON(!rec->num_duplicates);
6852
6853         /*
6854          * So this happens if we end up with a backref that doesn't match the
6855          * actual extent entry.  So either the backref is bad or the extent
6856          * entry is bad.  Either way we want to have the extent_record actually
6857          * reflect what we found in the extent_tree, so we need to take the
6858          * duplicate out and use that as the extent_record since the only way we
6859          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6860          */
6861         remove_cache_extent(extent_cache, &rec->cache);
6862
6863         good = list_entry(rec->dups.next, struct extent_record, list);
6864         list_del_init(&good->list);
6865         INIT_LIST_HEAD(&good->backrefs);
6866         INIT_LIST_HEAD(&good->dups);
6867         good->cache.start = good->start;
6868         good->cache.size = good->nr;
6869         good->content_checked = 0;
6870         good->owner_ref_checked = 0;
6871         good->num_duplicates = 0;
6872         good->refs = rec->refs;
6873         list_splice_init(&rec->backrefs, &good->backrefs);
6874         while (1) {
6875                 cache = lookup_cache_extent(extent_cache, good->start,
6876                                             good->nr);
6877                 if (!cache)
6878                         break;
6879                 tmp = container_of(cache, struct extent_record, cache);
6880
6881                 /*
6882                  * If we find another overlapping extent and it's found_rec is
6883                  * set then it's a duplicate and we need to try and delete
6884                  * something.
6885                  */
6886                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6887                         if (list_empty(&good->list))
6888                                 list_add_tail(&good->list,
6889                                               &duplicate_extents);
6890                         good->num_duplicates += tmp->num_duplicates + 1;
6891                         list_splice_init(&tmp->dups, &good->dups);
6892                         list_del_init(&tmp->list);
6893                         list_add_tail(&tmp->list, &good->dups);
6894                         remove_cache_extent(extent_cache, &tmp->cache);
6895                         continue;
6896                 }
6897
6898                 /*
6899                  * Ok we have another non extent item backed extent rec, so lets
6900                  * just add it to this extent and carry on like we did above.
6901                  */
6902                 good->refs += tmp->refs;
6903                 list_splice_init(&tmp->backrefs, &good->backrefs);
6904                 remove_cache_extent(extent_cache, &tmp->cache);
6905                 free(tmp);
6906         }
6907         ret = insert_cache_extent(extent_cache, &good->cache);
6908         BUG_ON(ret);
6909         free(rec);
6910         return good->num_duplicates ? 0 : 1;
6911 }
6912
6913 static int delete_duplicate_records(struct btrfs_root *root,
6914                                     struct extent_record *rec)
6915 {
6916         struct btrfs_trans_handle *trans;
6917         LIST_HEAD(delete_list);
6918         struct btrfs_path *path;
6919         struct extent_record *tmp, *good, *n;
6920         int nr_del = 0;
6921         int ret = 0, err;
6922         struct btrfs_key key;
6923
6924         path = btrfs_alloc_path();
6925         if (!path) {
6926                 ret = -ENOMEM;
6927                 goto out;
6928         }
6929
6930         good = rec;
6931         /* Find the record that covers all of the duplicates. */
6932         list_for_each_entry(tmp, &rec->dups, list) {
6933                 if (good->start < tmp->start)
6934                         continue;
6935                 if (good->nr > tmp->nr)
6936                         continue;
6937
6938                 if (tmp->start + tmp->nr < good->start + good->nr) {
6939                         fprintf(stderr, "Ok we have overlapping extents that "
6940                                 "aren't completely covered by eachother, this "
6941                                 "is going to require more careful thought.  "
6942                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6943                                 tmp->start, tmp->nr, good->start, good->nr);
6944                         abort();
6945                 }
6946                 good = tmp;
6947         }
6948
6949         if (good != rec)
6950                 list_add_tail(&rec->list, &delete_list);
6951
6952         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6953                 if (tmp == good)
6954                         continue;
6955                 list_move_tail(&tmp->list, &delete_list);
6956         }
6957
6958         root = root->fs_info->extent_root;
6959         trans = btrfs_start_transaction(root, 1);
6960         if (IS_ERR(trans)) {
6961                 ret = PTR_ERR(trans);
6962                 goto out;
6963         }
6964
6965         list_for_each_entry(tmp, &delete_list, list) {
6966                 if (tmp->found_rec == 0)
6967                         continue;
6968                 key.objectid = tmp->start;
6969                 key.type = BTRFS_EXTENT_ITEM_KEY;
6970                 key.offset = tmp->nr;
6971
6972                 /* Shouldn't happen but just in case */
6973                 if (tmp->metadata) {
6974                         fprintf(stderr, "Well this shouldn't happen, extent "
6975                                 "record overlaps but is metadata? "
6976                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6977                         abort();
6978                 }
6979
6980                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6981                 if (ret) {
6982                         if (ret > 0)
6983                                 ret = -EINVAL;
6984                         break;
6985                 }
6986                 ret = btrfs_del_item(trans, root, path);
6987                 if (ret)
6988                         break;
6989                 btrfs_release_path(path);
6990                 nr_del++;
6991         }
6992         err = btrfs_commit_transaction(trans, root);
6993         if (err && !ret)
6994                 ret = err;
6995 out:
6996         while (!list_empty(&delete_list)) {
6997                 tmp = list_entry(delete_list.next, struct extent_record, list);
6998                 list_del_init(&tmp->list);
6999                 if (tmp == rec)
7000                         continue;
7001                 free(tmp);
7002         }
7003
7004         while (!list_empty(&rec->dups)) {
7005                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7006                 list_del_init(&tmp->list);
7007                 free(tmp);
7008         }
7009
7010         btrfs_free_path(path);
7011
7012         if (!ret && !nr_del)
7013                 rec->num_duplicates = 0;
7014
7015         return ret ? ret : nr_del;
7016 }
7017
7018 static int find_possible_backrefs(struct btrfs_fs_info *info,
7019                                   struct btrfs_path *path,
7020                                   struct cache_tree *extent_cache,
7021                                   struct extent_record *rec)
7022 {
7023         struct btrfs_root *root;
7024         struct extent_backref *back;
7025         struct data_backref *dback;
7026         struct cache_extent *cache;
7027         struct btrfs_file_extent_item *fi;
7028         struct btrfs_key key;
7029         u64 bytenr, bytes;
7030         int ret;
7031
7032         list_for_each_entry(back, &rec->backrefs, list) {
7033                 /* Don't care about full backrefs (poor unloved backrefs) */
7034                 if (back->full_backref || !back->is_data)
7035                         continue;
7036
7037                 dback = (struct data_backref *)back;
7038
7039                 /* We found this one, we don't need to do a lookup */
7040                 if (dback->found_ref)
7041                         continue;
7042
7043                 key.objectid = dback->root;
7044                 key.type = BTRFS_ROOT_ITEM_KEY;
7045                 key.offset = (u64)-1;
7046
7047                 root = btrfs_read_fs_root(info, &key);
7048
7049                 /* No root, definitely a bad ref, skip */
7050                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7051                         continue;
7052                 /* Other err, exit */
7053                 if (IS_ERR(root))
7054                         return PTR_ERR(root);
7055
7056                 key.objectid = dback->owner;
7057                 key.type = BTRFS_EXTENT_DATA_KEY;
7058                 key.offset = dback->offset;
7059                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7060                 if (ret) {
7061                         btrfs_release_path(path);
7062                         if (ret < 0)
7063                                 return ret;
7064                         /* Didn't find it, we can carry on */
7065                         ret = 0;
7066                         continue;
7067                 }
7068
7069                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7070                                     struct btrfs_file_extent_item);
7071                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7072                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7073                 btrfs_release_path(path);
7074                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7075                 if (cache) {
7076                         struct extent_record *tmp;
7077                         tmp = container_of(cache, struct extent_record, cache);
7078
7079                         /*
7080                          * If we found an extent record for the bytenr for this
7081                          * particular backref then we can't add it to our
7082                          * current extent record.  We only want to add backrefs
7083                          * that don't have a corresponding extent item in the
7084                          * extent tree since they likely belong to this record
7085                          * and we need to fix it if it doesn't match bytenrs.
7086                          */
7087                         if  (tmp->found_rec)
7088                                 continue;
7089                 }
7090
7091                 dback->found_ref += 1;
7092                 dback->disk_bytenr = bytenr;
7093                 dback->bytes = bytes;
7094
7095                 /*
7096                  * Set this so the verify backref code knows not to trust the
7097                  * values in this backref.
7098                  */
7099                 back->broken = 1;
7100         }
7101
7102         return 0;
7103 }
7104
7105 /*
7106  * Record orphan data ref into corresponding root.
7107  *
7108  * Return 0 if the extent item contains data ref and recorded.
7109  * Return 1 if the extent item contains no useful data ref
7110  *   On that case, it may contains only shared_dataref or metadata backref
7111  *   or the file extent exists(this should be handled by the extent bytenr
7112  *   recovery routine)
7113  * Return <0 if something goes wrong.
7114  */
7115 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7116                                       struct extent_record *rec)
7117 {
7118         struct btrfs_key key;
7119         struct btrfs_root *dest_root;
7120         struct extent_backref *back;
7121         struct data_backref *dback;
7122         struct orphan_data_extent *orphan;
7123         struct btrfs_path *path;
7124         int recorded_data_ref = 0;
7125         int ret = 0;
7126
7127         if (rec->metadata)
7128                 return 1;
7129         path = btrfs_alloc_path();
7130         if (!path)
7131                 return -ENOMEM;
7132         list_for_each_entry(back, &rec->backrefs, list) {
7133                 if (back->full_backref || !back->is_data ||
7134                     !back->found_extent_tree)
7135                         continue;
7136                 dback = (struct data_backref *)back;
7137                 if (dback->found_ref)
7138                         continue;
7139                 key.objectid = dback->root;
7140                 key.type = BTRFS_ROOT_ITEM_KEY;
7141                 key.offset = (u64)-1;
7142
7143                 dest_root = btrfs_read_fs_root(fs_info, &key);
7144
7145                 /* For non-exist root we just skip it */
7146                 if (IS_ERR(dest_root) || !dest_root)
7147                         continue;
7148
7149                 key.objectid = dback->owner;
7150                 key.type = BTRFS_EXTENT_DATA_KEY;
7151                 key.offset = dback->offset;
7152
7153                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7154                 /*
7155                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7156                  * we need to record it for inode/file extent rebuild.
7157                  * For ret > 0, we record it only for file extent rebuild.
7158                  * For ret == 0, the file extent exists but only bytenr
7159                  * mismatch, let the original bytenr fix routine to handle,
7160                  * don't record it.
7161                  */
7162                 if (ret == 0)
7163                         continue;
7164                 ret = 0;
7165                 orphan = malloc(sizeof(*orphan));
7166                 if (!orphan) {
7167                         ret = -ENOMEM;
7168                         goto out;
7169                 }
7170                 INIT_LIST_HEAD(&orphan->list);
7171                 orphan->root = dback->root;
7172                 orphan->objectid = dback->owner;
7173                 orphan->offset = dback->offset;
7174                 orphan->disk_bytenr = rec->cache.start;
7175                 orphan->disk_len = rec->cache.size;
7176                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7177                 recorded_data_ref = 1;
7178         }
7179 out:
7180         btrfs_free_path(path);
7181         if (!ret)
7182                 return !recorded_data_ref;
7183         else
7184                 return ret;
7185 }
7186
7187 /*
7188  * when an incorrect extent item is found, this will delete
7189  * all of the existing entries for it and recreate them
7190  * based on what the tree scan found.
7191  */
7192 static int fixup_extent_refs(struct btrfs_fs_info *info,
7193                              struct cache_tree *extent_cache,
7194                              struct extent_record *rec)
7195 {
7196         struct btrfs_trans_handle *trans = NULL;
7197         int ret;
7198         struct btrfs_path *path;
7199         struct list_head *cur = rec->backrefs.next;
7200         struct cache_extent *cache;
7201         struct extent_backref *back;
7202         int allocated = 0;
7203         u64 flags = 0;
7204
7205         if (rec->flag_block_full_backref)
7206                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7207
7208         path = btrfs_alloc_path();
7209         if (!path)
7210                 return -ENOMEM;
7211
7212         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7213                 /*
7214                  * Sometimes the backrefs themselves are so broken they don't
7215                  * get attached to any meaningful rec, so first go back and
7216                  * check any of our backrefs that we couldn't find and throw
7217                  * them into the list if we find the backref so that
7218                  * verify_backrefs can figure out what to do.
7219                  */
7220                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7221                 if (ret < 0)
7222                         goto out;
7223         }
7224
7225         /* step one, make sure all of the backrefs agree */
7226         ret = verify_backrefs(info, path, rec);
7227         if (ret < 0)
7228                 goto out;
7229
7230         trans = btrfs_start_transaction(info->extent_root, 1);
7231         if (IS_ERR(trans)) {
7232                 ret = PTR_ERR(trans);
7233                 goto out;
7234         }
7235
7236         /* step two, delete all the existing records */
7237         ret = delete_extent_records(trans, info->extent_root, path,
7238                                     rec->start, rec->max_size);
7239
7240         if (ret < 0)
7241                 goto out;
7242
7243         /* was this block corrupt?  If so, don't add references to it */
7244         cache = lookup_cache_extent(info->corrupt_blocks,
7245                                     rec->start, rec->max_size);
7246         if (cache) {
7247                 ret = 0;
7248                 goto out;
7249         }
7250
7251         /* step three, recreate all the refs we did find */
7252         while(cur != &rec->backrefs) {
7253                 back = list_entry(cur, struct extent_backref, list);
7254                 cur = cur->next;
7255
7256                 /*
7257                  * if we didn't find any references, don't create a
7258                  * new extent record
7259                  */
7260                 if (!back->found_ref)
7261                         continue;
7262
7263                 rec->bad_full_backref = 0;
7264                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7265                 allocated = 1;
7266
7267                 if (ret)
7268                         goto out;
7269         }
7270 out:
7271         if (trans) {
7272                 int err = btrfs_commit_transaction(trans, info->extent_root);
7273                 if (!ret)
7274                         ret = err;
7275         }
7276
7277         btrfs_free_path(path);
7278         return ret;
7279 }
7280
7281 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7282                               struct extent_record *rec)
7283 {
7284         struct btrfs_trans_handle *trans;
7285         struct btrfs_root *root = fs_info->extent_root;
7286         struct btrfs_path *path;
7287         struct btrfs_extent_item *ei;
7288         struct btrfs_key key;
7289         u64 flags;
7290         int ret = 0;
7291
7292         key.objectid = rec->start;
7293         if (rec->metadata) {
7294                 key.type = BTRFS_METADATA_ITEM_KEY;
7295                 key.offset = rec->info_level;
7296         } else {
7297                 key.type = BTRFS_EXTENT_ITEM_KEY;
7298                 key.offset = rec->max_size;
7299         }
7300
7301         path = btrfs_alloc_path();
7302         if (!path)
7303                 return -ENOMEM;
7304
7305         trans = btrfs_start_transaction(root, 0);
7306         if (IS_ERR(trans)) {
7307                 btrfs_free_path(path);
7308                 return PTR_ERR(trans);
7309         }
7310
7311         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7312         if (ret < 0) {
7313                 btrfs_free_path(path);
7314                 btrfs_commit_transaction(trans, root);
7315                 return ret;
7316         } else if (ret) {
7317                 fprintf(stderr, "Didn't find extent for %llu\n",
7318                         (unsigned long long)rec->start);
7319                 btrfs_free_path(path);
7320                 btrfs_commit_transaction(trans, root);
7321                 return -ENOENT;
7322         }
7323
7324         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7325                             struct btrfs_extent_item);
7326         flags = btrfs_extent_flags(path->nodes[0], ei);
7327         if (rec->flag_block_full_backref) {
7328                 fprintf(stderr, "setting full backref on %llu\n",
7329                         (unsigned long long)key.objectid);
7330                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7331         } else {
7332                 fprintf(stderr, "clearing full backref on %llu\n",
7333                         (unsigned long long)key.objectid);
7334                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7335         }
7336         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7337         btrfs_mark_buffer_dirty(path->nodes[0]);
7338         btrfs_free_path(path);
7339         return btrfs_commit_transaction(trans, root);
7340 }
7341
7342 /* right now we only prune from the extent allocation tree */
7343 static int prune_one_block(struct btrfs_trans_handle *trans,
7344                            struct btrfs_fs_info *info,
7345                            struct btrfs_corrupt_block *corrupt)
7346 {
7347         int ret;
7348         struct btrfs_path path;
7349         struct extent_buffer *eb;
7350         u64 found;
7351         int slot;
7352         int nritems;
7353         int level = corrupt->level + 1;
7354
7355         btrfs_init_path(&path);
7356 again:
7357         /* we want to stop at the parent to our busted block */
7358         path.lowest_level = level;
7359
7360         ret = btrfs_search_slot(trans, info->extent_root,
7361                                 &corrupt->key, &path, -1, 1);
7362
7363         if (ret < 0)
7364                 goto out;
7365
7366         eb = path.nodes[level];
7367         if (!eb) {
7368                 ret = -ENOENT;
7369                 goto out;
7370         }
7371
7372         /*
7373          * hopefully the search gave us the block we want to prune,
7374          * lets try that first
7375          */
7376         slot = path.slots[level];
7377         found =  btrfs_node_blockptr(eb, slot);
7378         if (found == corrupt->cache.start)
7379                 goto del_ptr;
7380
7381         nritems = btrfs_header_nritems(eb);
7382
7383         /* the search failed, lets scan this node and hope we find it */
7384         for (slot = 0; slot < nritems; slot++) {
7385                 found =  btrfs_node_blockptr(eb, slot);
7386                 if (found == corrupt->cache.start)
7387                         goto del_ptr;
7388         }
7389         /*
7390          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7391          * to this block
7392          */
7393         if (eb == info->extent_root->node) {
7394                 ret = -ENOENT;
7395                 goto out;
7396         } else {
7397                 level++;
7398                 btrfs_release_path(&path);
7399                 goto again;
7400         }
7401
7402 del_ptr:
7403         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7404         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7405
7406 out:
7407         btrfs_release_path(&path);
7408         return ret;
7409 }
7410
7411 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7412 {
7413         struct btrfs_trans_handle *trans = NULL;
7414         struct cache_extent *cache;
7415         struct btrfs_corrupt_block *corrupt;
7416
7417         while (1) {
7418                 cache = search_cache_extent(info->corrupt_blocks, 0);
7419                 if (!cache)
7420                         break;
7421                 if (!trans) {
7422                         trans = btrfs_start_transaction(info->extent_root, 1);
7423                         if (IS_ERR(trans))
7424                                 return PTR_ERR(trans);
7425                 }
7426                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7427                 prune_one_block(trans, info, corrupt);
7428                 remove_cache_extent(info->corrupt_blocks, cache);
7429         }
7430         if (trans)
7431                 return btrfs_commit_transaction(trans, info->extent_root);
7432         return 0;
7433 }
7434
7435 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7436 {
7437         struct btrfs_block_group_cache *cache;
7438         u64 start, end;
7439         int ret;
7440
7441         while (1) {
7442                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7443                                             &start, &end, EXTENT_DIRTY);
7444                 if (ret)
7445                         break;
7446                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7447                                    GFP_NOFS);
7448         }
7449
7450         start = 0;
7451         while (1) {
7452                 cache = btrfs_lookup_first_block_group(fs_info, start);
7453                 if (!cache)
7454                         break;
7455                 if (cache->cached)
7456                         cache->cached = 0;
7457                 start = cache->key.objectid + cache->key.offset;
7458         }
7459 }
7460
7461 static int check_extent_refs(struct btrfs_root *root,
7462                              struct cache_tree *extent_cache)
7463 {
7464         struct extent_record *rec;
7465         struct cache_extent *cache;
7466         int err = 0;
7467         int ret = 0;
7468         int fixed = 0;
7469         int had_dups = 0;
7470         int recorded = 0;
7471
7472         if (repair) {
7473                 /*
7474                  * if we're doing a repair, we have to make sure
7475                  * we don't allocate from the problem extents.
7476                  * In the worst case, this will be all the
7477                  * extents in the FS
7478                  */
7479                 cache = search_cache_extent(extent_cache, 0);
7480                 while(cache) {
7481                         rec = container_of(cache, struct extent_record, cache);
7482                         set_extent_dirty(root->fs_info->excluded_extents,
7483                                          rec->start,
7484                                          rec->start + rec->max_size - 1,
7485                                          GFP_NOFS);
7486                         cache = next_cache_extent(cache);
7487                 }
7488
7489                 /* pin down all the corrupted blocks too */
7490                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7491                 while(cache) {
7492                         set_extent_dirty(root->fs_info->excluded_extents,
7493                                          cache->start,
7494                                          cache->start + cache->size - 1,
7495                                          GFP_NOFS);
7496                         cache = next_cache_extent(cache);
7497                 }
7498                 prune_corrupt_blocks(root->fs_info);
7499                 reset_cached_block_groups(root->fs_info);
7500         }
7501
7502         reset_cached_block_groups(root->fs_info);
7503
7504         /*
7505          * We need to delete any duplicate entries we find first otherwise we
7506          * could mess up the extent tree when we have backrefs that actually
7507          * belong to a different extent item and not the weird duplicate one.
7508          */
7509         while (repair && !list_empty(&duplicate_extents)) {
7510                 rec = list_entry(duplicate_extents.next, struct extent_record,
7511                                  list);
7512                 list_del_init(&rec->list);
7513
7514                 /* Sometimes we can find a backref before we find an actual
7515                  * extent, so we need to process it a little bit to see if there
7516                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7517                  * if this is a backref screwup.  If we need to delete stuff
7518                  * process_duplicates() will return 0, otherwise it will return
7519                  * 1 and we
7520                  */
7521                 if (process_duplicates(root, extent_cache, rec))
7522                         continue;
7523                 ret = delete_duplicate_records(root, rec);
7524                 if (ret < 0)
7525                         return ret;
7526                 /*
7527                  * delete_duplicate_records will return the number of entries
7528                  * deleted, so if it's greater than 0 then we know we actually
7529                  * did something and we need to remove.
7530                  */
7531                 if (ret)
7532                         had_dups = 1;
7533         }
7534
7535         if (had_dups)
7536                 return -EAGAIN;
7537
7538         while(1) {
7539                 int cur_err = 0;
7540
7541                 fixed = 0;
7542                 recorded = 0;
7543                 cache = search_cache_extent(extent_cache, 0);
7544                 if (!cache)
7545                         break;
7546                 rec = container_of(cache, struct extent_record, cache);
7547                 if (rec->num_duplicates) {
7548                         fprintf(stderr, "extent item %llu has multiple extent "
7549                                 "items\n", (unsigned long long)rec->start);
7550                         err = 1;
7551                         cur_err = 1;
7552                 }
7553
7554                 if (rec->refs != rec->extent_item_refs) {
7555                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7556                                 (unsigned long long)rec->start,
7557                                 (unsigned long long)rec->nr);
7558                         fprintf(stderr, "extent item %llu, found %llu\n",
7559                                 (unsigned long long)rec->extent_item_refs,
7560                                 (unsigned long long)rec->refs);
7561                         ret = record_orphan_data_extents(root->fs_info, rec);
7562                         if (ret < 0)
7563                                 goto repair_abort;
7564                         if (ret == 0) {
7565                                 recorded = 1;
7566                         } else {
7567                                 /*
7568                                  * we can't use the extent to repair file
7569                                  * extent, let the fallback method handle it.
7570                                  */
7571                                 if (!fixed && repair) {
7572                                         ret = fixup_extent_refs(
7573                                                         root->fs_info,
7574                                                         extent_cache, rec);
7575                                         if (ret)
7576                                                 goto repair_abort;
7577                                         fixed = 1;
7578                                 }
7579                         }
7580                         err = 1;
7581                         cur_err = 1;
7582                 }
7583                 if (all_backpointers_checked(rec, 1)) {
7584                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7585                                 (unsigned long long)rec->start,
7586                                 (unsigned long long)rec->nr);
7587
7588                         if (!fixed && !recorded && repair) {
7589                                 ret = fixup_extent_refs(root->fs_info,
7590                                                         extent_cache, rec);
7591                                 if (ret)
7592                                         goto repair_abort;
7593                                 fixed = 1;
7594                         }
7595                         cur_err = 1;
7596                         err = 1;
7597                 }
7598                 if (!rec->owner_ref_checked) {
7599                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7600                                 (unsigned long long)rec->start,
7601                                 (unsigned long long)rec->nr);
7602                         if (!fixed && !recorded && repair) {
7603                                 ret = fixup_extent_refs(root->fs_info,
7604                                                         extent_cache, rec);
7605                                 if (ret)
7606                                         goto repair_abort;
7607                                 fixed = 1;
7608                         }
7609                         err = 1;
7610                         cur_err = 1;
7611                 }
7612                 if (rec->bad_full_backref) {
7613                         fprintf(stderr, "bad full backref, on [%llu]\n",
7614                                 (unsigned long long)rec->start);
7615                         if (repair) {
7616                                 ret = fixup_extent_flags(root->fs_info, rec);
7617                                 if (ret)
7618                                         goto repair_abort;
7619                                 fixed = 1;
7620                         }
7621                         err = 1;
7622                         cur_err = 1;
7623                 }
7624                 /*
7625                  * Although it's not a extent ref's problem, we reuse this
7626                  * routine for error reporting.
7627                  * No repair function yet.
7628                  */
7629                 if (rec->crossing_stripes) {
7630                         fprintf(stderr,
7631                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7632                                 rec->start, rec->start + rec->max_size);
7633                         err = 1;
7634                         cur_err = 1;
7635                 }
7636
7637                 if (rec->wrong_chunk_type) {
7638                         fprintf(stderr,
7639                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7640                                 rec->start, rec->start + rec->max_size);
7641                         err = 1;
7642                         cur_err = 1;
7643                 }
7644
7645                 remove_cache_extent(extent_cache, cache);
7646                 free_all_extent_backrefs(rec);
7647                 if (!init_extent_tree && repair && (!cur_err || fixed))
7648                         clear_extent_dirty(root->fs_info->excluded_extents,
7649                                            rec->start,
7650                                            rec->start + rec->max_size - 1,
7651                                            GFP_NOFS);
7652                 free(rec);
7653         }
7654 repair_abort:
7655         if (repair) {
7656                 if (ret && ret != -EAGAIN) {
7657                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7658                         exit(1);
7659                 } else if (!ret) {
7660                         struct btrfs_trans_handle *trans;
7661
7662                         root = root->fs_info->extent_root;
7663                         trans = btrfs_start_transaction(root, 1);
7664                         if (IS_ERR(trans)) {
7665                                 ret = PTR_ERR(trans);
7666                                 goto repair_abort;
7667                         }
7668
7669                         btrfs_fix_block_accounting(trans, root);
7670                         ret = btrfs_commit_transaction(trans, root);
7671                         if (ret)
7672                                 goto repair_abort;
7673                 }
7674                 if (err)
7675                         fprintf(stderr, "repaired damaged extent references\n");
7676                 return ret;
7677         }
7678         return err;
7679 }
7680
7681 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7682 {
7683         u64 stripe_size;
7684
7685         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7686                 stripe_size = length;
7687                 stripe_size /= num_stripes;
7688         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7689                 stripe_size = length * 2;
7690                 stripe_size /= num_stripes;
7691         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7692                 stripe_size = length;
7693                 stripe_size /= (num_stripes - 1);
7694         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7695                 stripe_size = length;
7696                 stripe_size /= (num_stripes - 2);
7697         } else {
7698                 stripe_size = length;
7699         }
7700         return stripe_size;
7701 }
7702
7703 /*
7704  * Check the chunk with its block group/dev list ref:
7705  * Return 0 if all refs seems valid.
7706  * Return 1 if part of refs seems valid, need later check for rebuild ref
7707  * like missing block group and needs to search extent tree to rebuild them.
7708  * Return -1 if essential refs are missing and unable to rebuild.
7709  */
7710 static int check_chunk_refs(struct chunk_record *chunk_rec,
7711                             struct block_group_tree *block_group_cache,
7712                             struct device_extent_tree *dev_extent_cache,
7713                             int silent)
7714 {
7715         struct cache_extent *block_group_item;
7716         struct block_group_record *block_group_rec;
7717         struct cache_extent *dev_extent_item;
7718         struct device_extent_record *dev_extent_rec;
7719         u64 devid;
7720         u64 offset;
7721         u64 length;
7722         int metadump_v2 = 0;
7723         int i;
7724         int ret = 0;
7725
7726         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7727                                                chunk_rec->offset,
7728                                                chunk_rec->length);
7729         if (block_group_item) {
7730                 block_group_rec = container_of(block_group_item,
7731                                                struct block_group_record,
7732                                                cache);
7733                 if (chunk_rec->length != block_group_rec->offset ||
7734                     chunk_rec->offset != block_group_rec->objectid ||
7735                     (!metadump_v2 &&
7736                      chunk_rec->type_flags != block_group_rec->flags)) {
7737                         if (!silent)
7738                                 fprintf(stderr,
7739                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7740                                         chunk_rec->objectid,
7741                                         chunk_rec->type,
7742                                         chunk_rec->offset,
7743                                         chunk_rec->length,
7744                                         chunk_rec->offset,
7745                                         chunk_rec->type_flags,
7746                                         block_group_rec->objectid,
7747                                         block_group_rec->type,
7748                                         block_group_rec->offset,
7749                                         block_group_rec->offset,
7750                                         block_group_rec->objectid,
7751                                         block_group_rec->flags);
7752                         ret = -1;
7753                 } else {
7754                         list_del_init(&block_group_rec->list);
7755                         chunk_rec->bg_rec = block_group_rec;
7756                 }
7757         } else {
7758                 if (!silent)
7759                         fprintf(stderr,
7760                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7761                                 chunk_rec->objectid,
7762                                 chunk_rec->type,
7763                                 chunk_rec->offset,
7764                                 chunk_rec->length,
7765                                 chunk_rec->offset,
7766                                 chunk_rec->type_flags);
7767                 ret = 1;
7768         }
7769
7770         if (metadump_v2)
7771                 return ret;
7772
7773         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7774                                     chunk_rec->num_stripes);
7775         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7776                 devid = chunk_rec->stripes[i].devid;
7777                 offset = chunk_rec->stripes[i].offset;
7778                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7779                                                        devid, offset, length);
7780                 if (dev_extent_item) {
7781                         dev_extent_rec = container_of(dev_extent_item,
7782                                                 struct device_extent_record,
7783                                                 cache);
7784                         if (dev_extent_rec->objectid != devid ||
7785                             dev_extent_rec->offset != offset ||
7786                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7787                             dev_extent_rec->length != length) {
7788                                 if (!silent)
7789                                         fprintf(stderr,
7790                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7791                                                 chunk_rec->objectid,
7792                                                 chunk_rec->type,
7793                                                 chunk_rec->offset,
7794                                                 chunk_rec->stripes[i].devid,
7795                                                 chunk_rec->stripes[i].offset,
7796                                                 dev_extent_rec->objectid,
7797                                                 dev_extent_rec->offset,
7798                                                 dev_extent_rec->length);
7799                                 ret = -1;
7800                         } else {
7801                                 list_move(&dev_extent_rec->chunk_list,
7802                                           &chunk_rec->dextents);
7803                         }
7804                 } else {
7805                         if (!silent)
7806                                 fprintf(stderr,
7807                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7808                                         chunk_rec->objectid,
7809                                         chunk_rec->type,
7810                                         chunk_rec->offset,
7811                                         chunk_rec->stripes[i].devid,
7812                                         chunk_rec->stripes[i].offset);
7813                         ret = -1;
7814                 }
7815         }
7816         return ret;
7817 }
7818
7819 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7820 int check_chunks(struct cache_tree *chunk_cache,
7821                  struct block_group_tree *block_group_cache,
7822                  struct device_extent_tree *dev_extent_cache,
7823                  struct list_head *good, struct list_head *bad,
7824                  struct list_head *rebuild, int silent)
7825 {
7826         struct cache_extent *chunk_item;
7827         struct chunk_record *chunk_rec;
7828         struct block_group_record *bg_rec;
7829         struct device_extent_record *dext_rec;
7830         int err;
7831         int ret = 0;
7832
7833         chunk_item = first_cache_extent(chunk_cache);
7834         while (chunk_item) {
7835                 chunk_rec = container_of(chunk_item, struct chunk_record,
7836                                          cache);
7837                 err = check_chunk_refs(chunk_rec, block_group_cache,
7838                                        dev_extent_cache, silent);
7839                 if (err < 0)
7840                         ret = err;
7841                 if (err == 0 && good)
7842                         list_add_tail(&chunk_rec->list, good);
7843                 if (err > 0 && rebuild)
7844                         list_add_tail(&chunk_rec->list, rebuild);
7845                 if (err < 0 && bad)
7846                         list_add_tail(&chunk_rec->list, bad);
7847                 chunk_item = next_cache_extent(chunk_item);
7848         }
7849
7850         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7851                 if (!silent)
7852                         fprintf(stderr,
7853                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7854                                 bg_rec->objectid,
7855                                 bg_rec->offset,
7856                                 bg_rec->flags);
7857                 if (!ret)
7858                         ret = 1;
7859         }
7860
7861         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7862                             chunk_list) {
7863                 if (!silent)
7864                         fprintf(stderr,
7865                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7866                                 dext_rec->objectid,
7867                                 dext_rec->offset,
7868                                 dext_rec->length);
7869                 if (!ret)
7870                         ret = 1;
7871         }
7872         return ret;
7873 }
7874
7875
7876 static int check_device_used(struct device_record *dev_rec,
7877                              struct device_extent_tree *dext_cache)
7878 {
7879         struct cache_extent *cache;
7880         struct device_extent_record *dev_extent_rec;
7881         u64 total_byte = 0;
7882
7883         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7884         while (cache) {
7885                 dev_extent_rec = container_of(cache,
7886                                               struct device_extent_record,
7887                                               cache);
7888                 if (dev_extent_rec->objectid != dev_rec->devid)
7889                         break;
7890
7891                 list_del_init(&dev_extent_rec->device_list);
7892                 total_byte += dev_extent_rec->length;
7893                 cache = next_cache_extent(cache);
7894         }
7895
7896         if (total_byte != dev_rec->byte_used) {
7897                 fprintf(stderr,
7898                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7899                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7900                         dev_rec->type, dev_rec->offset);
7901                 return -1;
7902         } else {
7903                 return 0;
7904         }
7905 }
7906
7907 /* check btrfs_dev_item -> btrfs_dev_extent */
7908 static int check_devices(struct rb_root *dev_cache,
7909                          struct device_extent_tree *dev_extent_cache)
7910 {
7911         struct rb_node *dev_node;
7912         struct device_record *dev_rec;
7913         struct device_extent_record *dext_rec;
7914         int err;
7915         int ret = 0;
7916
7917         dev_node = rb_first(dev_cache);
7918         while (dev_node) {
7919                 dev_rec = container_of(dev_node, struct device_record, node);
7920                 err = check_device_used(dev_rec, dev_extent_cache);
7921                 if (err)
7922                         ret = err;
7923
7924                 dev_node = rb_next(dev_node);
7925         }
7926         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7927                             device_list) {
7928                 fprintf(stderr,
7929                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7930                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7931                 if (!ret)
7932                         ret = 1;
7933         }
7934         return ret;
7935 }
7936
7937 static int add_root_item_to_list(struct list_head *head,
7938                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7939                                   u8 level, u8 drop_level,
7940                                   int level_size, struct btrfs_key *drop_key)
7941 {
7942
7943         struct root_item_record *ri_rec;
7944         ri_rec = malloc(sizeof(*ri_rec));
7945         if (!ri_rec)
7946                 return -ENOMEM;
7947         ri_rec->bytenr = bytenr;
7948         ri_rec->objectid = objectid;
7949         ri_rec->level = level;
7950         ri_rec->level_size = level_size;
7951         ri_rec->drop_level = drop_level;
7952         ri_rec->last_snapshot = last_snapshot;
7953         if (drop_key)
7954                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7955         list_add_tail(&ri_rec->list, head);
7956
7957         return 0;
7958 }
7959
7960 static void free_root_item_list(struct list_head *list)
7961 {
7962         struct root_item_record *ri_rec;
7963
7964         while (!list_empty(list)) {
7965                 ri_rec = list_first_entry(list, struct root_item_record,
7966                                           list);
7967                 list_del_init(&ri_rec->list);
7968                 free(ri_rec);
7969         }
7970 }
7971
7972 static int deal_root_from_list(struct list_head *list,
7973                                struct btrfs_root *root,
7974                                struct block_info *bits,
7975                                int bits_nr,
7976                                struct cache_tree *pending,
7977                                struct cache_tree *seen,
7978                                struct cache_tree *reada,
7979                                struct cache_tree *nodes,
7980                                struct cache_tree *extent_cache,
7981                                struct cache_tree *chunk_cache,
7982                                struct rb_root *dev_cache,
7983                                struct block_group_tree *block_group_cache,
7984                                struct device_extent_tree *dev_extent_cache)
7985 {
7986         int ret = 0;
7987         u64 last;
7988
7989         while (!list_empty(list)) {
7990                 struct root_item_record *rec;
7991                 struct extent_buffer *buf;
7992                 rec = list_entry(list->next,
7993                                  struct root_item_record, list);
7994                 last = 0;
7995                 buf = read_tree_block(root->fs_info->tree_root,
7996                                       rec->bytenr, rec->level_size, 0);
7997                 if (!extent_buffer_uptodate(buf)) {
7998                         free_extent_buffer(buf);
7999                         ret = -EIO;
8000                         break;
8001                 }
8002                 add_root_to_pending(buf, extent_cache, pending,
8003                                     seen, nodes, rec->objectid);
8004                 /*
8005                  * To rebuild extent tree, we need deal with snapshot
8006                  * one by one, otherwise we deal with node firstly which
8007                  * can maximize readahead.
8008                  */
8009                 while (1) {
8010                         ret = run_next_block(root, bits, bits_nr, &last,
8011                                              pending, seen, reada, nodes,
8012                                              extent_cache, chunk_cache,
8013                                              dev_cache, block_group_cache,
8014                                              dev_extent_cache, rec);
8015                         if (ret != 0)
8016                                 break;
8017                 }
8018                 free_extent_buffer(buf);
8019                 list_del(&rec->list);
8020                 free(rec);
8021                 if (ret < 0)
8022                         break;
8023         }
8024         while (ret >= 0) {
8025                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8026                                      reada, nodes, extent_cache, chunk_cache,
8027                                      dev_cache, block_group_cache,
8028                                      dev_extent_cache, NULL);
8029                 if (ret != 0) {
8030                         if (ret > 0)
8031                                 ret = 0;
8032                         break;
8033                 }
8034         }
8035         return ret;
8036 }
8037
8038 static int check_chunks_and_extents(struct btrfs_root *root)
8039 {
8040         struct rb_root dev_cache;
8041         struct cache_tree chunk_cache;
8042         struct block_group_tree block_group_cache;
8043         struct device_extent_tree dev_extent_cache;
8044         struct cache_tree extent_cache;
8045         struct cache_tree seen;
8046         struct cache_tree pending;
8047         struct cache_tree reada;
8048         struct cache_tree nodes;
8049         struct extent_io_tree excluded_extents;
8050         struct cache_tree corrupt_blocks;
8051         struct btrfs_path path;
8052         struct btrfs_key key;
8053         struct btrfs_key found_key;
8054         int ret, err = 0;
8055         struct block_info *bits;
8056         int bits_nr;
8057         struct extent_buffer *leaf;
8058         int slot;
8059         struct btrfs_root_item ri;
8060         struct list_head dropping_trees;
8061         struct list_head normal_trees;
8062         struct btrfs_root *root1;
8063         u64 objectid;
8064         u32 level_size;
8065         u8 level;
8066
8067         dev_cache = RB_ROOT;
8068         cache_tree_init(&chunk_cache);
8069         block_group_tree_init(&block_group_cache);
8070         device_extent_tree_init(&dev_extent_cache);
8071
8072         cache_tree_init(&extent_cache);
8073         cache_tree_init(&seen);
8074         cache_tree_init(&pending);
8075         cache_tree_init(&nodes);
8076         cache_tree_init(&reada);
8077         cache_tree_init(&corrupt_blocks);
8078         extent_io_tree_init(&excluded_extents);
8079         INIT_LIST_HEAD(&dropping_trees);
8080         INIT_LIST_HEAD(&normal_trees);
8081
8082         if (repair) {
8083                 root->fs_info->excluded_extents = &excluded_extents;
8084                 root->fs_info->fsck_extent_cache = &extent_cache;
8085                 root->fs_info->free_extent_hook = free_extent_hook;
8086                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8087         }
8088
8089         bits_nr = 1024;
8090         bits = malloc(bits_nr * sizeof(struct block_info));
8091         if (!bits) {
8092                 perror("malloc");
8093                 exit(1);
8094         }
8095
8096         if (ctx.progress_enabled) {
8097                 ctx.tp = TASK_EXTENTS;
8098                 task_start(ctx.info);
8099         }
8100
8101 again:
8102         root1 = root->fs_info->tree_root;
8103         level = btrfs_header_level(root1->node);
8104         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8105                                     root1->node->start, 0, level, 0,
8106                                     btrfs_level_size(root1, level), NULL);
8107         if (ret < 0)
8108                 goto out;
8109         root1 = root->fs_info->chunk_root;
8110         level = btrfs_header_level(root1->node);
8111         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8112                                     root1->node->start, 0, level, 0,
8113                                     btrfs_level_size(root1, level), NULL);
8114         if (ret < 0)
8115                 goto out;
8116         btrfs_init_path(&path);
8117         key.offset = 0;
8118         key.objectid = 0;
8119         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8120         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8121                                         &key, &path, 0, 0);
8122         if (ret < 0)
8123                 goto out;
8124         while(1) {
8125                 leaf = path.nodes[0];
8126                 slot = path.slots[0];
8127                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8128                         ret = btrfs_next_leaf(root, &path);
8129                         if (ret != 0)
8130                                 break;
8131                         leaf = path.nodes[0];
8132                         slot = path.slots[0];
8133                 }
8134                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8135                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8136                         unsigned long offset;
8137                         u64 last_snapshot;
8138
8139                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8140                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8141                         last_snapshot = btrfs_root_last_snapshot(&ri);
8142                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8143                                 level = btrfs_root_level(&ri);
8144                                 level_size = btrfs_level_size(root, level);
8145                                 ret = add_root_item_to_list(&normal_trees,
8146                                                 found_key.objectid,
8147                                                 btrfs_root_bytenr(&ri),
8148                                                 last_snapshot, level,
8149                                                 0, level_size, NULL);
8150                                 if (ret < 0)
8151                                         goto out;
8152                         } else {
8153                                 level = btrfs_root_level(&ri);
8154                                 level_size = btrfs_level_size(root, level);
8155                                 objectid = found_key.objectid;
8156                                 btrfs_disk_key_to_cpu(&found_key,
8157                                                       &ri.drop_progress);
8158                                 ret = add_root_item_to_list(&dropping_trees,
8159                                                 objectid,
8160                                                 btrfs_root_bytenr(&ri),
8161                                                 last_snapshot, level,
8162                                                 ri.drop_level,
8163                                                 level_size, &found_key);
8164                                 if (ret < 0)
8165                                         goto out;
8166                         }
8167                 }
8168                 path.slots[0]++;
8169         }
8170         btrfs_release_path(&path);
8171
8172         /*
8173          * check_block can return -EAGAIN if it fixes something, please keep
8174          * this in mind when dealing with return values from these functions, if
8175          * we get -EAGAIN we want to fall through and restart the loop.
8176          */
8177         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8178                                   &seen, &reada, &nodes, &extent_cache,
8179                                   &chunk_cache, &dev_cache, &block_group_cache,
8180                                   &dev_extent_cache);
8181         if (ret < 0) {
8182                 if (ret == -EAGAIN)
8183                         goto loop;
8184                 goto out;
8185         }
8186         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8187                                   &pending, &seen, &reada, &nodes,
8188                                   &extent_cache, &chunk_cache, &dev_cache,
8189                                   &block_group_cache, &dev_extent_cache);
8190         if (ret < 0) {
8191                 if (ret == -EAGAIN)
8192                         goto loop;
8193                 goto out;
8194         }
8195
8196         ret = check_chunks(&chunk_cache, &block_group_cache,
8197                            &dev_extent_cache, NULL, NULL, NULL, 0);
8198         if (ret) {
8199                 if (ret == -EAGAIN)
8200                         goto loop;
8201                 err = ret;
8202         }
8203
8204         ret = check_extent_refs(root, &extent_cache);
8205         if (ret < 0) {
8206                 if (ret == -EAGAIN)
8207                         goto loop;
8208                 goto out;
8209         }
8210
8211         ret = check_devices(&dev_cache, &dev_extent_cache);
8212         if (ret && err)
8213                 ret = err;
8214
8215 out:
8216         task_stop(ctx.info);
8217         if (repair) {
8218                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8219                 extent_io_tree_cleanup(&excluded_extents);
8220                 root->fs_info->fsck_extent_cache = NULL;
8221                 root->fs_info->free_extent_hook = NULL;
8222                 root->fs_info->corrupt_blocks = NULL;
8223                 root->fs_info->excluded_extents = NULL;
8224         }
8225         free(bits);
8226         free_chunk_cache_tree(&chunk_cache);
8227         free_device_cache_tree(&dev_cache);
8228         free_block_group_tree(&block_group_cache);
8229         free_device_extent_tree(&dev_extent_cache);
8230         free_extent_cache_tree(&seen);
8231         free_extent_cache_tree(&pending);
8232         free_extent_cache_tree(&reada);
8233         free_extent_cache_tree(&nodes);
8234         return ret;
8235 loop:
8236         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8237         free_extent_cache_tree(&seen);
8238         free_extent_cache_tree(&pending);
8239         free_extent_cache_tree(&reada);
8240         free_extent_cache_tree(&nodes);
8241         free_chunk_cache_tree(&chunk_cache);
8242         free_block_group_tree(&block_group_cache);
8243         free_device_cache_tree(&dev_cache);
8244         free_device_extent_tree(&dev_extent_cache);
8245         free_extent_record_cache(root->fs_info, &extent_cache);
8246         free_root_item_list(&normal_trees);
8247         free_root_item_list(&dropping_trees);
8248         extent_io_tree_cleanup(&excluded_extents);
8249         goto again;
8250 }
8251
8252 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8253                            struct btrfs_root *root, int overwrite)
8254 {
8255         struct extent_buffer *c;
8256         struct extent_buffer *old = root->node;
8257         int level;
8258         int ret;
8259         struct btrfs_disk_key disk_key = {0,0,0};
8260
8261         level = 0;
8262
8263         if (overwrite) {
8264                 c = old;
8265                 extent_buffer_get(c);
8266                 goto init;
8267         }
8268         c = btrfs_alloc_free_block(trans, root,
8269                                    btrfs_level_size(root, 0),
8270                                    root->root_key.objectid,
8271                                    &disk_key, level, 0, 0);
8272         if (IS_ERR(c)) {
8273                 c = old;
8274                 extent_buffer_get(c);
8275                 overwrite = 1;
8276         }
8277 init:
8278         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8279         btrfs_set_header_level(c, level);
8280         btrfs_set_header_bytenr(c, c->start);
8281         btrfs_set_header_generation(c, trans->transid);
8282         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8283         btrfs_set_header_owner(c, root->root_key.objectid);
8284
8285         write_extent_buffer(c, root->fs_info->fsid,
8286                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8287
8288         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8289                             btrfs_header_chunk_tree_uuid(c),
8290                             BTRFS_UUID_SIZE);
8291
8292         btrfs_mark_buffer_dirty(c);
8293         /*
8294          * this case can happen in the following case:
8295          *
8296          * 1.overwrite previous root.
8297          *
8298          * 2.reinit reloc data root, this is because we skip pin
8299          * down reloc data tree before which means we can allocate
8300          * same block bytenr here.
8301          */
8302         if (old->start == c->start) {
8303                 btrfs_set_root_generation(&root->root_item,
8304                                           trans->transid);
8305                 root->root_item.level = btrfs_header_level(root->node);
8306                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8307                                         &root->root_key, &root->root_item);
8308                 if (ret) {
8309                         free_extent_buffer(c);
8310                         return ret;
8311                 }
8312         }
8313         free_extent_buffer(old);
8314         root->node = c;
8315         add_root_to_dirty_list(root);
8316         return 0;
8317 }
8318
8319 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8320                                 struct extent_buffer *eb, int tree_root)
8321 {
8322         struct extent_buffer *tmp;
8323         struct btrfs_root_item *ri;
8324         struct btrfs_key key;
8325         u64 bytenr;
8326         u32 leafsize;
8327         int level = btrfs_header_level(eb);
8328         int nritems;
8329         int ret;
8330         int i;
8331
8332         /*
8333          * If we have pinned this block before, don't pin it again.
8334          * This can not only avoid forever loop with broken filesystem
8335          * but also give us some speedups.
8336          */
8337         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8338                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8339                 return 0;
8340
8341         btrfs_pin_extent(fs_info, eb->start, eb->len);
8342
8343         leafsize = btrfs_super_leafsize(fs_info->super_copy);
8344         nritems = btrfs_header_nritems(eb);
8345         for (i = 0; i < nritems; i++) {
8346                 if (level == 0) {
8347                         btrfs_item_key_to_cpu(eb, &key, i);
8348                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8349                                 continue;
8350                         /* Skip the extent root and reloc roots */
8351                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8352                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8353                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8354                                 continue;
8355                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8356                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8357
8358                         /*
8359                          * If at any point we start needing the real root we
8360                          * will have to build a stump root for the root we are
8361                          * in, but for now this doesn't actually use the root so
8362                          * just pass in extent_root.
8363                          */
8364                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8365                                               leafsize, 0);
8366                         if (!extent_buffer_uptodate(tmp)) {
8367                                 fprintf(stderr, "Error reading root block\n");
8368                                 return -EIO;
8369                         }
8370                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8371                         free_extent_buffer(tmp);
8372                         if (ret)
8373                                 return ret;
8374                 } else {
8375                         bytenr = btrfs_node_blockptr(eb, i);
8376
8377                         /* If we aren't the tree root don't read the block */
8378                         if (level == 1 && !tree_root) {
8379                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
8380                                 continue;
8381                         }
8382
8383                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8384                                               leafsize, 0);
8385                         if (!extent_buffer_uptodate(tmp)) {
8386                                 fprintf(stderr, "Error reading tree block\n");
8387                                 return -EIO;
8388                         }
8389                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8390                         free_extent_buffer(tmp);
8391                         if (ret)
8392                                 return ret;
8393                 }
8394         }
8395
8396         return 0;
8397 }
8398
8399 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8400 {
8401         int ret;
8402
8403         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8404         if (ret)
8405                 return ret;
8406
8407         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8408 }
8409
8410 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8411 {
8412         struct btrfs_block_group_cache *cache;
8413         struct btrfs_path *path;
8414         struct extent_buffer *leaf;
8415         struct btrfs_chunk *chunk;
8416         struct btrfs_key key;
8417         int ret;
8418         u64 start;
8419
8420         path = btrfs_alloc_path();
8421         if (!path)
8422                 return -ENOMEM;
8423
8424         key.objectid = 0;
8425         key.type = BTRFS_CHUNK_ITEM_KEY;
8426         key.offset = 0;
8427
8428         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8429         if (ret < 0) {
8430                 btrfs_free_path(path);
8431                 return ret;
8432         }
8433
8434         /*
8435          * We do this in case the block groups were screwed up and had alloc
8436          * bits that aren't actually set on the chunks.  This happens with
8437          * restored images every time and could happen in real life I guess.
8438          */
8439         fs_info->avail_data_alloc_bits = 0;
8440         fs_info->avail_metadata_alloc_bits = 0;
8441         fs_info->avail_system_alloc_bits = 0;
8442
8443         /* First we need to create the in-memory block groups */
8444         while (1) {
8445                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8446                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8447                         if (ret < 0) {
8448                                 btrfs_free_path(path);
8449                                 return ret;
8450                         }
8451                         if (ret) {
8452                                 ret = 0;
8453                                 break;
8454                         }
8455                 }
8456                 leaf = path->nodes[0];
8457                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8458                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8459                         path->slots[0]++;
8460                         continue;
8461                 }
8462
8463                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8464                                        struct btrfs_chunk);
8465                 btrfs_add_block_group(fs_info, 0,
8466                                       btrfs_chunk_type(leaf, chunk),
8467                                       key.objectid, key.offset,
8468                                       btrfs_chunk_length(leaf, chunk));
8469                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8470                                  key.offset + btrfs_chunk_length(leaf, chunk),
8471                                  GFP_NOFS);
8472                 path->slots[0]++;
8473         }
8474         start = 0;
8475         while (1) {
8476                 cache = btrfs_lookup_first_block_group(fs_info, start);
8477                 if (!cache)
8478                         break;
8479                 cache->cached = 1;
8480                 start = cache->key.objectid + cache->key.offset;
8481         }
8482
8483         btrfs_free_path(path);
8484         return 0;
8485 }
8486
8487 static int reset_balance(struct btrfs_trans_handle *trans,
8488                          struct btrfs_fs_info *fs_info)
8489 {
8490         struct btrfs_root *root = fs_info->tree_root;
8491         struct btrfs_path *path;
8492         struct extent_buffer *leaf;
8493         struct btrfs_key key;
8494         int del_slot, del_nr = 0;
8495         int ret;
8496         int found = 0;
8497
8498         path = btrfs_alloc_path();
8499         if (!path)
8500                 return -ENOMEM;
8501
8502         key.objectid = BTRFS_BALANCE_OBJECTID;
8503         key.type = BTRFS_BALANCE_ITEM_KEY;
8504         key.offset = 0;
8505
8506         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8507         if (ret) {
8508                 if (ret > 0)
8509                         ret = 0;
8510                 if (!ret)
8511                         goto reinit_data_reloc;
8512                 else
8513                         goto out;
8514         }
8515
8516         ret = btrfs_del_item(trans, root, path);
8517         if (ret)
8518                 goto out;
8519         btrfs_release_path(path);
8520
8521         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8522         key.type = BTRFS_ROOT_ITEM_KEY;
8523         key.offset = 0;
8524
8525         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8526         if (ret < 0)
8527                 goto out;
8528         while (1) {
8529                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8530                         if (!found)
8531                                 break;
8532
8533                         if (del_nr) {
8534                                 ret = btrfs_del_items(trans, root, path,
8535                                                       del_slot, del_nr);
8536                                 del_nr = 0;
8537                                 if (ret)
8538                                         goto out;
8539                         }
8540                         key.offset++;
8541                         btrfs_release_path(path);
8542
8543                         found = 0;
8544                         ret = btrfs_search_slot(trans, root, &key, path,
8545                                                 -1, 1);
8546                         if (ret < 0)
8547                                 goto out;
8548                         continue;
8549                 }
8550                 found = 1;
8551                 leaf = path->nodes[0];
8552                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8553                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8554                         break;
8555                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8556                         path->slots[0]++;
8557                         continue;
8558                 }
8559                 if (!del_nr) {
8560                         del_slot = path->slots[0];
8561                         del_nr = 1;
8562                 } else {
8563                         del_nr++;
8564                 }
8565                 path->slots[0]++;
8566         }
8567
8568         if (del_nr) {
8569                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8570                 if (ret)
8571                         goto out;
8572         }
8573         btrfs_release_path(path);
8574
8575 reinit_data_reloc:
8576         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8577         key.type = BTRFS_ROOT_ITEM_KEY;
8578         key.offset = (u64)-1;
8579         root = btrfs_read_fs_root(fs_info, &key);
8580         if (IS_ERR(root)) {
8581                 fprintf(stderr, "Error reading data reloc tree\n");
8582                 ret = PTR_ERR(root);
8583                 goto out;
8584         }
8585         record_root_in_trans(trans, root);
8586         ret = btrfs_fsck_reinit_root(trans, root, 0);
8587         if (ret)
8588                 goto out;
8589         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8590 out:
8591         btrfs_free_path(path);
8592         return ret;
8593 }
8594
8595 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8596                               struct btrfs_fs_info *fs_info)
8597 {
8598         u64 start = 0;
8599         int ret;
8600
8601         /*
8602          * The only reason we don't do this is because right now we're just
8603          * walking the trees we find and pinning down their bytes, we don't look
8604          * at any of the leaves.  In order to do mixed groups we'd have to check
8605          * the leaves of any fs roots and pin down the bytes for any file
8606          * extents we find.  Not hard but why do it if we don't have to?
8607          */
8608         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8609                 fprintf(stderr, "We don't support re-initing the extent tree "
8610                         "for mixed block groups yet, please notify a btrfs "
8611                         "developer you want to do this so they can add this "
8612                         "functionality.\n");
8613                 return -EINVAL;
8614         }
8615
8616         /*
8617          * first we need to walk all of the trees except the extent tree and pin
8618          * down the bytes that are in use so we don't overwrite any existing
8619          * metadata.
8620          */
8621         ret = pin_metadata_blocks(fs_info);
8622         if (ret) {
8623                 fprintf(stderr, "error pinning down used bytes\n");
8624                 return ret;
8625         }
8626
8627         /*
8628          * Need to drop all the block groups since we're going to recreate all
8629          * of them again.
8630          */
8631         btrfs_free_block_groups(fs_info);
8632         ret = reset_block_groups(fs_info);
8633         if (ret) {
8634                 fprintf(stderr, "error resetting the block groups\n");
8635                 return ret;
8636         }
8637
8638         /* Ok we can allocate now, reinit the extent root */
8639         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8640         if (ret) {
8641                 fprintf(stderr, "extent root initialization failed\n");
8642                 /*
8643                  * When the transaction code is updated we should end the
8644                  * transaction, but for now progs only knows about commit so
8645                  * just return an error.
8646                  */
8647                 return ret;
8648         }
8649
8650         /*
8651          * Now we have all the in-memory block groups setup so we can make
8652          * allocations properly, and the metadata we care about is safe since we
8653          * pinned all of it above.
8654          */
8655         while (1) {
8656                 struct btrfs_block_group_cache *cache;
8657
8658                 cache = btrfs_lookup_first_block_group(fs_info, start);
8659                 if (!cache)
8660                         break;
8661                 start = cache->key.objectid + cache->key.offset;
8662                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8663                                         &cache->key, &cache->item,
8664                                         sizeof(cache->item));
8665                 if (ret) {
8666                         fprintf(stderr, "Error adding block group\n");
8667                         return ret;
8668                 }
8669                 btrfs_extent_post_op(trans, fs_info->extent_root);
8670         }
8671
8672         ret = reset_balance(trans, fs_info);
8673         if (ret)
8674                 fprintf(stderr, "error reseting the pending balance\n");
8675
8676         return ret;
8677 }
8678
8679 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8680 {
8681         struct btrfs_path *path;
8682         struct btrfs_trans_handle *trans;
8683         struct btrfs_key key;
8684         int ret;
8685
8686         printf("Recowing metadata block %llu\n", eb->start);
8687         key.objectid = btrfs_header_owner(eb);
8688         key.type = BTRFS_ROOT_ITEM_KEY;
8689         key.offset = (u64)-1;
8690
8691         root = btrfs_read_fs_root(root->fs_info, &key);
8692         if (IS_ERR(root)) {
8693                 fprintf(stderr, "Couldn't find owner root %llu\n",
8694                         key.objectid);
8695                 return PTR_ERR(root);
8696         }
8697
8698         path = btrfs_alloc_path();
8699         if (!path)
8700                 return -ENOMEM;
8701
8702         trans = btrfs_start_transaction(root, 1);
8703         if (IS_ERR(trans)) {
8704                 btrfs_free_path(path);
8705                 return PTR_ERR(trans);
8706         }
8707
8708         path->lowest_level = btrfs_header_level(eb);
8709         if (path->lowest_level)
8710                 btrfs_node_key_to_cpu(eb, &key, 0);
8711         else
8712                 btrfs_item_key_to_cpu(eb, &key, 0);
8713
8714         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8715         btrfs_commit_transaction(trans, root);
8716         btrfs_free_path(path);
8717         return ret;
8718 }
8719
8720 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8721 {
8722         struct btrfs_path *path;
8723         struct btrfs_trans_handle *trans;
8724         struct btrfs_key key;
8725         int ret;
8726
8727         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8728                bad->key.type, bad->key.offset);
8729         key.objectid = bad->root_id;
8730         key.type = BTRFS_ROOT_ITEM_KEY;
8731         key.offset = (u64)-1;
8732
8733         root = btrfs_read_fs_root(root->fs_info, &key);
8734         if (IS_ERR(root)) {
8735                 fprintf(stderr, "Couldn't find owner root %llu\n",
8736                         key.objectid);
8737                 return PTR_ERR(root);
8738         }
8739
8740         path = btrfs_alloc_path();
8741         if (!path)
8742                 return -ENOMEM;
8743
8744         trans = btrfs_start_transaction(root, 1);
8745         if (IS_ERR(trans)) {
8746                 btrfs_free_path(path);
8747                 return PTR_ERR(trans);
8748         }
8749
8750         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8751         if (ret) {
8752                 if (ret > 0)
8753                         ret = 0;
8754                 goto out;
8755         }
8756         ret = btrfs_del_item(trans, root, path);
8757 out:
8758         btrfs_commit_transaction(trans, root);
8759         btrfs_free_path(path);
8760         return ret;
8761 }
8762
8763 static int zero_log_tree(struct btrfs_root *root)
8764 {
8765         struct btrfs_trans_handle *trans;
8766         int ret;
8767
8768         trans = btrfs_start_transaction(root, 1);
8769         if (IS_ERR(trans)) {
8770                 ret = PTR_ERR(trans);
8771                 return ret;
8772         }
8773         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8774         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8775         ret = btrfs_commit_transaction(trans, root);
8776         return ret;
8777 }
8778
8779 static int populate_csum(struct btrfs_trans_handle *trans,
8780                          struct btrfs_root *csum_root, char *buf, u64 start,
8781                          u64 len)
8782 {
8783         u64 offset = 0;
8784         u64 sectorsize;
8785         int ret = 0;
8786
8787         while (offset < len) {
8788                 sectorsize = csum_root->sectorsize;
8789                 ret = read_extent_data(csum_root, buf, start + offset,
8790                                        &sectorsize, 0);
8791                 if (ret)
8792                         break;
8793                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8794                                             start + offset, buf, sectorsize);
8795                 if (ret)
8796                         break;
8797                 offset += sectorsize;
8798         }
8799         return ret;
8800 }
8801
8802 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8803                                       struct btrfs_root *csum_root,
8804                                       struct btrfs_root *cur_root)
8805 {
8806         struct btrfs_path *path;
8807         struct btrfs_key key;
8808         struct extent_buffer *node;
8809         struct btrfs_file_extent_item *fi;
8810         char *buf = NULL;
8811         u64 start = 0;
8812         u64 len = 0;
8813         int slot = 0;
8814         int ret = 0;
8815
8816         path = btrfs_alloc_path();
8817         if (!path)
8818                 return -ENOMEM;
8819         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8820         if (!buf) {
8821                 ret = -ENOMEM;
8822                 goto out;
8823         }
8824
8825         key.objectid = 0;
8826         key.offset = 0;
8827         key.type = 0;
8828
8829         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8830         if (ret < 0)
8831                 goto out;
8832         /* Iterate all regular file extents and fill its csum */
8833         while (1) {
8834                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8835
8836                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8837                         goto next;
8838                 node = path->nodes[0];
8839                 slot = path->slots[0];
8840                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8841                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8842                         goto next;
8843                 start = btrfs_file_extent_disk_bytenr(node, fi);
8844                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8845
8846                 ret = populate_csum(trans, csum_root, buf, start, len);
8847                 if (ret == -EEXIST)
8848                         ret = 0;
8849                 if (ret < 0)
8850                         goto out;
8851 next:
8852                 /*
8853                  * TODO: if next leaf is corrupted, jump to nearest next valid
8854                  * leaf.
8855                  */
8856                 ret = btrfs_next_item(cur_root, path);
8857                 if (ret < 0)
8858                         goto out;
8859                 if (ret > 0) {
8860                         ret = 0;
8861                         goto out;
8862                 }
8863         }
8864
8865 out:
8866         btrfs_free_path(path);
8867         free(buf);
8868         return ret;
8869 }
8870
8871 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8872                                   struct btrfs_root *csum_root)
8873 {
8874         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8875         struct btrfs_path *path;
8876         struct btrfs_root *tree_root = fs_info->tree_root;
8877         struct btrfs_root *cur_root;
8878         struct extent_buffer *node;
8879         struct btrfs_key key;
8880         int slot = 0;
8881         int ret = 0;
8882
8883         path = btrfs_alloc_path();
8884         if (!path)
8885                 return -ENOMEM;
8886
8887         key.objectid = BTRFS_FS_TREE_OBJECTID;
8888         key.offset = 0;
8889         key.type = BTRFS_ROOT_ITEM_KEY;
8890
8891         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8892         if (ret < 0)
8893                 goto out;
8894         if (ret > 0) {
8895                 ret = -ENOENT;
8896                 goto out;
8897         }
8898
8899         while (1) {
8900                 node = path->nodes[0];
8901                 slot = path->slots[0];
8902                 btrfs_item_key_to_cpu(node, &key, slot);
8903                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8904                         goto out;
8905                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8906                         goto next;
8907                 if (!is_fstree(key.objectid))
8908                         goto next;
8909                 key.offset = (u64)-1;
8910
8911                 cur_root = btrfs_read_fs_root(fs_info, &key);
8912                 if (IS_ERR(cur_root) || !cur_root) {
8913                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8914                                 key.objectid);
8915                         goto out;
8916                 }
8917                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8918                                 cur_root);
8919                 if (ret < 0)
8920                         goto out;
8921 next:
8922                 ret = btrfs_next_item(tree_root, path);
8923                 if (ret > 0) {
8924                         ret = 0;
8925                         goto out;
8926                 }
8927                 if (ret < 0)
8928                         goto out;
8929         }
8930
8931 out:
8932         btrfs_free_path(path);
8933         return ret;
8934 }
8935
8936 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8937                                       struct btrfs_root *csum_root)
8938 {
8939         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8940         struct btrfs_path *path;
8941         struct btrfs_extent_item *ei;
8942         struct extent_buffer *leaf;
8943         char *buf;
8944         struct btrfs_key key;
8945         int ret;
8946
8947         path = btrfs_alloc_path();
8948         if (!path)
8949                 return -ENOMEM;
8950
8951         key.objectid = 0;
8952         key.type = BTRFS_EXTENT_ITEM_KEY;
8953         key.offset = 0;
8954
8955         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
8956         if (ret < 0) {
8957                 btrfs_free_path(path);
8958                 return ret;
8959         }
8960
8961         buf = malloc(csum_root->sectorsize);
8962         if (!buf) {
8963                 btrfs_free_path(path);
8964                 return -ENOMEM;
8965         }
8966
8967         while (1) {
8968                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8969                         ret = btrfs_next_leaf(extent_root, path);
8970                         if (ret < 0)
8971                                 break;
8972                         if (ret) {
8973                                 ret = 0;
8974                                 break;
8975                         }
8976                 }
8977                 leaf = path->nodes[0];
8978
8979                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8980                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8981                         path->slots[0]++;
8982                         continue;
8983                 }
8984
8985                 ei = btrfs_item_ptr(leaf, path->slots[0],
8986                                     struct btrfs_extent_item);
8987                 if (!(btrfs_extent_flags(leaf, ei) &
8988                       BTRFS_EXTENT_FLAG_DATA)) {
8989                         path->slots[0]++;
8990                         continue;
8991                 }
8992
8993                 ret = populate_csum(trans, csum_root, buf, key.objectid,
8994                                     key.offset);
8995                 if (ret)
8996                         break;
8997                 path->slots[0]++;
8998         }
8999
9000         btrfs_free_path(path);
9001         free(buf);
9002         return ret;
9003 }
9004
9005 /*
9006  * Recalculate the csum and put it into the csum tree.
9007  *
9008  * Extent tree init will wipe out all the extent info, so in that case, we
9009  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9010  * will use fs/subvol trees to init the csum tree.
9011  */
9012 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9013                           struct btrfs_root *csum_root,
9014                           int search_fs_tree)
9015 {
9016         if (search_fs_tree)
9017                 return fill_csum_tree_from_fs(trans, csum_root);
9018         else
9019                 return fill_csum_tree_from_extent(trans, csum_root);
9020 }
9021
9022 struct root_item_info {
9023         /* level of the root */
9024         u8 level;
9025         /* number of nodes at this level, must be 1 for a root */
9026         int node_count;
9027         u64 bytenr;
9028         u64 gen;
9029         struct cache_extent cache_extent;
9030 };
9031
9032 static struct cache_tree *roots_info_cache = NULL;
9033
9034 static void free_roots_info_cache(void)
9035 {
9036         if (!roots_info_cache)
9037                 return;
9038
9039         while (!cache_tree_empty(roots_info_cache)) {
9040                 struct cache_extent *entry;
9041                 struct root_item_info *rii;
9042
9043                 entry = first_cache_extent(roots_info_cache);
9044                 if (!entry)
9045                         break;
9046                 remove_cache_extent(roots_info_cache, entry);
9047                 rii = container_of(entry, struct root_item_info, cache_extent);
9048                 free(rii);
9049         }
9050
9051         free(roots_info_cache);
9052         roots_info_cache = NULL;
9053 }
9054
9055 static int build_roots_info_cache(struct btrfs_fs_info *info)
9056 {
9057         int ret = 0;
9058         struct btrfs_key key;
9059         struct extent_buffer *leaf;
9060         struct btrfs_path *path;
9061
9062         if (!roots_info_cache) {
9063                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9064                 if (!roots_info_cache)
9065                         return -ENOMEM;
9066                 cache_tree_init(roots_info_cache);
9067         }
9068
9069         path = btrfs_alloc_path();
9070         if (!path)
9071                 return -ENOMEM;
9072
9073         key.objectid = 0;
9074         key.type = BTRFS_EXTENT_ITEM_KEY;
9075         key.offset = 0;
9076
9077         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9078         if (ret < 0)
9079                 goto out;
9080         leaf = path->nodes[0];
9081
9082         while (1) {
9083                 struct btrfs_key found_key;
9084                 struct btrfs_extent_item *ei;
9085                 struct btrfs_extent_inline_ref *iref;
9086                 int slot = path->slots[0];
9087                 int type;
9088                 u64 flags;
9089                 u64 root_id;
9090                 u8 level;
9091                 struct cache_extent *entry;
9092                 struct root_item_info *rii;
9093
9094                 if (slot >= btrfs_header_nritems(leaf)) {
9095                         ret = btrfs_next_leaf(info->extent_root, path);
9096                         if (ret < 0) {
9097                                 break;
9098                         } else if (ret) {
9099                                 ret = 0;
9100                                 break;
9101                         }
9102                         leaf = path->nodes[0];
9103                         slot = path->slots[0];
9104                 }
9105
9106                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9107
9108                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9109                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9110                         goto next;
9111
9112                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9113                 flags = btrfs_extent_flags(leaf, ei);
9114
9115                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9116                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9117                         goto next;
9118
9119                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9120                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9121                         level = found_key.offset;
9122                 } else {
9123                         struct btrfs_tree_block_info *info;
9124
9125                         info = (struct btrfs_tree_block_info *)(ei + 1);
9126                         iref = (struct btrfs_extent_inline_ref *)(info + 1);
9127                         level = btrfs_tree_block_level(leaf, info);
9128                 }
9129
9130                 /*
9131                  * For a root extent, it must be of the following type and the
9132                  * first (and only one) iref in the item.
9133                  */
9134                 type = btrfs_extent_inline_ref_type(leaf, iref);
9135                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9136                         goto next;
9137
9138                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9139                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9140                 if (!entry) {
9141                         rii = malloc(sizeof(struct root_item_info));
9142                         if (!rii) {
9143                                 ret = -ENOMEM;
9144                                 goto out;
9145                         }
9146                         rii->cache_extent.start = root_id;
9147                         rii->cache_extent.size = 1;
9148                         rii->level = (u8)-1;
9149                         entry = &rii->cache_extent;
9150                         ret = insert_cache_extent(roots_info_cache, entry);
9151                         ASSERT(ret == 0);
9152                 } else {
9153                         rii = container_of(entry, struct root_item_info,
9154                                            cache_extent);
9155                 }
9156
9157                 ASSERT(rii->cache_extent.start == root_id);
9158                 ASSERT(rii->cache_extent.size == 1);
9159
9160                 if (level > rii->level || rii->level == (u8)-1) {
9161                         rii->level = level;
9162                         rii->bytenr = found_key.objectid;
9163                         rii->gen = btrfs_extent_generation(leaf, ei);
9164                         rii->node_count = 1;
9165                 } else if (level == rii->level) {
9166                         rii->node_count++;
9167                 }
9168 next:
9169                 path->slots[0]++;
9170         }
9171
9172 out:
9173         btrfs_free_path(path);
9174
9175         return ret;
9176 }
9177
9178 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9179                                   struct btrfs_path *path,
9180                                   const struct btrfs_key *root_key,
9181                                   const int read_only_mode)
9182 {
9183         const u64 root_id = root_key->objectid;
9184         struct cache_extent *entry;
9185         struct root_item_info *rii;
9186         struct btrfs_root_item ri;
9187         unsigned long offset;
9188
9189         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9190         if (!entry) {
9191                 fprintf(stderr,
9192                         "Error: could not find extent items for root %llu\n",
9193                         root_key->objectid);
9194                 return -ENOENT;
9195         }
9196
9197         rii = container_of(entry, struct root_item_info, cache_extent);
9198         ASSERT(rii->cache_extent.start == root_id);
9199         ASSERT(rii->cache_extent.size == 1);
9200
9201         if (rii->node_count != 1) {
9202                 fprintf(stderr,
9203                         "Error: could not find btree root extent for root %llu\n",
9204                         root_id);
9205                 return -ENOENT;
9206         }
9207
9208         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9209         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9210
9211         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9212             btrfs_root_level(&ri) != rii->level ||
9213             btrfs_root_generation(&ri) != rii->gen) {
9214
9215                 /*
9216                  * If we're in repair mode but our caller told us to not update
9217                  * the root item, i.e. just check if it needs to be updated, don't
9218                  * print this message, since the caller will call us again shortly
9219                  * for the same root item without read only mode (the caller will
9220                  * open a transaction first).
9221                  */
9222                 if (!(read_only_mode && repair))
9223                         fprintf(stderr,
9224                                 "%sroot item for root %llu,"
9225                                 " current bytenr %llu, current gen %llu, current level %u,"
9226                                 " new bytenr %llu, new gen %llu, new level %u\n",
9227                                 (read_only_mode ? "" : "fixing "),
9228                                 root_id,
9229                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9230                                 btrfs_root_level(&ri),
9231                                 rii->bytenr, rii->gen, rii->level);
9232
9233                 if (btrfs_root_generation(&ri) > rii->gen) {
9234                         fprintf(stderr,
9235                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9236                                 root_id, btrfs_root_generation(&ri), rii->gen);
9237                         return -EINVAL;
9238                 }
9239
9240                 if (!read_only_mode) {
9241                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9242                         btrfs_set_root_level(&ri, rii->level);
9243                         btrfs_set_root_generation(&ri, rii->gen);
9244                         write_extent_buffer(path->nodes[0], &ri,
9245                                             offset, sizeof(ri));
9246                 }
9247
9248                 return 1;
9249         }
9250
9251         return 0;
9252 }
9253
9254 /*
9255  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9256  * caused read-only snapshots to be corrupted if they were created at a moment
9257  * when the source subvolume/snapshot had orphan items. The issue was that the
9258  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9259  * node instead of the post orphan cleanup root node.
9260  * So this function, and its callees, just detects and fixes those cases. Even
9261  * though the regression was for read-only snapshots, this function applies to
9262  * any snapshot/subvolume root.
9263  * This must be run before any other repair code - not doing it so, makes other
9264  * repair code delete or modify backrefs in the extent tree for example, which
9265  * will result in an inconsistent fs after repairing the root items.
9266  */
9267 static int repair_root_items(struct btrfs_fs_info *info)
9268 {
9269         struct btrfs_path *path = NULL;
9270         struct btrfs_key key;
9271         struct extent_buffer *leaf;
9272         struct btrfs_trans_handle *trans = NULL;
9273         int ret = 0;
9274         int bad_roots = 0;
9275         int need_trans = 0;
9276
9277         ret = build_roots_info_cache(info);
9278         if (ret)
9279                 goto out;
9280
9281         path = btrfs_alloc_path();
9282         if (!path) {
9283                 ret = -ENOMEM;
9284                 goto out;
9285         }
9286
9287         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9288         key.type = BTRFS_ROOT_ITEM_KEY;
9289         key.offset = 0;
9290
9291 again:
9292         /*
9293          * Avoid opening and committing transactions if a leaf doesn't have
9294          * any root items that need to be fixed, so that we avoid rotating
9295          * backup roots unnecessarily.
9296          */
9297         if (need_trans) {
9298                 trans = btrfs_start_transaction(info->tree_root, 1);
9299                 if (IS_ERR(trans)) {
9300                         ret = PTR_ERR(trans);
9301                         goto out;
9302                 }
9303         }
9304
9305         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9306                                 0, trans ? 1 : 0);
9307         if (ret < 0)
9308                 goto out;
9309         leaf = path->nodes[0];
9310
9311         while (1) {
9312                 struct btrfs_key found_key;
9313
9314                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9315                         int no_more_keys = find_next_key(path, &key);
9316
9317                         btrfs_release_path(path);
9318                         if (trans) {
9319                                 ret = btrfs_commit_transaction(trans,
9320                                                                info->tree_root);
9321                                 trans = NULL;
9322                                 if (ret < 0)
9323                                         goto out;
9324                         }
9325                         need_trans = 0;
9326                         if (no_more_keys)
9327                                 break;
9328                         goto again;
9329                 }
9330
9331                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9332
9333                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9334                         goto next;
9335                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9336                         goto next;
9337
9338                 ret = maybe_repair_root_item(info, path, &found_key,
9339                                              trans ? 0 : 1);
9340                 if (ret < 0)
9341                         goto out;
9342                 if (ret) {
9343                         if (!trans && repair) {
9344                                 need_trans = 1;
9345                                 key = found_key;
9346                                 btrfs_release_path(path);
9347                                 goto again;
9348                         }
9349                         bad_roots++;
9350                 }
9351 next:
9352                 path->slots[0]++;
9353         }
9354         ret = 0;
9355 out:
9356         free_roots_info_cache();
9357         btrfs_free_path(path);
9358         if (trans)
9359                 btrfs_commit_transaction(trans, info->tree_root);
9360         if (ret < 0)
9361                 return ret;
9362
9363         return bad_roots;
9364 }
9365
9366 const char * const cmd_check_usage[] = {
9367         "btrfs check [options] <device>",
9368         "Check structural inegrity of a filesystem (unmounted).",
9369         "Check structural inegrity of an unmounted filesystem. Verify internal",
9370         "trees' consistency and item connectivity. In the repair mode try to",
9371         "fix the problems found.",
9372         "WARNING: the repair mode is considered dangerous",
9373         "",
9374         "-s|--super <superblock>     use this superblock copy",
9375         "-b|--backup                 use the backup root copy",
9376         "--repair                    try to repair the filesystem",
9377         "--readonly                  run in read-only mode (default)",
9378         "--init-csum-tree            create a new CRC tree",
9379         "--init-extent-tree          create a new extent tree",
9380         "--check-data-csum           verify checkums of data blocks",
9381         "-Q|--qgroup-report           print a report on qgroup consistency",
9382         "-E|--subvol-extents <subvolid>",
9383         "                            print subvolume extents and sharing state",
9384         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9385         "-p|--progress               indicate progress",
9386         NULL
9387 };
9388
9389 int cmd_check(int argc, char **argv)
9390 {
9391         struct cache_tree root_cache;
9392         struct btrfs_root *root;
9393         struct btrfs_fs_info *info;
9394         u64 bytenr = 0;
9395         u64 subvolid = 0;
9396         u64 tree_root_bytenr = 0;
9397         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9398         int ret;
9399         u64 num;
9400         int init_csum_tree = 0;
9401         int readonly = 0;
9402         int qgroup_report = 0;
9403         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9404
9405         while(1) {
9406                 int c;
9407                 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9408                         OPT_CHECK_CSUM, OPT_READONLY };
9409                 static const struct option long_options[] = {
9410                         { "super", required_argument, NULL, 's' },
9411                         { "repair", no_argument, NULL, OPT_REPAIR },
9412                         { "readonly", no_argument, NULL, OPT_READONLY },
9413                         { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9414                         { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9415                         { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9416                         { "backup", no_argument, NULL, 'b' },
9417                         { "subvol-extents", required_argument, NULL, 'E' },
9418                         { "qgroup-report", no_argument, NULL, 'Q' },
9419                         { "tree-root", required_argument, NULL, 'r' },
9420                         { "progress", no_argument, NULL, 'p' },
9421                         { NULL, 0, NULL, 0}
9422                 };
9423
9424                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9425                 if (c < 0)
9426                         break;
9427                 switch(c) {
9428                         case 'a': /* ignored */ break;
9429                         case 'b':
9430                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9431                                 break;
9432                         case 's':
9433                                 num = arg_strtou64(optarg);
9434                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9435                                         fprintf(stderr,
9436                                                 "ERROR: super mirror should be less than: %d\n",
9437                                                 BTRFS_SUPER_MIRROR_MAX);
9438                                         exit(1);
9439                                 }
9440                                 bytenr = btrfs_sb_offset(((int)num));
9441                                 printf("using SB copy %llu, bytenr %llu\n", num,
9442                                        (unsigned long long)bytenr);
9443                                 break;
9444                         case 'Q':
9445                                 qgroup_report = 1;
9446                                 break;
9447                         case 'E':
9448                                 subvolid = arg_strtou64(optarg);
9449                                 break;
9450                         case 'r':
9451                                 tree_root_bytenr = arg_strtou64(optarg);
9452                                 break;
9453                         case 'p':
9454                                 ctx.progress_enabled = true;
9455                                 break;
9456                         case '?':
9457                         case 'h':
9458                                 usage(cmd_check_usage);
9459                         case OPT_REPAIR:
9460                                 printf("enabling repair mode\n");
9461                                 repair = 1;
9462                                 ctree_flags |= OPEN_CTREE_WRITES;
9463                                 break;
9464                         case OPT_READONLY:
9465                                 readonly = 1;
9466                                 break;
9467                         case OPT_INIT_CSUM:
9468                                 printf("Creating a new CRC tree\n");
9469                                 init_csum_tree = 1;
9470                                 repair = 1;
9471                                 ctree_flags |= OPEN_CTREE_WRITES;
9472                                 break;
9473                         case OPT_INIT_EXTENT:
9474                                 init_extent_tree = 1;
9475                                 ctree_flags |= (OPEN_CTREE_WRITES |
9476                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9477                                 repair = 1;
9478                                 break;
9479                         case OPT_CHECK_CSUM:
9480                                 check_data_csum = 1;
9481                                 break;
9482                 }
9483         }
9484         argc = argc - optind;
9485
9486         if (check_argc_exact(argc, 1))
9487                 usage(cmd_check_usage);
9488
9489         if (ctx.progress_enabled) {
9490                 ctx.tp = TASK_NOTHING;
9491                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9492         }
9493
9494         /* This check is the only reason for --readonly to exist */
9495         if (readonly && repair) {
9496                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9497                 exit(1);
9498         }
9499
9500         radix_tree_init();
9501         cache_tree_init(&root_cache);
9502
9503         if((ret = check_mounted(argv[optind])) < 0) {
9504                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9505                 goto err_out;
9506         } else if(ret) {
9507                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9508                 ret = -EBUSY;
9509                 goto err_out;
9510         }
9511
9512         /* only allow partial opening under repair mode */
9513         if (repair)
9514                 ctree_flags |= OPEN_CTREE_PARTIAL;
9515
9516         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9517                                   ctree_flags);
9518         if (!info) {
9519                 fprintf(stderr, "Couldn't open file system\n");
9520                 ret = -EIO;
9521                 goto err_out;
9522         }
9523
9524         global_info = info;
9525         root = info->fs_root;
9526
9527         /*
9528          * repair mode will force us to commit transaction which
9529          * will make us fail to load log tree when mounting.
9530          */
9531         if (repair && btrfs_super_log_root(info->super_copy)) {
9532                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9533                 if (!ret) {
9534                         ret = 1;
9535                         goto close_out;
9536                 }
9537                 ret = zero_log_tree(root);
9538                 if (ret) {
9539                         fprintf(stderr, "fail to zero log tree\n");
9540                         goto close_out;
9541                 }
9542         }
9543
9544         uuid_unparse(info->super_copy->fsid, uuidbuf);
9545         if (qgroup_report) {
9546                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9547                        uuidbuf);
9548                 ret = qgroup_verify_all(info);
9549                 if (ret == 0)
9550                         print_qgroup_report(1);
9551                 goto close_out;
9552         }
9553         if (subvolid) {
9554                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9555                        subvolid, argv[optind], uuidbuf);
9556                 ret = print_extent_state(info, subvolid);
9557                 goto close_out;
9558         }
9559         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9560
9561         if (!extent_buffer_uptodate(info->tree_root->node) ||
9562             !extent_buffer_uptodate(info->dev_root->node) ||
9563             !extent_buffer_uptodate(info->chunk_root->node)) {
9564                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9565                 ret = -EIO;
9566                 goto close_out;
9567         }
9568
9569         if (init_extent_tree || init_csum_tree) {
9570                 struct btrfs_trans_handle *trans;
9571
9572                 trans = btrfs_start_transaction(info->extent_root, 0);
9573                 if (IS_ERR(trans)) {
9574                         fprintf(stderr, "Error starting transaction\n");
9575                         ret = PTR_ERR(trans);
9576                         goto close_out;
9577                 }
9578
9579                 if (init_extent_tree) {
9580                         printf("Creating a new extent tree\n");
9581                         ret = reinit_extent_tree(trans, info);
9582                         if (ret)
9583                                 goto close_out;
9584                 }
9585
9586                 if (init_csum_tree) {
9587                         fprintf(stderr, "Reinit crc root\n");
9588                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9589                         if (ret) {
9590                                 fprintf(stderr, "crc root initialization failed\n");
9591                                 ret = -EIO;
9592                                 goto close_out;
9593                         }
9594
9595                         ret = fill_csum_tree(trans, info->csum_root,
9596                                              init_extent_tree);
9597                         if (ret) {
9598                                 fprintf(stderr, "crc refilling failed\n");
9599                                 return -EIO;
9600                         }
9601                 }
9602                 /*
9603                  * Ok now we commit and run the normal fsck, which will add
9604                  * extent entries for all of the items it finds.
9605                  */
9606                 ret = btrfs_commit_transaction(trans, info->extent_root);
9607                 if (ret)
9608                         goto close_out;
9609         }
9610         if (!extent_buffer_uptodate(info->extent_root->node)) {
9611                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9612                 ret = -EIO;
9613                 goto close_out;
9614         }
9615         if (!extent_buffer_uptodate(info->csum_root->node)) {
9616                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9617                 ret = -EIO;
9618                 goto close_out;
9619         }
9620
9621         if (!ctx.progress_enabled)
9622                 fprintf(stderr, "checking extents\n");
9623         ret = check_chunks_and_extents(root);
9624         if (ret)
9625                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9626
9627         ret = repair_root_items(info);
9628         if (ret < 0)
9629                 goto close_out;
9630         if (repair) {
9631                 fprintf(stderr, "Fixed %d roots.\n", ret);
9632                 ret = 0;
9633         } else if (ret > 0) {
9634                 fprintf(stderr,
9635                        "Found %d roots with an outdated root item.\n",
9636                        ret);
9637                 fprintf(stderr,
9638                         "Please run a filesystem check with the option --repair to fix them.\n");
9639                 ret = 1;
9640                 goto close_out;
9641         }
9642
9643         if (!ctx.progress_enabled)
9644                 fprintf(stderr, "checking free space cache\n");
9645         ret = check_space_cache(root);
9646         if (ret)
9647                 goto out;
9648
9649         /*
9650          * We used to have to have these hole extents in between our real
9651          * extents so if we don't have this flag set we need to make sure there
9652          * are no gaps in the file extents for inodes, otherwise we can just
9653          * ignore it when this happens.
9654          */
9655         no_holes = btrfs_fs_incompat(root->fs_info,
9656                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9657         if (!ctx.progress_enabled)
9658                 fprintf(stderr, "checking fs roots\n");
9659         ret = check_fs_roots(root, &root_cache);
9660         if (ret)
9661                 goto out;
9662
9663         fprintf(stderr, "checking csums\n");
9664         ret = check_csums(root);
9665         if (ret)
9666                 goto out;
9667
9668         fprintf(stderr, "checking root refs\n");
9669         ret = check_root_refs(root, &root_cache);
9670         if (ret)
9671                 goto out;
9672
9673         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9674                 struct extent_buffer *eb;
9675
9676                 eb = list_first_entry(&root->fs_info->recow_ebs,
9677                                       struct extent_buffer, recow);
9678                 list_del_init(&eb->recow);
9679                 ret = recow_extent_buffer(root, eb);
9680                 if (ret)
9681                         break;
9682         }
9683
9684         while (!list_empty(&delete_items)) {
9685                 struct bad_item *bad;
9686
9687                 bad = list_first_entry(&delete_items, struct bad_item, list);
9688                 list_del_init(&bad->list);
9689                 if (repair)
9690                         ret = delete_bad_item(root, bad);
9691                 free(bad);
9692         }
9693
9694         if (info->quota_enabled) {
9695                 int err;
9696                 fprintf(stderr, "checking quota groups\n");
9697                 err = qgroup_verify_all(info);
9698                 if (err)
9699                         goto out;
9700         }
9701
9702         if (!list_empty(&root->fs_info->recow_ebs)) {
9703                 fprintf(stderr, "Transid errors in file system\n");
9704                 ret = 1;
9705         }
9706 out:
9707         print_qgroup_report(0);
9708         if (found_old_backref) { /*
9709                  * there was a disk format change when mixed
9710                  * backref was in testing tree. The old format
9711                  * existed about one week.
9712                  */
9713                 printf("\n * Found old mixed backref format. "
9714                        "The old format is not supported! *"
9715                        "\n * Please mount the FS in readonly mode, "
9716                        "backup data and re-format the FS. *\n\n");
9717                 ret = 1;
9718         }
9719         printf("found %llu bytes used err is %d\n",
9720                (unsigned long long)bytes_used, ret);
9721         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9722         printf("total tree bytes: %llu\n",
9723                (unsigned long long)total_btree_bytes);
9724         printf("total fs tree bytes: %llu\n",
9725                (unsigned long long)total_fs_tree_bytes);
9726         printf("total extent tree bytes: %llu\n",
9727                (unsigned long long)total_extent_tree_bytes);
9728         printf("btree space waste bytes: %llu\n",
9729                (unsigned long long)btree_space_waste);
9730         printf("file data blocks allocated: %llu\n referenced %llu\n",
9731                 (unsigned long long)data_bytes_allocated,
9732                 (unsigned long long)data_bytes_referenced);
9733         printf("%s\n", PACKAGE_STRING);
9734
9735         free_root_recs_tree(&root_cache);
9736 close_out:
9737         close_ctree(root);
9738 err_out:
9739         if (ctx.progress_enabled)
9740                 task_deinit(ctx.info);
9741
9742         return ret;
9743 }