btrfs-progs: check, properly reset nlink of multi-linked file
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "btrfsck.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
41 #include "backref.h"
42 #include "ulist.h"
43
44 enum task_position {
45         TASK_EXTENTS,
46         TASK_FREE_SPACE,
47         TASK_FS_ROOTS,
48         TASK_NOTHING, /* have to be the last element */
49 };
50
51 struct task_ctx {
52         int progress_enabled;
53         enum task_position tp;
54
55         struct task_info *info;
56 };
57
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75
76 static void *print_status_check(void *p)
77 {
78         struct task_ctx *priv = p;
79         const char work_indicator[] = { '.', 'o', 'O', 'o' };
80         uint32_t count = 0;
81         static char *task_position_string[] = {
82                 "checking extents",
83                 "checking free space cache",
84                 "checking fs roots",
85         };
86
87         task_period_start(priv->info, 1000 /* 1s */);
88
89         if (priv->tp == TASK_NOTHING)
90                 return NULL;
91
92         while (1) {
93                 printf("%s [%c]\r", task_position_string[priv->tp],
94                                 work_indicator[count % 4]);
95                 count++;
96                 fflush(stdout);
97                 task_period_wait(priv->info);
98         }
99         return NULL;
100 }
101
102 static int print_status_return(void *p)
103 {
104         printf("\n");
105         fflush(stdout);
106
107         return 0;
108 }
109
110 struct extent_backref {
111         struct list_head list;
112         unsigned int is_data:1;
113         unsigned int found_extent_tree:1;
114         unsigned int full_backref:1;
115         unsigned int found_ref:1;
116         unsigned int broken:1;
117 };
118
119 struct data_backref {
120         struct extent_backref node;
121         union {
122                 u64 parent;
123                 u64 root;
124         };
125         u64 owner;
126         u64 offset;
127         u64 disk_bytenr;
128         u64 bytes;
129         u64 ram_bytes;
130         u32 num_refs;
131         u32 found_ref;
132 };
133
134 /*
135  * Much like data_backref, just removed the undetermined members
136  * and change it to use list_head.
137  * During extent scan, it is stored in root->orphan_data_extent.
138  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
139  */
140 struct orphan_data_extent {
141         struct list_head list;
142         u64 root;
143         u64 objectid;
144         u64 offset;
145         u64 disk_bytenr;
146         u64 disk_len;
147 };
148
149 struct tree_backref {
150         struct extent_backref node;
151         union {
152                 u64 parent;
153                 u64 root;
154         };
155 };
156
157 struct extent_record {
158         struct list_head backrefs;
159         struct list_head dups;
160         struct list_head list;
161         struct cache_extent cache;
162         struct btrfs_disk_key parent_key;
163         u64 start;
164         u64 max_size;
165         u64 nr;
166         u64 refs;
167         u64 extent_item_refs;
168         u64 generation;
169         u64 parent_generation;
170         u64 info_objectid;
171         u32 num_duplicates;
172         u8 info_level;
173         int flag_block_full_backref;
174         unsigned int found_rec:1;
175         unsigned int content_checked:1;
176         unsigned int owner_ref_checked:1;
177         unsigned int is_root:1;
178         unsigned int metadata:1;
179         unsigned int bad_full_backref:1;
180         unsigned int crossing_stripes:1;
181         unsigned int wrong_chunk_type:1;
182 };
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         unsigned int filetype:8;
190         int errors;
191         unsigned int ref_type;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 struct root_item_record {
199         struct list_head list;
200         u64 objectid;
201         u64 bytenr;
202         u64 last_snapshot;
203         u8 level;
204         u8 drop_level;
205         int level_size;
206         struct btrfs_key drop_key;
207 };
208
209 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
211 #define REF_ERR_NO_INODE_REF            (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
214 #define REF_ERR_DUP_INODE_REF           (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF             (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
222
223 struct file_extent_hole {
224         struct rb_node node;
225         u64 start;
226         u64 len;
227 };
228
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
231 {
232         struct file_extent_hole *hole;
233
234         if (RB_EMPTY_ROOT(holes))
235                 return (u64)-1;
236
237         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
238         return hole->start;
239 }
240
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
242 {
243         struct file_extent_hole *hole1;
244         struct file_extent_hole *hole2;
245
246         hole1 = rb_entry(node1, struct file_extent_hole, node);
247         hole2 = rb_entry(node2, struct file_extent_hole, node);
248
249         if (hole1->start > hole2->start)
250                 return -1;
251         if (hole1->start < hole2->start)
252                 return 1;
253         /* Now hole1->start == hole2->start */
254         if (hole1->len >= hole2->len)
255                 /*
256                  * Hole 1 will be merge center
257                  * Same hole will be merged later
258                  */
259                 return -1;
260         /* Hole 2 will be merge center */
261         return 1;
262 }
263
264 /*
265  * Add a hole to the record
266  *
267  * This will do hole merge for copy_file_extent_holes(),
268  * which will ensure there won't be continuous holes.
269  */
270 static int add_file_extent_hole(struct rb_root *holes,
271                                 u64 start, u64 len)
272 {
273         struct file_extent_hole *hole;
274         struct file_extent_hole *prev = NULL;
275         struct file_extent_hole *next = NULL;
276
277         hole = malloc(sizeof(*hole));
278         if (!hole)
279                 return -ENOMEM;
280         hole->start = start;
281         hole->len = len;
282         /* Since compare will not return 0, no -EEXIST will happen */
283         rb_insert(holes, &hole->node, compare_hole);
284
285         /* simple merge with previous hole */
286         if (rb_prev(&hole->node))
287                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
288                                 node);
289         if (prev && prev->start + prev->len >= hole->start) {
290                 hole->len = hole->start + hole->len - prev->start;
291                 hole->start = prev->start;
292                 rb_erase(&prev->node, holes);
293                 free(prev);
294                 prev = NULL;
295         }
296
297         /* iterate merge with next holes */
298         while (1) {
299                 if (!rb_next(&hole->node))
300                         break;
301                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
302                                         node);
303                 if (hole->start + hole->len >= next->start) {
304                         if (hole->start + hole->len <= next->start + next->len)
305                                 hole->len = next->start + next->len -
306                                             hole->start;
307                         rb_erase(&next->node, holes);
308                         free(next);
309                         next = NULL;
310                 } else
311                         break;
312         }
313         return 0;
314 }
315
316 static int compare_hole_range(struct rb_node *node, void *data)
317 {
318         struct file_extent_hole *hole;
319         u64 start;
320
321         hole = (struct file_extent_hole *)data;
322         start = hole->start;
323
324         hole = rb_entry(node, struct file_extent_hole, node);
325         if (start < hole->start)
326                 return -1;
327         if (start >= hole->start && start < hole->start + hole->len)
328                 return 0;
329         return 1;
330 }
331
332 /*
333  * Delete a hole in the record
334  *
335  * This will do the hole split and is much restrict than add.
336  */
337 static int del_file_extent_hole(struct rb_root *holes,
338                                 u64 start, u64 len)
339 {
340         struct file_extent_hole *hole;
341         struct file_extent_hole tmp;
342         u64 prev_start = 0;
343         u64 prev_len = 0;
344         u64 next_start = 0;
345         u64 next_len = 0;
346         struct rb_node *node;
347         int have_prev = 0;
348         int have_next = 0;
349         int ret = 0;
350
351         tmp.start = start;
352         tmp.len = len;
353         node = rb_search(holes, &tmp, compare_hole_range, NULL);
354         if (!node)
355                 return -EEXIST;
356         hole = rb_entry(node, struct file_extent_hole, node);
357         if (start + len > hole->start + hole->len)
358                 return -EEXIST;
359
360         /*
361          * Now there will be no overflap, delete the hole and re-add the
362          * split(s) if they exists.
363          */
364         if (start > hole->start) {
365                 prev_start = hole->start;
366                 prev_len = start - hole->start;
367                 have_prev = 1;
368         }
369         if (hole->start + hole->len > start + len) {
370                 next_start = start + len;
371                 next_len = hole->start + hole->len - start - len;
372                 have_next = 1;
373         }
374         rb_erase(node, holes);
375         free(hole);
376         if (have_prev) {
377                 ret = add_file_extent_hole(holes, prev_start, prev_len);
378                 if (ret < 0)
379                         return ret;
380         }
381         if (have_next) {
382                 ret = add_file_extent_hole(holes, next_start, next_len);
383                 if (ret < 0)
384                         return ret;
385         }
386         return 0;
387 }
388
389 static int copy_file_extent_holes(struct rb_root *dst,
390                                   struct rb_root *src)
391 {
392         struct file_extent_hole *hole;
393         struct rb_node *node;
394         int ret = 0;
395
396         node = rb_first(src);
397         while (node) {
398                 hole = rb_entry(node, struct file_extent_hole, node);
399                 ret = add_file_extent_hole(dst, hole->start, hole->len);
400                 if (ret)
401                         break;
402                 node = rb_next(node);
403         }
404         return ret;
405 }
406
407 static void free_file_extent_holes(struct rb_root *holes)
408 {
409         struct rb_node *node;
410         struct file_extent_hole *hole;
411
412         node = rb_first(holes);
413         while (node) {
414                 hole = rb_entry(node, struct file_extent_hole, node);
415                 rb_erase(node, holes);
416                 free(hole);
417                 node = rb_first(holes);
418         }
419 }
420
421 struct inode_record {
422         struct list_head backrefs;
423         unsigned int checked:1;
424         unsigned int merging:1;
425         unsigned int found_inode_item:1;
426         unsigned int found_dir_item:1;
427         unsigned int found_file_extent:1;
428         unsigned int found_csum_item:1;
429         unsigned int some_csum_missing:1;
430         unsigned int nodatasum:1;
431         int errors;
432
433         u64 ino;
434         u32 nlink;
435         u32 imode;
436         u64 isize;
437         u64 nbytes;
438
439         u32 found_link;
440         u64 found_size;
441         u64 extent_start;
442         u64 extent_end;
443         struct rb_root holes;
444         struct list_head orphan_extents;
445
446         u32 refs;
447 };
448
449 #define I_ERR_NO_INODE_ITEM             (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
464
465 struct root_backref {
466         struct list_head list;
467         unsigned int found_dir_item:1;
468         unsigned int found_dir_index:1;
469         unsigned int found_back_ref:1;
470         unsigned int found_forward_ref:1;
471         unsigned int reachable:1;
472         int errors;
473         u64 ref_root;
474         u64 dir;
475         u64 index;
476         u16 namelen;
477         char name[0];
478 };
479
480 struct root_record {
481         struct list_head backrefs;
482         struct cache_extent cache;
483         unsigned int found_root_item:1;
484         u64 objectid;
485         u32 found_ref;
486 };
487
488 struct ptr_node {
489         struct cache_extent cache;
490         void *data;
491 };
492
493 struct shared_node {
494         struct cache_extent cache;
495         struct cache_tree root_cache;
496         struct cache_tree inode_cache;
497         struct inode_record *current;
498         u32 refs;
499 };
500
501 struct block_info {
502         u64 start;
503         u32 size;
504 };
505
506 struct walk_control {
507         struct cache_tree shared;
508         struct shared_node *nodes[BTRFS_MAX_LEVEL];
509         int active_node;
510         int root_level;
511 };
512
513 struct bad_item {
514         struct btrfs_key key;
515         u64 root_id;
516         struct list_head list;
517 };
518
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
520
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522                                  struct btrfs_root *root)
523 {
524         if (root->last_trans != trans->transid) {
525                 root->track_dirty = 1;
526                 root->last_trans = trans->transid;
527                 root->commit_root = root->node;
528                 extent_buffer_get(root->node);
529         }
530 }
531
532 static u8 imode_to_type(u32 imode)
533 {
534 #define S_SHIFT 12
535         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
537                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
538                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
539                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
540                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
541                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
542                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
543         };
544
545         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
546 #undef S_SHIFT
547 }
548
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
550 {
551         struct device_record *rec1;
552         struct device_record *rec2;
553
554         rec1 = rb_entry(node1, struct device_record, node);
555         rec2 = rb_entry(node2, struct device_record, node);
556         if (rec1->devid > rec2->devid)
557                 return -1;
558         else if (rec1->devid < rec2->devid)
559                 return 1;
560         else
561                 return 0;
562 }
563
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
565 {
566         struct inode_record *rec;
567         struct inode_backref *backref;
568         struct inode_backref *orig;
569         struct orphan_data_extent *src_orphan;
570         struct orphan_data_extent *dst_orphan;
571         size_t size;
572         int ret;
573
574         rec = malloc(sizeof(*rec));
575         memcpy(rec, orig_rec, sizeof(*rec));
576         rec->refs = 1;
577         INIT_LIST_HEAD(&rec->backrefs);
578         INIT_LIST_HEAD(&rec->orphan_extents);
579         rec->holes = RB_ROOT;
580
581         list_for_each_entry(orig, &orig_rec->backrefs, list) {
582                 size = sizeof(*orig) + orig->namelen + 1;
583                 backref = malloc(size);
584                 memcpy(backref, orig, size);
585                 list_add_tail(&backref->list, &rec->backrefs);
586         }
587         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
588                 dst_orphan = malloc(sizeof(*dst_orphan));
589                 /* TODO: Fix all the HELL of un-catched -ENOMEM case */
590                 BUG_ON(!dst_orphan);
591                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
592                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
593         }
594         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
595         BUG_ON(ret < 0);
596
597         return rec;
598 }
599
600 static void print_orphan_data_extents(struct list_head *orphan_extents,
601                                       u64 objectid)
602 {
603         struct orphan_data_extent *orphan;
604
605         if (list_empty(orphan_extents))
606                 return;
607         printf("The following data extent is lost in tree %llu:\n",
608                objectid);
609         list_for_each_entry(orphan, orphan_extents, list) {
610                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
611                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
612                        orphan->disk_len);
613         }
614 }
615
616 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
617 {
618         u64 root_objectid = root->root_key.objectid;
619         int errors = rec->errors;
620
621         if (!errors)
622                 return;
623         /* reloc root errors, we print its corresponding fs root objectid*/
624         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
625                 root_objectid = root->root_key.offset;
626                 fprintf(stderr, "reloc");
627         }
628         fprintf(stderr, "root %llu inode %llu errors %x",
629                 (unsigned long long) root_objectid,
630                 (unsigned long long) rec->ino, rec->errors);
631
632         if (errors & I_ERR_NO_INODE_ITEM)
633                 fprintf(stderr, ", no inode item");
634         if (errors & I_ERR_NO_ORPHAN_ITEM)
635                 fprintf(stderr, ", no orphan item");
636         if (errors & I_ERR_DUP_INODE_ITEM)
637                 fprintf(stderr, ", dup inode item");
638         if (errors & I_ERR_DUP_DIR_INDEX)
639                 fprintf(stderr, ", dup dir index");
640         if (errors & I_ERR_ODD_DIR_ITEM)
641                 fprintf(stderr, ", odd dir item");
642         if (errors & I_ERR_ODD_FILE_EXTENT)
643                 fprintf(stderr, ", odd file extent");
644         if (errors & I_ERR_BAD_FILE_EXTENT)
645                 fprintf(stderr, ", bad file extent");
646         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
647                 fprintf(stderr, ", file extent overlap");
648         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
649                 fprintf(stderr, ", file extent discount");
650         if (errors & I_ERR_DIR_ISIZE_WRONG)
651                 fprintf(stderr, ", dir isize wrong");
652         if (errors & I_ERR_FILE_NBYTES_WRONG)
653                 fprintf(stderr, ", nbytes wrong");
654         if (errors & I_ERR_ODD_CSUM_ITEM)
655                 fprintf(stderr, ", odd csum item");
656         if (errors & I_ERR_SOME_CSUM_MISSING)
657                 fprintf(stderr, ", some csum missing");
658         if (errors & I_ERR_LINK_COUNT_WRONG)
659                 fprintf(stderr, ", link count wrong");
660         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
661                 fprintf(stderr, ", orphan file extent");
662         fprintf(stderr, "\n");
663         /* Print the orphan extents if needed */
664         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
665                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
666
667         /* Print the holes if needed */
668         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
669                 struct file_extent_hole *hole;
670                 struct rb_node *node;
671                 int found = 0;
672
673                 node = rb_first(&rec->holes);
674                 fprintf(stderr, "Found file extent holes:\n");
675                 while (node) {
676                         found = 1;
677                         hole = rb_entry(node, struct file_extent_hole, node);
678                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
679                                 hole->start, hole->len);
680                         node = rb_next(node);
681                 }
682                 if (!found)
683                         fprintf(stderr, "\tstart: 0, len: %llu\n",
684                                 round_up(rec->isize, root->sectorsize));
685         }
686 }
687
688 static void print_ref_error(int errors)
689 {
690         if (errors & REF_ERR_NO_DIR_ITEM)
691                 fprintf(stderr, ", no dir item");
692         if (errors & REF_ERR_NO_DIR_INDEX)
693                 fprintf(stderr, ", no dir index");
694         if (errors & REF_ERR_NO_INODE_REF)
695                 fprintf(stderr, ", no inode ref");
696         if (errors & REF_ERR_DUP_DIR_ITEM)
697                 fprintf(stderr, ", dup dir item");
698         if (errors & REF_ERR_DUP_DIR_INDEX)
699                 fprintf(stderr, ", dup dir index");
700         if (errors & REF_ERR_DUP_INODE_REF)
701                 fprintf(stderr, ", dup inode ref");
702         if (errors & REF_ERR_INDEX_UNMATCH)
703                 fprintf(stderr, ", index unmatch");
704         if (errors & REF_ERR_FILETYPE_UNMATCH)
705                 fprintf(stderr, ", filetype unmatch");
706         if (errors & REF_ERR_NAME_TOO_LONG)
707                 fprintf(stderr, ", name too long");
708         if (errors & REF_ERR_NO_ROOT_REF)
709                 fprintf(stderr, ", no root ref");
710         if (errors & REF_ERR_NO_ROOT_BACKREF)
711                 fprintf(stderr, ", no root backref");
712         if (errors & REF_ERR_DUP_ROOT_REF)
713                 fprintf(stderr, ", dup root ref");
714         if (errors & REF_ERR_DUP_ROOT_BACKREF)
715                 fprintf(stderr, ", dup root backref");
716         fprintf(stderr, "\n");
717 }
718
719 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
720                                           u64 ino, int mod)
721 {
722         struct ptr_node *node;
723         struct cache_extent *cache;
724         struct inode_record *rec = NULL;
725         int ret;
726
727         cache = lookup_cache_extent(inode_cache, ino, 1);
728         if (cache) {
729                 node = container_of(cache, struct ptr_node, cache);
730                 rec = node->data;
731                 if (mod && rec->refs > 1) {
732                         node->data = clone_inode_rec(rec);
733                         rec->refs--;
734                         rec = node->data;
735                 }
736         } else if (mod) {
737                 rec = calloc(1, sizeof(*rec));
738                 rec->ino = ino;
739                 rec->extent_start = (u64)-1;
740                 rec->refs = 1;
741                 INIT_LIST_HEAD(&rec->backrefs);
742                 INIT_LIST_HEAD(&rec->orphan_extents);
743                 rec->holes = RB_ROOT;
744
745                 node = malloc(sizeof(*node));
746                 node->cache.start = ino;
747                 node->cache.size = 1;
748                 node->data = rec;
749
750                 if (ino == BTRFS_FREE_INO_OBJECTID)
751                         rec->found_link = 1;
752
753                 ret = insert_cache_extent(inode_cache, &node->cache);
754                 BUG_ON(ret);
755         }
756         return rec;
757 }
758
759 static void free_orphan_data_extents(struct list_head *orphan_extents)
760 {
761         struct orphan_data_extent *orphan;
762
763         while (!list_empty(orphan_extents)) {
764                 orphan = list_entry(orphan_extents->next,
765                                     struct orphan_data_extent, list);
766                 list_del(&orphan->list);
767                 free(orphan);
768         }
769 }
770
771 static void free_inode_rec(struct inode_record *rec)
772 {
773         struct inode_backref *backref;
774
775         if (--rec->refs > 0)
776                 return;
777
778         while (!list_empty(&rec->backrefs)) {
779                 backref = list_entry(rec->backrefs.next,
780                                      struct inode_backref, list);
781                 list_del(&backref->list);
782                 free(backref);
783         }
784         free_orphan_data_extents(&rec->orphan_extents);
785         free_file_extent_holes(&rec->holes);
786         free(rec);
787 }
788
789 static int can_free_inode_rec(struct inode_record *rec)
790 {
791         if (!rec->errors && rec->checked && rec->found_inode_item &&
792             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
793                 return 1;
794         return 0;
795 }
796
797 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
798                                  struct inode_record *rec)
799 {
800         struct cache_extent *cache;
801         struct inode_backref *tmp, *backref;
802         struct ptr_node *node;
803         unsigned char filetype;
804
805         if (!rec->found_inode_item)
806                 return;
807
808         filetype = imode_to_type(rec->imode);
809         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
810                 if (backref->found_dir_item && backref->found_dir_index) {
811                         if (backref->filetype != filetype)
812                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
813                         if (!backref->errors && backref->found_inode_ref &&
814                             rec->nlink == rec->found_link) {
815                                 list_del(&backref->list);
816                                 free(backref);
817                         }
818                 }
819         }
820
821         if (!rec->checked || rec->merging)
822                 return;
823
824         if (S_ISDIR(rec->imode)) {
825                 if (rec->found_size != rec->isize)
826                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
827                 if (rec->found_file_extent)
828                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
829         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
830                 if (rec->found_dir_item)
831                         rec->errors |= I_ERR_ODD_DIR_ITEM;
832                 if (rec->found_size != rec->nbytes)
833                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
834                 if (rec->nlink > 0 && !no_holes &&
835                     (rec->extent_end < rec->isize ||
836                      first_extent_gap(&rec->holes) < rec->isize))
837                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
838         }
839
840         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
841                 if (rec->found_csum_item && rec->nodatasum)
842                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
843                 if (rec->some_csum_missing && !rec->nodatasum)
844                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
845         }
846
847         BUG_ON(rec->refs != 1);
848         if (can_free_inode_rec(rec)) {
849                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
850                 node = container_of(cache, struct ptr_node, cache);
851                 BUG_ON(node->data != rec);
852                 remove_cache_extent(inode_cache, &node->cache);
853                 free(node);
854                 free_inode_rec(rec);
855         }
856 }
857
858 static int check_orphan_item(struct btrfs_root *root, u64 ino)
859 {
860         struct btrfs_path path;
861         struct btrfs_key key;
862         int ret;
863
864         key.objectid = BTRFS_ORPHAN_OBJECTID;
865         key.type = BTRFS_ORPHAN_ITEM_KEY;
866         key.offset = ino;
867
868         btrfs_init_path(&path);
869         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
870         btrfs_release_path(&path);
871         if (ret > 0)
872                 ret = -ENOENT;
873         return ret;
874 }
875
876 static int process_inode_item(struct extent_buffer *eb,
877                               int slot, struct btrfs_key *key,
878                               struct shared_node *active_node)
879 {
880         struct inode_record *rec;
881         struct btrfs_inode_item *item;
882
883         rec = active_node->current;
884         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
885         if (rec->found_inode_item) {
886                 rec->errors |= I_ERR_DUP_INODE_ITEM;
887                 return 1;
888         }
889         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
890         rec->nlink = btrfs_inode_nlink(eb, item);
891         rec->isize = btrfs_inode_size(eb, item);
892         rec->nbytes = btrfs_inode_nbytes(eb, item);
893         rec->imode = btrfs_inode_mode(eb, item);
894         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
895                 rec->nodatasum = 1;
896         rec->found_inode_item = 1;
897         if (rec->nlink == 0)
898                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
899         maybe_free_inode_rec(&active_node->inode_cache, rec);
900         return 0;
901 }
902
903 static struct inode_backref *get_inode_backref(struct inode_record *rec,
904                                                 const char *name,
905                                                 int namelen, u64 dir)
906 {
907         struct inode_backref *backref;
908
909         list_for_each_entry(backref, &rec->backrefs, list) {
910                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
911                         break;
912                 if (backref->dir != dir || backref->namelen != namelen)
913                         continue;
914                 if (memcmp(name, backref->name, namelen))
915                         continue;
916                 return backref;
917         }
918
919         backref = malloc(sizeof(*backref) + namelen + 1);
920         memset(backref, 0, sizeof(*backref));
921         backref->dir = dir;
922         backref->namelen = namelen;
923         memcpy(backref->name, name, namelen);
924         backref->name[namelen] = '\0';
925         list_add_tail(&backref->list, &rec->backrefs);
926         return backref;
927 }
928
929 static int add_inode_backref(struct cache_tree *inode_cache,
930                              u64 ino, u64 dir, u64 index,
931                              const char *name, int namelen,
932                              int filetype, int itemtype, int errors)
933 {
934         struct inode_record *rec;
935         struct inode_backref *backref;
936
937         rec = get_inode_rec(inode_cache, ino, 1);
938         backref = get_inode_backref(rec, name, namelen, dir);
939         if (errors)
940                 backref->errors |= errors;
941         if (itemtype == BTRFS_DIR_INDEX_KEY) {
942                 if (backref->found_dir_index)
943                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
944                 if (backref->found_inode_ref && backref->index != index)
945                         backref->errors |= REF_ERR_INDEX_UNMATCH;
946                 if (backref->found_dir_item && backref->filetype != filetype)
947                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
948
949                 backref->index = index;
950                 backref->filetype = filetype;
951                 backref->found_dir_index = 1;
952         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
953                 rec->found_link++;
954                 if (backref->found_dir_item)
955                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
956                 if (backref->found_dir_index && backref->filetype != filetype)
957                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
958
959                 backref->filetype = filetype;
960                 backref->found_dir_item = 1;
961         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
962                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
963                 if (backref->found_inode_ref)
964                         backref->errors |= REF_ERR_DUP_INODE_REF;
965                 if (backref->found_dir_index && backref->index != index)
966                         backref->errors |= REF_ERR_INDEX_UNMATCH;
967                 else
968                         backref->index = index;
969
970                 backref->ref_type = itemtype;
971                 backref->found_inode_ref = 1;
972         } else {
973                 BUG_ON(1);
974         }
975
976         maybe_free_inode_rec(inode_cache, rec);
977         return 0;
978 }
979
980 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
981                             struct cache_tree *dst_cache)
982 {
983         struct inode_backref *backref;
984         u32 dir_count = 0;
985         int ret = 0;
986
987         dst->merging = 1;
988         list_for_each_entry(backref, &src->backrefs, list) {
989                 if (backref->found_dir_index) {
990                         add_inode_backref(dst_cache, dst->ino, backref->dir,
991                                         backref->index, backref->name,
992                                         backref->namelen, backref->filetype,
993                                         BTRFS_DIR_INDEX_KEY, backref->errors);
994                 }
995                 if (backref->found_dir_item) {
996                         dir_count++;
997                         add_inode_backref(dst_cache, dst->ino,
998                                         backref->dir, 0, backref->name,
999                                         backref->namelen, backref->filetype,
1000                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1001                 }
1002                 if (backref->found_inode_ref) {
1003                         add_inode_backref(dst_cache, dst->ino,
1004                                         backref->dir, backref->index,
1005                                         backref->name, backref->namelen, 0,
1006                                         backref->ref_type, backref->errors);
1007                 }
1008         }
1009
1010         if (src->found_dir_item)
1011                 dst->found_dir_item = 1;
1012         if (src->found_file_extent)
1013                 dst->found_file_extent = 1;
1014         if (src->found_csum_item)
1015                 dst->found_csum_item = 1;
1016         if (src->some_csum_missing)
1017                 dst->some_csum_missing = 1;
1018         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1019                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1020                 if (ret < 0)
1021                         return ret;
1022         }
1023
1024         BUG_ON(src->found_link < dir_count);
1025         dst->found_link += src->found_link - dir_count;
1026         dst->found_size += src->found_size;
1027         if (src->extent_start != (u64)-1) {
1028                 if (dst->extent_start == (u64)-1) {
1029                         dst->extent_start = src->extent_start;
1030                         dst->extent_end = src->extent_end;
1031                 } else {
1032                         if (dst->extent_end > src->extent_start)
1033                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1034                         else if (dst->extent_end < src->extent_start) {
1035                                 ret = add_file_extent_hole(&dst->holes,
1036                                         dst->extent_end,
1037                                         src->extent_start - dst->extent_end);
1038                         }
1039                         if (dst->extent_end < src->extent_end)
1040                                 dst->extent_end = src->extent_end;
1041                 }
1042         }
1043
1044         dst->errors |= src->errors;
1045         if (src->found_inode_item) {
1046                 if (!dst->found_inode_item) {
1047                         dst->nlink = src->nlink;
1048                         dst->isize = src->isize;
1049                         dst->nbytes = src->nbytes;
1050                         dst->imode = src->imode;
1051                         dst->nodatasum = src->nodatasum;
1052                         dst->found_inode_item = 1;
1053                 } else {
1054                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1055                 }
1056         }
1057         dst->merging = 0;
1058
1059         return 0;
1060 }
1061
1062 static int splice_shared_node(struct shared_node *src_node,
1063                               struct shared_node *dst_node)
1064 {
1065         struct cache_extent *cache;
1066         struct ptr_node *node, *ins;
1067         struct cache_tree *src, *dst;
1068         struct inode_record *rec, *conflict;
1069         u64 current_ino = 0;
1070         int splice = 0;
1071         int ret;
1072
1073         if (--src_node->refs == 0)
1074                 splice = 1;
1075         if (src_node->current)
1076                 current_ino = src_node->current->ino;
1077
1078         src = &src_node->root_cache;
1079         dst = &dst_node->root_cache;
1080 again:
1081         cache = search_cache_extent(src, 0);
1082         while (cache) {
1083                 node = container_of(cache, struct ptr_node, cache);
1084                 rec = node->data;
1085                 cache = next_cache_extent(cache);
1086
1087                 if (splice) {
1088                         remove_cache_extent(src, &node->cache);
1089                         ins = node;
1090                 } else {
1091                         ins = malloc(sizeof(*ins));
1092                         ins->cache.start = node->cache.start;
1093                         ins->cache.size = node->cache.size;
1094                         ins->data = rec;
1095                         rec->refs++;
1096                 }
1097                 ret = insert_cache_extent(dst, &ins->cache);
1098                 if (ret == -EEXIST) {
1099                         conflict = get_inode_rec(dst, rec->ino, 1);
1100                         merge_inode_recs(rec, conflict, dst);
1101                         if (rec->checked) {
1102                                 conflict->checked = 1;
1103                                 if (dst_node->current == conflict)
1104                                         dst_node->current = NULL;
1105                         }
1106                         maybe_free_inode_rec(dst, conflict);
1107                         free_inode_rec(rec);
1108                         free(ins);
1109                 } else {
1110                         BUG_ON(ret);
1111                 }
1112         }
1113
1114         if (src == &src_node->root_cache) {
1115                 src = &src_node->inode_cache;
1116                 dst = &dst_node->inode_cache;
1117                 goto again;
1118         }
1119
1120         if (current_ino > 0 && (!dst_node->current ||
1121             current_ino > dst_node->current->ino)) {
1122                 if (dst_node->current) {
1123                         dst_node->current->checked = 1;
1124                         maybe_free_inode_rec(dst, dst_node->current);
1125                 }
1126                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1127         }
1128         return 0;
1129 }
1130
1131 static void free_inode_ptr(struct cache_extent *cache)
1132 {
1133         struct ptr_node *node;
1134         struct inode_record *rec;
1135
1136         node = container_of(cache, struct ptr_node, cache);
1137         rec = node->data;
1138         free_inode_rec(rec);
1139         free(node);
1140 }
1141
1142 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1143
1144 static struct shared_node *find_shared_node(struct cache_tree *shared,
1145                                             u64 bytenr)
1146 {
1147         struct cache_extent *cache;
1148         struct shared_node *node;
1149
1150         cache = lookup_cache_extent(shared, bytenr, 1);
1151         if (cache) {
1152                 node = container_of(cache, struct shared_node, cache);
1153                 return node;
1154         }
1155         return NULL;
1156 }
1157
1158 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1159 {
1160         int ret;
1161         struct shared_node *node;
1162
1163         node = calloc(1, sizeof(*node));
1164         node->cache.start = bytenr;
1165         node->cache.size = 1;
1166         cache_tree_init(&node->root_cache);
1167         cache_tree_init(&node->inode_cache);
1168         node->refs = refs;
1169
1170         ret = insert_cache_extent(shared, &node->cache);
1171         BUG_ON(ret);
1172         return 0;
1173 }
1174
1175 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1176                              struct walk_control *wc, int level)
1177 {
1178         struct shared_node *node;
1179         struct shared_node *dest;
1180
1181         if (level == wc->active_node)
1182                 return 0;
1183
1184         BUG_ON(wc->active_node <= level);
1185         node = find_shared_node(&wc->shared, bytenr);
1186         if (!node) {
1187                 add_shared_node(&wc->shared, bytenr, refs);
1188                 node = find_shared_node(&wc->shared, bytenr);
1189                 wc->nodes[level] = node;
1190                 wc->active_node = level;
1191                 return 0;
1192         }
1193
1194         if (wc->root_level == wc->active_node &&
1195             btrfs_root_refs(&root->root_item) == 0) {
1196                 if (--node->refs == 0) {
1197                         free_inode_recs_tree(&node->root_cache);
1198                         free_inode_recs_tree(&node->inode_cache);
1199                         remove_cache_extent(&wc->shared, &node->cache);
1200                         free(node);
1201                 }
1202                 return 1;
1203         }
1204
1205         dest = wc->nodes[wc->active_node];
1206         splice_shared_node(node, dest);
1207         if (node->refs == 0) {
1208                 remove_cache_extent(&wc->shared, &node->cache);
1209                 free(node);
1210         }
1211         return 1;
1212 }
1213
1214 static int leave_shared_node(struct btrfs_root *root,
1215                              struct walk_control *wc, int level)
1216 {
1217         struct shared_node *node;
1218         struct shared_node *dest;
1219         int i;
1220
1221         if (level == wc->root_level)
1222                 return 0;
1223
1224         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1225                 if (wc->nodes[i])
1226                         break;
1227         }
1228         BUG_ON(i >= BTRFS_MAX_LEVEL);
1229
1230         node = wc->nodes[wc->active_node];
1231         wc->nodes[wc->active_node] = NULL;
1232         wc->active_node = i;
1233
1234         dest = wc->nodes[wc->active_node];
1235         if (wc->active_node < wc->root_level ||
1236             btrfs_root_refs(&root->root_item) > 0) {
1237                 BUG_ON(node->refs <= 1);
1238                 splice_shared_node(node, dest);
1239         } else {
1240                 BUG_ON(node->refs < 2);
1241                 node->refs--;
1242         }
1243         return 0;
1244 }
1245
1246 /*
1247  * Returns:
1248  * < 0 - on error
1249  * 1   - if the root with id child_root_id is a child of root parent_root_id
1250  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1251  *       has other root(s) as parent(s)
1252  * 2   - if the root child_root_id doesn't have any parent roots
1253  */
1254 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1255                          u64 child_root_id)
1256 {
1257         struct btrfs_path path;
1258         struct btrfs_key key;
1259         struct extent_buffer *leaf;
1260         int has_parent = 0;
1261         int ret;
1262
1263         btrfs_init_path(&path);
1264
1265         key.objectid = parent_root_id;
1266         key.type = BTRFS_ROOT_REF_KEY;
1267         key.offset = child_root_id;
1268         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1269                                 0, 0);
1270         if (ret < 0)
1271                 return ret;
1272         btrfs_release_path(&path);
1273         if (!ret)
1274                 return 1;
1275
1276         key.objectid = child_root_id;
1277         key.type = BTRFS_ROOT_BACKREF_KEY;
1278         key.offset = 0;
1279         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1280                                 0, 0);
1281         if (ret < 0)
1282                 goto out;
1283
1284         while (1) {
1285                 leaf = path.nodes[0];
1286                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1287                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1288                         if (ret)
1289                                 break;
1290                         leaf = path.nodes[0];
1291                 }
1292
1293                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1294                 if (key.objectid != child_root_id ||
1295                     key.type != BTRFS_ROOT_BACKREF_KEY)
1296                         break;
1297
1298                 has_parent = 1;
1299
1300                 if (key.offset == parent_root_id) {
1301                         btrfs_release_path(&path);
1302                         return 1;
1303                 }
1304
1305                 path.slots[0]++;
1306         }
1307 out:
1308         btrfs_release_path(&path);
1309         if (ret < 0)
1310                 return ret;
1311         return has_parent ? 0 : 2;
1312 }
1313
1314 static int process_dir_item(struct btrfs_root *root,
1315                             struct extent_buffer *eb,
1316                             int slot, struct btrfs_key *key,
1317                             struct shared_node *active_node)
1318 {
1319         u32 total;
1320         u32 cur = 0;
1321         u32 len;
1322         u32 name_len;
1323         u32 data_len;
1324         int error;
1325         int nritems = 0;
1326         int filetype;
1327         struct btrfs_dir_item *di;
1328         struct inode_record *rec;
1329         struct cache_tree *root_cache;
1330         struct cache_tree *inode_cache;
1331         struct btrfs_key location;
1332         char namebuf[BTRFS_NAME_LEN];
1333
1334         root_cache = &active_node->root_cache;
1335         inode_cache = &active_node->inode_cache;
1336         rec = active_node->current;
1337         rec->found_dir_item = 1;
1338
1339         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1340         total = btrfs_item_size_nr(eb, slot);
1341         while (cur < total) {
1342                 nritems++;
1343                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1344                 name_len = btrfs_dir_name_len(eb, di);
1345                 data_len = btrfs_dir_data_len(eb, di);
1346                 filetype = btrfs_dir_type(eb, di);
1347
1348                 rec->found_size += name_len;
1349                 if (name_len <= BTRFS_NAME_LEN) {
1350                         len = name_len;
1351                         error = 0;
1352                 } else {
1353                         len = BTRFS_NAME_LEN;
1354                         error = REF_ERR_NAME_TOO_LONG;
1355                 }
1356                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1357
1358                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1359                         add_inode_backref(inode_cache, location.objectid,
1360                                           key->objectid, key->offset, namebuf,
1361                                           len, filetype, key->type, error);
1362                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1363                         add_inode_backref(root_cache, location.objectid,
1364                                           key->objectid, key->offset,
1365                                           namebuf, len, filetype,
1366                                           key->type, error);
1367                 } else {
1368                         fprintf(stderr, "invalid location in dir item %u\n",
1369                                 location.type);
1370                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1371                                           key->objectid, key->offset, namebuf,
1372                                           len, filetype, key->type, error);
1373                 }
1374
1375                 len = sizeof(*di) + name_len + data_len;
1376                 di = (struct btrfs_dir_item *)((char *)di + len);
1377                 cur += len;
1378         }
1379         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1380                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1381
1382         return 0;
1383 }
1384
1385 static int process_inode_ref(struct extent_buffer *eb,
1386                              int slot, struct btrfs_key *key,
1387                              struct shared_node *active_node)
1388 {
1389         u32 total;
1390         u32 cur = 0;
1391         u32 len;
1392         u32 name_len;
1393         u64 index;
1394         int error;
1395         struct cache_tree *inode_cache;
1396         struct btrfs_inode_ref *ref;
1397         char namebuf[BTRFS_NAME_LEN];
1398
1399         inode_cache = &active_node->inode_cache;
1400
1401         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1402         total = btrfs_item_size_nr(eb, slot);
1403         while (cur < total) {
1404                 name_len = btrfs_inode_ref_name_len(eb, ref);
1405                 index = btrfs_inode_ref_index(eb, ref);
1406                 if (name_len <= BTRFS_NAME_LEN) {
1407                         len = name_len;
1408                         error = 0;
1409                 } else {
1410                         len = BTRFS_NAME_LEN;
1411                         error = REF_ERR_NAME_TOO_LONG;
1412                 }
1413                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1414                 add_inode_backref(inode_cache, key->objectid, key->offset,
1415                                   index, namebuf, len, 0, key->type, error);
1416
1417                 len = sizeof(*ref) + name_len;
1418                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1419                 cur += len;
1420         }
1421         return 0;
1422 }
1423
1424 static int process_inode_extref(struct extent_buffer *eb,
1425                                 int slot, struct btrfs_key *key,
1426                                 struct shared_node *active_node)
1427 {
1428         u32 total;
1429         u32 cur = 0;
1430         u32 len;
1431         u32 name_len;
1432         u64 index;
1433         u64 parent;
1434         int error;
1435         struct cache_tree *inode_cache;
1436         struct btrfs_inode_extref *extref;
1437         char namebuf[BTRFS_NAME_LEN];
1438
1439         inode_cache = &active_node->inode_cache;
1440
1441         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1442         total = btrfs_item_size_nr(eb, slot);
1443         while (cur < total) {
1444                 name_len = btrfs_inode_extref_name_len(eb, extref);
1445                 index = btrfs_inode_extref_index(eb, extref);
1446                 parent = btrfs_inode_extref_parent(eb, extref);
1447                 if (name_len <= BTRFS_NAME_LEN) {
1448                         len = name_len;
1449                         error = 0;
1450                 } else {
1451                         len = BTRFS_NAME_LEN;
1452                         error = REF_ERR_NAME_TOO_LONG;
1453                 }
1454                 read_extent_buffer(eb, namebuf,
1455                                    (unsigned long)(extref + 1), len);
1456                 add_inode_backref(inode_cache, key->objectid, parent,
1457                                   index, namebuf, len, 0, key->type, error);
1458
1459                 len = sizeof(*extref) + name_len;
1460                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1461                 cur += len;
1462         }
1463         return 0;
1464
1465 }
1466
1467 static int count_csum_range(struct btrfs_root *root, u64 start,
1468                             u64 len, u64 *found)
1469 {
1470         struct btrfs_key key;
1471         struct btrfs_path path;
1472         struct extent_buffer *leaf;
1473         int ret;
1474         size_t size;
1475         *found = 0;
1476         u64 csum_end;
1477         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1478
1479         btrfs_init_path(&path);
1480
1481         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1482         key.offset = start;
1483         key.type = BTRFS_EXTENT_CSUM_KEY;
1484
1485         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1486                                 &key, &path, 0, 0);
1487         if (ret < 0)
1488                 goto out;
1489         if (ret > 0 && path.slots[0] > 0) {
1490                 leaf = path.nodes[0];
1491                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1492                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1493                     key.type == BTRFS_EXTENT_CSUM_KEY)
1494                         path.slots[0]--;
1495         }
1496
1497         while (len > 0) {
1498                 leaf = path.nodes[0];
1499                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1500                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1501                         if (ret > 0)
1502                                 break;
1503                         else if (ret < 0)
1504                                 goto out;
1505                         leaf = path.nodes[0];
1506                 }
1507
1508                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1509                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1510                     key.type != BTRFS_EXTENT_CSUM_KEY)
1511                         break;
1512
1513                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1514                 if (key.offset >= start + len)
1515                         break;
1516
1517                 if (key.offset > start)
1518                         start = key.offset;
1519
1520                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1521                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1522                 if (csum_end > start) {
1523                         size = min(csum_end - start, len);
1524                         len -= size;
1525                         start += size;
1526                         *found += size;
1527                 }
1528
1529                 path.slots[0]++;
1530         }
1531 out:
1532         btrfs_release_path(&path);
1533         if (ret < 0)
1534                 return ret;
1535         return 0;
1536 }
1537
1538 static int process_file_extent(struct btrfs_root *root,
1539                                 struct extent_buffer *eb,
1540                                 int slot, struct btrfs_key *key,
1541                                 struct shared_node *active_node)
1542 {
1543         struct inode_record *rec;
1544         struct btrfs_file_extent_item *fi;
1545         u64 num_bytes = 0;
1546         u64 disk_bytenr = 0;
1547         u64 extent_offset = 0;
1548         u64 mask = root->sectorsize - 1;
1549         int extent_type;
1550         int ret;
1551
1552         rec = active_node->current;
1553         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1554         rec->found_file_extent = 1;
1555
1556         if (rec->extent_start == (u64)-1) {
1557                 rec->extent_start = key->offset;
1558                 rec->extent_end = key->offset;
1559         }
1560
1561         if (rec->extent_end > key->offset)
1562                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1563         else if (rec->extent_end < key->offset) {
1564                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1565                                            key->offset - rec->extent_end);
1566                 if (ret < 0)
1567                         return ret;
1568         }
1569
1570         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1571         extent_type = btrfs_file_extent_type(eb, fi);
1572
1573         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1574                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1575                 if (num_bytes == 0)
1576                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1577                 rec->found_size += num_bytes;
1578                 num_bytes = (num_bytes + mask) & ~mask;
1579         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1580                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1581                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1582                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1583                 extent_offset = btrfs_file_extent_offset(eb, fi);
1584                 if (num_bytes == 0 || (num_bytes & mask))
1585                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1586                 if (num_bytes + extent_offset >
1587                     btrfs_file_extent_ram_bytes(eb, fi))
1588                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1589                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1590                     (btrfs_file_extent_compression(eb, fi) ||
1591                      btrfs_file_extent_encryption(eb, fi) ||
1592                      btrfs_file_extent_other_encoding(eb, fi)))
1593                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1594                 if (disk_bytenr > 0)
1595                         rec->found_size += num_bytes;
1596         } else {
1597                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1598         }
1599         rec->extent_end = key->offset + num_bytes;
1600
1601         /*
1602          * The data reloc tree will copy full extents into its inode and then
1603          * copy the corresponding csums.  Because the extent it copied could be
1604          * a preallocated extent that hasn't been written to yet there may be no
1605          * csums to copy, ergo we won't have csums for our file extent.  This is
1606          * ok so just don't bother checking csums if the inode belongs to the
1607          * data reloc tree.
1608          */
1609         if (disk_bytenr > 0 &&
1610             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1611                 u64 found;
1612                 if (btrfs_file_extent_compression(eb, fi))
1613                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1614                 else
1615                         disk_bytenr += extent_offset;
1616
1617                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1618                 if (ret < 0)
1619                         return ret;
1620                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1621                         if (found > 0)
1622                                 rec->found_csum_item = 1;
1623                         if (found < num_bytes)
1624                                 rec->some_csum_missing = 1;
1625                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1626                         if (found > 0)
1627                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1628                 }
1629         }
1630         return 0;
1631 }
1632
1633 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1634                             struct walk_control *wc)
1635 {
1636         struct btrfs_key key;
1637         u32 nritems;
1638         int i;
1639         int ret = 0;
1640         struct cache_tree *inode_cache;
1641         struct shared_node *active_node;
1642
1643         if (wc->root_level == wc->active_node &&
1644             btrfs_root_refs(&root->root_item) == 0)
1645                 return 0;
1646
1647         active_node = wc->nodes[wc->active_node];
1648         inode_cache = &active_node->inode_cache;
1649         nritems = btrfs_header_nritems(eb);
1650         for (i = 0; i < nritems; i++) {
1651                 btrfs_item_key_to_cpu(eb, &key, i);
1652
1653                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1654                         continue;
1655                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1656                         continue;
1657
1658                 if (active_node->current == NULL ||
1659                     active_node->current->ino < key.objectid) {
1660                         if (active_node->current) {
1661                                 active_node->current->checked = 1;
1662                                 maybe_free_inode_rec(inode_cache,
1663                                                      active_node->current);
1664                         }
1665                         active_node->current = get_inode_rec(inode_cache,
1666                                                              key.objectid, 1);
1667                 }
1668                 switch (key.type) {
1669                 case BTRFS_DIR_ITEM_KEY:
1670                 case BTRFS_DIR_INDEX_KEY:
1671                         ret = process_dir_item(root, eb, i, &key, active_node);
1672                         break;
1673                 case BTRFS_INODE_REF_KEY:
1674                         ret = process_inode_ref(eb, i, &key, active_node);
1675                         break;
1676                 case BTRFS_INODE_EXTREF_KEY:
1677                         ret = process_inode_extref(eb, i, &key, active_node);
1678                         break;
1679                 case BTRFS_INODE_ITEM_KEY:
1680                         ret = process_inode_item(eb, i, &key, active_node);
1681                         break;
1682                 case BTRFS_EXTENT_DATA_KEY:
1683                         ret = process_file_extent(root, eb, i, &key,
1684                                                   active_node);
1685                         break;
1686                 default:
1687                         break;
1688                 };
1689         }
1690         return ret;
1691 }
1692
1693 static void reada_walk_down(struct btrfs_root *root,
1694                             struct extent_buffer *node, int slot)
1695 {
1696         u64 bytenr;
1697         u64 ptr_gen;
1698         u32 nritems;
1699         u32 blocksize;
1700         int i;
1701         int level;
1702
1703         level = btrfs_header_level(node);
1704         if (level != 1)
1705                 return;
1706
1707         nritems = btrfs_header_nritems(node);
1708         blocksize = btrfs_level_size(root, level - 1);
1709         for (i = slot; i < nritems; i++) {
1710                 bytenr = btrfs_node_blockptr(node, i);
1711                 ptr_gen = btrfs_node_ptr_generation(node, i);
1712                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1713         }
1714 }
1715
1716 /*
1717  * Check the child node/leaf by the following condition:
1718  * 1. the first item key of the node/leaf should be the same with the one
1719  *    in parent.
1720  * 2. block in parent node should match the child node/leaf.
1721  * 3. generation of parent node and child's header should be consistent.
1722  *
1723  * Or the child node/leaf pointed by the key in parent is not valid.
1724  *
1725  * We hope to check leaf owner too, but since subvol may share leaves,
1726  * which makes leaf owner check not so strong, key check should be
1727  * sufficient enough for that case.
1728  */
1729 static int check_child_node(struct btrfs_root *root,
1730                             struct extent_buffer *parent, int slot,
1731                             struct extent_buffer *child)
1732 {
1733         struct btrfs_key parent_key;
1734         struct btrfs_key child_key;
1735         int ret = 0;
1736
1737         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1738         if (btrfs_header_level(child) == 0)
1739                 btrfs_item_key_to_cpu(child, &child_key, 0);
1740         else
1741                 btrfs_node_key_to_cpu(child, &child_key, 0);
1742
1743         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1744                 ret = -EINVAL;
1745                 fprintf(stderr,
1746                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1747                         parent_key.objectid, parent_key.type, parent_key.offset,
1748                         child_key.objectid, child_key.type, child_key.offset);
1749         }
1750         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1751                 ret = -EINVAL;
1752                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1753                         btrfs_node_blockptr(parent, slot),
1754                         btrfs_header_bytenr(child));
1755         }
1756         if (btrfs_node_ptr_generation(parent, slot) !=
1757             btrfs_header_generation(child)) {
1758                 ret = -EINVAL;
1759                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1760                         btrfs_header_generation(child),
1761                         btrfs_node_ptr_generation(parent, slot));
1762         }
1763         return ret;
1764 }
1765
1766 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1767                           struct walk_control *wc, int *level)
1768 {
1769         enum btrfs_tree_block_status status;
1770         u64 bytenr;
1771         u64 ptr_gen;
1772         struct extent_buffer *next;
1773         struct extent_buffer *cur;
1774         u32 blocksize;
1775         int ret, err = 0;
1776         u64 refs;
1777
1778         WARN_ON(*level < 0);
1779         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1780         ret = btrfs_lookup_extent_info(NULL, root,
1781                                        path->nodes[*level]->start,
1782                                        *level, 1, &refs, NULL);
1783         if (ret < 0) {
1784                 err = ret;
1785                 goto out;
1786         }
1787
1788         if (refs > 1) {
1789                 ret = enter_shared_node(root, path->nodes[*level]->start,
1790                                         refs, wc, *level);
1791                 if (ret > 0) {
1792                         err = ret;
1793                         goto out;
1794                 }
1795         }
1796
1797         while (*level >= 0) {
1798                 WARN_ON(*level < 0);
1799                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1800                 cur = path->nodes[*level];
1801
1802                 if (btrfs_header_level(cur) != *level)
1803                         WARN_ON(1);
1804
1805                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1806                         break;
1807                 if (*level == 0) {
1808                         ret = process_one_leaf(root, cur, wc);
1809                         if (ret < 0)
1810                                 err = ret;
1811                         break;
1812                 }
1813                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1814                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1815                 blocksize = btrfs_level_size(root, *level - 1);
1816                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1817                                                1, &refs, NULL);
1818                 if (ret < 0)
1819                         refs = 0;
1820
1821                 if (refs > 1) {
1822                         ret = enter_shared_node(root, bytenr, refs,
1823                                                 wc, *level - 1);
1824                         if (ret > 0) {
1825                                 path->slots[*level]++;
1826                                 continue;
1827                         }
1828                 }
1829
1830                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1831                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1832                         free_extent_buffer(next);
1833                         reada_walk_down(root, cur, path->slots[*level]);
1834                         next = read_tree_block(root, bytenr, blocksize,
1835                                                ptr_gen);
1836                         if (!extent_buffer_uptodate(next)) {
1837                                 struct btrfs_key node_key;
1838
1839                                 btrfs_node_key_to_cpu(path->nodes[*level],
1840                                                       &node_key,
1841                                                       path->slots[*level]);
1842                                 btrfs_add_corrupt_extent_record(root->fs_info,
1843                                                 &node_key,
1844                                                 path->nodes[*level]->start,
1845                                                 root->leafsize, *level);
1846                                 err = -EIO;
1847                                 goto out;
1848                         }
1849                 }
1850
1851                 ret = check_child_node(root, cur, path->slots[*level], next);
1852                 if (ret) {
1853                         err = ret;
1854                         goto out;
1855                 }
1856
1857                 if (btrfs_is_leaf(next))
1858                         status = btrfs_check_leaf(root, NULL, next);
1859                 else
1860                         status = btrfs_check_node(root, NULL, next);
1861                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1862                         free_extent_buffer(next);
1863                         err = -EIO;
1864                         goto out;
1865                 }
1866
1867                 *level = *level - 1;
1868                 free_extent_buffer(path->nodes[*level]);
1869                 path->nodes[*level] = next;
1870                 path->slots[*level] = 0;
1871         }
1872 out:
1873         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1874         return err;
1875 }
1876
1877 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1878                         struct walk_control *wc, int *level)
1879 {
1880         int i;
1881         struct extent_buffer *leaf;
1882
1883         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1884                 leaf = path->nodes[i];
1885                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1886                         path->slots[i]++;
1887                         *level = i;
1888                         return 0;
1889                 } else {
1890                         free_extent_buffer(path->nodes[*level]);
1891                         path->nodes[*level] = NULL;
1892                         BUG_ON(*level > wc->active_node);
1893                         if (*level == wc->active_node)
1894                                 leave_shared_node(root, wc, *level);
1895                         *level = i + 1;
1896                 }
1897         }
1898         return 1;
1899 }
1900
1901 static int check_root_dir(struct inode_record *rec)
1902 {
1903         struct inode_backref *backref;
1904         int ret = -1;
1905
1906         if (!rec->found_inode_item || rec->errors)
1907                 goto out;
1908         if (rec->nlink != 1 || rec->found_link != 0)
1909                 goto out;
1910         if (list_empty(&rec->backrefs))
1911                 goto out;
1912         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1913         if (!backref->found_inode_ref)
1914                 goto out;
1915         if (backref->index != 0 || backref->namelen != 2 ||
1916             memcmp(backref->name, "..", 2))
1917                 goto out;
1918         if (backref->found_dir_index || backref->found_dir_item)
1919                 goto out;
1920         ret = 0;
1921 out:
1922         return ret;
1923 }
1924
1925 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1926                               struct btrfs_root *root, struct btrfs_path *path,
1927                               struct inode_record *rec)
1928 {
1929         struct btrfs_inode_item *ei;
1930         struct btrfs_key key;
1931         int ret;
1932
1933         key.objectid = rec->ino;
1934         key.type = BTRFS_INODE_ITEM_KEY;
1935         key.offset = (u64)-1;
1936
1937         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1938         if (ret < 0)
1939                 goto out;
1940         if (ret) {
1941                 if (!path->slots[0]) {
1942                         ret = -ENOENT;
1943                         goto out;
1944                 }
1945                 path->slots[0]--;
1946                 ret = 0;
1947         }
1948         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1949         if (key.objectid != rec->ino) {
1950                 ret = -ENOENT;
1951                 goto out;
1952         }
1953
1954         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1955                             struct btrfs_inode_item);
1956         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1957         btrfs_mark_buffer_dirty(path->nodes[0]);
1958         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1959         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1960                root->root_key.objectid);
1961 out:
1962         btrfs_release_path(path);
1963         return ret;
1964 }
1965
1966 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1967                                     struct btrfs_root *root,
1968                                     struct btrfs_path *path,
1969                                     struct inode_record *rec)
1970 {
1971         int ret;
1972
1973         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1974         btrfs_release_path(path);
1975         if (!ret)
1976                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1977         return ret;
1978 }
1979
1980 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1981                                struct btrfs_root *root,
1982                                struct btrfs_path *path,
1983                                struct inode_record *rec)
1984 {
1985         struct btrfs_inode_item *ei;
1986         struct btrfs_key key;
1987         int ret = 0;
1988
1989         key.objectid = rec->ino;
1990         key.type = BTRFS_INODE_ITEM_KEY;
1991         key.offset = 0;
1992
1993         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1994         if (ret) {
1995                 if (ret > 0)
1996                         ret = -ENOENT;
1997                 goto out;
1998         }
1999
2000         /* Since ret == 0, no need to check anything */
2001         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2002                             struct btrfs_inode_item);
2003         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2004         btrfs_mark_buffer_dirty(path->nodes[0]);
2005         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2006         printf("reset nbytes for ino %llu root %llu\n",
2007                rec->ino, root->root_key.objectid);
2008 out:
2009         btrfs_release_path(path);
2010         return ret;
2011 }
2012
2013 static int add_missing_dir_index(struct btrfs_root *root,
2014                                  struct cache_tree *inode_cache,
2015                                  struct inode_record *rec,
2016                                  struct inode_backref *backref)
2017 {
2018         struct btrfs_path *path;
2019         struct btrfs_trans_handle *trans;
2020         struct btrfs_dir_item *dir_item;
2021         struct extent_buffer *leaf;
2022         struct btrfs_key key;
2023         struct btrfs_disk_key disk_key;
2024         struct inode_record *dir_rec;
2025         unsigned long name_ptr;
2026         u32 data_size = sizeof(*dir_item) + backref->namelen;
2027         int ret;
2028
2029         path = btrfs_alloc_path();
2030         if (!path)
2031                 return -ENOMEM;
2032
2033         trans = btrfs_start_transaction(root, 1);
2034         if (IS_ERR(trans)) {
2035                 btrfs_free_path(path);
2036                 return PTR_ERR(trans);
2037         }
2038
2039         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2040                 (unsigned long long)rec->ino);
2041         key.objectid = backref->dir;
2042         key.type = BTRFS_DIR_INDEX_KEY;
2043         key.offset = backref->index;
2044
2045         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2046         BUG_ON(ret);
2047
2048         leaf = path->nodes[0];
2049         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2050
2051         disk_key.objectid = cpu_to_le64(rec->ino);
2052         disk_key.type = BTRFS_INODE_ITEM_KEY;
2053         disk_key.offset = 0;
2054
2055         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2056         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2057         btrfs_set_dir_data_len(leaf, dir_item, 0);
2058         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2059         name_ptr = (unsigned long)(dir_item + 1);
2060         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2061         btrfs_mark_buffer_dirty(leaf);
2062         btrfs_free_path(path);
2063         btrfs_commit_transaction(trans, root);
2064
2065         backref->found_dir_index = 1;
2066         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2067         if (!dir_rec)
2068                 return 0;
2069         dir_rec->found_size += backref->namelen;
2070         if (dir_rec->found_size == dir_rec->isize &&
2071             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2072                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2073         if (dir_rec->found_size != dir_rec->isize)
2074                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2075
2076         return 0;
2077 }
2078
2079 static int delete_dir_index(struct btrfs_root *root,
2080                             struct cache_tree *inode_cache,
2081                             struct inode_record *rec,
2082                             struct inode_backref *backref)
2083 {
2084         struct btrfs_trans_handle *trans;
2085         struct btrfs_dir_item *di;
2086         struct btrfs_path *path;
2087         int ret = 0;
2088
2089         path = btrfs_alloc_path();
2090         if (!path)
2091                 return -ENOMEM;
2092
2093         trans = btrfs_start_transaction(root, 1);
2094         if (IS_ERR(trans)) {
2095                 btrfs_free_path(path);
2096                 return PTR_ERR(trans);
2097         }
2098
2099
2100         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2101                 (unsigned long long)backref->dir,
2102                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2103                 (unsigned long long)root->objectid);
2104
2105         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2106                                     backref->name, backref->namelen,
2107                                     backref->index, -1);
2108         if (IS_ERR(di)) {
2109                 ret = PTR_ERR(di);
2110                 btrfs_free_path(path);
2111                 btrfs_commit_transaction(trans, root);
2112                 if (ret == -ENOENT)
2113                         return 0;
2114                 return ret;
2115         }
2116
2117         if (!di)
2118                 ret = btrfs_del_item(trans, root, path);
2119         else
2120                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2121         BUG_ON(ret);
2122         btrfs_free_path(path);
2123         btrfs_commit_transaction(trans, root);
2124         return ret;
2125 }
2126
2127 static int create_inode_item(struct btrfs_root *root,
2128                              struct inode_record *rec,
2129                              struct inode_backref *backref, int root_dir)
2130 {
2131         struct btrfs_trans_handle *trans;
2132         struct btrfs_inode_item inode_item;
2133         time_t now = time(NULL);
2134         int ret;
2135
2136         trans = btrfs_start_transaction(root, 1);
2137         if (IS_ERR(trans)) {
2138                 ret = PTR_ERR(trans);
2139                 return ret;
2140         }
2141
2142         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2143                 "be incomplete, please check permissions and content after "
2144                 "the fsck completes.\n", (unsigned long long)root->objectid,
2145                 (unsigned long long)rec->ino);
2146
2147         memset(&inode_item, 0, sizeof(inode_item));
2148         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2149         if (root_dir)
2150                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2151         else
2152                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2153         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2154         if (rec->found_dir_item) {
2155                 if (rec->found_file_extent)
2156                         fprintf(stderr, "root %llu inode %llu has both a dir "
2157                                 "item and extents, unsure if it is a dir or a "
2158                                 "regular file so setting it as a directory\n",
2159                                 (unsigned long long)root->objectid,
2160                                 (unsigned long long)rec->ino);
2161                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2162                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2163         } else if (!rec->found_dir_item) {
2164                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2165                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2166         }
2167         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2168         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2169         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2170         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2171         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2172         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2173         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2174         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2175
2176         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2177         BUG_ON(ret);
2178         btrfs_commit_transaction(trans, root);
2179         return 0;
2180 }
2181
2182 static int repair_inode_backrefs(struct btrfs_root *root,
2183                                  struct inode_record *rec,
2184                                  struct cache_tree *inode_cache,
2185                                  int delete)
2186 {
2187         struct inode_backref *tmp, *backref;
2188         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2189         int ret = 0;
2190         int repaired = 0;
2191
2192         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2193                 if (!delete && rec->ino == root_dirid) {
2194                         if (!rec->found_inode_item) {
2195                                 ret = create_inode_item(root, rec, backref, 1);
2196                                 if (ret)
2197                                         break;
2198                                 repaired++;
2199                         }
2200                 }
2201
2202                 /* Index 0 for root dir's are special, don't mess with it */
2203                 if (rec->ino == root_dirid && backref->index == 0)
2204                         continue;
2205
2206                 if (delete &&
2207                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2208                      (backref->found_dir_index && backref->found_inode_ref &&
2209                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2210                         ret = delete_dir_index(root, inode_cache, rec, backref);
2211                         if (ret)
2212                                 break;
2213                         repaired++;
2214                         list_del(&backref->list);
2215                         free(backref);
2216                 }
2217
2218                 if (!delete && !backref->found_dir_index &&
2219                     backref->found_dir_item && backref->found_inode_ref) {
2220                         ret = add_missing_dir_index(root, inode_cache, rec,
2221                                                     backref);
2222                         if (ret)
2223                                 break;
2224                         repaired++;
2225                         if (backref->found_dir_item &&
2226                             backref->found_dir_index &&
2227                             backref->found_dir_index) {
2228                                 if (!backref->errors &&
2229                                     backref->found_inode_ref) {
2230                                         list_del(&backref->list);
2231                                         free(backref);
2232                                 }
2233                         }
2234                 }
2235
2236                 if (!delete && (!backref->found_dir_index &&
2237                                 !backref->found_dir_item &&
2238                                 backref->found_inode_ref)) {
2239                         struct btrfs_trans_handle *trans;
2240                         struct btrfs_key location;
2241
2242                         ret = check_dir_conflict(root, backref->name,
2243                                                  backref->namelen,
2244                                                  backref->dir,
2245                                                  backref->index);
2246                         if (ret) {
2247                                 /*
2248                                  * let nlink fixing routine to handle it,
2249                                  * which can do it better.
2250                                  */
2251                                 ret = 0;
2252                                 break;
2253                         }
2254                         location.objectid = rec->ino;
2255                         location.type = BTRFS_INODE_ITEM_KEY;
2256                         location.offset = 0;
2257
2258                         trans = btrfs_start_transaction(root, 1);
2259                         if (IS_ERR(trans)) {
2260                                 ret = PTR_ERR(trans);
2261                                 break;
2262                         }
2263                         fprintf(stderr, "adding missing dir index/item pair "
2264                                 "for inode %llu\n",
2265                                 (unsigned long long)rec->ino);
2266                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2267                                                     backref->namelen,
2268                                                     backref->dir, &location,
2269                                                     imode_to_type(rec->imode),
2270                                                     backref->index);
2271                         BUG_ON(ret);
2272                         btrfs_commit_transaction(trans, root);
2273                         repaired++;
2274                 }
2275
2276                 if (!delete && (backref->found_inode_ref &&
2277                                 backref->found_dir_index &&
2278                                 backref->found_dir_item &&
2279                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2280                                 !rec->found_inode_item)) {
2281                         ret = create_inode_item(root, rec, backref, 0);
2282                         if (ret)
2283                                 break;
2284                         repaired++;
2285                 }
2286
2287         }
2288         return ret ? ret : repaired;
2289 }
2290
2291 /*
2292  * To determine the file type for nlink/inode_item repair
2293  *
2294  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2295  * Return -ENOENT if file type is not found.
2296  */
2297 static int find_file_type(struct inode_record *rec, u8 *type)
2298 {
2299         struct inode_backref *backref;
2300
2301         /* For inode item recovered case */
2302         if (rec->found_inode_item) {
2303                 *type = imode_to_type(rec->imode);
2304                 return 0;
2305         }
2306
2307         list_for_each_entry(backref, &rec->backrefs, list) {
2308                 if (backref->found_dir_index || backref->found_dir_item) {
2309                         *type = backref->filetype;
2310                         return 0;
2311                 }
2312         }
2313         return -ENOENT;
2314 }
2315
2316 /*
2317  * To determine the file name for nlink repair
2318  *
2319  * Return 0 if file name is found, set name and namelen.
2320  * Return -ENOENT if file name is not found.
2321  */
2322 static int find_file_name(struct inode_record *rec,
2323                           char *name, int *namelen)
2324 {
2325         struct inode_backref *backref;
2326
2327         list_for_each_entry(backref, &rec->backrefs, list) {
2328                 if (backref->found_dir_index || backref->found_dir_item ||
2329                     backref->found_inode_ref) {
2330                         memcpy(name, backref->name, backref->namelen);
2331                         *namelen = backref->namelen;
2332                         return 0;
2333                 }
2334         }
2335         return -ENOENT;
2336 }
2337
2338 /* Reset the nlink of the inode to the correct one */
2339 static int reset_nlink(struct btrfs_trans_handle *trans,
2340                        struct btrfs_root *root,
2341                        struct btrfs_path *path,
2342                        struct inode_record *rec)
2343 {
2344         struct inode_backref *backref;
2345         struct inode_backref *tmp;
2346         struct btrfs_key key;
2347         struct btrfs_inode_item *inode_item;
2348         int ret = 0;
2349
2350         /* We don't believe this either, reset it and iterate backref */
2351         rec->found_link = 0;
2352
2353         /* Remove all backref including the valid ones */
2354         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2356                                    backref->index, backref->name,
2357                                    backref->namelen, 0);
2358                 if (ret < 0)
2359                         goto out;
2360
2361                 /* remove invalid backref, so it won't be added back */
2362                 if (!(backref->found_dir_index &&
2363                       backref->found_dir_item &&
2364                       backref->found_inode_ref)) {
2365                         list_del(&backref->list);
2366                         free(backref);
2367                 } else {
2368                         rec->found_link++;
2369                 }
2370         }
2371
2372         /* Set nlink to 0 */
2373         key.objectid = rec->ino;
2374         key.type = BTRFS_INODE_ITEM_KEY;
2375         key.offset = 0;
2376         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2377         if (ret < 0)
2378                 goto out;
2379         if (ret > 0) {
2380                 ret = -ENOENT;
2381                 goto out;
2382         }
2383         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2384                                     struct btrfs_inode_item);
2385         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2386         btrfs_mark_buffer_dirty(path->nodes[0]);
2387         btrfs_release_path(path);
2388
2389         /*
2390          * Add back valid inode_ref/dir_item/dir_index,
2391          * add_link() will handle the nlink inc, so new nlink must be correct
2392          */
2393         list_for_each_entry(backref, &rec->backrefs, list) {
2394                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2395                                      backref->name, backref->namelen,
2396                                      backref->filetype, &backref->index, 1);
2397                 if (ret < 0)
2398                         goto out;
2399         }
2400 out:
2401         btrfs_release_path(path);
2402         return ret;
2403 }
2404
2405 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2406                                struct btrfs_root *root,
2407                                struct btrfs_path *path,
2408                                struct inode_record *rec)
2409 {
2410         char *dir_name = "lost+found";
2411         char namebuf[BTRFS_NAME_LEN] = {0};
2412         u64 lost_found_ino;
2413         u32 mode = 0700;
2414         u8 type = 0;
2415         int namelen = 0;
2416         int name_recovered = 0;
2417         int type_recovered = 0;
2418         int ret = 0;
2419
2420         /*
2421          * Get file name and type first before these invalid inode ref
2422          * are deleted by remove_all_invalid_backref()
2423          */
2424         name_recovered = !find_file_name(rec, namebuf, &namelen);
2425         type_recovered = !find_file_type(rec, &type);
2426
2427         if (!name_recovered) {
2428                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2429                        rec->ino, rec->ino);
2430                 namelen = count_digits(rec->ino);
2431                 sprintf(namebuf, "%llu", rec->ino);
2432                 name_recovered = 1;
2433         }
2434         if (!type_recovered) {
2435                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2436                        rec->ino);
2437                 type = BTRFS_FT_REG_FILE;
2438                 type_recovered = 1;
2439         }
2440
2441         ret = reset_nlink(trans, root, path, rec);
2442         if (ret < 0) {
2443                 fprintf(stderr,
2444                         "Failed to reset nlink for inode %llu: %s\n",
2445                         rec->ino, strerror(-ret));
2446                 goto out;
2447         }
2448
2449         if (rec->found_link == 0) {
2450                 lost_found_ino = root->highest_inode;
2451                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2452                         ret = -EOVERFLOW;
2453                         goto out;
2454                 }
2455                 lost_found_ino++;
2456                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2457                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2458                                   mode);
2459                 if (ret < 0) {
2460                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2461                                 dir_name, strerror(-ret));
2462                         goto out;
2463                 }
2464                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2465                                      namebuf, namelen, type, NULL, 1);
2466                 /*
2467                  * Add ".INO" suffix several times to handle case where
2468                  * "FILENAME.INO" is already taken by another file.
2469                  */
2470                 while (ret == -EEXIST) {
2471                         /*
2472                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2473                          */
2474                         if (namelen + count_digits(rec->ino) + 1 >
2475                             BTRFS_NAME_LEN) {
2476                                 ret = -EFBIG;
2477                                 goto out;
2478                         }
2479                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2480                                  ".%llu", rec->ino);
2481                         namelen += count_digits(rec->ino) + 1;
2482                         ret = btrfs_add_link(trans, root, rec->ino,
2483                                              lost_found_ino, namebuf,
2484                                              namelen, type, NULL, 1);
2485                 }
2486                 if (ret < 0) {
2487                         fprintf(stderr,
2488                                 "Failed to link the inode %llu to %s dir: %s\n",
2489                                 rec->ino, dir_name, strerror(-ret));
2490                         goto out;
2491                 }
2492                 /*
2493                  * Just increase the found_link, don't actually add the
2494                  * backref. This will make things easier and this inode
2495                  * record will be freed after the repair is done.
2496                  * So fsck will not report problem about this inode.
2497                  */
2498                 rec->found_link++;
2499                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2500                        namelen, namebuf, dir_name);
2501         }
2502         printf("Fixed the nlink of inode %llu\n", rec->ino);
2503 out:
2504         /*
2505          * Clear the flag anyway, or we will loop forever for the same inode
2506          * as it will not be removed from the bad inode list and the dead loop
2507          * happens.
2508          */
2509         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2510         btrfs_release_path(path);
2511         return ret;
2512 }
2513
2514 /*
2515  * Check if there is any normal(reg or prealloc) file extent for given
2516  * ino.
2517  * This is used to determine the file type when neither its dir_index/item or
2518  * inode_item exists.
2519  *
2520  * This will *NOT* report error, if any error happens, just consider it does
2521  * not have any normal file extent.
2522  */
2523 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2524 {
2525         struct btrfs_path *path;
2526         struct btrfs_key key;
2527         struct btrfs_key found_key;
2528         struct btrfs_file_extent_item *fi;
2529         u8 type;
2530         int ret = 0;
2531
2532         path = btrfs_alloc_path();
2533         if (!path)
2534                 goto out;
2535         key.objectid = ino;
2536         key.type = BTRFS_EXTENT_DATA_KEY;
2537         key.offset = 0;
2538
2539         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2540         if (ret < 0) {
2541                 ret = 0;
2542                 goto out;
2543         }
2544         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2545                 ret = btrfs_next_leaf(root, path);
2546                 if (ret) {
2547                         ret = 0;
2548                         goto out;
2549                 }
2550         }
2551         while (1) {
2552                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2553                                       path->slots[0]);
2554                 if (found_key.objectid != ino ||
2555                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2556                         break;
2557                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2558                                     struct btrfs_file_extent_item);
2559                 type = btrfs_file_extent_type(path->nodes[0], fi);
2560                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2561                         ret = 1;
2562                         goto out;
2563                 }
2564         }
2565 out:
2566         btrfs_free_path(path);
2567         return ret;
2568 }
2569
2570 static u32 btrfs_type_to_imode(u8 type)
2571 {
2572         static u32 imode_by_btrfs_type[] = {
2573                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2574                 [BTRFS_FT_DIR]          = S_IFDIR,
2575                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2576                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2577                 [BTRFS_FT_FIFO]         = S_IFIFO,
2578                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2579                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2580         };
2581
2582         return imode_by_btrfs_type[(type)];
2583 }
2584
2585 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2586                                 struct btrfs_root *root,
2587                                 struct btrfs_path *path,
2588                                 struct inode_record *rec)
2589 {
2590         u8 filetype;
2591         u32 mode = 0700;
2592         int type_recovered = 0;
2593         int ret = 0;
2594
2595         printf("Trying to rebuild inode:%llu\n", rec->ino);
2596
2597         type_recovered = !find_file_type(rec, &filetype);
2598
2599         /*
2600          * Try to determine inode type if type not found.
2601          *
2602          * For found regular file extent, it must be FILE.
2603          * For found dir_item/index, it must be DIR.
2604          *
2605          * For undetermined one, use FILE as fallback.
2606          *
2607          * TODO:
2608          * 1. If found backref(inode_index/item is already handled) to it,
2609          *    it must be DIR.
2610          *    Need new inode-inode ref structure to allow search for that.
2611          */
2612         if (!type_recovered) {
2613                 if (rec->found_file_extent &&
2614                     find_normal_file_extent(root, rec->ino)) {
2615                         type_recovered = 1;
2616                         filetype = BTRFS_FT_REG_FILE;
2617                 } else if (rec->found_dir_item) {
2618                         type_recovered = 1;
2619                         filetype = BTRFS_FT_DIR;
2620                 } else if (!list_empty(&rec->orphan_extents)) {
2621                         type_recovered = 1;
2622                         filetype = BTRFS_FT_REG_FILE;
2623                 } else{
2624                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2625                                rec->ino);
2626                         type_recovered = 1;
2627                         filetype = BTRFS_FT_REG_FILE;
2628                 }
2629         }
2630
2631         ret = btrfs_new_inode(trans, root, rec->ino,
2632                               mode | btrfs_type_to_imode(filetype));
2633         if (ret < 0)
2634                 goto out;
2635
2636         /*
2637          * Here inode rebuild is done, we only rebuild the inode item,
2638          * don't repair the nlink(like move to lost+found).
2639          * That is the job of nlink repair.
2640          *
2641          * We just fill the record and return
2642          */
2643         rec->found_dir_item = 1;
2644         rec->imode = mode | btrfs_type_to_imode(filetype);
2645         rec->nlink = 0;
2646         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2647         /* Ensure the inode_nlinks repair function will be called */
2648         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2649 out:
2650         return ret;
2651 }
2652
2653 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2654                                       struct btrfs_root *root,
2655                                       struct btrfs_path *path,
2656                                       struct inode_record *rec)
2657 {
2658         struct orphan_data_extent *orphan;
2659         struct orphan_data_extent *tmp;
2660         int ret = 0;
2661
2662         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2663                 /*
2664                  * Check for conflicting file extents
2665                  *
2666                  * Here we don't know whether the extents is compressed or not,
2667                  * so we can only assume it not compressed nor data offset,
2668                  * and use its disk_len as extent length.
2669                  */
2670                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2671                                        orphan->offset, orphan->disk_len, 0);
2672                 btrfs_release_path(path);
2673                 if (ret < 0)
2674                         goto out;
2675                 if (!ret) {
2676                         fprintf(stderr,
2677                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2678                                 orphan->disk_bytenr, orphan->disk_len);
2679                         ret = btrfs_free_extent(trans,
2680                                         root->fs_info->extent_root,
2681                                         orphan->disk_bytenr, orphan->disk_len,
2682                                         0, root->objectid, orphan->objectid,
2683                                         orphan->offset);
2684                         if (ret < 0)
2685                                 goto out;
2686                 }
2687                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2688                                 orphan->offset, orphan->disk_bytenr,
2689                                 orphan->disk_len, orphan->disk_len);
2690                 if (ret < 0)
2691                         goto out;
2692
2693                 /* Update file size info */
2694                 rec->found_size += orphan->disk_len;
2695                 if (rec->found_size == rec->nbytes)
2696                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2697
2698                 /* Update the file extent hole info too */
2699                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2700                                            orphan->disk_len);
2701                 if (ret < 0)
2702                         goto out;
2703                 if (RB_EMPTY_ROOT(&rec->holes))
2704                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2705
2706                 list_del(&orphan->list);
2707                 free(orphan);
2708         }
2709         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2710 out:
2711         return ret;
2712 }
2713
2714 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2715                                         struct btrfs_root *root,
2716                                         struct btrfs_path *path,
2717                                         struct inode_record *rec)
2718 {
2719         struct rb_node *node;
2720         struct file_extent_hole *hole;
2721         int found = 0;
2722         int ret = 0;
2723
2724         node = rb_first(&rec->holes);
2725
2726         while (node) {
2727                 found = 1;
2728                 hole = rb_entry(node, struct file_extent_hole, node);
2729                 ret = btrfs_punch_hole(trans, root, rec->ino,
2730                                        hole->start, hole->len);
2731                 if (ret < 0)
2732                         goto out;
2733                 ret = del_file_extent_hole(&rec->holes, hole->start,
2734                                            hole->len);
2735                 if (ret < 0)
2736                         goto out;
2737                 if (RB_EMPTY_ROOT(&rec->holes))
2738                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2739                 node = rb_first(&rec->holes);
2740         }
2741         /* special case for a file losing all its file extent */
2742         if (!found) {
2743                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2744                                        round_up(rec->isize, root->sectorsize));
2745                 if (ret < 0)
2746                         goto out;
2747         }
2748         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2749                rec->ino, root->objectid);
2750 out:
2751         return ret;
2752 }
2753
2754 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2755 {
2756         struct btrfs_trans_handle *trans;
2757         struct btrfs_path *path;
2758         int ret = 0;
2759
2760         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2761                              I_ERR_NO_ORPHAN_ITEM |
2762                              I_ERR_LINK_COUNT_WRONG |
2763                              I_ERR_NO_INODE_ITEM |
2764                              I_ERR_FILE_EXTENT_ORPHAN |
2765                              I_ERR_FILE_EXTENT_DISCOUNT|
2766                              I_ERR_FILE_NBYTES_WRONG)))
2767                 return rec->errors;
2768
2769         path = btrfs_alloc_path();
2770         if (!path)
2771                 return -ENOMEM;
2772
2773         /*
2774          * For nlink repair, it may create a dir and add link, so
2775          * 2 for parent(256)'s dir_index and dir_item
2776          * 2 for lost+found dir's inode_item and inode_ref
2777          * 1 for the new inode_ref of the file
2778          * 2 for lost+found dir's dir_index and dir_item for the file
2779          */
2780         trans = btrfs_start_transaction(root, 7);
2781         if (IS_ERR(trans)) {
2782                 btrfs_free_path(path);
2783                 return PTR_ERR(trans);
2784         }
2785
2786         if (rec->errors & I_ERR_NO_INODE_ITEM)
2787                 ret = repair_inode_no_item(trans, root, path, rec);
2788         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2789                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2790         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2791                 ret = repair_inode_discount_extent(trans, root, path, rec);
2792         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2793                 ret = repair_inode_isize(trans, root, path, rec);
2794         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2795                 ret = repair_inode_orphan_item(trans, root, path, rec);
2796         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2797                 ret = repair_inode_nlinks(trans, root, path, rec);
2798         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2799                 ret = repair_inode_nbytes(trans, root, path, rec);
2800         btrfs_commit_transaction(trans, root);
2801         btrfs_free_path(path);
2802         return ret;
2803 }
2804
2805 static int check_inode_recs(struct btrfs_root *root,
2806                             struct cache_tree *inode_cache)
2807 {
2808         struct cache_extent *cache;
2809         struct ptr_node *node;
2810         struct inode_record *rec;
2811         struct inode_backref *backref;
2812         int stage = 0;
2813         int ret = 0;
2814         int err = 0;
2815         u64 error = 0;
2816         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2817
2818         if (btrfs_root_refs(&root->root_item) == 0) {
2819                 if (!cache_tree_empty(inode_cache))
2820                         fprintf(stderr, "warning line %d\n", __LINE__);
2821                 return 0;
2822         }
2823
2824         /*
2825          * We need to record the highest inode number for later 'lost+found'
2826          * dir creation.
2827          * We must select a ino not used/refered by any existing inode, or
2828          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2829          * this may cause 'lost+found' dir has wrong nlinks.
2830          */
2831         cache = last_cache_extent(inode_cache);
2832         if (cache) {
2833                 node = container_of(cache, struct ptr_node, cache);
2834                 rec = node->data;
2835                 if (rec->ino > root->highest_inode)
2836                         root->highest_inode = rec->ino;
2837         }
2838
2839         /*
2840          * We need to repair backrefs first because we could change some of the
2841          * errors in the inode recs.
2842          *
2843          * We also need to go through and delete invalid backrefs first and then
2844          * add the correct ones second.  We do this because we may get EEXIST
2845          * when adding back the correct index because we hadn't yet deleted the
2846          * invalid index.
2847          *
2848          * For example, if we were missing a dir index then the directories
2849          * isize would be wrong, so if we fixed the isize to what we thought it
2850          * would be and then fixed the backref we'd still have a invalid fs, so
2851          * we need to add back the dir index and then check to see if the isize
2852          * is still wrong.
2853          */
2854         while (stage < 3) {
2855                 stage++;
2856                 if (stage == 3 && !err)
2857                         break;
2858
2859                 cache = search_cache_extent(inode_cache, 0);
2860                 while (repair && cache) {
2861                         node = container_of(cache, struct ptr_node, cache);
2862                         rec = node->data;
2863                         cache = next_cache_extent(cache);
2864
2865                         /* Need to free everything up and rescan */
2866                         if (stage == 3) {
2867                                 remove_cache_extent(inode_cache, &node->cache);
2868                                 free(node);
2869                                 free_inode_rec(rec);
2870                                 continue;
2871                         }
2872
2873                         if (list_empty(&rec->backrefs))
2874                                 continue;
2875
2876                         ret = repair_inode_backrefs(root, rec, inode_cache,
2877                                                     stage == 1);
2878                         if (ret < 0) {
2879                                 err = ret;
2880                                 stage = 2;
2881                                 break;
2882                         } if (ret > 0) {
2883                                 err = -EAGAIN;
2884                         }
2885                 }
2886         }
2887         if (err)
2888                 return err;
2889
2890         rec = get_inode_rec(inode_cache, root_dirid, 0);
2891         if (rec) {
2892                 ret = check_root_dir(rec);
2893                 if (ret) {
2894                         fprintf(stderr, "root %llu root dir %llu error\n",
2895                                 (unsigned long long)root->root_key.objectid,
2896                                 (unsigned long long)root_dirid);
2897                         print_inode_error(root, rec);
2898                         error++;
2899                 }
2900         } else {
2901                 if (repair) {
2902                         struct btrfs_trans_handle *trans;
2903
2904                         trans = btrfs_start_transaction(root, 1);
2905                         if (IS_ERR(trans)) {
2906                                 err = PTR_ERR(trans);
2907                                 return err;
2908                         }
2909
2910                         fprintf(stderr,
2911                                 "root %llu missing its root dir, recreating\n",
2912                                 (unsigned long long)root->objectid);
2913
2914                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2915                         BUG_ON(ret);
2916
2917                         btrfs_commit_transaction(trans, root);
2918                         return -EAGAIN;
2919                 }
2920
2921                 fprintf(stderr, "root %llu root dir %llu not found\n",
2922                         (unsigned long long)root->root_key.objectid,
2923                         (unsigned long long)root_dirid);
2924         }
2925
2926         while (1) {
2927                 cache = search_cache_extent(inode_cache, 0);
2928                 if (!cache)
2929                         break;
2930                 node = container_of(cache, struct ptr_node, cache);
2931                 rec = node->data;
2932                 remove_cache_extent(inode_cache, &node->cache);
2933                 free(node);
2934                 if (rec->ino == root_dirid ||
2935                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2936                         free_inode_rec(rec);
2937                         continue;
2938                 }
2939
2940                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2941                         ret = check_orphan_item(root, rec->ino);
2942                         if (ret == 0)
2943                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2944                         if (can_free_inode_rec(rec)) {
2945                                 free_inode_rec(rec);
2946                                 continue;
2947                         }
2948                 }
2949
2950                 if (!rec->found_inode_item)
2951                         rec->errors |= I_ERR_NO_INODE_ITEM;
2952                 if (rec->found_link != rec->nlink)
2953                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2954                 if (repair) {
2955                         ret = try_repair_inode(root, rec);
2956                         if (ret == 0 && can_free_inode_rec(rec)) {
2957                                 free_inode_rec(rec);
2958                                 continue;
2959                         }
2960                         ret = 0;
2961                 }
2962
2963                 if (!(repair && ret == 0))
2964                         error++;
2965                 print_inode_error(root, rec);
2966                 list_for_each_entry(backref, &rec->backrefs, list) {
2967                         if (!backref->found_dir_item)
2968                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2969                         if (!backref->found_dir_index)
2970                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2971                         if (!backref->found_inode_ref)
2972                                 backref->errors |= REF_ERR_NO_INODE_REF;
2973                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2974                                 " namelen %u name %s filetype %d errors %x",
2975                                 (unsigned long long)backref->dir,
2976                                 (unsigned long long)backref->index,
2977                                 backref->namelen, backref->name,
2978                                 backref->filetype, backref->errors);
2979                         print_ref_error(backref->errors);
2980                 }
2981                 free_inode_rec(rec);
2982         }
2983         return (error > 0) ? -1 : 0;
2984 }
2985
2986 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2987                                         u64 objectid)
2988 {
2989         struct cache_extent *cache;
2990         struct root_record *rec = NULL;
2991         int ret;
2992
2993         cache = lookup_cache_extent(root_cache, objectid, 1);
2994         if (cache) {
2995                 rec = container_of(cache, struct root_record, cache);
2996         } else {
2997                 rec = calloc(1, sizeof(*rec));
2998                 rec->objectid = objectid;
2999                 INIT_LIST_HEAD(&rec->backrefs);
3000                 rec->cache.start = objectid;
3001                 rec->cache.size = 1;
3002
3003                 ret = insert_cache_extent(root_cache, &rec->cache);
3004                 BUG_ON(ret);
3005         }
3006         return rec;
3007 }
3008
3009 static struct root_backref *get_root_backref(struct root_record *rec,
3010                                              u64 ref_root, u64 dir, u64 index,
3011                                              const char *name, int namelen)
3012 {
3013         struct root_backref *backref;
3014
3015         list_for_each_entry(backref, &rec->backrefs, list) {
3016                 if (backref->ref_root != ref_root || backref->dir != dir ||
3017                     backref->namelen != namelen)
3018                         continue;
3019                 if (memcmp(name, backref->name, namelen))
3020                         continue;
3021                 return backref;
3022         }
3023
3024         backref = calloc(1, sizeof(*backref) + namelen + 1);
3025         backref->ref_root = ref_root;
3026         backref->dir = dir;
3027         backref->index = index;
3028         backref->namelen = namelen;
3029         memcpy(backref->name, name, namelen);
3030         backref->name[namelen] = '\0';
3031         list_add_tail(&backref->list, &rec->backrefs);
3032         return backref;
3033 }
3034
3035 static void free_root_record(struct cache_extent *cache)
3036 {
3037         struct root_record *rec;
3038         struct root_backref *backref;
3039
3040         rec = container_of(cache, struct root_record, cache);
3041         while (!list_empty(&rec->backrefs)) {
3042                 backref = list_entry(rec->backrefs.next,
3043                                      struct root_backref, list);
3044                 list_del(&backref->list);
3045                 free(backref);
3046         }
3047
3048         kfree(rec);
3049 }
3050
3051 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3052
3053 static int add_root_backref(struct cache_tree *root_cache,
3054                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3055                             const char *name, int namelen,
3056                             int item_type, int errors)
3057 {
3058         struct root_record *rec;
3059         struct root_backref *backref;
3060
3061         rec = get_root_rec(root_cache, root_id);
3062         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3063
3064         backref->errors |= errors;
3065
3066         if (item_type != BTRFS_DIR_ITEM_KEY) {
3067                 if (backref->found_dir_index || backref->found_back_ref ||
3068                     backref->found_forward_ref) {
3069                         if (backref->index != index)
3070                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3071                 } else {
3072                         backref->index = index;
3073                 }
3074         }
3075
3076         if (item_type == BTRFS_DIR_ITEM_KEY) {
3077                 if (backref->found_forward_ref)
3078                         rec->found_ref++;
3079                 backref->found_dir_item = 1;
3080         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3081                 backref->found_dir_index = 1;
3082         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3083                 if (backref->found_forward_ref)
3084                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3085                 else if (backref->found_dir_item)
3086                         rec->found_ref++;
3087                 backref->found_forward_ref = 1;
3088         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3089                 if (backref->found_back_ref)
3090                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3091                 backref->found_back_ref = 1;
3092         } else {
3093                 BUG_ON(1);
3094         }
3095
3096         if (backref->found_forward_ref && backref->found_dir_item)
3097                 backref->reachable = 1;
3098         return 0;
3099 }
3100
3101 static int merge_root_recs(struct btrfs_root *root,
3102                            struct cache_tree *src_cache,
3103                            struct cache_tree *dst_cache)
3104 {
3105         struct cache_extent *cache;
3106         struct ptr_node *node;
3107         struct inode_record *rec;
3108         struct inode_backref *backref;
3109         int ret = 0;
3110
3111         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3112                 free_inode_recs_tree(src_cache);
3113                 return 0;
3114         }
3115
3116         while (1) {
3117                 cache = search_cache_extent(src_cache, 0);
3118                 if (!cache)
3119                         break;
3120                 node = container_of(cache, struct ptr_node, cache);
3121                 rec = node->data;
3122                 remove_cache_extent(src_cache, &node->cache);
3123                 free(node);
3124
3125                 ret = is_child_root(root, root->objectid, rec->ino);
3126                 if (ret < 0)
3127                         break;
3128                 else if (ret == 0)
3129                         goto skip;
3130
3131                 list_for_each_entry(backref, &rec->backrefs, list) {
3132                         BUG_ON(backref->found_inode_ref);
3133                         if (backref->found_dir_item)
3134                                 add_root_backref(dst_cache, rec->ino,
3135                                         root->root_key.objectid, backref->dir,
3136                                         backref->index, backref->name,
3137                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3138                                         backref->errors);
3139                         if (backref->found_dir_index)
3140                                 add_root_backref(dst_cache, rec->ino,
3141                                         root->root_key.objectid, backref->dir,
3142                                         backref->index, backref->name,
3143                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3144                                         backref->errors);
3145                 }
3146 skip:
3147                 free_inode_rec(rec);
3148         }
3149         if (ret < 0)
3150                 return ret;
3151         return 0;
3152 }
3153
3154 static int check_root_refs(struct btrfs_root *root,
3155                            struct cache_tree *root_cache)
3156 {
3157         struct root_record *rec;
3158         struct root_record *ref_root;
3159         struct root_backref *backref;
3160         struct cache_extent *cache;
3161         int loop = 1;
3162         int ret;
3163         int error;
3164         int errors = 0;
3165
3166         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3167         rec->found_ref = 1;
3168
3169         /* fixme: this can not detect circular references */
3170         while (loop) {
3171                 loop = 0;
3172                 cache = search_cache_extent(root_cache, 0);
3173                 while (1) {
3174                         if (!cache)
3175                                 break;
3176                         rec = container_of(cache, struct root_record, cache);
3177                         cache = next_cache_extent(cache);
3178
3179                         if (rec->found_ref == 0)
3180                                 continue;
3181
3182                         list_for_each_entry(backref, &rec->backrefs, list) {
3183                                 if (!backref->reachable)
3184                                         continue;
3185
3186                                 ref_root = get_root_rec(root_cache,
3187                                                         backref->ref_root);
3188                                 if (ref_root->found_ref > 0)
3189                                         continue;
3190
3191                                 backref->reachable = 0;
3192                                 rec->found_ref--;
3193                                 if (rec->found_ref == 0)
3194                                         loop = 1;
3195                         }
3196                 }
3197         }
3198
3199         cache = search_cache_extent(root_cache, 0);
3200         while (1) {
3201                 if (!cache)
3202                         break;
3203                 rec = container_of(cache, struct root_record, cache);
3204                 cache = next_cache_extent(cache);
3205
3206                 if (rec->found_ref == 0 &&
3207                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3208                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3209                         ret = check_orphan_item(root->fs_info->tree_root,
3210                                                 rec->objectid);
3211                         if (ret == 0)
3212                                 continue;
3213
3214                         /*
3215                          * If we don't have a root item then we likely just have
3216                          * a dir item in a snapshot for this root but no actual
3217                          * ref key or anything so it's meaningless.
3218                          */
3219                         if (!rec->found_root_item)
3220                                 continue;
3221                         errors++;
3222                         fprintf(stderr, "fs tree %llu not referenced\n",
3223                                 (unsigned long long)rec->objectid);
3224                 }
3225
3226                 error = 0;
3227                 if (rec->found_ref > 0 && !rec->found_root_item)
3228                         error = 1;
3229                 list_for_each_entry(backref, &rec->backrefs, list) {
3230                         if (!backref->found_dir_item)
3231                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3232                         if (!backref->found_dir_index)
3233                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3234                         if (!backref->found_back_ref)
3235                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3236                         if (!backref->found_forward_ref)
3237                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3238                         if (backref->reachable && backref->errors)
3239                                 error = 1;
3240                 }
3241                 if (!error)
3242                         continue;
3243
3244                 errors++;
3245                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3246                         (unsigned long long)rec->objectid, rec->found_ref,
3247                          rec->found_root_item ? "" : "not found");
3248
3249                 list_for_each_entry(backref, &rec->backrefs, list) {
3250                         if (!backref->reachable)
3251                                 continue;
3252                         if (!backref->errors && rec->found_root_item)
3253                                 continue;
3254                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3255                                 " index %llu namelen %u name %s errors %x\n",
3256                                 (unsigned long long)backref->ref_root,
3257                                 (unsigned long long)backref->dir,
3258                                 (unsigned long long)backref->index,
3259                                 backref->namelen, backref->name,
3260                                 backref->errors);
3261                         print_ref_error(backref->errors);
3262                 }
3263         }
3264         return errors > 0 ? 1 : 0;
3265 }
3266
3267 static int process_root_ref(struct extent_buffer *eb, int slot,
3268                             struct btrfs_key *key,
3269                             struct cache_tree *root_cache)
3270 {
3271         u64 dirid;
3272         u64 index;
3273         u32 len;
3274         u32 name_len;
3275         struct btrfs_root_ref *ref;
3276         char namebuf[BTRFS_NAME_LEN];
3277         int error;
3278
3279         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3280
3281         dirid = btrfs_root_ref_dirid(eb, ref);
3282         index = btrfs_root_ref_sequence(eb, ref);
3283         name_len = btrfs_root_ref_name_len(eb, ref);
3284
3285         if (name_len <= BTRFS_NAME_LEN) {
3286                 len = name_len;
3287                 error = 0;
3288         } else {
3289                 len = BTRFS_NAME_LEN;
3290                 error = REF_ERR_NAME_TOO_LONG;
3291         }
3292         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3293
3294         if (key->type == BTRFS_ROOT_REF_KEY) {
3295                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3296                                  index, namebuf, len, key->type, error);
3297         } else {
3298                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3299                                  index, namebuf, len, key->type, error);
3300         }
3301         return 0;
3302 }
3303
3304 static void free_corrupt_block(struct cache_extent *cache)
3305 {
3306         struct btrfs_corrupt_block *corrupt;
3307
3308         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3309         free(corrupt);
3310 }
3311
3312 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3313
3314 /*
3315  * Repair the btree of the given root.
3316  *
3317  * The fix is to remove the node key in corrupt_blocks cache_tree.
3318  * and rebalance the tree.
3319  * After the fix, the btree should be writeable.
3320  */
3321 static int repair_btree(struct btrfs_root *root,
3322                         struct cache_tree *corrupt_blocks)
3323 {
3324         struct btrfs_trans_handle *trans;
3325         struct btrfs_path *path;
3326         struct btrfs_corrupt_block *corrupt;
3327         struct cache_extent *cache;
3328         struct btrfs_key key;
3329         u64 offset;
3330         int level;
3331         int ret = 0;
3332
3333         if (cache_tree_empty(corrupt_blocks))
3334                 return 0;
3335
3336         path = btrfs_alloc_path();
3337         if (!path)
3338                 return -ENOMEM;
3339
3340         trans = btrfs_start_transaction(root, 1);
3341         if (IS_ERR(trans)) {
3342                 ret = PTR_ERR(trans);
3343                 fprintf(stderr, "Error starting transaction: %s\n",
3344                         strerror(-ret));
3345                 goto out_free_path;
3346         }
3347         cache = first_cache_extent(corrupt_blocks);
3348         while (cache) {
3349                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3350                                        cache);
3351                 level = corrupt->level;
3352                 path->lowest_level = level;
3353                 key.objectid = corrupt->key.objectid;
3354                 key.type = corrupt->key.type;
3355                 key.offset = corrupt->key.offset;
3356
3357                 /*
3358                  * Here we don't want to do any tree balance, since it may
3359                  * cause a balance with corrupted brother leaf/node,
3360                  * so ins_len set to 0 here.
3361                  * Balance will be done after all corrupt node/leaf is deleted.
3362                  */
3363                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3364                 if (ret < 0)
3365                         goto out;
3366                 offset = btrfs_node_blockptr(path->nodes[level],
3367                                              path->slots[level]);
3368
3369                 /* Remove the ptr */
3370                 ret = btrfs_del_ptr(trans, root, path, level,
3371                                     path->slots[level]);
3372                 if (ret < 0)
3373                         goto out;
3374                 /*
3375                  * Remove the corresponding extent
3376                  * return value is not concerned.
3377                  */
3378                 btrfs_release_path(path);
3379                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3380                                         0, root->root_key.objectid,
3381                                         level - 1, 0);
3382                 cache = next_cache_extent(cache);
3383         }
3384
3385         /* Balance the btree using btrfs_search_slot() */
3386         cache = first_cache_extent(corrupt_blocks);
3387         while (cache) {
3388                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3389                                        cache);
3390                 memcpy(&key, &corrupt->key, sizeof(key));
3391                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3392                 if (ret < 0)
3393                         goto out;
3394                 /* return will always >0 since it won't find the item */
3395                 ret = 0;
3396                 btrfs_release_path(path);
3397                 cache = next_cache_extent(cache);
3398         }
3399 out:
3400         btrfs_commit_transaction(trans, root);
3401 out_free_path:
3402         btrfs_free_path(path);
3403         return ret;
3404 }
3405
3406 static int check_fs_root(struct btrfs_root *root,
3407                          struct cache_tree *root_cache,
3408                          struct walk_control *wc)
3409 {
3410         int ret = 0;
3411         int err = 0;
3412         int wret;
3413         int level;
3414         struct btrfs_path path;
3415         struct shared_node root_node;
3416         struct root_record *rec;
3417         struct btrfs_root_item *root_item = &root->root_item;
3418         struct cache_tree corrupt_blocks;
3419         struct orphan_data_extent *orphan;
3420         struct orphan_data_extent *tmp;
3421         enum btrfs_tree_block_status status;
3422
3423         /*
3424          * Reuse the corrupt_block cache tree to record corrupted tree block
3425          *
3426          * Unlike the usage in extent tree check, here we do it in a per
3427          * fs/subvol tree base.
3428          */
3429         cache_tree_init(&corrupt_blocks);
3430         root->fs_info->corrupt_blocks = &corrupt_blocks;
3431
3432         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3433                 rec = get_root_rec(root_cache, root->root_key.objectid);
3434                 if (btrfs_root_refs(root_item) > 0)
3435                         rec->found_root_item = 1;
3436         }
3437
3438         btrfs_init_path(&path);
3439         memset(&root_node, 0, sizeof(root_node));
3440         cache_tree_init(&root_node.root_cache);
3441         cache_tree_init(&root_node.inode_cache);
3442
3443         /* Move the orphan extent record to corresponding inode_record */
3444         list_for_each_entry_safe(orphan, tmp,
3445                                  &root->orphan_data_extents, list) {
3446                 struct inode_record *inode;
3447
3448                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3449                                       1);
3450                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3451                 list_move(&orphan->list, &inode->orphan_extents);
3452         }
3453
3454         level = btrfs_header_level(root->node);
3455         memset(wc->nodes, 0, sizeof(wc->nodes));
3456         wc->nodes[level] = &root_node;
3457         wc->active_node = level;
3458         wc->root_level = level;
3459
3460         /* We may not have checked the root block, lets do that now */
3461         if (btrfs_is_leaf(root->node))
3462                 status = btrfs_check_leaf(root, NULL, root->node);
3463         else
3464                 status = btrfs_check_node(root, NULL, root->node);
3465         if (status != BTRFS_TREE_BLOCK_CLEAN)
3466                 return -EIO;
3467
3468         if (btrfs_root_refs(root_item) > 0 ||
3469             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3470                 path.nodes[level] = root->node;
3471                 extent_buffer_get(root->node);
3472                 path.slots[level] = 0;
3473         } else {
3474                 struct btrfs_key key;
3475                 struct btrfs_disk_key found_key;
3476
3477                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3478                 level = root_item->drop_level;
3479                 path.lowest_level = level;
3480                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3481                 if (wret < 0)
3482                         goto skip_walking;
3483                 btrfs_node_key(path.nodes[level], &found_key,
3484                                 path.slots[level]);
3485                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3486                                         sizeof(found_key)));
3487         }
3488
3489         while (1) {
3490                 wret = walk_down_tree(root, &path, wc, &level);
3491                 if (wret < 0)
3492                         ret = wret;
3493                 if (wret != 0)
3494                         break;
3495
3496                 wret = walk_up_tree(root, &path, wc, &level);
3497                 if (wret < 0)
3498                         ret = wret;
3499                 if (wret != 0)
3500                         break;
3501         }
3502 skip_walking:
3503         btrfs_release_path(&path);
3504
3505         if (!cache_tree_empty(&corrupt_blocks)) {
3506                 struct cache_extent *cache;
3507                 struct btrfs_corrupt_block *corrupt;
3508
3509                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3510                        root->root_key.objectid);
3511                 cache = first_cache_extent(&corrupt_blocks);
3512                 while (cache) {
3513                         corrupt = container_of(cache,
3514                                                struct btrfs_corrupt_block,
3515                                                cache);
3516                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3517                                cache->start, corrupt->level,
3518                                corrupt->key.objectid, corrupt->key.type,
3519                                corrupt->key.offset);
3520                         cache = next_cache_extent(cache);
3521                 }
3522                 if (repair) {
3523                         printf("Try to repair the btree for root %llu\n",
3524                                root->root_key.objectid);
3525                         ret = repair_btree(root, &corrupt_blocks);
3526                         if (ret < 0)
3527                                 fprintf(stderr, "Failed to repair btree: %s\n",
3528                                         strerror(-ret));
3529                         if (!ret)
3530                                 printf("Btree for root %llu is fixed\n",
3531                                        root->root_key.objectid);
3532                 }
3533         }
3534
3535         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3536         if (err < 0)
3537                 ret = err;
3538
3539         if (root_node.current) {
3540                 root_node.current->checked = 1;
3541                 maybe_free_inode_rec(&root_node.inode_cache,
3542                                 root_node.current);
3543         }
3544
3545         err = check_inode_recs(root, &root_node.inode_cache);
3546         if (!ret)
3547                 ret = err;
3548
3549         free_corrupt_blocks_tree(&corrupt_blocks);
3550         root->fs_info->corrupt_blocks = NULL;
3551         free_orphan_data_extents(&root->orphan_data_extents);
3552         return ret;
3553 }
3554
3555 static int fs_root_objectid(u64 objectid)
3556 {
3557         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3558             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3559                 return 1;
3560         return is_fstree(objectid);
3561 }
3562
3563 static int check_fs_roots(struct btrfs_root *root,
3564                           struct cache_tree *root_cache)
3565 {
3566         struct btrfs_path path;
3567         struct btrfs_key key;
3568         struct walk_control wc;
3569         struct extent_buffer *leaf, *tree_node;
3570         struct btrfs_root *tmp_root;
3571         struct btrfs_root *tree_root = root->fs_info->tree_root;
3572         int ret;
3573         int err = 0;
3574
3575         if (ctx.progress_enabled) {
3576                 ctx.tp = TASK_FS_ROOTS;
3577                 task_start(ctx.info);
3578         }
3579
3580         /*
3581          * Just in case we made any changes to the extent tree that weren't
3582          * reflected into the free space cache yet.
3583          */
3584         if (repair)
3585                 reset_cached_block_groups(root->fs_info);
3586         memset(&wc, 0, sizeof(wc));
3587         cache_tree_init(&wc.shared);
3588         btrfs_init_path(&path);
3589
3590 again:
3591         key.offset = 0;
3592         key.objectid = 0;
3593         key.type = BTRFS_ROOT_ITEM_KEY;
3594         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3595         if (ret < 0) {
3596                 err = 1;
3597                 goto out;
3598         }
3599         tree_node = tree_root->node;
3600         while (1) {
3601                 if (tree_node != tree_root->node) {
3602                         free_root_recs_tree(root_cache);
3603                         btrfs_release_path(&path);
3604                         goto again;
3605                 }
3606                 leaf = path.nodes[0];
3607                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3608                         ret = btrfs_next_leaf(tree_root, &path);
3609                         if (ret) {
3610                                 if (ret < 0)
3611                                         err = 1;
3612                                 break;
3613                         }
3614                         leaf = path.nodes[0];
3615                 }
3616                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3617                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3618                     fs_root_objectid(key.objectid)) {
3619                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3620                                 tmp_root = btrfs_read_fs_root_no_cache(
3621                                                 root->fs_info, &key);
3622                         } else {
3623                                 key.offset = (u64)-1;
3624                                 tmp_root = btrfs_read_fs_root(
3625                                                 root->fs_info, &key);
3626                         }
3627                         if (IS_ERR(tmp_root)) {
3628                                 err = 1;
3629                                 goto next;
3630                         }
3631                         ret = check_fs_root(tmp_root, root_cache, &wc);
3632                         if (ret == -EAGAIN) {
3633                                 free_root_recs_tree(root_cache);
3634                                 btrfs_release_path(&path);
3635                                 goto again;
3636                         }
3637                         if (ret)
3638                                 err = 1;
3639                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3640                                 btrfs_free_fs_root(tmp_root);
3641                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3642                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3643                         process_root_ref(leaf, path.slots[0], &key,
3644                                          root_cache);
3645                 }
3646 next:
3647                 path.slots[0]++;
3648         }
3649 out:
3650         btrfs_release_path(&path);
3651         if (err)
3652                 free_extent_cache_tree(&wc.shared);
3653         if (!cache_tree_empty(&wc.shared))
3654                 fprintf(stderr, "warning line %d\n", __LINE__);
3655
3656         task_stop(ctx.info);
3657
3658         return err;
3659 }
3660
3661 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3662 {
3663         struct list_head *cur = rec->backrefs.next;
3664         struct extent_backref *back;
3665         struct tree_backref *tback;
3666         struct data_backref *dback;
3667         u64 found = 0;
3668         int err = 0;
3669
3670         while(cur != &rec->backrefs) {
3671                 back = list_entry(cur, struct extent_backref, list);
3672                 cur = cur->next;
3673                 if (!back->found_extent_tree) {
3674                         err = 1;
3675                         if (!print_errs)
3676                                 goto out;
3677                         if (back->is_data) {
3678                                 dback = (struct data_backref *)back;
3679                                 fprintf(stderr, "Backref %llu %s %llu"
3680                                         " owner %llu offset %llu num_refs %lu"
3681                                         " not found in extent tree\n",
3682                                         (unsigned long long)rec->start,
3683                                         back->full_backref ?
3684                                         "parent" : "root",
3685                                         back->full_backref ?
3686                                         (unsigned long long)dback->parent:
3687                                         (unsigned long long)dback->root,
3688                                         (unsigned long long)dback->owner,
3689                                         (unsigned long long)dback->offset,
3690                                         (unsigned long)dback->num_refs);
3691                         } else {
3692                                 tback = (struct tree_backref *)back;
3693                                 fprintf(stderr, "Backref %llu parent %llu"
3694                                         " root %llu not found in extent tree\n",
3695                                         (unsigned long long)rec->start,
3696                                         (unsigned long long)tback->parent,
3697                                         (unsigned long long)tback->root);
3698                         }
3699                 }
3700                 if (!back->is_data && !back->found_ref) {
3701                         err = 1;
3702                         if (!print_errs)
3703                                 goto out;
3704                         tback = (struct tree_backref *)back;
3705                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3706                                 (unsigned long long)rec->start,
3707                                 back->full_backref ? "parent" : "root",
3708                                 back->full_backref ?
3709                                 (unsigned long long)tback->parent :
3710                                 (unsigned long long)tback->root, back);
3711                 }
3712                 if (back->is_data) {
3713                         dback = (struct data_backref *)back;
3714                         if (dback->found_ref != dback->num_refs) {
3715                                 err = 1;
3716                                 if (!print_errs)
3717                                         goto out;
3718                                 fprintf(stderr, "Incorrect local backref count"
3719                                         " on %llu %s %llu owner %llu"
3720                                         " offset %llu found %u wanted %u back %p\n",
3721                                         (unsigned long long)rec->start,
3722                                         back->full_backref ?
3723                                         "parent" : "root",
3724                                         back->full_backref ?
3725                                         (unsigned long long)dback->parent:
3726                                         (unsigned long long)dback->root,
3727                                         (unsigned long long)dback->owner,
3728                                         (unsigned long long)dback->offset,
3729                                         dback->found_ref, dback->num_refs, back);
3730                         }
3731                         if (dback->disk_bytenr != rec->start) {
3732                                 err = 1;
3733                                 if (!print_errs)
3734                                         goto out;
3735                                 fprintf(stderr, "Backref disk bytenr does not"
3736                                         " match extent record, bytenr=%llu, "
3737                                         "ref bytenr=%llu\n",
3738                                         (unsigned long long)rec->start,
3739                                         (unsigned long long)dback->disk_bytenr);
3740                         }
3741
3742                         if (dback->bytes != rec->nr) {
3743                                 err = 1;
3744                                 if (!print_errs)
3745                                         goto out;
3746                                 fprintf(stderr, "Backref bytes do not match "
3747                                         "extent backref, bytenr=%llu, ref "
3748                                         "bytes=%llu, backref bytes=%llu\n",
3749                                         (unsigned long long)rec->start,
3750                                         (unsigned long long)rec->nr,
3751                                         (unsigned long long)dback->bytes);
3752                         }
3753                 }
3754                 if (!back->is_data) {
3755                         found += 1;
3756                 } else {
3757                         dback = (struct data_backref *)back;
3758                         found += dback->found_ref;
3759                 }
3760         }
3761         if (found != rec->refs) {
3762                 err = 1;
3763                 if (!print_errs)
3764                         goto out;
3765                 fprintf(stderr, "Incorrect global backref count "
3766                         "on %llu found %llu wanted %llu\n",
3767                         (unsigned long long)rec->start,
3768                         (unsigned long long)found,
3769                         (unsigned long long)rec->refs);
3770         }
3771 out:
3772         return err;
3773 }
3774
3775 static int free_all_extent_backrefs(struct extent_record *rec)
3776 {
3777         struct extent_backref *back;
3778         struct list_head *cur;
3779         while (!list_empty(&rec->backrefs)) {
3780                 cur = rec->backrefs.next;
3781                 back = list_entry(cur, struct extent_backref, list);
3782                 list_del(cur);
3783                 free(back);
3784         }
3785         return 0;
3786 }
3787
3788 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3789                                      struct cache_tree *extent_cache)
3790 {
3791         struct cache_extent *cache;
3792         struct extent_record *rec;
3793
3794         while (1) {
3795                 cache = first_cache_extent(extent_cache);
3796                 if (!cache)
3797                         break;
3798                 rec = container_of(cache, struct extent_record, cache);
3799                 remove_cache_extent(extent_cache, cache);
3800                 free_all_extent_backrefs(rec);
3801                 free(rec);
3802         }
3803 }
3804
3805 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3806                                  struct extent_record *rec)
3807 {
3808         if (rec->content_checked && rec->owner_ref_checked &&
3809             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3810             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3811             !rec->bad_full_backref && !rec->crossing_stripes &&
3812             !rec->wrong_chunk_type) {
3813                 remove_cache_extent(extent_cache, &rec->cache);
3814                 free_all_extent_backrefs(rec);
3815                 list_del_init(&rec->list);
3816                 free(rec);
3817         }
3818         return 0;
3819 }
3820
3821 static int check_owner_ref(struct btrfs_root *root,
3822                             struct extent_record *rec,
3823                             struct extent_buffer *buf)
3824 {
3825         struct extent_backref *node;
3826         struct tree_backref *back;
3827         struct btrfs_root *ref_root;
3828         struct btrfs_key key;
3829         struct btrfs_path path;
3830         struct extent_buffer *parent;
3831         int level;
3832         int found = 0;
3833         int ret;
3834
3835         list_for_each_entry(node, &rec->backrefs, list) {
3836                 if (node->is_data)
3837                         continue;
3838                 if (!node->found_ref)
3839                         continue;
3840                 if (node->full_backref)
3841                         continue;
3842                 back = (struct tree_backref *)node;
3843                 if (btrfs_header_owner(buf) == back->root)
3844                         return 0;
3845         }
3846         BUG_ON(rec->is_root);
3847
3848         /* try to find the block by search corresponding fs tree */
3849         key.objectid = btrfs_header_owner(buf);
3850         key.type = BTRFS_ROOT_ITEM_KEY;
3851         key.offset = (u64)-1;
3852
3853         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3854         if (IS_ERR(ref_root))
3855                 return 1;
3856
3857         level = btrfs_header_level(buf);
3858         if (level == 0)
3859                 btrfs_item_key_to_cpu(buf, &key, 0);
3860         else
3861                 btrfs_node_key_to_cpu(buf, &key, 0);
3862
3863         btrfs_init_path(&path);
3864         path.lowest_level = level + 1;
3865         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3866         if (ret < 0)
3867                 return 0;
3868
3869         parent = path.nodes[level + 1];
3870         if (parent && buf->start == btrfs_node_blockptr(parent,
3871                                                         path.slots[level + 1]))
3872                 found = 1;
3873
3874         btrfs_release_path(&path);
3875         return found ? 0 : 1;
3876 }
3877
3878 static int is_extent_tree_record(struct extent_record *rec)
3879 {
3880         struct list_head *cur = rec->backrefs.next;
3881         struct extent_backref *node;
3882         struct tree_backref *back;
3883         int is_extent = 0;
3884
3885         while(cur != &rec->backrefs) {
3886                 node = list_entry(cur, struct extent_backref, list);
3887                 cur = cur->next;
3888                 if (node->is_data)
3889                         return 0;
3890                 back = (struct tree_backref *)node;
3891                 if (node->full_backref)
3892                         return 0;
3893                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3894                         is_extent = 1;
3895         }
3896         return is_extent;
3897 }
3898
3899
3900 static int record_bad_block_io(struct btrfs_fs_info *info,
3901                                struct cache_tree *extent_cache,
3902                                u64 start, u64 len)
3903 {
3904         struct extent_record *rec;
3905         struct cache_extent *cache;
3906         struct btrfs_key key;
3907
3908         cache = lookup_cache_extent(extent_cache, start, len);
3909         if (!cache)
3910                 return 0;
3911
3912         rec = container_of(cache, struct extent_record, cache);
3913         if (!is_extent_tree_record(rec))
3914                 return 0;
3915
3916         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3917         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3918 }
3919
3920 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3921                        struct extent_buffer *buf, int slot)
3922 {
3923         if (btrfs_header_level(buf)) {
3924                 struct btrfs_key_ptr ptr1, ptr2;
3925
3926                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3927                                    sizeof(struct btrfs_key_ptr));
3928                 read_extent_buffer(buf, &ptr2,
3929                                    btrfs_node_key_ptr_offset(slot + 1),
3930                                    sizeof(struct btrfs_key_ptr));
3931                 write_extent_buffer(buf, &ptr1,
3932                                     btrfs_node_key_ptr_offset(slot + 1),
3933                                     sizeof(struct btrfs_key_ptr));
3934                 write_extent_buffer(buf, &ptr2,
3935                                     btrfs_node_key_ptr_offset(slot),
3936                                     sizeof(struct btrfs_key_ptr));
3937                 if (slot == 0) {
3938                         struct btrfs_disk_key key;
3939                         btrfs_node_key(buf, &key, 0);
3940                         btrfs_fixup_low_keys(root, path, &key,
3941                                              btrfs_header_level(buf) + 1);
3942                 }
3943         } else {
3944                 struct btrfs_item *item1, *item2;
3945                 struct btrfs_key k1, k2;
3946                 char *item1_data, *item2_data;
3947                 u32 item1_offset, item2_offset, item1_size, item2_size;
3948
3949                 item1 = btrfs_item_nr(slot);
3950                 item2 = btrfs_item_nr(slot + 1);
3951                 btrfs_item_key_to_cpu(buf, &k1, slot);
3952                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3953                 item1_offset = btrfs_item_offset(buf, item1);
3954                 item2_offset = btrfs_item_offset(buf, item2);
3955                 item1_size = btrfs_item_size(buf, item1);
3956                 item2_size = btrfs_item_size(buf, item2);
3957
3958                 item1_data = malloc(item1_size);
3959                 if (!item1_data)
3960                         return -ENOMEM;
3961                 item2_data = malloc(item2_size);
3962                 if (!item2_data) {
3963                         free(item1_data);
3964                         return -ENOMEM;
3965                 }
3966
3967                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3968                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3969
3970                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3971                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3972                 free(item1_data);
3973                 free(item2_data);
3974
3975                 btrfs_set_item_offset(buf, item1, item2_offset);
3976                 btrfs_set_item_offset(buf, item2, item1_offset);
3977                 btrfs_set_item_size(buf, item1, item2_size);
3978                 btrfs_set_item_size(buf, item2, item1_size);
3979
3980                 path->slots[0] = slot;
3981                 btrfs_set_item_key_unsafe(root, path, &k2);
3982                 path->slots[0] = slot + 1;
3983                 btrfs_set_item_key_unsafe(root, path, &k1);
3984         }
3985         return 0;
3986 }
3987
3988 static int fix_key_order(struct btrfs_trans_handle *trans,
3989                          struct btrfs_root *root,
3990                          struct btrfs_path *path)
3991 {
3992         struct extent_buffer *buf;
3993         struct btrfs_key k1, k2;
3994         int i;
3995         int level = path->lowest_level;
3996         int ret = -EIO;
3997
3998         buf = path->nodes[level];
3999         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4000                 if (level) {
4001                         btrfs_node_key_to_cpu(buf, &k1, i);
4002                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4003                 } else {
4004                         btrfs_item_key_to_cpu(buf, &k1, i);
4005                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4006                 }
4007                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4008                         continue;
4009                 ret = swap_values(root, path, buf, i);
4010                 if (ret)
4011                         break;
4012                 btrfs_mark_buffer_dirty(buf);
4013                 i = 0;
4014         }
4015         return ret;
4016 }
4017
4018 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4019                              struct btrfs_root *root,
4020                              struct btrfs_path *path,
4021                              struct extent_buffer *buf, int slot)
4022 {
4023         struct btrfs_key key;
4024         int nritems = btrfs_header_nritems(buf);
4025
4026         btrfs_item_key_to_cpu(buf, &key, slot);
4027
4028         /* These are all the keys we can deal with missing. */
4029         if (key.type != BTRFS_DIR_INDEX_KEY &&
4030             key.type != BTRFS_EXTENT_ITEM_KEY &&
4031             key.type != BTRFS_METADATA_ITEM_KEY &&
4032             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4033             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4034                 return -1;
4035
4036         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4037                (unsigned long long)key.objectid, key.type,
4038                (unsigned long long)key.offset, slot, buf->start);
4039         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4040                               btrfs_item_nr_offset(slot + 1),
4041                               sizeof(struct btrfs_item) *
4042                               (nritems - slot - 1));
4043         btrfs_set_header_nritems(buf, nritems - 1);
4044         if (slot == 0) {
4045                 struct btrfs_disk_key disk_key;
4046
4047                 btrfs_item_key(buf, &disk_key, 0);
4048                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4049         }
4050         btrfs_mark_buffer_dirty(buf);
4051         return 0;
4052 }
4053
4054 static int fix_item_offset(struct btrfs_trans_handle *trans,
4055                            struct btrfs_root *root,
4056                            struct btrfs_path *path)
4057 {
4058         struct extent_buffer *buf;
4059         int i;
4060         int ret = 0;
4061
4062         /* We should only get this for leaves */
4063         BUG_ON(path->lowest_level);
4064         buf = path->nodes[0];
4065 again:
4066         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4067                 unsigned int shift = 0, offset;
4068
4069                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4070                     BTRFS_LEAF_DATA_SIZE(root)) {
4071                         if (btrfs_item_end_nr(buf, i) >
4072                             BTRFS_LEAF_DATA_SIZE(root)) {
4073                                 ret = delete_bogus_item(trans, root, path,
4074                                                         buf, i);
4075                                 if (!ret)
4076                                         goto again;
4077                                 fprintf(stderr, "item is off the end of the "
4078                                         "leaf, can't fix\n");
4079                                 ret = -EIO;
4080                                 break;
4081                         }
4082                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4083                                 btrfs_item_end_nr(buf, i);
4084                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4085                            btrfs_item_offset_nr(buf, i - 1)) {
4086                         if (btrfs_item_end_nr(buf, i) >
4087                             btrfs_item_offset_nr(buf, i - 1)) {
4088                                 ret = delete_bogus_item(trans, root, path,
4089                                                         buf, i);
4090                                 if (!ret)
4091                                         goto again;
4092                                 fprintf(stderr, "items overlap, can't fix\n");
4093                                 ret = -EIO;
4094                                 break;
4095                         }
4096                         shift = btrfs_item_offset_nr(buf, i - 1) -
4097                                 btrfs_item_end_nr(buf, i);
4098                 }
4099                 if (!shift)
4100                         continue;
4101
4102                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4103                        i, shift, (unsigned long long)buf->start);
4104                 offset = btrfs_item_offset_nr(buf, i);
4105                 memmove_extent_buffer(buf,
4106                                       btrfs_leaf_data(buf) + offset + shift,
4107                                       btrfs_leaf_data(buf) + offset,
4108                                       btrfs_item_size_nr(buf, i));
4109                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4110                                       offset + shift);
4111                 btrfs_mark_buffer_dirty(buf);
4112         }
4113
4114         /*
4115          * We may have moved things, in which case we want to exit so we don't
4116          * write those changes out.  Once we have proper abort functionality in
4117          * progs this can be changed to something nicer.
4118          */
4119         BUG_ON(ret);
4120         return ret;
4121 }
4122
4123 /*
4124  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4125  * then just return -EIO.
4126  */
4127 static int try_to_fix_bad_block(struct btrfs_root *root,
4128                                 struct extent_buffer *buf,
4129                                 enum btrfs_tree_block_status status)
4130 {
4131         struct btrfs_trans_handle *trans;
4132         struct ulist *roots;
4133         struct ulist_node *node;
4134         struct btrfs_root *search_root;
4135         struct btrfs_path *path;
4136         struct ulist_iterator iter;
4137         struct btrfs_key root_key, key;
4138         int ret;
4139
4140         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4141             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4142                 return -EIO;
4143
4144         path = btrfs_alloc_path();
4145         if (!path)
4146                 return -EIO;
4147
4148         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4149                                    0, &roots);
4150         if (ret) {
4151                 btrfs_free_path(path);
4152                 return -EIO;
4153         }
4154
4155         ULIST_ITER_INIT(&iter);
4156         while ((node = ulist_next(roots, &iter))) {
4157                 root_key.objectid = node->val;
4158                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4159                 root_key.offset = (u64)-1;
4160
4161                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4162                 if (IS_ERR(root)) {
4163                         ret = -EIO;
4164                         break;
4165                 }
4166
4167
4168                 trans = btrfs_start_transaction(search_root, 0);
4169                 if (IS_ERR(trans)) {
4170                         ret = PTR_ERR(trans);
4171                         break;
4172                 }
4173
4174                 path->lowest_level = btrfs_header_level(buf);
4175                 path->skip_check_block = 1;
4176                 if (path->lowest_level)
4177                         btrfs_node_key_to_cpu(buf, &key, 0);
4178                 else
4179                         btrfs_item_key_to_cpu(buf, &key, 0);
4180                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4181                 if (ret) {
4182                         ret = -EIO;
4183                         btrfs_commit_transaction(trans, search_root);
4184                         break;
4185                 }
4186                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4187                         ret = fix_key_order(trans, search_root, path);
4188                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4189                         ret = fix_item_offset(trans, search_root, path);
4190                 if (ret) {
4191                         btrfs_commit_transaction(trans, search_root);
4192                         break;
4193                 }
4194                 btrfs_release_path(path);
4195                 btrfs_commit_transaction(trans, search_root);
4196         }
4197         ulist_free(roots);
4198         btrfs_free_path(path);
4199         return ret;
4200 }
4201
4202 static int check_block(struct btrfs_root *root,
4203                        struct cache_tree *extent_cache,
4204                        struct extent_buffer *buf, u64 flags)
4205 {
4206         struct extent_record *rec;
4207         struct cache_extent *cache;
4208         struct btrfs_key key;
4209         enum btrfs_tree_block_status status;
4210         int ret = 0;
4211         int level;
4212
4213         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4214         if (!cache)
4215                 return 1;
4216         rec = container_of(cache, struct extent_record, cache);
4217         rec->generation = btrfs_header_generation(buf);
4218
4219         level = btrfs_header_level(buf);
4220         if (btrfs_header_nritems(buf) > 0) {
4221
4222                 if (level == 0)
4223                         btrfs_item_key_to_cpu(buf, &key, 0);
4224                 else
4225                         btrfs_node_key_to_cpu(buf, &key, 0);
4226
4227                 rec->info_objectid = key.objectid;
4228         }
4229         rec->info_level = level;
4230
4231         if (btrfs_is_leaf(buf))
4232                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4233         else
4234                 status = btrfs_check_node(root, &rec->parent_key, buf);
4235
4236         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4237                 if (repair)
4238                         status = try_to_fix_bad_block(root, buf, status);
4239                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4240                         ret = -EIO;
4241                         fprintf(stderr, "bad block %llu\n",
4242                                 (unsigned long long)buf->start);
4243                 } else {
4244                         /*
4245                          * Signal to callers we need to start the scan over
4246                          * again since we'll have cow'ed blocks.
4247                          */
4248                         ret = -EAGAIN;
4249                 }
4250         } else {
4251                 rec->content_checked = 1;
4252                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4253                         rec->owner_ref_checked = 1;
4254                 else {
4255                         ret = check_owner_ref(root, rec, buf);
4256                         if (!ret)
4257                                 rec->owner_ref_checked = 1;
4258                 }
4259         }
4260         if (!ret)
4261                 maybe_free_extent_rec(extent_cache, rec);
4262         return ret;
4263 }
4264
4265 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4266                                                 u64 parent, u64 root)
4267 {
4268         struct list_head *cur = rec->backrefs.next;
4269         struct extent_backref *node;
4270         struct tree_backref *back;
4271
4272         while(cur != &rec->backrefs) {
4273                 node = list_entry(cur, struct extent_backref, list);
4274                 cur = cur->next;
4275                 if (node->is_data)
4276                         continue;
4277                 back = (struct tree_backref *)node;
4278                 if (parent > 0) {
4279                         if (!node->full_backref)
4280                                 continue;
4281                         if (parent == back->parent)
4282                                 return back;
4283                 } else {
4284                         if (node->full_backref)
4285                                 continue;
4286                         if (back->root == root)
4287                                 return back;
4288                 }
4289         }
4290         return NULL;
4291 }
4292
4293 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4294                                                 u64 parent, u64 root)
4295 {
4296         struct tree_backref *ref = malloc(sizeof(*ref));
4297         memset(&ref->node, 0, sizeof(ref->node));
4298         if (parent > 0) {
4299                 ref->parent = parent;
4300                 ref->node.full_backref = 1;
4301         } else {
4302                 ref->root = root;
4303                 ref->node.full_backref = 0;
4304         }
4305         list_add_tail(&ref->node.list, &rec->backrefs);
4306
4307         return ref;
4308 }
4309
4310 static struct data_backref *find_data_backref(struct extent_record *rec,
4311                                                 u64 parent, u64 root,
4312                                                 u64 owner, u64 offset,
4313                                                 int found_ref,
4314                                                 u64 disk_bytenr, u64 bytes)
4315 {
4316         struct list_head *cur = rec->backrefs.next;
4317         struct extent_backref *node;
4318         struct data_backref *back;
4319
4320         while(cur != &rec->backrefs) {
4321                 node = list_entry(cur, struct extent_backref, list);
4322                 cur = cur->next;
4323                 if (!node->is_data)
4324                         continue;
4325                 back = (struct data_backref *)node;
4326                 if (parent > 0) {
4327                         if (!node->full_backref)
4328                                 continue;
4329                         if (parent == back->parent)
4330                                 return back;
4331                 } else {
4332                         if (node->full_backref)
4333                                 continue;
4334                         if (back->root == root && back->owner == owner &&
4335                             back->offset == offset) {
4336                                 if (found_ref && node->found_ref &&
4337                                     (back->bytes != bytes ||
4338                                     back->disk_bytenr != disk_bytenr))
4339                                         continue;
4340                                 return back;
4341                         }
4342                 }
4343         }
4344         return NULL;
4345 }
4346
4347 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4348                                                 u64 parent, u64 root,
4349                                                 u64 owner, u64 offset,
4350                                                 u64 max_size)
4351 {
4352         struct data_backref *ref = malloc(sizeof(*ref));
4353         memset(&ref->node, 0, sizeof(ref->node));
4354         ref->node.is_data = 1;
4355
4356         if (parent > 0) {
4357                 ref->parent = parent;
4358                 ref->owner = 0;
4359                 ref->offset = 0;
4360                 ref->node.full_backref = 1;
4361         } else {
4362                 ref->root = root;
4363                 ref->owner = owner;
4364                 ref->offset = offset;
4365                 ref->node.full_backref = 0;
4366         }
4367         ref->bytes = max_size;
4368         ref->found_ref = 0;
4369         ref->num_refs = 0;
4370         list_add_tail(&ref->node.list, &rec->backrefs);
4371         if (max_size > rec->max_size)
4372                 rec->max_size = max_size;
4373         return ref;
4374 }
4375
4376 /* Check if the type of extent matches with its chunk */
4377 static void check_extent_type(struct extent_record *rec)
4378 {
4379         struct btrfs_block_group_cache *bg_cache;
4380
4381         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4382         if (!bg_cache)
4383                 return;
4384
4385         /* data extent, check chunk directly*/
4386         if (!rec->metadata) {
4387                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4388                         rec->wrong_chunk_type = 1;
4389                 return;
4390         }
4391
4392         /* metadata extent, check the obvious case first */
4393         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4394                                  BTRFS_BLOCK_GROUP_METADATA))) {
4395                 rec->wrong_chunk_type = 1;
4396                 return;
4397         }
4398
4399         /*
4400          * Check SYSTEM extent, as it's also marked as metadata, we can only
4401          * make sure it's a SYSTEM extent by its backref
4402          */
4403         if (!list_empty(&rec->backrefs)) {
4404                 struct extent_backref *node;
4405                 struct tree_backref *tback;
4406                 u64 bg_type;
4407
4408                 node = list_entry(rec->backrefs.next, struct extent_backref,
4409                                   list);
4410                 if (node->is_data) {
4411                         /* tree block shouldn't have data backref */
4412                         rec->wrong_chunk_type = 1;
4413                         return;
4414                 }
4415                 tback = container_of(node, struct tree_backref, node);
4416
4417                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4418                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4419                 else
4420                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4421                 if (!(bg_cache->flags & bg_type))
4422                         rec->wrong_chunk_type = 1;
4423         }
4424 }
4425
4426 static int add_extent_rec(struct cache_tree *extent_cache,
4427                           struct btrfs_key *parent_key, u64 parent_gen,
4428                           u64 start, u64 nr, u64 extent_item_refs,
4429                           int is_root, int inc_ref, int set_checked,
4430                           int metadata, int extent_rec, u64 max_size)
4431 {
4432         struct extent_record *rec;
4433         struct cache_extent *cache;
4434         int ret = 0;
4435         int dup = 0;
4436
4437         cache = lookup_cache_extent(extent_cache, start, nr);
4438         if (cache) {
4439                 rec = container_of(cache, struct extent_record, cache);
4440                 if (inc_ref)
4441                         rec->refs++;
4442                 if (rec->nr == 1)
4443                         rec->nr = max(nr, max_size);
4444
4445                 /*
4446                  * We need to make sure to reset nr to whatever the extent
4447                  * record says was the real size, this way we can compare it to
4448                  * the backrefs.
4449                  */
4450                 if (extent_rec) {
4451                         if (start != rec->start || rec->found_rec) {
4452                                 struct extent_record *tmp;
4453
4454                                 dup = 1;
4455                                 if (list_empty(&rec->list))
4456                                         list_add_tail(&rec->list,
4457                                                       &duplicate_extents);
4458
4459                                 /*
4460                                  * We have to do this song and dance in case we
4461                                  * find an extent record that falls inside of
4462                                  * our current extent record but does not have
4463                                  * the same objectid.
4464                                  */
4465                                 tmp = malloc(sizeof(*tmp));
4466                                 if (!tmp)
4467                                         return -ENOMEM;
4468                                 tmp->start = start;
4469                                 tmp->max_size = max_size;
4470                                 tmp->nr = nr;
4471                                 tmp->found_rec = 1;
4472                                 tmp->metadata = metadata;
4473                                 tmp->extent_item_refs = extent_item_refs;
4474                                 INIT_LIST_HEAD(&tmp->list);
4475                                 list_add_tail(&tmp->list, &rec->dups);
4476                                 rec->num_duplicates++;
4477                         } else {
4478                                 rec->nr = nr;
4479                                 rec->found_rec = 1;
4480                         }
4481                 }
4482
4483                 if (extent_item_refs && !dup) {
4484                         if (rec->extent_item_refs) {
4485                                 fprintf(stderr, "block %llu rec "
4486                                         "extent_item_refs %llu, passed %llu\n",
4487                                         (unsigned long long)start,
4488                                         (unsigned long long)
4489                                                         rec->extent_item_refs,
4490                                         (unsigned long long)extent_item_refs);
4491                         }
4492                         rec->extent_item_refs = extent_item_refs;
4493                 }
4494                 if (is_root)
4495                         rec->is_root = 1;
4496                 if (set_checked) {
4497                         rec->content_checked = 1;
4498                         rec->owner_ref_checked = 1;
4499                 }
4500
4501                 if (parent_key)
4502                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4503                 if (parent_gen)
4504                         rec->parent_generation = parent_gen;
4505
4506                 if (rec->max_size < max_size)
4507                         rec->max_size = max_size;
4508
4509                 /*
4510                  * A metadata extent can't cross stripe_len boundary, otherwise
4511                  * kernel scrub won't be able to handle it.
4512                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4513                  * it.
4514                  */
4515                 if (metadata && check_crossing_stripes(rec->start,
4516                                                        rec->max_size))
4517                                 rec->crossing_stripes = 1;
4518                 check_extent_type(rec);
4519                 maybe_free_extent_rec(extent_cache, rec);
4520                 return ret;
4521         }
4522         rec = malloc(sizeof(*rec));
4523         rec->start = start;
4524         rec->max_size = max_size;
4525         rec->nr = max(nr, max_size);
4526         rec->found_rec = !!extent_rec;
4527         rec->content_checked = 0;
4528         rec->owner_ref_checked = 0;
4529         rec->num_duplicates = 0;
4530         rec->metadata = metadata;
4531         rec->flag_block_full_backref = -1;
4532         rec->bad_full_backref = 0;
4533         rec->crossing_stripes = 0;
4534         rec->wrong_chunk_type = 0;
4535         INIT_LIST_HEAD(&rec->backrefs);
4536         INIT_LIST_HEAD(&rec->dups);
4537         INIT_LIST_HEAD(&rec->list);
4538
4539         if (is_root)
4540                 rec->is_root = 1;
4541         else
4542                 rec->is_root = 0;
4543
4544         if (inc_ref)
4545                 rec->refs = 1;
4546         else
4547                 rec->refs = 0;
4548
4549         if (extent_item_refs)
4550                 rec->extent_item_refs = extent_item_refs;
4551         else
4552                 rec->extent_item_refs = 0;
4553
4554         if (parent_key)
4555                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4556         else
4557                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4558
4559         if (parent_gen)
4560                 rec->parent_generation = parent_gen;
4561         else
4562                 rec->parent_generation = 0;
4563
4564         rec->cache.start = start;
4565         rec->cache.size = nr;
4566         ret = insert_cache_extent(extent_cache, &rec->cache);
4567         BUG_ON(ret);
4568         bytes_used += nr;
4569         if (set_checked) {
4570                 rec->content_checked = 1;
4571                 rec->owner_ref_checked = 1;
4572         }
4573
4574         if (metadata)
4575                 if (check_crossing_stripes(rec->start, rec->max_size))
4576                         rec->crossing_stripes = 1;
4577         check_extent_type(rec);
4578         return ret;
4579 }
4580
4581 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4582                             u64 parent, u64 root, int found_ref)
4583 {
4584         struct extent_record *rec;
4585         struct tree_backref *back;
4586         struct cache_extent *cache;
4587
4588         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4589         if (!cache) {
4590                 add_extent_rec(extent_cache, NULL, 0, bytenr,
4591                                1, 0, 0, 0, 0, 1, 0, 0);
4592                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4593                 if (!cache)
4594                         abort();
4595         }
4596
4597         rec = container_of(cache, struct extent_record, cache);
4598         if (rec->start != bytenr) {
4599                 abort();
4600         }
4601
4602         back = find_tree_backref(rec, parent, root);
4603         if (!back)
4604                 back = alloc_tree_backref(rec, parent, root);
4605
4606         if (found_ref) {
4607                 if (back->node.found_ref) {
4608                         fprintf(stderr, "Extent back ref already exists "
4609                                 "for %llu parent %llu root %llu \n",
4610                                 (unsigned long long)bytenr,
4611                                 (unsigned long long)parent,
4612                                 (unsigned long long)root);
4613                 }
4614                 back->node.found_ref = 1;
4615         } else {
4616                 if (back->node.found_extent_tree) {
4617                         fprintf(stderr, "Extent back ref already exists "
4618                                 "for %llu parent %llu root %llu \n",
4619                                 (unsigned long long)bytenr,
4620                                 (unsigned long long)parent,
4621                                 (unsigned long long)root);
4622                 }
4623                 back->node.found_extent_tree = 1;
4624         }
4625         check_extent_type(rec);
4626         maybe_free_extent_rec(extent_cache, rec);
4627         return 0;
4628 }
4629
4630 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4631                             u64 parent, u64 root, u64 owner, u64 offset,
4632                             u32 num_refs, int found_ref, u64 max_size)
4633 {
4634         struct extent_record *rec;
4635         struct data_backref *back;
4636         struct cache_extent *cache;
4637
4638         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4639         if (!cache) {
4640                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4641                                0, 0, max_size);
4642                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4643                 if (!cache)
4644                         abort();
4645         }
4646
4647         rec = container_of(cache, struct extent_record, cache);
4648         if (rec->max_size < max_size)
4649                 rec->max_size = max_size;
4650
4651         /*
4652          * If found_ref is set then max_size is the real size and must match the
4653          * existing refs.  So if we have already found a ref then we need to
4654          * make sure that this ref matches the existing one, otherwise we need
4655          * to add a new backref so we can notice that the backrefs don't match
4656          * and we need to figure out who is telling the truth.  This is to
4657          * account for that awful fsync bug I introduced where we'd end up with
4658          * a btrfs_file_extent_item that would have its length include multiple
4659          * prealloc extents or point inside of a prealloc extent.
4660          */
4661         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4662                                  bytenr, max_size);
4663         if (!back)
4664                 back = alloc_data_backref(rec, parent, root, owner, offset,
4665                                           max_size);
4666
4667         if (found_ref) {
4668                 BUG_ON(num_refs != 1);
4669                 if (back->node.found_ref)
4670                         BUG_ON(back->bytes != max_size);
4671                 back->node.found_ref = 1;
4672                 back->found_ref += 1;
4673                 back->bytes = max_size;
4674                 back->disk_bytenr = bytenr;
4675                 rec->refs += 1;
4676                 rec->content_checked = 1;
4677                 rec->owner_ref_checked = 1;
4678         } else {
4679                 if (back->node.found_extent_tree) {
4680                         fprintf(stderr, "Extent back ref already exists "
4681                                 "for %llu parent %llu root %llu "
4682                                 "owner %llu offset %llu num_refs %lu\n",
4683                                 (unsigned long long)bytenr,
4684                                 (unsigned long long)parent,
4685                                 (unsigned long long)root,
4686                                 (unsigned long long)owner,
4687                                 (unsigned long long)offset,
4688                                 (unsigned long)num_refs);
4689                 }
4690                 back->num_refs = num_refs;
4691                 back->node.found_extent_tree = 1;
4692         }
4693         maybe_free_extent_rec(extent_cache, rec);
4694         return 0;
4695 }
4696
4697 static int add_pending(struct cache_tree *pending,
4698                        struct cache_tree *seen, u64 bytenr, u32 size)
4699 {
4700         int ret;
4701         ret = add_cache_extent(seen, bytenr, size);
4702         if (ret)
4703                 return ret;
4704         add_cache_extent(pending, bytenr, size);
4705         return 0;
4706 }
4707
4708 static int pick_next_pending(struct cache_tree *pending,
4709                         struct cache_tree *reada,
4710                         struct cache_tree *nodes,
4711                         u64 last, struct block_info *bits, int bits_nr,
4712                         int *reada_bits)
4713 {
4714         unsigned long node_start = last;
4715         struct cache_extent *cache;
4716         int ret;
4717
4718         cache = search_cache_extent(reada, 0);
4719         if (cache) {
4720                 bits[0].start = cache->start;
4721                 bits[0].size = cache->size;
4722                 *reada_bits = 1;
4723                 return 1;
4724         }
4725         *reada_bits = 0;
4726         if (node_start > 32768)
4727                 node_start -= 32768;
4728
4729         cache = search_cache_extent(nodes, node_start);
4730         if (!cache)
4731                 cache = search_cache_extent(nodes, 0);
4732
4733         if (!cache) {
4734                  cache = search_cache_extent(pending, 0);
4735                  if (!cache)
4736                          return 0;
4737                  ret = 0;
4738                  do {
4739                          bits[ret].start = cache->start;
4740                          bits[ret].size = cache->size;
4741                          cache = next_cache_extent(cache);
4742                          ret++;
4743                  } while (cache && ret < bits_nr);
4744                  return ret;
4745         }
4746
4747         ret = 0;
4748         do {
4749                 bits[ret].start = cache->start;
4750                 bits[ret].size = cache->size;
4751                 cache = next_cache_extent(cache);
4752                 ret++;
4753         } while (cache && ret < bits_nr);
4754
4755         if (bits_nr - ret > 8) {
4756                 u64 lookup = bits[0].start + bits[0].size;
4757                 struct cache_extent *next;
4758                 next = search_cache_extent(pending, lookup);
4759                 while(next) {
4760                         if (next->start - lookup > 32768)
4761                                 break;
4762                         bits[ret].start = next->start;
4763                         bits[ret].size = next->size;
4764                         lookup = next->start + next->size;
4765                         ret++;
4766                         if (ret == bits_nr)
4767                                 break;
4768                         next = next_cache_extent(next);
4769                         if (!next)
4770                                 break;
4771                 }
4772         }
4773         return ret;
4774 }
4775
4776 static void free_chunk_record(struct cache_extent *cache)
4777 {
4778         struct chunk_record *rec;
4779
4780         rec = container_of(cache, struct chunk_record, cache);
4781         list_del_init(&rec->list);
4782         list_del_init(&rec->dextents);
4783         free(rec);
4784 }
4785
4786 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4787 {
4788         cache_tree_free_extents(chunk_cache, free_chunk_record);
4789 }
4790
4791 static void free_device_record(struct rb_node *node)
4792 {
4793         struct device_record *rec;
4794
4795         rec = container_of(node, struct device_record, node);
4796         free(rec);
4797 }
4798
4799 FREE_RB_BASED_TREE(device_cache, free_device_record);
4800
4801 int insert_block_group_record(struct block_group_tree *tree,
4802                               struct block_group_record *bg_rec)
4803 {
4804         int ret;
4805
4806         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4807         if (ret)
4808                 return ret;
4809
4810         list_add_tail(&bg_rec->list, &tree->block_groups);
4811         return 0;
4812 }
4813
4814 static void free_block_group_record(struct cache_extent *cache)
4815 {
4816         struct block_group_record *rec;
4817
4818         rec = container_of(cache, struct block_group_record, cache);
4819         list_del_init(&rec->list);
4820         free(rec);
4821 }
4822
4823 void free_block_group_tree(struct block_group_tree *tree)
4824 {
4825         cache_tree_free_extents(&tree->tree, free_block_group_record);
4826 }
4827
4828 int insert_device_extent_record(struct device_extent_tree *tree,
4829                                 struct device_extent_record *de_rec)
4830 {
4831         int ret;
4832
4833         /*
4834          * Device extent is a bit different from the other extents, because
4835          * the extents which belong to the different devices may have the
4836          * same start and size, so we need use the special extent cache
4837          * search/insert functions.
4838          */
4839         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4840         if (ret)
4841                 return ret;
4842
4843         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4844         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4845         return 0;
4846 }
4847
4848 static void free_device_extent_record(struct cache_extent *cache)
4849 {
4850         struct device_extent_record *rec;
4851
4852         rec = container_of(cache, struct device_extent_record, cache);
4853         if (!list_empty(&rec->chunk_list))
4854                 list_del_init(&rec->chunk_list);
4855         if (!list_empty(&rec->device_list))
4856                 list_del_init(&rec->device_list);
4857         free(rec);
4858 }
4859
4860 void free_device_extent_tree(struct device_extent_tree *tree)
4861 {
4862         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4863 }
4864
4865 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4866 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4867                                  struct extent_buffer *leaf, int slot)
4868 {
4869         struct btrfs_extent_ref_v0 *ref0;
4870         struct btrfs_key key;
4871
4872         btrfs_item_key_to_cpu(leaf, &key, slot);
4873         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4874         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4875                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4876         } else {
4877                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4878                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4879         }
4880         return 0;
4881 }
4882 #endif
4883
4884 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4885                                             struct btrfs_key *key,
4886                                             int slot)
4887 {
4888         struct btrfs_chunk *ptr;
4889         struct chunk_record *rec;
4890         int num_stripes, i;
4891
4892         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4893         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4894
4895         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4896         if (!rec) {
4897                 fprintf(stderr, "memory allocation failed\n");
4898                 exit(-1);
4899         }
4900
4901         INIT_LIST_HEAD(&rec->list);
4902         INIT_LIST_HEAD(&rec->dextents);
4903         rec->bg_rec = NULL;
4904
4905         rec->cache.start = key->offset;
4906         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4907
4908         rec->generation = btrfs_header_generation(leaf);
4909
4910         rec->objectid = key->objectid;
4911         rec->type = key->type;
4912         rec->offset = key->offset;
4913
4914         rec->length = rec->cache.size;
4915         rec->owner = btrfs_chunk_owner(leaf, ptr);
4916         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4917         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4918         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4919         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4920         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4921         rec->num_stripes = num_stripes;
4922         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4923
4924         for (i = 0; i < rec->num_stripes; ++i) {
4925                 rec->stripes[i].devid =
4926                         btrfs_stripe_devid_nr(leaf, ptr, i);
4927                 rec->stripes[i].offset =
4928                         btrfs_stripe_offset_nr(leaf, ptr, i);
4929                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4930                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4931                                 BTRFS_UUID_SIZE);
4932         }
4933
4934         return rec;
4935 }
4936
4937 static int process_chunk_item(struct cache_tree *chunk_cache,
4938                               struct btrfs_key *key, struct extent_buffer *eb,
4939                               int slot)
4940 {
4941         struct chunk_record *rec;
4942         int ret = 0;
4943
4944         rec = btrfs_new_chunk_record(eb, key, slot);
4945         ret = insert_cache_extent(chunk_cache, &rec->cache);
4946         if (ret) {
4947                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4948                         rec->offset, rec->length);
4949                 free(rec);
4950         }
4951
4952         return ret;
4953 }
4954
4955 static int process_device_item(struct rb_root *dev_cache,
4956                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4957 {
4958         struct btrfs_dev_item *ptr;
4959         struct device_record *rec;
4960         int ret = 0;
4961
4962         ptr = btrfs_item_ptr(eb,
4963                 slot, struct btrfs_dev_item);
4964
4965         rec = malloc(sizeof(*rec));
4966         if (!rec) {
4967                 fprintf(stderr, "memory allocation failed\n");
4968                 return -ENOMEM;
4969         }
4970
4971         rec->devid = key->offset;
4972         rec->generation = btrfs_header_generation(eb);
4973
4974         rec->objectid = key->objectid;
4975         rec->type = key->type;
4976         rec->offset = key->offset;
4977
4978         rec->devid = btrfs_device_id(eb, ptr);
4979         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4980         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4981
4982         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4983         if (ret) {
4984                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4985                 free(rec);
4986         }
4987
4988         return ret;
4989 }
4990
4991 struct block_group_record *
4992 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4993                              int slot)
4994 {
4995         struct btrfs_block_group_item *ptr;
4996         struct block_group_record *rec;
4997
4998         rec = calloc(1, sizeof(*rec));
4999         if (!rec) {
5000                 fprintf(stderr, "memory allocation failed\n");
5001                 exit(-1);
5002         }
5003
5004         rec->cache.start = key->objectid;
5005         rec->cache.size = key->offset;
5006
5007         rec->generation = btrfs_header_generation(leaf);
5008
5009         rec->objectid = key->objectid;
5010         rec->type = key->type;
5011         rec->offset = key->offset;
5012
5013         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5014         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5015
5016         INIT_LIST_HEAD(&rec->list);
5017
5018         return rec;
5019 }
5020
5021 static int process_block_group_item(struct block_group_tree *block_group_cache,
5022                                     struct btrfs_key *key,
5023                                     struct extent_buffer *eb, int slot)
5024 {
5025         struct block_group_record *rec;
5026         int ret = 0;
5027
5028         rec = btrfs_new_block_group_record(eb, key, slot);
5029         ret = insert_block_group_record(block_group_cache, rec);
5030         if (ret) {
5031                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5032                         rec->objectid, rec->offset);
5033                 free(rec);
5034         }
5035
5036         return ret;
5037 }
5038
5039 struct device_extent_record *
5040 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5041                                struct btrfs_key *key, int slot)
5042 {
5043         struct device_extent_record *rec;
5044         struct btrfs_dev_extent *ptr;
5045
5046         rec = calloc(1, sizeof(*rec));
5047         if (!rec) {
5048                 fprintf(stderr, "memory allocation failed\n");
5049                 exit(-1);
5050         }
5051
5052         rec->cache.objectid = key->objectid;
5053         rec->cache.start = key->offset;
5054
5055         rec->generation = btrfs_header_generation(leaf);
5056
5057         rec->objectid = key->objectid;
5058         rec->type = key->type;
5059         rec->offset = key->offset;
5060
5061         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5062         rec->chunk_objecteid =
5063                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5064         rec->chunk_offset =
5065                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5066         rec->length = btrfs_dev_extent_length(leaf, ptr);
5067         rec->cache.size = rec->length;
5068
5069         INIT_LIST_HEAD(&rec->chunk_list);
5070         INIT_LIST_HEAD(&rec->device_list);
5071
5072         return rec;
5073 }
5074
5075 static int
5076 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5077                            struct btrfs_key *key, struct extent_buffer *eb,
5078                            int slot)
5079 {
5080         struct device_extent_record *rec;
5081         int ret;
5082
5083         rec = btrfs_new_device_extent_record(eb, key, slot);
5084         ret = insert_device_extent_record(dev_extent_cache, rec);
5085         if (ret) {
5086                 fprintf(stderr,
5087                         "Device extent[%llu, %llu, %llu] existed.\n",
5088                         rec->objectid, rec->offset, rec->length);
5089                 free(rec);
5090         }
5091
5092         return ret;
5093 }
5094
5095 static int process_extent_item(struct btrfs_root *root,
5096                                struct cache_tree *extent_cache,
5097                                struct extent_buffer *eb, int slot)
5098 {
5099         struct btrfs_extent_item *ei;
5100         struct btrfs_extent_inline_ref *iref;
5101         struct btrfs_extent_data_ref *dref;
5102         struct btrfs_shared_data_ref *sref;
5103         struct btrfs_key key;
5104         unsigned long end;
5105         unsigned long ptr;
5106         int type;
5107         u32 item_size = btrfs_item_size_nr(eb, slot);
5108         u64 refs = 0;
5109         u64 offset;
5110         u64 num_bytes;
5111         int metadata = 0;
5112
5113         btrfs_item_key_to_cpu(eb, &key, slot);
5114
5115         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5116                 metadata = 1;
5117                 num_bytes = root->leafsize;
5118         } else {
5119                 num_bytes = key.offset;
5120         }
5121
5122         if (item_size < sizeof(*ei)) {
5123 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5124                 struct btrfs_extent_item_v0 *ei0;
5125                 BUG_ON(item_size != sizeof(*ei0));
5126                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5127                 refs = btrfs_extent_refs_v0(eb, ei0);
5128 #else
5129                 BUG();
5130 #endif
5131                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5132                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5133                                       num_bytes);
5134         }
5135
5136         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5137         refs = btrfs_extent_refs(eb, ei);
5138         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5139                 metadata = 1;
5140         else
5141                 metadata = 0;
5142
5143         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5144                        refs, 0, 0, 0, metadata, 1, num_bytes);
5145
5146         ptr = (unsigned long)(ei + 1);
5147         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5148             key.type == BTRFS_EXTENT_ITEM_KEY)
5149                 ptr += sizeof(struct btrfs_tree_block_info);
5150
5151         end = (unsigned long)ei + item_size;
5152         while (ptr < end) {
5153                 iref = (struct btrfs_extent_inline_ref *)ptr;
5154                 type = btrfs_extent_inline_ref_type(eb, iref);
5155                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5156                 switch (type) {
5157                 case BTRFS_TREE_BLOCK_REF_KEY:
5158                         add_tree_backref(extent_cache, key.objectid,
5159                                          0, offset, 0);
5160                         break;
5161                 case BTRFS_SHARED_BLOCK_REF_KEY:
5162                         add_tree_backref(extent_cache, key.objectid,
5163                                          offset, 0, 0);
5164                         break;
5165                 case BTRFS_EXTENT_DATA_REF_KEY:
5166                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5167                         add_data_backref(extent_cache, key.objectid, 0,
5168                                         btrfs_extent_data_ref_root(eb, dref),
5169                                         btrfs_extent_data_ref_objectid(eb,
5170                                                                        dref),
5171                                         btrfs_extent_data_ref_offset(eb, dref),
5172                                         btrfs_extent_data_ref_count(eb, dref),
5173                                         0, num_bytes);
5174                         break;
5175                 case BTRFS_SHARED_DATA_REF_KEY:
5176                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5177                         add_data_backref(extent_cache, key.objectid, offset,
5178                                         0, 0, 0,
5179                                         btrfs_shared_data_ref_count(eb, sref),
5180                                         0, num_bytes);
5181                         break;
5182                 default:
5183                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5184                                 key.objectid, key.type, num_bytes);
5185                         goto out;
5186                 }
5187                 ptr += btrfs_extent_inline_ref_size(type);
5188         }
5189         WARN_ON(ptr > end);
5190 out:
5191         return 0;
5192 }
5193
5194 static int check_cache_range(struct btrfs_root *root,
5195                              struct btrfs_block_group_cache *cache,
5196                              u64 offset, u64 bytes)
5197 {
5198         struct btrfs_free_space *entry;
5199         u64 *logical;
5200         u64 bytenr;
5201         int stripe_len;
5202         int i, nr, ret;
5203
5204         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5205                 bytenr = btrfs_sb_offset(i);
5206                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5207                                        cache->key.objectid, bytenr, 0,
5208                                        &logical, &nr, &stripe_len);
5209                 if (ret)
5210                         return ret;
5211
5212                 while (nr--) {
5213                         if (logical[nr] + stripe_len <= offset)
5214                                 continue;
5215                         if (offset + bytes <= logical[nr])
5216                                 continue;
5217                         if (logical[nr] == offset) {
5218                                 if (stripe_len >= bytes) {
5219                                         kfree(logical);
5220                                         return 0;
5221                                 }
5222                                 bytes -= stripe_len;
5223                                 offset += stripe_len;
5224                         } else if (logical[nr] < offset) {
5225                                 if (logical[nr] + stripe_len >=
5226                                     offset + bytes) {
5227                                         kfree(logical);
5228                                         return 0;
5229                                 }
5230                                 bytes = (offset + bytes) -
5231                                         (logical[nr] + stripe_len);
5232                                 offset = logical[nr] + stripe_len;
5233                         } else {
5234                                 /*
5235                                  * Could be tricky, the super may land in the
5236                                  * middle of the area we're checking.  First
5237                                  * check the easiest case, it's at the end.
5238                                  */
5239                                 if (logical[nr] + stripe_len >=
5240                                     bytes + offset) {
5241                                         bytes = logical[nr] - offset;
5242                                         continue;
5243                                 }
5244
5245                                 /* Check the left side */
5246                                 ret = check_cache_range(root, cache,
5247                                                         offset,
5248                                                         logical[nr] - offset);
5249                                 if (ret) {
5250                                         kfree(logical);
5251                                         return ret;
5252                                 }
5253
5254                                 /* Now we continue with the right side */
5255                                 bytes = (offset + bytes) -
5256                                         (logical[nr] + stripe_len);
5257                                 offset = logical[nr] + stripe_len;
5258                         }
5259                 }
5260
5261                 kfree(logical);
5262         }
5263
5264         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5265         if (!entry) {
5266                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5267                         offset, offset+bytes);
5268                 return -EINVAL;
5269         }
5270
5271         if (entry->offset != offset) {
5272                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5273                         entry->offset);
5274                 return -EINVAL;
5275         }
5276
5277         if (entry->bytes != bytes) {
5278                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5279                         bytes, entry->bytes, offset);
5280                 return -EINVAL;
5281         }
5282
5283         unlink_free_space(cache->free_space_ctl, entry);
5284         free(entry);
5285         return 0;
5286 }
5287
5288 static int verify_space_cache(struct btrfs_root *root,
5289                               struct btrfs_block_group_cache *cache)
5290 {
5291         struct btrfs_path *path;
5292         struct extent_buffer *leaf;
5293         struct btrfs_key key;
5294         u64 last;
5295         int ret = 0;
5296
5297         path = btrfs_alloc_path();
5298         if (!path)
5299                 return -ENOMEM;
5300
5301         root = root->fs_info->extent_root;
5302
5303         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5304
5305         key.objectid = last;
5306         key.offset = 0;
5307         key.type = BTRFS_EXTENT_ITEM_KEY;
5308
5309         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5310         if (ret < 0)
5311                 goto out;
5312         ret = 0;
5313         while (1) {
5314                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5315                         ret = btrfs_next_leaf(root, path);
5316                         if (ret < 0)
5317                                 goto out;
5318                         if (ret > 0) {
5319                                 ret = 0;
5320                                 break;
5321                         }
5322                 }
5323                 leaf = path->nodes[0];
5324                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5325                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5326                         break;
5327                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5328                     key.type != BTRFS_METADATA_ITEM_KEY) {
5329                         path->slots[0]++;
5330                         continue;
5331                 }
5332
5333                 if (last == key.objectid) {
5334                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5335                                 last = key.objectid + key.offset;
5336                         else
5337                                 last = key.objectid + root->leafsize;
5338                         path->slots[0]++;
5339                         continue;
5340                 }
5341
5342                 ret = check_cache_range(root, cache, last,
5343                                         key.objectid - last);
5344                 if (ret)
5345                         break;
5346                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5347                         last = key.objectid + key.offset;
5348                 else
5349                         last = key.objectid + root->leafsize;
5350                 path->slots[0]++;
5351         }
5352
5353         if (last < cache->key.objectid + cache->key.offset)
5354                 ret = check_cache_range(root, cache, last,
5355                                         cache->key.objectid +
5356                                         cache->key.offset - last);
5357
5358 out:
5359         btrfs_free_path(path);
5360
5361         if (!ret &&
5362             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5363                 fprintf(stderr, "There are still entries left in the space "
5364                         "cache\n");
5365                 ret = -EINVAL;
5366         }
5367
5368         return ret;
5369 }
5370
5371 static int check_space_cache(struct btrfs_root *root)
5372 {
5373         struct btrfs_block_group_cache *cache;
5374         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5375         int ret;
5376         int error = 0;
5377
5378         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5379             btrfs_super_generation(root->fs_info->super_copy) !=
5380             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5381                 printf("cache and super generation don't match, space cache "
5382                        "will be invalidated\n");
5383                 return 0;
5384         }
5385
5386         if (ctx.progress_enabled) {
5387                 ctx.tp = TASK_FREE_SPACE;
5388                 task_start(ctx.info);
5389         }
5390
5391         while (1) {
5392                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5393                 if (!cache)
5394                         break;
5395
5396                 start = cache->key.objectid + cache->key.offset;
5397                 if (!cache->free_space_ctl) {
5398                         if (btrfs_init_free_space_ctl(cache,
5399                                                       root->sectorsize)) {
5400                                 ret = -ENOMEM;
5401                                 break;
5402                         }
5403                 } else {
5404                         btrfs_remove_free_space_cache(cache);
5405                 }
5406
5407                 ret = load_free_space_cache(root->fs_info, cache);
5408                 if (!ret)
5409                         continue;
5410
5411                 ret = verify_space_cache(root, cache);
5412                 if (ret) {
5413                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5414                                 cache->key.objectid);
5415                         error++;
5416                 }
5417         }
5418
5419         task_stop(ctx.info);
5420
5421         return error ? -EINVAL : 0;
5422 }
5423
5424 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5425                         u64 num_bytes, unsigned long leaf_offset,
5426                         struct extent_buffer *eb) {
5427
5428         u64 offset = 0;
5429         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5430         char *data;
5431         unsigned long csum_offset;
5432         u32 csum;
5433         u32 csum_expected;
5434         u64 read_len;
5435         u64 data_checked = 0;
5436         u64 tmp;
5437         int ret = 0;
5438         int mirror;
5439         int num_copies;
5440
5441         if (num_bytes % root->sectorsize)
5442                 return -EINVAL;
5443
5444         data = malloc(num_bytes);
5445         if (!data)
5446                 return -ENOMEM;
5447
5448         while (offset < num_bytes) {
5449                 mirror = 0;
5450 again:
5451                 read_len = num_bytes - offset;
5452                 /* read as much space once a time */
5453                 ret = read_extent_data(root, data + offset,
5454                                 bytenr + offset, &read_len, mirror);
5455                 if (ret)
5456                         goto out;
5457                 data_checked = 0;
5458                 /* verify every 4k data's checksum */
5459                 while (data_checked < read_len) {
5460                         csum = ~(u32)0;
5461                         tmp = offset + data_checked;
5462
5463                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5464                                                csum, root->sectorsize);
5465                         btrfs_csum_final(csum, (char *)&csum);
5466
5467                         csum_offset = leaf_offset +
5468                                  tmp / root->sectorsize * csum_size;
5469                         read_extent_buffer(eb, (char *)&csum_expected,
5470                                            csum_offset, csum_size);
5471                         /* try another mirror */
5472                         if (csum != csum_expected) {
5473                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5474                                                 mirror, bytenr + tmp,
5475                                                 csum, csum_expected);
5476                                 num_copies = btrfs_num_copies(
5477                                                 &root->fs_info->mapping_tree,
5478                                                 bytenr, num_bytes);
5479                                 if (mirror < num_copies - 1) {
5480                                         mirror += 1;
5481                                         goto again;
5482                                 }
5483                         }
5484                         data_checked += root->sectorsize;
5485                 }
5486                 offset += read_len;
5487         }
5488 out:
5489         free(data);
5490         return ret;
5491 }
5492
5493 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5494                                u64 num_bytes)
5495 {
5496         struct btrfs_path *path;
5497         struct extent_buffer *leaf;
5498         struct btrfs_key key;
5499         int ret;
5500
5501         path = btrfs_alloc_path();
5502         if (!path) {
5503                 fprintf(stderr, "Error allocing path\n");
5504                 return -ENOMEM;
5505         }
5506
5507         key.objectid = bytenr;
5508         key.type = BTRFS_EXTENT_ITEM_KEY;
5509         key.offset = (u64)-1;
5510
5511 again:
5512         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5513                                 0, 0);
5514         if (ret < 0) {
5515                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5516                 btrfs_free_path(path);
5517                 return ret;
5518         } else if (ret) {
5519                 if (path->slots[0] > 0) {
5520                         path->slots[0]--;
5521                 } else {
5522                         ret = btrfs_prev_leaf(root, path);
5523                         if (ret < 0) {
5524                                 goto out;
5525                         } else if (ret > 0) {
5526                                 ret = 0;
5527                                 goto out;
5528                         }
5529                 }
5530         }
5531
5532         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5533
5534         /*
5535          * Block group items come before extent items if they have the same
5536          * bytenr, so walk back one more just in case.  Dear future traveler,
5537          * first congrats on mastering time travel.  Now if it's not too much
5538          * trouble could you go back to 2006 and tell Chris to make the
5539          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5540          * EXTENT_ITEM_KEY please?
5541          */
5542         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5543                 if (path->slots[0] > 0) {
5544                         path->slots[0]--;
5545                 } else {
5546                         ret = btrfs_prev_leaf(root, path);
5547                         if (ret < 0) {
5548                                 goto out;
5549                         } else if (ret > 0) {
5550                                 ret = 0;
5551                                 goto out;
5552                         }
5553                 }
5554                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5555         }
5556
5557         while (num_bytes) {
5558                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5559                         ret = btrfs_next_leaf(root, path);
5560                         if (ret < 0) {
5561                                 fprintf(stderr, "Error going to next leaf "
5562                                         "%d\n", ret);
5563                                 btrfs_free_path(path);
5564                                 return ret;
5565                         } else if (ret) {
5566                                 break;
5567                         }
5568                 }
5569                 leaf = path->nodes[0];
5570                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5571                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5572                         path->slots[0]++;
5573                         continue;
5574                 }
5575                 if (key.objectid + key.offset < bytenr) {
5576                         path->slots[0]++;
5577                         continue;
5578                 }
5579                 if (key.objectid > bytenr + num_bytes)
5580                         break;
5581
5582                 if (key.objectid == bytenr) {
5583                         if (key.offset >= num_bytes) {
5584                                 num_bytes = 0;
5585                                 break;
5586                         }
5587                         num_bytes -= key.offset;
5588                         bytenr += key.offset;
5589                 } else if (key.objectid < bytenr) {
5590                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5591                                 num_bytes = 0;
5592                                 break;
5593                         }
5594                         num_bytes = (bytenr + num_bytes) -
5595                                 (key.objectid + key.offset);
5596                         bytenr = key.objectid + key.offset;
5597                 } else {
5598                         if (key.objectid + key.offset < bytenr + num_bytes) {
5599                                 u64 new_start = key.objectid + key.offset;
5600                                 u64 new_bytes = bytenr + num_bytes - new_start;
5601
5602                                 /*
5603                                  * Weird case, the extent is in the middle of
5604                                  * our range, we'll have to search one side
5605                                  * and then the other.  Not sure if this happens
5606                                  * in real life, but no harm in coding it up
5607                                  * anyway just in case.
5608                                  */
5609                                 btrfs_release_path(path);
5610                                 ret = check_extent_exists(root, new_start,
5611                                                           new_bytes);
5612                                 if (ret) {
5613                                         fprintf(stderr, "Right section didn't "
5614                                                 "have a record\n");
5615                                         break;
5616                                 }
5617                                 num_bytes = key.objectid - bytenr;
5618                                 goto again;
5619                         }
5620                         num_bytes = key.objectid - bytenr;
5621                 }
5622                 path->slots[0]++;
5623         }
5624         ret = 0;
5625
5626 out:
5627         if (num_bytes && !ret) {
5628                 fprintf(stderr, "There are no extents for csum range "
5629                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5630                 ret = 1;
5631         }
5632
5633         btrfs_free_path(path);
5634         return ret;
5635 }
5636
5637 static int check_csums(struct btrfs_root *root)
5638 {
5639         struct btrfs_path *path;
5640         struct extent_buffer *leaf;
5641         struct btrfs_key key;
5642         u64 offset = 0, num_bytes = 0;
5643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5644         int errors = 0;
5645         int ret;
5646         u64 data_len;
5647         unsigned long leaf_offset;
5648
5649         root = root->fs_info->csum_root;
5650         if (!extent_buffer_uptodate(root->node)) {
5651                 fprintf(stderr, "No valid csum tree found\n");
5652                 return -ENOENT;
5653         }
5654
5655         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5656         key.type = BTRFS_EXTENT_CSUM_KEY;
5657         key.offset = 0;
5658
5659         path = btrfs_alloc_path();
5660         if (!path)
5661                 return -ENOMEM;
5662
5663         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5664         if (ret < 0) {
5665                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5666                 btrfs_free_path(path);
5667                 return ret;
5668         }
5669
5670         if (ret > 0 && path->slots[0])
5671                 path->slots[0]--;
5672         ret = 0;
5673
5674         while (1) {
5675                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5676                         ret = btrfs_next_leaf(root, path);
5677                         if (ret < 0) {
5678                                 fprintf(stderr, "Error going to next leaf "
5679                                         "%d\n", ret);
5680                                 break;
5681                         }
5682                         if (ret)
5683                                 break;
5684                 }
5685                 leaf = path->nodes[0];
5686
5687                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5688                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5689                         path->slots[0]++;
5690                         continue;
5691                 }
5692
5693                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5694                               csum_size) * root->sectorsize;
5695                 if (!check_data_csum)
5696                         goto skip_csum_check;
5697                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5698                 ret = check_extent_csums(root, key.offset, data_len,
5699                                          leaf_offset, leaf);
5700                 if (ret)
5701                         break;
5702 skip_csum_check:
5703                 if (!num_bytes) {
5704                         offset = key.offset;
5705                 } else if (key.offset != offset + num_bytes) {
5706                         ret = check_extent_exists(root, offset, num_bytes);
5707                         if (ret) {
5708                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5709                                         "there is no extent record\n",
5710                                         offset, offset+num_bytes);
5711                                 errors++;
5712                         }
5713                         offset = key.offset;
5714                         num_bytes = 0;
5715                 }
5716                 num_bytes += data_len;
5717                 path->slots[0]++;
5718         }
5719
5720         btrfs_free_path(path);
5721         return errors;
5722 }
5723
5724 static int is_dropped_key(struct btrfs_key *key,
5725                           struct btrfs_key *drop_key) {
5726         if (key->objectid < drop_key->objectid)
5727                 return 1;
5728         else if (key->objectid == drop_key->objectid) {
5729                 if (key->type < drop_key->type)
5730                         return 1;
5731                 else if (key->type == drop_key->type) {
5732                         if (key->offset < drop_key->offset)
5733                                 return 1;
5734                 }
5735         }
5736         return 0;
5737 }
5738
5739 /*
5740  * Here are the rules for FULL_BACKREF.
5741  *
5742  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5743  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5744  *      FULL_BACKREF set.
5745  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5746  *    if it happened after the relocation occurred since we'll have dropped the
5747  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5748  *    have no real way to know for sure.
5749  *
5750  * We process the blocks one root at a time, and we start from the lowest root
5751  * objectid and go to the highest.  So we can just lookup the owner backref for
5752  * the record and if we don't find it then we know it doesn't exist and we have
5753  * a FULL BACKREF.
5754  *
5755  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5756  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5757  * be set or not and then we can check later once we've gathered all the refs.
5758  */
5759 static int calc_extent_flag(struct btrfs_root *root,
5760                            struct cache_tree *extent_cache,
5761                            struct extent_buffer *buf,
5762                            struct root_item_record *ri,
5763                            u64 *flags)
5764 {
5765         struct extent_record *rec;
5766         struct cache_extent *cache;
5767         struct tree_backref *tback;
5768         u64 owner = 0;
5769
5770         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5771         /* we have added this extent before */
5772         BUG_ON(!cache);
5773         rec = container_of(cache, struct extent_record, cache);
5774
5775         /*
5776          * Except file/reloc tree, we can not have
5777          * FULL BACKREF MODE
5778          */
5779         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5780                 goto normal;
5781         /*
5782          * root node
5783          */
5784         if (buf->start == ri->bytenr)
5785                 goto normal;
5786
5787         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5788                 goto full_backref;
5789
5790         owner = btrfs_header_owner(buf);
5791         if (owner == ri->objectid)
5792                 goto normal;
5793
5794         tback = find_tree_backref(rec, 0, owner);
5795         if (!tback)
5796                 goto full_backref;
5797 normal:
5798         *flags = 0;
5799         if (rec->flag_block_full_backref != -1 &&
5800             rec->flag_block_full_backref != 0)
5801                 rec->bad_full_backref = 1;
5802         return 0;
5803 full_backref:
5804         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5805         if (rec->flag_block_full_backref != -1 &&
5806             rec->flag_block_full_backref != 1)
5807                 rec->bad_full_backref = 1;
5808         return 0;
5809 }
5810
5811 static int run_next_block(struct btrfs_root *root,
5812                           struct block_info *bits,
5813                           int bits_nr,
5814                           u64 *last,
5815                           struct cache_tree *pending,
5816                           struct cache_tree *seen,
5817                           struct cache_tree *reada,
5818                           struct cache_tree *nodes,
5819                           struct cache_tree *extent_cache,
5820                           struct cache_tree *chunk_cache,
5821                           struct rb_root *dev_cache,
5822                           struct block_group_tree *block_group_cache,
5823                           struct device_extent_tree *dev_extent_cache,
5824                           struct root_item_record *ri)
5825 {
5826         struct extent_buffer *buf;
5827         struct extent_record *rec = NULL;
5828         u64 bytenr;
5829         u32 size;
5830         u64 parent;
5831         u64 owner;
5832         u64 flags;
5833         u64 ptr;
5834         u64 gen = 0;
5835         int ret = 0;
5836         int i;
5837         int nritems;
5838         struct btrfs_key key;
5839         struct cache_extent *cache;
5840         int reada_bits;
5841
5842         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5843                                     bits_nr, &reada_bits);
5844         if (nritems == 0)
5845                 return 1;
5846
5847         if (!reada_bits) {
5848                 for(i = 0; i < nritems; i++) {
5849                         ret = add_cache_extent(reada, bits[i].start,
5850                                                bits[i].size);
5851                         if (ret == -EEXIST)
5852                                 continue;
5853
5854                         /* fixme, get the parent transid */
5855                         readahead_tree_block(root, bits[i].start,
5856                                              bits[i].size, 0);
5857                 }
5858         }
5859         *last = bits[0].start;
5860         bytenr = bits[0].start;
5861         size = bits[0].size;
5862
5863         cache = lookup_cache_extent(pending, bytenr, size);
5864         if (cache) {
5865                 remove_cache_extent(pending, cache);
5866                 free(cache);
5867         }
5868         cache = lookup_cache_extent(reada, bytenr, size);
5869         if (cache) {
5870                 remove_cache_extent(reada, cache);
5871                 free(cache);
5872         }
5873         cache = lookup_cache_extent(nodes, bytenr, size);
5874         if (cache) {
5875                 remove_cache_extent(nodes, cache);
5876                 free(cache);
5877         }
5878         cache = lookup_cache_extent(extent_cache, bytenr, size);
5879         if (cache) {
5880                 rec = container_of(cache, struct extent_record, cache);
5881                 gen = rec->parent_generation;
5882         }
5883
5884         /* fixme, get the real parent transid */
5885         buf = read_tree_block(root, bytenr, size, gen);
5886         if (!extent_buffer_uptodate(buf)) {
5887                 record_bad_block_io(root->fs_info,
5888                                     extent_cache, bytenr, size);
5889                 goto out;
5890         }
5891
5892         nritems = btrfs_header_nritems(buf);
5893
5894         flags = 0;
5895         if (!init_extent_tree) {
5896                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5897                                        btrfs_header_level(buf), 1, NULL,
5898                                        &flags);
5899                 if (ret < 0) {
5900                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5901                         if (ret < 0) {
5902                                 fprintf(stderr, "Couldn't calc extent flags\n");
5903                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5904                         }
5905                 }
5906         } else {
5907                 flags = 0;
5908                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5909                 if (ret < 0) {
5910                         fprintf(stderr, "Couldn't calc extent flags\n");
5911                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5912                 }
5913         }
5914
5915         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5916                 if (ri != NULL &&
5917                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5918                     ri->objectid == btrfs_header_owner(buf)) {
5919                         /*
5920                          * Ok we got to this block from it's original owner and
5921                          * we have FULL_BACKREF set.  Relocation can leave
5922                          * converted blocks over so this is altogether possible,
5923                          * however it's not possible if the generation > the
5924                          * last snapshot, so check for this case.
5925                          */
5926                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5927                             btrfs_header_generation(buf) > ri->last_snapshot) {
5928                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5929                                 rec->bad_full_backref = 1;
5930                         }
5931                 }
5932         } else {
5933                 if (ri != NULL &&
5934                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5935                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5936                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5937                         rec->bad_full_backref = 1;
5938                 }
5939         }
5940
5941         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5942                 rec->flag_block_full_backref = 1;
5943                 parent = bytenr;
5944                 owner = 0;
5945         } else {
5946                 rec->flag_block_full_backref = 0;
5947                 parent = 0;
5948                 owner = btrfs_header_owner(buf);
5949         }
5950
5951         ret = check_block(root, extent_cache, buf, flags);
5952         if (ret)
5953                 goto out;
5954
5955         if (btrfs_is_leaf(buf)) {
5956                 btree_space_waste += btrfs_leaf_free_space(root, buf);
5957                 for (i = 0; i < nritems; i++) {
5958                         struct btrfs_file_extent_item *fi;
5959                         btrfs_item_key_to_cpu(buf, &key, i);
5960                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5961                                 process_extent_item(root, extent_cache, buf,
5962                                                     i);
5963                                 continue;
5964                         }
5965                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5966                                 process_extent_item(root, extent_cache, buf,
5967                                                     i);
5968                                 continue;
5969                         }
5970                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5971                                 total_csum_bytes +=
5972                                         btrfs_item_size_nr(buf, i);
5973                                 continue;
5974                         }
5975                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5976                                 process_chunk_item(chunk_cache, &key, buf, i);
5977                                 continue;
5978                         }
5979                         if (key.type == BTRFS_DEV_ITEM_KEY) {
5980                                 process_device_item(dev_cache, &key, buf, i);
5981                                 continue;
5982                         }
5983                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5984                                 process_block_group_item(block_group_cache,
5985                                         &key, buf, i);
5986                                 continue;
5987                         }
5988                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
5989                                 process_device_extent_item(dev_extent_cache,
5990                                         &key, buf, i);
5991                                 continue;
5992
5993                         }
5994                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5995 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5996                                 process_extent_ref_v0(extent_cache, buf, i);
5997 #else
5998                                 BUG();
5999 #endif
6000                                 continue;
6001                         }
6002
6003                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6004                                 add_tree_backref(extent_cache, key.objectid, 0,
6005                                                  key.offset, 0);
6006                                 continue;
6007                         }
6008                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6009                                 add_tree_backref(extent_cache, key.objectid,
6010                                                  key.offset, 0, 0);
6011                                 continue;
6012                         }
6013                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6014                                 struct btrfs_extent_data_ref *ref;
6015                                 ref = btrfs_item_ptr(buf, i,
6016                                                 struct btrfs_extent_data_ref);
6017                                 add_data_backref(extent_cache,
6018                                         key.objectid, 0,
6019                                         btrfs_extent_data_ref_root(buf, ref),
6020                                         btrfs_extent_data_ref_objectid(buf,
6021                                                                        ref),
6022                                         btrfs_extent_data_ref_offset(buf, ref),
6023                                         btrfs_extent_data_ref_count(buf, ref),
6024                                         0, root->sectorsize);
6025                                 continue;
6026                         }
6027                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6028                                 struct btrfs_shared_data_ref *ref;
6029                                 ref = btrfs_item_ptr(buf, i,
6030                                                 struct btrfs_shared_data_ref);
6031                                 add_data_backref(extent_cache,
6032                                         key.objectid, key.offset, 0, 0, 0,
6033                                         btrfs_shared_data_ref_count(buf, ref),
6034                                         0, root->sectorsize);
6035                                 continue;
6036                         }
6037                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6038                                 struct bad_item *bad;
6039
6040                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6041                                         continue;
6042                                 if (!owner)
6043                                         continue;
6044                                 bad = malloc(sizeof(struct bad_item));
6045                                 if (!bad)
6046                                         continue;
6047                                 INIT_LIST_HEAD(&bad->list);
6048                                 memcpy(&bad->key, &key,
6049                                        sizeof(struct btrfs_key));
6050                                 bad->root_id = owner;
6051                                 list_add_tail(&bad->list, &delete_items);
6052                                 continue;
6053                         }
6054                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6055                                 continue;
6056                         fi = btrfs_item_ptr(buf, i,
6057                                             struct btrfs_file_extent_item);
6058                         if (btrfs_file_extent_type(buf, fi) ==
6059                             BTRFS_FILE_EXTENT_INLINE)
6060                                 continue;
6061                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6062                                 continue;
6063
6064                         data_bytes_allocated +=
6065                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6066                         if (data_bytes_allocated < root->sectorsize) {
6067                                 abort();
6068                         }
6069                         data_bytes_referenced +=
6070                                 btrfs_file_extent_num_bytes(buf, fi);
6071                         add_data_backref(extent_cache,
6072                                 btrfs_file_extent_disk_bytenr(buf, fi),
6073                                 parent, owner, key.objectid, key.offset -
6074                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6075                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6076                 }
6077         } else {
6078                 int level;
6079                 struct btrfs_key first_key;
6080
6081                 first_key.objectid = 0;
6082
6083                 if (nritems > 0)
6084                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6085                 level = btrfs_header_level(buf);
6086                 for (i = 0; i < nritems; i++) {
6087                         ptr = btrfs_node_blockptr(buf, i);
6088                         size = btrfs_level_size(root, level - 1);
6089                         btrfs_node_key_to_cpu(buf, &key, i);
6090                         if (ri != NULL) {
6091                                 if ((level == ri->drop_level)
6092                                     && is_dropped_key(&key, &ri->drop_key)) {
6093                                         continue;
6094                                 }
6095                         }
6096                         ret = add_extent_rec(extent_cache, &key,
6097                                              btrfs_node_ptr_generation(buf, i),
6098                                              ptr, size, 0, 0, 1, 0, 1, 0,
6099                                              size);
6100                         BUG_ON(ret);
6101
6102                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6103
6104                         if (level > 1) {
6105                                 add_pending(nodes, seen, ptr, size);
6106                         } else {
6107                                 add_pending(pending, seen, ptr, size);
6108                         }
6109                 }
6110                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6111                                       nritems) * sizeof(struct btrfs_key_ptr);
6112         }
6113         total_btree_bytes += buf->len;
6114         if (fs_root_objectid(btrfs_header_owner(buf)))
6115                 total_fs_tree_bytes += buf->len;
6116         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6117                 total_extent_tree_bytes += buf->len;
6118         if (!found_old_backref &&
6119             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6120             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6121             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6122                 found_old_backref = 1;
6123 out:
6124         free_extent_buffer(buf);
6125         return ret;
6126 }
6127
6128 static int add_root_to_pending(struct extent_buffer *buf,
6129                                struct cache_tree *extent_cache,
6130                                struct cache_tree *pending,
6131                                struct cache_tree *seen,
6132                                struct cache_tree *nodes,
6133                                u64 objectid)
6134 {
6135         if (btrfs_header_level(buf) > 0)
6136                 add_pending(nodes, seen, buf->start, buf->len);
6137         else
6138                 add_pending(pending, seen, buf->start, buf->len);
6139         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6140                        0, 1, 1, 0, 1, 0, buf->len);
6141
6142         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6143             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6144                 add_tree_backref(extent_cache, buf->start, buf->start,
6145                                  0, 1);
6146         else
6147                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6148         return 0;
6149 }
6150
6151 /* as we fix the tree, we might be deleting blocks that
6152  * we're tracking for repair.  This hook makes sure we
6153  * remove any backrefs for blocks as we are fixing them.
6154  */
6155 static int free_extent_hook(struct btrfs_trans_handle *trans,
6156                             struct btrfs_root *root,
6157                             u64 bytenr, u64 num_bytes, u64 parent,
6158                             u64 root_objectid, u64 owner, u64 offset,
6159                             int refs_to_drop)
6160 {
6161         struct extent_record *rec;
6162         struct cache_extent *cache;
6163         int is_data;
6164         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6165
6166         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6167         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6168         if (!cache)
6169                 return 0;
6170
6171         rec = container_of(cache, struct extent_record, cache);
6172         if (is_data) {
6173                 struct data_backref *back;
6174                 back = find_data_backref(rec, parent, root_objectid, owner,
6175                                          offset, 1, bytenr, num_bytes);
6176                 if (!back)
6177                         goto out;
6178                 if (back->node.found_ref) {
6179                         back->found_ref -= refs_to_drop;
6180                         if (rec->refs)
6181                                 rec->refs -= refs_to_drop;
6182                 }
6183                 if (back->node.found_extent_tree) {
6184                         back->num_refs -= refs_to_drop;
6185                         if (rec->extent_item_refs)
6186                                 rec->extent_item_refs -= refs_to_drop;
6187                 }
6188                 if (back->found_ref == 0)
6189                         back->node.found_ref = 0;
6190                 if (back->num_refs == 0)
6191                         back->node.found_extent_tree = 0;
6192
6193                 if (!back->node.found_extent_tree && back->node.found_ref) {
6194                         list_del(&back->node.list);
6195                         free(back);
6196                 }
6197         } else {
6198                 struct tree_backref *back;
6199                 back = find_tree_backref(rec, parent, root_objectid);
6200                 if (!back)
6201                         goto out;
6202                 if (back->node.found_ref) {
6203                         if (rec->refs)
6204                                 rec->refs--;
6205                         back->node.found_ref = 0;
6206                 }
6207                 if (back->node.found_extent_tree) {
6208                         if (rec->extent_item_refs)
6209                                 rec->extent_item_refs--;
6210                         back->node.found_extent_tree = 0;
6211                 }
6212                 if (!back->node.found_extent_tree && back->node.found_ref) {
6213                         list_del(&back->node.list);
6214                         free(back);
6215                 }
6216         }
6217         maybe_free_extent_rec(extent_cache, rec);
6218 out:
6219         return 0;
6220 }
6221
6222 static int delete_extent_records(struct btrfs_trans_handle *trans,
6223                                  struct btrfs_root *root,
6224                                  struct btrfs_path *path,
6225                                  u64 bytenr, u64 new_len)
6226 {
6227         struct btrfs_key key;
6228         struct btrfs_key found_key;
6229         struct extent_buffer *leaf;
6230         int ret;
6231         int slot;
6232
6233
6234         key.objectid = bytenr;
6235         key.type = (u8)-1;
6236         key.offset = (u64)-1;
6237
6238         while(1) {
6239                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6240                                         &key, path, 0, 1);
6241                 if (ret < 0)
6242                         break;
6243
6244                 if (ret > 0) {
6245                         ret = 0;
6246                         if (path->slots[0] == 0)
6247                                 break;
6248                         path->slots[0]--;
6249                 }
6250                 ret = 0;
6251
6252                 leaf = path->nodes[0];
6253                 slot = path->slots[0];
6254
6255                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6256                 if (found_key.objectid != bytenr)
6257                         break;
6258
6259                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6260                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6261                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6262                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6263                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6264                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6265                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6266                         btrfs_release_path(path);
6267                         if (found_key.type == 0) {
6268                                 if (found_key.offset == 0)
6269                                         break;
6270                                 key.offset = found_key.offset - 1;
6271                                 key.type = found_key.type;
6272                         }
6273                         key.type = found_key.type - 1;
6274                         key.offset = (u64)-1;
6275                         continue;
6276                 }
6277
6278                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6279                         found_key.objectid, found_key.type, found_key.offset);
6280
6281                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6282                 if (ret)
6283                         break;
6284                 btrfs_release_path(path);
6285
6286                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6287                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6288                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6289                                 found_key.offset : root->leafsize;
6290
6291                         ret = btrfs_update_block_group(trans, root, bytenr,
6292                                                        bytes, 0, 0);
6293                         if (ret)
6294                                 break;
6295                 }
6296         }
6297
6298         btrfs_release_path(path);
6299         return ret;
6300 }
6301
6302 /*
6303  * for a single backref, this will allocate a new extent
6304  * and add the backref to it.
6305  */
6306 static int record_extent(struct btrfs_trans_handle *trans,
6307                          struct btrfs_fs_info *info,
6308                          struct btrfs_path *path,
6309                          struct extent_record *rec,
6310                          struct extent_backref *back,
6311                          int allocated, u64 flags)
6312 {
6313         int ret;
6314         struct btrfs_root *extent_root = info->extent_root;
6315         struct extent_buffer *leaf;
6316         struct btrfs_key ins_key;
6317         struct btrfs_extent_item *ei;
6318         struct tree_backref *tback;
6319         struct data_backref *dback;
6320         struct btrfs_tree_block_info *bi;
6321
6322         if (!back->is_data)
6323                 rec->max_size = max_t(u64, rec->max_size,
6324                                     info->extent_root->leafsize);
6325
6326         if (!allocated) {
6327                 u32 item_size = sizeof(*ei);
6328
6329                 if (!back->is_data)
6330                         item_size += sizeof(*bi);
6331
6332                 ins_key.objectid = rec->start;
6333                 ins_key.offset = rec->max_size;
6334                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6335
6336                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6337                                         &ins_key, item_size);
6338                 if (ret)
6339                         goto fail;
6340
6341                 leaf = path->nodes[0];
6342                 ei = btrfs_item_ptr(leaf, path->slots[0],
6343                                     struct btrfs_extent_item);
6344
6345                 btrfs_set_extent_refs(leaf, ei, 0);
6346                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6347
6348                 if (back->is_data) {
6349                         btrfs_set_extent_flags(leaf, ei,
6350                                                BTRFS_EXTENT_FLAG_DATA);
6351                 } else {
6352                         struct btrfs_disk_key copy_key;;
6353
6354                         tback = (struct tree_backref *)back;
6355                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6356                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6357                                              sizeof(*bi));
6358
6359                         btrfs_set_disk_key_objectid(&copy_key,
6360                                                     rec->info_objectid);
6361                         btrfs_set_disk_key_type(&copy_key, 0);
6362                         btrfs_set_disk_key_offset(&copy_key, 0);
6363
6364                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6365                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6366
6367                         btrfs_set_extent_flags(leaf, ei,
6368                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6369                 }
6370
6371                 btrfs_mark_buffer_dirty(leaf);
6372                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6373                                                rec->max_size, 1, 0);
6374                 if (ret)
6375                         goto fail;
6376                 btrfs_release_path(path);
6377         }
6378
6379         if (back->is_data) {
6380                 u64 parent;
6381                 int i;
6382
6383                 dback = (struct data_backref *)back;
6384                 if (back->full_backref)
6385                         parent = dback->parent;
6386                 else
6387                         parent = 0;
6388
6389                 for (i = 0; i < dback->found_ref; i++) {
6390                         /* if parent != 0, we're doing a full backref
6391                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6392                          * just makes the backref allocator create a data
6393                          * backref
6394                          */
6395                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6396                                                    rec->start, rec->max_size,
6397                                                    parent,
6398                                                    dback->root,
6399                                                    parent ?
6400                                                    BTRFS_FIRST_FREE_OBJECTID :
6401                                                    dback->owner,
6402                                                    dback->offset);
6403                         if (ret)
6404                                 break;
6405                 }
6406                 fprintf(stderr, "adding new data backref"
6407                                 " on %llu %s %llu owner %llu"
6408                                 " offset %llu found %d\n",
6409                                 (unsigned long long)rec->start,
6410                                 back->full_backref ?
6411                                 "parent" : "root",
6412                                 back->full_backref ?
6413                                 (unsigned long long)parent :
6414                                 (unsigned long long)dback->root,
6415                                 (unsigned long long)dback->owner,
6416                                 (unsigned long long)dback->offset,
6417                                 dback->found_ref);
6418         } else {
6419                 u64 parent;
6420
6421                 tback = (struct tree_backref *)back;
6422                 if (back->full_backref)
6423                         parent = tback->parent;
6424                 else
6425                         parent = 0;
6426
6427                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6428                                            rec->start, rec->max_size,
6429                                            parent, tback->root, 0, 0);
6430                 fprintf(stderr, "adding new tree backref on "
6431                         "start %llu len %llu parent %llu root %llu\n",
6432                         rec->start, rec->max_size, parent, tback->root);
6433         }
6434 fail:
6435         btrfs_release_path(path);
6436         return ret;
6437 }
6438
6439 struct extent_entry {
6440         u64 bytenr;
6441         u64 bytes;
6442         int count;
6443         int broken;
6444         struct list_head list;
6445 };
6446
6447 static struct extent_entry *find_entry(struct list_head *entries,
6448                                        u64 bytenr, u64 bytes)
6449 {
6450         struct extent_entry *entry = NULL;
6451
6452         list_for_each_entry(entry, entries, list) {
6453                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6454                         return entry;
6455         }
6456
6457         return NULL;
6458 }
6459
6460 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6461 {
6462         struct extent_entry *entry, *best = NULL, *prev = NULL;
6463
6464         list_for_each_entry(entry, entries, list) {
6465                 if (!prev) {
6466                         prev = entry;
6467                         continue;
6468                 }
6469
6470                 /*
6471                  * If there are as many broken entries as entries then we know
6472                  * not to trust this particular entry.
6473                  */
6474                 if (entry->broken == entry->count)
6475                         continue;
6476
6477                 /*
6478                  * If our current entry == best then we can't be sure our best
6479                  * is really the best, so we need to keep searching.
6480                  */
6481                 if (best && best->count == entry->count) {
6482                         prev = entry;
6483                         best = NULL;
6484                         continue;
6485                 }
6486
6487                 /* Prev == entry, not good enough, have to keep searching */
6488                 if (!prev->broken && prev->count == entry->count)
6489                         continue;
6490
6491                 if (!best)
6492                         best = (prev->count > entry->count) ? prev : entry;
6493                 else if (best->count < entry->count)
6494                         best = entry;
6495                 prev = entry;
6496         }
6497
6498         return best;
6499 }
6500
6501 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6502                       struct data_backref *dback, struct extent_entry *entry)
6503 {
6504         struct btrfs_trans_handle *trans;
6505         struct btrfs_root *root;
6506         struct btrfs_file_extent_item *fi;
6507         struct extent_buffer *leaf;
6508         struct btrfs_key key;
6509         u64 bytenr, bytes;
6510         int ret, err;
6511
6512         key.objectid = dback->root;
6513         key.type = BTRFS_ROOT_ITEM_KEY;
6514         key.offset = (u64)-1;
6515         root = btrfs_read_fs_root(info, &key);
6516         if (IS_ERR(root)) {
6517                 fprintf(stderr, "Couldn't find root for our ref\n");
6518                 return -EINVAL;
6519         }
6520
6521         /*
6522          * The backref points to the original offset of the extent if it was
6523          * split, so we need to search down to the offset we have and then walk
6524          * forward until we find the backref we're looking for.
6525          */
6526         key.objectid = dback->owner;
6527         key.type = BTRFS_EXTENT_DATA_KEY;
6528         key.offset = dback->offset;
6529         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6530         if (ret < 0) {
6531                 fprintf(stderr, "Error looking up ref %d\n", ret);
6532                 return ret;
6533         }
6534
6535         while (1) {
6536                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6537                         ret = btrfs_next_leaf(root, path);
6538                         if (ret) {
6539                                 fprintf(stderr, "Couldn't find our ref, next\n");
6540                                 return -EINVAL;
6541                         }
6542                 }
6543                 leaf = path->nodes[0];
6544                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6545                 if (key.objectid != dback->owner ||
6546                     key.type != BTRFS_EXTENT_DATA_KEY) {
6547                         fprintf(stderr, "Couldn't find our ref, search\n");
6548                         return -EINVAL;
6549                 }
6550                 fi = btrfs_item_ptr(leaf, path->slots[0],
6551                                     struct btrfs_file_extent_item);
6552                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6553                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6554
6555                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6556                         break;
6557                 path->slots[0]++;
6558         }
6559
6560         btrfs_release_path(path);
6561
6562         trans = btrfs_start_transaction(root, 1);
6563         if (IS_ERR(trans))
6564                 return PTR_ERR(trans);
6565
6566         /*
6567          * Ok we have the key of the file extent we want to fix, now we can cow
6568          * down to the thing and fix it.
6569          */
6570         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6571         if (ret < 0) {
6572                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6573                         key.objectid, key.type, key.offset, ret);
6574                 goto out;
6575         }
6576         if (ret > 0) {
6577                 fprintf(stderr, "Well that's odd, we just found this key "
6578                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6579                         key.offset);
6580                 ret = -EINVAL;
6581                 goto out;
6582         }
6583         leaf = path->nodes[0];
6584         fi = btrfs_item_ptr(leaf, path->slots[0],
6585                             struct btrfs_file_extent_item);
6586
6587         if (btrfs_file_extent_compression(leaf, fi) &&
6588             dback->disk_bytenr != entry->bytenr) {
6589                 fprintf(stderr, "Ref doesn't match the record start and is "
6590                         "compressed, please take a btrfs-image of this file "
6591                         "system and send it to a btrfs developer so they can "
6592                         "complete this functionality for bytenr %Lu\n",
6593                         dback->disk_bytenr);
6594                 ret = -EINVAL;
6595                 goto out;
6596         }
6597
6598         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6599                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6600         } else if (dback->disk_bytenr > entry->bytenr) {
6601                 u64 off_diff, offset;
6602
6603                 off_diff = dback->disk_bytenr - entry->bytenr;
6604                 offset = btrfs_file_extent_offset(leaf, fi);
6605                 if (dback->disk_bytenr + offset +
6606                     btrfs_file_extent_num_bytes(leaf, fi) >
6607                     entry->bytenr + entry->bytes) {
6608                         fprintf(stderr, "Ref is past the entry end, please "
6609                                 "take a btrfs-image of this file system and "
6610                                 "send it to a btrfs developer, ref %Lu\n",
6611                                 dback->disk_bytenr);
6612                         ret = -EINVAL;
6613                         goto out;
6614                 }
6615                 offset += off_diff;
6616                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6617                 btrfs_set_file_extent_offset(leaf, fi, offset);
6618         } else if (dback->disk_bytenr < entry->bytenr) {
6619                 u64 offset;
6620
6621                 offset = btrfs_file_extent_offset(leaf, fi);
6622                 if (dback->disk_bytenr + offset < entry->bytenr) {
6623                         fprintf(stderr, "Ref is before the entry start, please"
6624                                 " take a btrfs-image of this file system and "
6625                                 "send it to a btrfs developer, ref %Lu\n",
6626                                 dback->disk_bytenr);
6627                         ret = -EINVAL;
6628                         goto out;
6629                 }
6630
6631                 offset += dback->disk_bytenr;
6632                 offset -= entry->bytenr;
6633                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6634                 btrfs_set_file_extent_offset(leaf, fi, offset);
6635         }
6636
6637         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6638
6639         /*
6640          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6641          * only do this if we aren't using compression, otherwise it's a
6642          * trickier case.
6643          */
6644         if (!btrfs_file_extent_compression(leaf, fi))
6645                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6646         else
6647                 printf("ram bytes may be wrong?\n");
6648         btrfs_mark_buffer_dirty(leaf);
6649 out:
6650         err = btrfs_commit_transaction(trans, root);
6651         btrfs_release_path(path);
6652         return ret ? ret : err;
6653 }
6654
6655 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6656                            struct extent_record *rec)
6657 {
6658         struct extent_backref *back;
6659         struct data_backref *dback;
6660         struct extent_entry *entry, *best = NULL;
6661         LIST_HEAD(entries);
6662         int nr_entries = 0;
6663         int broken_entries = 0;
6664         int ret = 0;
6665         short mismatch = 0;
6666
6667         /*
6668          * Metadata is easy and the backrefs should always agree on bytenr and
6669          * size, if not we've got bigger issues.
6670          */
6671         if (rec->metadata)
6672                 return 0;
6673
6674         list_for_each_entry(back, &rec->backrefs, list) {
6675                 if (back->full_backref || !back->is_data)
6676                         continue;
6677
6678                 dback = (struct data_backref *)back;
6679
6680                 /*
6681                  * We only pay attention to backrefs that we found a real
6682                  * backref for.
6683                  */
6684                 if (dback->found_ref == 0)
6685                         continue;
6686
6687                 /*
6688                  * For now we only catch when the bytes don't match, not the
6689                  * bytenr.  We can easily do this at the same time, but I want
6690                  * to have a fs image to test on before we just add repair
6691                  * functionality willy-nilly so we know we won't screw up the
6692                  * repair.
6693                  */
6694
6695                 entry = find_entry(&entries, dback->disk_bytenr,
6696                                    dback->bytes);
6697                 if (!entry) {
6698                         entry = malloc(sizeof(struct extent_entry));
6699                         if (!entry) {
6700                                 ret = -ENOMEM;
6701                                 goto out;
6702                         }
6703                         memset(entry, 0, sizeof(*entry));
6704                         entry->bytenr = dback->disk_bytenr;
6705                         entry->bytes = dback->bytes;
6706                         list_add_tail(&entry->list, &entries);
6707                         nr_entries++;
6708                 }
6709
6710                 /*
6711                  * If we only have on entry we may think the entries agree when
6712                  * in reality they don't so we have to do some extra checking.
6713                  */
6714                 if (dback->disk_bytenr != rec->start ||
6715                     dback->bytes != rec->nr || back->broken)
6716                         mismatch = 1;
6717
6718                 if (back->broken) {
6719                         entry->broken++;
6720                         broken_entries++;
6721                 }
6722
6723                 entry->count++;
6724         }
6725
6726         /* Yay all the backrefs agree, carry on good sir */
6727         if (nr_entries <= 1 && !mismatch)
6728                 goto out;
6729
6730         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6731                 "%Lu\n", rec->start);
6732
6733         /*
6734          * First we want to see if the backrefs can agree amongst themselves who
6735          * is right, so figure out which one of the entries has the highest
6736          * count.
6737          */
6738         best = find_most_right_entry(&entries);
6739
6740         /*
6741          * Ok so we may have an even split between what the backrefs think, so
6742          * this is where we use the extent ref to see what it thinks.
6743          */
6744         if (!best) {
6745                 entry = find_entry(&entries, rec->start, rec->nr);
6746                 if (!entry && (!broken_entries || !rec->found_rec)) {
6747                         fprintf(stderr, "Backrefs don't agree with each other "
6748                                 "and extent record doesn't agree with anybody,"
6749                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6750                                 rec->start, rec->nr);
6751                         ret = -EINVAL;
6752                         goto out;
6753                 } else if (!entry) {
6754                         /*
6755                          * Ok our backrefs were broken, we'll assume this is the
6756                          * correct value and add an entry for this range.
6757                          */
6758                         entry = malloc(sizeof(struct extent_entry));
6759                         if (!entry) {
6760                                 ret = -ENOMEM;
6761                                 goto out;
6762                         }
6763                         memset(entry, 0, sizeof(*entry));
6764                         entry->bytenr = rec->start;
6765                         entry->bytes = rec->nr;
6766                         list_add_tail(&entry->list, &entries);
6767                         nr_entries++;
6768                 }
6769                 entry->count++;
6770                 best = find_most_right_entry(&entries);
6771                 if (!best) {
6772                         fprintf(stderr, "Backrefs and extent record evenly "
6773                                 "split on who is right, this is going to "
6774                                 "require user input to fix bytenr %Lu bytes "
6775                                 "%Lu\n", rec->start, rec->nr);
6776                         ret = -EINVAL;
6777                         goto out;
6778                 }
6779         }
6780
6781         /*
6782          * I don't think this can happen currently as we'll abort() if we catch
6783          * this case higher up, but in case somebody removes that we still can't
6784          * deal with it properly here yet, so just bail out of that's the case.
6785          */
6786         if (best->bytenr != rec->start) {
6787                 fprintf(stderr, "Extent start and backref starts don't match, "
6788                         "please use btrfs-image on this file system and send "
6789                         "it to a btrfs developer so they can make fsck fix "
6790                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6791                         rec->start, rec->nr);
6792                 ret = -EINVAL;
6793                 goto out;
6794         }
6795
6796         /*
6797          * Ok great we all agreed on an extent record, let's go find the real
6798          * references and fix up the ones that don't match.
6799          */
6800         list_for_each_entry(back, &rec->backrefs, list) {
6801                 if (back->full_backref || !back->is_data)
6802                         continue;
6803
6804                 dback = (struct data_backref *)back;
6805
6806                 /*
6807                  * Still ignoring backrefs that don't have a real ref attached
6808                  * to them.
6809                  */
6810                 if (dback->found_ref == 0)
6811                         continue;
6812
6813                 if (dback->bytes == best->bytes &&
6814                     dback->disk_bytenr == best->bytenr)
6815                         continue;
6816
6817                 ret = repair_ref(info, path, dback, best);
6818                 if (ret)
6819                         goto out;
6820         }
6821
6822         /*
6823          * Ok we messed with the actual refs, which means we need to drop our
6824          * entire cache and go back and rescan.  I know this is a huge pain and
6825          * adds a lot of extra work, but it's the only way to be safe.  Once all
6826          * the backrefs agree we may not need to do anything to the extent
6827          * record itself.
6828          */
6829         ret = -EAGAIN;
6830 out:
6831         while (!list_empty(&entries)) {
6832                 entry = list_entry(entries.next, struct extent_entry, list);
6833                 list_del_init(&entry->list);
6834                 free(entry);
6835         }
6836         return ret;
6837 }
6838
6839 static int process_duplicates(struct btrfs_root *root,
6840                               struct cache_tree *extent_cache,
6841                               struct extent_record *rec)
6842 {
6843         struct extent_record *good, *tmp;
6844         struct cache_extent *cache;
6845         int ret;
6846
6847         /*
6848          * If we found a extent record for this extent then return, or if we
6849          * have more than one duplicate we are likely going to need to delete
6850          * something.
6851          */
6852         if (rec->found_rec || rec->num_duplicates > 1)
6853                 return 0;
6854
6855         /* Shouldn't happen but just in case */
6856         BUG_ON(!rec->num_duplicates);
6857
6858         /*
6859          * So this happens if we end up with a backref that doesn't match the
6860          * actual extent entry.  So either the backref is bad or the extent
6861          * entry is bad.  Either way we want to have the extent_record actually
6862          * reflect what we found in the extent_tree, so we need to take the
6863          * duplicate out and use that as the extent_record since the only way we
6864          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6865          */
6866         remove_cache_extent(extent_cache, &rec->cache);
6867
6868         good = list_entry(rec->dups.next, struct extent_record, list);
6869         list_del_init(&good->list);
6870         INIT_LIST_HEAD(&good->backrefs);
6871         INIT_LIST_HEAD(&good->dups);
6872         good->cache.start = good->start;
6873         good->cache.size = good->nr;
6874         good->content_checked = 0;
6875         good->owner_ref_checked = 0;
6876         good->num_duplicates = 0;
6877         good->refs = rec->refs;
6878         list_splice_init(&rec->backrefs, &good->backrefs);
6879         while (1) {
6880                 cache = lookup_cache_extent(extent_cache, good->start,
6881                                             good->nr);
6882                 if (!cache)
6883                         break;
6884                 tmp = container_of(cache, struct extent_record, cache);
6885
6886                 /*
6887                  * If we find another overlapping extent and it's found_rec is
6888                  * set then it's a duplicate and we need to try and delete
6889                  * something.
6890                  */
6891                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6892                         if (list_empty(&good->list))
6893                                 list_add_tail(&good->list,
6894                                               &duplicate_extents);
6895                         good->num_duplicates += tmp->num_duplicates + 1;
6896                         list_splice_init(&tmp->dups, &good->dups);
6897                         list_del_init(&tmp->list);
6898                         list_add_tail(&tmp->list, &good->dups);
6899                         remove_cache_extent(extent_cache, &tmp->cache);
6900                         continue;
6901                 }
6902
6903                 /*
6904                  * Ok we have another non extent item backed extent rec, so lets
6905                  * just add it to this extent and carry on like we did above.
6906                  */
6907                 good->refs += tmp->refs;
6908                 list_splice_init(&tmp->backrefs, &good->backrefs);
6909                 remove_cache_extent(extent_cache, &tmp->cache);
6910                 free(tmp);
6911         }
6912         ret = insert_cache_extent(extent_cache, &good->cache);
6913         BUG_ON(ret);
6914         free(rec);
6915         return good->num_duplicates ? 0 : 1;
6916 }
6917
6918 static int delete_duplicate_records(struct btrfs_root *root,
6919                                     struct extent_record *rec)
6920 {
6921         struct btrfs_trans_handle *trans;
6922         LIST_HEAD(delete_list);
6923         struct btrfs_path *path;
6924         struct extent_record *tmp, *good, *n;
6925         int nr_del = 0;
6926         int ret = 0, err;
6927         struct btrfs_key key;
6928
6929         path = btrfs_alloc_path();
6930         if (!path) {
6931                 ret = -ENOMEM;
6932                 goto out;
6933         }
6934
6935         good = rec;
6936         /* Find the record that covers all of the duplicates. */
6937         list_for_each_entry(tmp, &rec->dups, list) {
6938                 if (good->start < tmp->start)
6939                         continue;
6940                 if (good->nr > tmp->nr)
6941                         continue;
6942
6943                 if (tmp->start + tmp->nr < good->start + good->nr) {
6944                         fprintf(stderr, "Ok we have overlapping extents that "
6945                                 "aren't completely covered by eachother, this "
6946                                 "is going to require more careful thought.  "
6947                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6948                                 tmp->start, tmp->nr, good->start, good->nr);
6949                         abort();
6950                 }
6951                 good = tmp;
6952         }
6953
6954         if (good != rec)
6955                 list_add_tail(&rec->list, &delete_list);
6956
6957         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6958                 if (tmp == good)
6959                         continue;
6960                 list_move_tail(&tmp->list, &delete_list);
6961         }
6962
6963         root = root->fs_info->extent_root;
6964         trans = btrfs_start_transaction(root, 1);
6965         if (IS_ERR(trans)) {
6966                 ret = PTR_ERR(trans);
6967                 goto out;
6968         }
6969
6970         list_for_each_entry(tmp, &delete_list, list) {
6971                 if (tmp->found_rec == 0)
6972                         continue;
6973                 key.objectid = tmp->start;
6974                 key.type = BTRFS_EXTENT_ITEM_KEY;
6975                 key.offset = tmp->nr;
6976
6977                 /* Shouldn't happen but just in case */
6978                 if (tmp->metadata) {
6979                         fprintf(stderr, "Well this shouldn't happen, extent "
6980                                 "record overlaps but is metadata? "
6981                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6982                         abort();
6983                 }
6984
6985                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6986                 if (ret) {
6987                         if (ret > 0)
6988                                 ret = -EINVAL;
6989                         break;
6990                 }
6991                 ret = btrfs_del_item(trans, root, path);
6992                 if (ret)
6993                         break;
6994                 btrfs_release_path(path);
6995                 nr_del++;
6996         }
6997         err = btrfs_commit_transaction(trans, root);
6998         if (err && !ret)
6999                 ret = err;
7000 out:
7001         while (!list_empty(&delete_list)) {
7002                 tmp = list_entry(delete_list.next, struct extent_record, list);
7003                 list_del_init(&tmp->list);
7004                 if (tmp == rec)
7005                         continue;
7006                 free(tmp);
7007         }
7008
7009         while (!list_empty(&rec->dups)) {
7010                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7011                 list_del_init(&tmp->list);
7012                 free(tmp);
7013         }
7014
7015         btrfs_free_path(path);
7016
7017         if (!ret && !nr_del)
7018                 rec->num_duplicates = 0;
7019
7020         return ret ? ret : nr_del;
7021 }
7022
7023 static int find_possible_backrefs(struct btrfs_fs_info *info,
7024                                   struct btrfs_path *path,
7025                                   struct cache_tree *extent_cache,
7026                                   struct extent_record *rec)
7027 {
7028         struct btrfs_root *root;
7029         struct extent_backref *back;
7030         struct data_backref *dback;
7031         struct cache_extent *cache;
7032         struct btrfs_file_extent_item *fi;
7033         struct btrfs_key key;
7034         u64 bytenr, bytes;
7035         int ret;
7036
7037         list_for_each_entry(back, &rec->backrefs, list) {
7038                 /* Don't care about full backrefs (poor unloved backrefs) */
7039                 if (back->full_backref || !back->is_data)
7040                         continue;
7041
7042                 dback = (struct data_backref *)back;
7043
7044                 /* We found this one, we don't need to do a lookup */
7045                 if (dback->found_ref)
7046                         continue;
7047
7048                 key.objectid = dback->root;
7049                 key.type = BTRFS_ROOT_ITEM_KEY;
7050                 key.offset = (u64)-1;
7051
7052                 root = btrfs_read_fs_root(info, &key);
7053
7054                 /* No root, definitely a bad ref, skip */
7055                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7056                         continue;
7057                 /* Other err, exit */
7058                 if (IS_ERR(root))
7059                         return PTR_ERR(root);
7060
7061                 key.objectid = dback->owner;
7062                 key.type = BTRFS_EXTENT_DATA_KEY;
7063                 key.offset = dback->offset;
7064                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7065                 if (ret) {
7066                         btrfs_release_path(path);
7067                         if (ret < 0)
7068                                 return ret;
7069                         /* Didn't find it, we can carry on */
7070                         ret = 0;
7071                         continue;
7072                 }
7073
7074                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7075                                     struct btrfs_file_extent_item);
7076                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7077                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7078                 btrfs_release_path(path);
7079                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7080                 if (cache) {
7081                         struct extent_record *tmp;
7082                         tmp = container_of(cache, struct extent_record, cache);
7083
7084                         /*
7085                          * If we found an extent record for the bytenr for this
7086                          * particular backref then we can't add it to our
7087                          * current extent record.  We only want to add backrefs
7088                          * that don't have a corresponding extent item in the
7089                          * extent tree since they likely belong to this record
7090                          * and we need to fix it if it doesn't match bytenrs.
7091                          */
7092                         if  (tmp->found_rec)
7093                                 continue;
7094                 }
7095
7096                 dback->found_ref += 1;
7097                 dback->disk_bytenr = bytenr;
7098                 dback->bytes = bytes;
7099
7100                 /*
7101                  * Set this so the verify backref code knows not to trust the
7102                  * values in this backref.
7103                  */
7104                 back->broken = 1;
7105         }
7106
7107         return 0;
7108 }
7109
7110 /*
7111  * Record orphan data ref into corresponding root.
7112  *
7113  * Return 0 if the extent item contains data ref and recorded.
7114  * Return 1 if the extent item contains no useful data ref
7115  *   On that case, it may contains only shared_dataref or metadata backref
7116  *   or the file extent exists(this should be handled by the extent bytenr
7117  *   recovery routine)
7118  * Return <0 if something goes wrong.
7119  */
7120 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7121                                       struct extent_record *rec)
7122 {
7123         struct btrfs_key key;
7124         struct btrfs_root *dest_root;
7125         struct extent_backref *back;
7126         struct data_backref *dback;
7127         struct orphan_data_extent *orphan;
7128         struct btrfs_path *path;
7129         int recorded_data_ref = 0;
7130         int ret = 0;
7131
7132         if (rec->metadata)
7133                 return 1;
7134         path = btrfs_alloc_path();
7135         if (!path)
7136                 return -ENOMEM;
7137         list_for_each_entry(back, &rec->backrefs, list) {
7138                 if (back->full_backref || !back->is_data ||
7139                     !back->found_extent_tree)
7140                         continue;
7141                 dback = (struct data_backref *)back;
7142                 if (dback->found_ref)
7143                         continue;
7144                 key.objectid = dback->root;
7145                 key.type = BTRFS_ROOT_ITEM_KEY;
7146                 key.offset = (u64)-1;
7147
7148                 dest_root = btrfs_read_fs_root(fs_info, &key);
7149
7150                 /* For non-exist root we just skip it */
7151                 if (IS_ERR(dest_root) || !dest_root)
7152                         continue;
7153
7154                 key.objectid = dback->owner;
7155                 key.type = BTRFS_EXTENT_DATA_KEY;
7156                 key.offset = dback->offset;
7157
7158                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7159                 /*
7160                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7161                  * we need to record it for inode/file extent rebuild.
7162                  * For ret > 0, we record it only for file extent rebuild.
7163                  * For ret == 0, the file extent exists but only bytenr
7164                  * mismatch, let the original bytenr fix routine to handle,
7165                  * don't record it.
7166                  */
7167                 if (ret == 0)
7168                         continue;
7169                 ret = 0;
7170                 orphan = malloc(sizeof(*orphan));
7171                 if (!orphan) {
7172                         ret = -ENOMEM;
7173                         goto out;
7174                 }
7175                 INIT_LIST_HEAD(&orphan->list);
7176                 orphan->root = dback->root;
7177                 orphan->objectid = dback->owner;
7178                 orphan->offset = dback->offset;
7179                 orphan->disk_bytenr = rec->cache.start;
7180                 orphan->disk_len = rec->cache.size;
7181                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7182                 recorded_data_ref = 1;
7183         }
7184 out:
7185         btrfs_free_path(path);
7186         if (!ret)
7187                 return !recorded_data_ref;
7188         else
7189                 return ret;
7190 }
7191
7192 /*
7193  * when an incorrect extent item is found, this will delete
7194  * all of the existing entries for it and recreate them
7195  * based on what the tree scan found.
7196  */
7197 static int fixup_extent_refs(struct btrfs_fs_info *info,
7198                              struct cache_tree *extent_cache,
7199                              struct extent_record *rec)
7200 {
7201         struct btrfs_trans_handle *trans = NULL;
7202         int ret;
7203         struct btrfs_path *path;
7204         struct list_head *cur = rec->backrefs.next;
7205         struct cache_extent *cache;
7206         struct extent_backref *back;
7207         int allocated = 0;
7208         u64 flags = 0;
7209
7210         if (rec->flag_block_full_backref)
7211                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7212
7213         path = btrfs_alloc_path();
7214         if (!path)
7215                 return -ENOMEM;
7216
7217         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7218                 /*
7219                  * Sometimes the backrefs themselves are so broken they don't
7220                  * get attached to any meaningful rec, so first go back and
7221                  * check any of our backrefs that we couldn't find and throw
7222                  * them into the list if we find the backref so that
7223                  * verify_backrefs can figure out what to do.
7224                  */
7225                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7226                 if (ret < 0)
7227                         goto out;
7228         }
7229
7230         /* step one, make sure all of the backrefs agree */
7231         ret = verify_backrefs(info, path, rec);
7232         if (ret < 0)
7233                 goto out;
7234
7235         trans = btrfs_start_transaction(info->extent_root, 1);
7236         if (IS_ERR(trans)) {
7237                 ret = PTR_ERR(trans);
7238                 goto out;
7239         }
7240
7241         /* step two, delete all the existing records */
7242         ret = delete_extent_records(trans, info->extent_root, path,
7243                                     rec->start, rec->max_size);
7244
7245         if (ret < 0)
7246                 goto out;
7247
7248         /* was this block corrupt?  If so, don't add references to it */
7249         cache = lookup_cache_extent(info->corrupt_blocks,
7250                                     rec->start, rec->max_size);
7251         if (cache) {
7252                 ret = 0;
7253                 goto out;
7254         }
7255
7256         /* step three, recreate all the refs we did find */
7257         while(cur != &rec->backrefs) {
7258                 back = list_entry(cur, struct extent_backref, list);
7259                 cur = cur->next;
7260
7261                 /*
7262                  * if we didn't find any references, don't create a
7263                  * new extent record
7264                  */
7265                 if (!back->found_ref)
7266                         continue;
7267
7268                 rec->bad_full_backref = 0;
7269                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7270                 allocated = 1;
7271
7272                 if (ret)
7273                         goto out;
7274         }
7275 out:
7276         if (trans) {
7277                 int err = btrfs_commit_transaction(trans, info->extent_root);
7278                 if (!ret)
7279                         ret = err;
7280         }
7281
7282         btrfs_free_path(path);
7283         return ret;
7284 }
7285
7286 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7287                               struct extent_record *rec)
7288 {
7289         struct btrfs_trans_handle *trans;
7290         struct btrfs_root *root = fs_info->extent_root;
7291         struct btrfs_path *path;
7292         struct btrfs_extent_item *ei;
7293         struct btrfs_key key;
7294         u64 flags;
7295         int ret = 0;
7296
7297         key.objectid = rec->start;
7298         if (rec->metadata) {
7299                 key.type = BTRFS_METADATA_ITEM_KEY;
7300                 key.offset = rec->info_level;
7301         } else {
7302                 key.type = BTRFS_EXTENT_ITEM_KEY;
7303                 key.offset = rec->max_size;
7304         }
7305
7306         path = btrfs_alloc_path();
7307         if (!path)
7308                 return -ENOMEM;
7309
7310         trans = btrfs_start_transaction(root, 0);
7311         if (IS_ERR(trans)) {
7312                 btrfs_free_path(path);
7313                 return PTR_ERR(trans);
7314         }
7315
7316         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7317         if (ret < 0) {
7318                 btrfs_free_path(path);
7319                 btrfs_commit_transaction(trans, root);
7320                 return ret;
7321         } else if (ret) {
7322                 fprintf(stderr, "Didn't find extent for %llu\n",
7323                         (unsigned long long)rec->start);
7324                 btrfs_free_path(path);
7325                 btrfs_commit_transaction(trans, root);
7326                 return -ENOENT;
7327         }
7328
7329         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7330                             struct btrfs_extent_item);
7331         flags = btrfs_extent_flags(path->nodes[0], ei);
7332         if (rec->flag_block_full_backref) {
7333                 fprintf(stderr, "setting full backref on %llu\n",
7334                         (unsigned long long)key.objectid);
7335                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7336         } else {
7337                 fprintf(stderr, "clearing full backref on %llu\n",
7338                         (unsigned long long)key.objectid);
7339                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7340         }
7341         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7342         btrfs_mark_buffer_dirty(path->nodes[0]);
7343         btrfs_free_path(path);
7344         return btrfs_commit_transaction(trans, root);
7345 }
7346
7347 /* right now we only prune from the extent allocation tree */
7348 static int prune_one_block(struct btrfs_trans_handle *trans,
7349                            struct btrfs_fs_info *info,
7350                            struct btrfs_corrupt_block *corrupt)
7351 {
7352         int ret;
7353         struct btrfs_path path;
7354         struct extent_buffer *eb;
7355         u64 found;
7356         int slot;
7357         int nritems;
7358         int level = corrupt->level + 1;
7359
7360         btrfs_init_path(&path);
7361 again:
7362         /* we want to stop at the parent to our busted block */
7363         path.lowest_level = level;
7364
7365         ret = btrfs_search_slot(trans, info->extent_root,
7366                                 &corrupt->key, &path, -1, 1);
7367
7368         if (ret < 0)
7369                 goto out;
7370
7371         eb = path.nodes[level];
7372         if (!eb) {
7373                 ret = -ENOENT;
7374                 goto out;
7375         }
7376
7377         /*
7378          * hopefully the search gave us the block we want to prune,
7379          * lets try that first
7380          */
7381         slot = path.slots[level];
7382         found =  btrfs_node_blockptr(eb, slot);
7383         if (found == corrupt->cache.start)
7384                 goto del_ptr;
7385
7386         nritems = btrfs_header_nritems(eb);
7387
7388         /* the search failed, lets scan this node and hope we find it */
7389         for (slot = 0; slot < nritems; slot++) {
7390                 found =  btrfs_node_blockptr(eb, slot);
7391                 if (found == corrupt->cache.start)
7392                         goto del_ptr;
7393         }
7394         /*
7395          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7396          * to this block
7397          */
7398         if (eb == info->extent_root->node) {
7399                 ret = -ENOENT;
7400                 goto out;
7401         } else {
7402                 level++;
7403                 btrfs_release_path(&path);
7404                 goto again;
7405         }
7406
7407 del_ptr:
7408         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7409         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7410
7411 out:
7412         btrfs_release_path(&path);
7413         return ret;
7414 }
7415
7416 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7417 {
7418         struct btrfs_trans_handle *trans = NULL;
7419         struct cache_extent *cache;
7420         struct btrfs_corrupt_block *corrupt;
7421
7422         while (1) {
7423                 cache = search_cache_extent(info->corrupt_blocks, 0);
7424                 if (!cache)
7425                         break;
7426                 if (!trans) {
7427                         trans = btrfs_start_transaction(info->extent_root, 1);
7428                         if (IS_ERR(trans))
7429                                 return PTR_ERR(trans);
7430                 }
7431                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7432                 prune_one_block(trans, info, corrupt);
7433                 remove_cache_extent(info->corrupt_blocks, cache);
7434         }
7435         if (trans)
7436                 return btrfs_commit_transaction(trans, info->extent_root);
7437         return 0;
7438 }
7439
7440 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7441 {
7442         struct btrfs_block_group_cache *cache;
7443         u64 start, end;
7444         int ret;
7445
7446         while (1) {
7447                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7448                                             &start, &end, EXTENT_DIRTY);
7449                 if (ret)
7450                         break;
7451                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7452                                    GFP_NOFS);
7453         }
7454
7455         start = 0;
7456         while (1) {
7457                 cache = btrfs_lookup_first_block_group(fs_info, start);
7458                 if (!cache)
7459                         break;
7460                 if (cache->cached)
7461                         cache->cached = 0;
7462                 start = cache->key.objectid + cache->key.offset;
7463         }
7464 }
7465
7466 static int check_extent_refs(struct btrfs_root *root,
7467                              struct cache_tree *extent_cache)
7468 {
7469         struct extent_record *rec;
7470         struct cache_extent *cache;
7471         int err = 0;
7472         int ret = 0;
7473         int fixed = 0;
7474         int had_dups = 0;
7475         int recorded = 0;
7476
7477         if (repair) {
7478                 /*
7479                  * if we're doing a repair, we have to make sure
7480                  * we don't allocate from the problem extents.
7481                  * In the worst case, this will be all the
7482                  * extents in the FS
7483                  */
7484                 cache = search_cache_extent(extent_cache, 0);
7485                 while(cache) {
7486                         rec = container_of(cache, struct extent_record, cache);
7487                         set_extent_dirty(root->fs_info->excluded_extents,
7488                                          rec->start,
7489                                          rec->start + rec->max_size - 1,
7490                                          GFP_NOFS);
7491                         cache = next_cache_extent(cache);
7492                 }
7493
7494                 /* pin down all the corrupted blocks too */
7495                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7496                 while(cache) {
7497                         set_extent_dirty(root->fs_info->excluded_extents,
7498                                          cache->start,
7499                                          cache->start + cache->size - 1,
7500                                          GFP_NOFS);
7501                         cache = next_cache_extent(cache);
7502                 }
7503                 prune_corrupt_blocks(root->fs_info);
7504                 reset_cached_block_groups(root->fs_info);
7505         }
7506
7507         reset_cached_block_groups(root->fs_info);
7508
7509         /*
7510          * We need to delete any duplicate entries we find first otherwise we
7511          * could mess up the extent tree when we have backrefs that actually
7512          * belong to a different extent item and not the weird duplicate one.
7513          */
7514         while (repair && !list_empty(&duplicate_extents)) {
7515                 rec = list_entry(duplicate_extents.next, struct extent_record,
7516                                  list);
7517                 list_del_init(&rec->list);
7518
7519                 /* Sometimes we can find a backref before we find an actual
7520                  * extent, so we need to process it a little bit to see if there
7521                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7522                  * if this is a backref screwup.  If we need to delete stuff
7523                  * process_duplicates() will return 0, otherwise it will return
7524                  * 1 and we
7525                  */
7526                 if (process_duplicates(root, extent_cache, rec))
7527                         continue;
7528                 ret = delete_duplicate_records(root, rec);
7529                 if (ret < 0)
7530                         return ret;
7531                 /*
7532                  * delete_duplicate_records will return the number of entries
7533                  * deleted, so if it's greater than 0 then we know we actually
7534                  * did something and we need to remove.
7535                  */
7536                 if (ret)
7537                         had_dups = 1;
7538         }
7539
7540         if (had_dups)
7541                 return -EAGAIN;
7542
7543         while(1) {
7544                 int cur_err = 0;
7545
7546                 fixed = 0;
7547                 recorded = 0;
7548                 cache = search_cache_extent(extent_cache, 0);
7549                 if (!cache)
7550                         break;
7551                 rec = container_of(cache, struct extent_record, cache);
7552                 if (rec->num_duplicates) {
7553                         fprintf(stderr, "extent item %llu has multiple extent "
7554                                 "items\n", (unsigned long long)rec->start);
7555                         err = 1;
7556                         cur_err = 1;
7557                 }
7558
7559                 if (rec->refs != rec->extent_item_refs) {
7560                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7561                                 (unsigned long long)rec->start,
7562                                 (unsigned long long)rec->nr);
7563                         fprintf(stderr, "extent item %llu, found %llu\n",
7564                                 (unsigned long long)rec->extent_item_refs,
7565                                 (unsigned long long)rec->refs);
7566                         ret = record_orphan_data_extents(root->fs_info, rec);
7567                         if (ret < 0)
7568                                 goto repair_abort;
7569                         if (ret == 0) {
7570                                 recorded = 1;
7571                         } else {
7572                                 /*
7573                                  * we can't use the extent to repair file
7574                                  * extent, let the fallback method handle it.
7575                                  */
7576                                 if (!fixed && repair) {
7577                                         ret = fixup_extent_refs(
7578                                                         root->fs_info,
7579                                                         extent_cache, rec);
7580                                         if (ret)
7581                                                 goto repair_abort;
7582                                         fixed = 1;
7583                                 }
7584                         }
7585                         err = 1;
7586                         cur_err = 1;
7587                 }
7588                 if (all_backpointers_checked(rec, 1)) {
7589                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7590                                 (unsigned long long)rec->start,
7591                                 (unsigned long long)rec->nr);
7592
7593                         if (!fixed && !recorded && repair) {
7594                                 ret = fixup_extent_refs(root->fs_info,
7595                                                         extent_cache, rec);
7596                                 if (ret)
7597                                         goto repair_abort;
7598                                 fixed = 1;
7599                         }
7600                         cur_err = 1;
7601                         err = 1;
7602                 }
7603                 if (!rec->owner_ref_checked) {
7604                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7605                                 (unsigned long long)rec->start,
7606                                 (unsigned long long)rec->nr);
7607                         if (!fixed && !recorded && repair) {
7608                                 ret = fixup_extent_refs(root->fs_info,
7609                                                         extent_cache, rec);
7610                                 if (ret)
7611                                         goto repair_abort;
7612                                 fixed = 1;
7613                         }
7614                         err = 1;
7615                         cur_err = 1;
7616                 }
7617                 if (rec->bad_full_backref) {
7618                         fprintf(stderr, "bad full backref, on [%llu]\n",
7619                                 (unsigned long long)rec->start);
7620                         if (repair) {
7621                                 ret = fixup_extent_flags(root->fs_info, rec);
7622                                 if (ret)
7623                                         goto repair_abort;
7624                                 fixed = 1;
7625                         }
7626                         err = 1;
7627                         cur_err = 1;
7628                 }
7629                 /*
7630                  * Although it's not a extent ref's problem, we reuse this
7631                  * routine for error reporting.
7632                  * No repair function yet.
7633                  */
7634                 if (rec->crossing_stripes) {
7635                         fprintf(stderr,
7636                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7637                                 rec->start, rec->start + rec->max_size);
7638                         err = 1;
7639                         cur_err = 1;
7640                 }
7641
7642                 if (rec->wrong_chunk_type) {
7643                         fprintf(stderr,
7644                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7645                                 rec->start, rec->start + rec->max_size);
7646                         err = 1;
7647                         cur_err = 1;
7648                 }
7649
7650                 remove_cache_extent(extent_cache, cache);
7651                 free_all_extent_backrefs(rec);
7652                 if (!init_extent_tree && repair && (!cur_err || fixed))
7653                         clear_extent_dirty(root->fs_info->excluded_extents,
7654                                            rec->start,
7655                                            rec->start + rec->max_size - 1,
7656                                            GFP_NOFS);
7657                 free(rec);
7658         }
7659 repair_abort:
7660         if (repair) {
7661                 if (ret && ret != -EAGAIN) {
7662                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7663                         exit(1);
7664                 } else if (!ret) {
7665                         struct btrfs_trans_handle *trans;
7666
7667                         root = root->fs_info->extent_root;
7668                         trans = btrfs_start_transaction(root, 1);
7669                         if (IS_ERR(trans)) {
7670                                 ret = PTR_ERR(trans);
7671                                 goto repair_abort;
7672                         }
7673
7674                         btrfs_fix_block_accounting(trans, root);
7675                         ret = btrfs_commit_transaction(trans, root);
7676                         if (ret)
7677                                 goto repair_abort;
7678                 }
7679                 if (err)
7680                         fprintf(stderr, "repaired damaged extent references\n");
7681                 return ret;
7682         }
7683         return err;
7684 }
7685
7686 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7687 {
7688         u64 stripe_size;
7689
7690         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7691                 stripe_size = length;
7692                 stripe_size /= num_stripes;
7693         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7694                 stripe_size = length * 2;
7695                 stripe_size /= num_stripes;
7696         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7697                 stripe_size = length;
7698                 stripe_size /= (num_stripes - 1);
7699         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7700                 stripe_size = length;
7701                 stripe_size /= (num_stripes - 2);
7702         } else {
7703                 stripe_size = length;
7704         }
7705         return stripe_size;
7706 }
7707
7708 /*
7709  * Check the chunk with its block group/dev list ref:
7710  * Return 0 if all refs seems valid.
7711  * Return 1 if part of refs seems valid, need later check for rebuild ref
7712  * like missing block group and needs to search extent tree to rebuild them.
7713  * Return -1 if essential refs are missing and unable to rebuild.
7714  */
7715 static int check_chunk_refs(struct chunk_record *chunk_rec,
7716                             struct block_group_tree *block_group_cache,
7717                             struct device_extent_tree *dev_extent_cache,
7718                             int silent)
7719 {
7720         struct cache_extent *block_group_item;
7721         struct block_group_record *block_group_rec;
7722         struct cache_extent *dev_extent_item;
7723         struct device_extent_record *dev_extent_rec;
7724         u64 devid;
7725         u64 offset;
7726         u64 length;
7727         int metadump_v2 = 0;
7728         int i;
7729         int ret = 0;
7730
7731         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7732                                                chunk_rec->offset,
7733                                                chunk_rec->length);
7734         if (block_group_item) {
7735                 block_group_rec = container_of(block_group_item,
7736                                                struct block_group_record,
7737                                                cache);
7738                 if (chunk_rec->length != block_group_rec->offset ||
7739                     chunk_rec->offset != block_group_rec->objectid ||
7740                     (!metadump_v2 &&
7741                      chunk_rec->type_flags != block_group_rec->flags)) {
7742                         if (!silent)
7743                                 fprintf(stderr,
7744                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7745                                         chunk_rec->objectid,
7746                                         chunk_rec->type,
7747                                         chunk_rec->offset,
7748                                         chunk_rec->length,
7749                                         chunk_rec->offset,
7750                                         chunk_rec->type_flags,
7751                                         block_group_rec->objectid,
7752                                         block_group_rec->type,
7753                                         block_group_rec->offset,
7754                                         block_group_rec->offset,
7755                                         block_group_rec->objectid,
7756                                         block_group_rec->flags);
7757                         ret = -1;
7758                 } else {
7759                         list_del_init(&block_group_rec->list);
7760                         chunk_rec->bg_rec = block_group_rec;
7761                 }
7762         } else {
7763                 if (!silent)
7764                         fprintf(stderr,
7765                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7766                                 chunk_rec->objectid,
7767                                 chunk_rec->type,
7768                                 chunk_rec->offset,
7769                                 chunk_rec->length,
7770                                 chunk_rec->offset,
7771                                 chunk_rec->type_flags);
7772                 ret = 1;
7773         }
7774
7775         if (metadump_v2)
7776                 return ret;
7777
7778         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7779                                     chunk_rec->num_stripes);
7780         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7781                 devid = chunk_rec->stripes[i].devid;
7782                 offset = chunk_rec->stripes[i].offset;
7783                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7784                                                        devid, offset, length);
7785                 if (dev_extent_item) {
7786                         dev_extent_rec = container_of(dev_extent_item,
7787                                                 struct device_extent_record,
7788                                                 cache);
7789                         if (dev_extent_rec->objectid != devid ||
7790                             dev_extent_rec->offset != offset ||
7791                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7792                             dev_extent_rec->length != length) {
7793                                 if (!silent)
7794                                         fprintf(stderr,
7795                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7796                                                 chunk_rec->objectid,
7797                                                 chunk_rec->type,
7798                                                 chunk_rec->offset,
7799                                                 chunk_rec->stripes[i].devid,
7800                                                 chunk_rec->stripes[i].offset,
7801                                                 dev_extent_rec->objectid,
7802                                                 dev_extent_rec->offset,
7803                                                 dev_extent_rec->length);
7804                                 ret = -1;
7805                         } else {
7806                                 list_move(&dev_extent_rec->chunk_list,
7807                                           &chunk_rec->dextents);
7808                         }
7809                 } else {
7810                         if (!silent)
7811                                 fprintf(stderr,
7812                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7813                                         chunk_rec->objectid,
7814                                         chunk_rec->type,
7815                                         chunk_rec->offset,
7816                                         chunk_rec->stripes[i].devid,
7817                                         chunk_rec->stripes[i].offset);
7818                         ret = -1;
7819                 }
7820         }
7821         return ret;
7822 }
7823
7824 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7825 int check_chunks(struct cache_tree *chunk_cache,
7826                  struct block_group_tree *block_group_cache,
7827                  struct device_extent_tree *dev_extent_cache,
7828                  struct list_head *good, struct list_head *bad,
7829                  struct list_head *rebuild, int silent)
7830 {
7831         struct cache_extent *chunk_item;
7832         struct chunk_record *chunk_rec;
7833         struct block_group_record *bg_rec;
7834         struct device_extent_record *dext_rec;
7835         int err;
7836         int ret = 0;
7837
7838         chunk_item = first_cache_extent(chunk_cache);
7839         while (chunk_item) {
7840                 chunk_rec = container_of(chunk_item, struct chunk_record,
7841                                          cache);
7842                 err = check_chunk_refs(chunk_rec, block_group_cache,
7843                                        dev_extent_cache, silent);
7844                 if (err < 0)
7845                         ret = err;
7846                 if (err == 0 && good)
7847                         list_add_tail(&chunk_rec->list, good);
7848                 if (err > 0 && rebuild)
7849                         list_add_tail(&chunk_rec->list, rebuild);
7850                 if (err < 0 && bad)
7851                         list_add_tail(&chunk_rec->list, bad);
7852                 chunk_item = next_cache_extent(chunk_item);
7853         }
7854
7855         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7856                 if (!silent)
7857                         fprintf(stderr,
7858                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7859                                 bg_rec->objectid,
7860                                 bg_rec->offset,
7861                                 bg_rec->flags);
7862                 if (!ret)
7863                         ret = 1;
7864         }
7865
7866         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7867                             chunk_list) {
7868                 if (!silent)
7869                         fprintf(stderr,
7870                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7871                                 dext_rec->objectid,
7872                                 dext_rec->offset,
7873                                 dext_rec->length);
7874                 if (!ret)
7875                         ret = 1;
7876         }
7877         return ret;
7878 }
7879
7880
7881 static int check_device_used(struct device_record *dev_rec,
7882                              struct device_extent_tree *dext_cache)
7883 {
7884         struct cache_extent *cache;
7885         struct device_extent_record *dev_extent_rec;
7886         u64 total_byte = 0;
7887
7888         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7889         while (cache) {
7890                 dev_extent_rec = container_of(cache,
7891                                               struct device_extent_record,
7892                                               cache);
7893                 if (dev_extent_rec->objectid != dev_rec->devid)
7894                         break;
7895
7896                 list_del_init(&dev_extent_rec->device_list);
7897                 total_byte += dev_extent_rec->length;
7898                 cache = next_cache_extent(cache);
7899         }
7900
7901         if (total_byte != dev_rec->byte_used) {
7902                 fprintf(stderr,
7903                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7904                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7905                         dev_rec->type, dev_rec->offset);
7906                 return -1;
7907         } else {
7908                 return 0;
7909         }
7910 }
7911
7912 /* check btrfs_dev_item -> btrfs_dev_extent */
7913 static int check_devices(struct rb_root *dev_cache,
7914                          struct device_extent_tree *dev_extent_cache)
7915 {
7916         struct rb_node *dev_node;
7917         struct device_record *dev_rec;
7918         struct device_extent_record *dext_rec;
7919         int err;
7920         int ret = 0;
7921
7922         dev_node = rb_first(dev_cache);
7923         while (dev_node) {
7924                 dev_rec = container_of(dev_node, struct device_record, node);
7925                 err = check_device_used(dev_rec, dev_extent_cache);
7926                 if (err)
7927                         ret = err;
7928
7929                 dev_node = rb_next(dev_node);
7930         }
7931         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7932                             device_list) {
7933                 fprintf(stderr,
7934                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7935                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7936                 if (!ret)
7937                         ret = 1;
7938         }
7939         return ret;
7940 }
7941
7942 static int add_root_item_to_list(struct list_head *head,
7943                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7944                                   u8 level, u8 drop_level,
7945                                   int level_size, struct btrfs_key *drop_key)
7946 {
7947
7948         struct root_item_record *ri_rec;
7949         ri_rec = malloc(sizeof(*ri_rec));
7950         if (!ri_rec)
7951                 return -ENOMEM;
7952         ri_rec->bytenr = bytenr;
7953         ri_rec->objectid = objectid;
7954         ri_rec->level = level;
7955         ri_rec->level_size = level_size;
7956         ri_rec->drop_level = drop_level;
7957         ri_rec->last_snapshot = last_snapshot;
7958         if (drop_key)
7959                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7960         list_add_tail(&ri_rec->list, head);
7961
7962         return 0;
7963 }
7964
7965 static void free_root_item_list(struct list_head *list)
7966 {
7967         struct root_item_record *ri_rec;
7968
7969         while (!list_empty(list)) {
7970                 ri_rec = list_first_entry(list, struct root_item_record,
7971                                           list);
7972                 list_del_init(&ri_rec->list);
7973                 free(ri_rec);
7974         }
7975 }
7976
7977 static int deal_root_from_list(struct list_head *list,
7978                                struct btrfs_root *root,
7979                                struct block_info *bits,
7980                                int bits_nr,
7981                                struct cache_tree *pending,
7982                                struct cache_tree *seen,
7983                                struct cache_tree *reada,
7984                                struct cache_tree *nodes,
7985                                struct cache_tree *extent_cache,
7986                                struct cache_tree *chunk_cache,
7987                                struct rb_root *dev_cache,
7988                                struct block_group_tree *block_group_cache,
7989                                struct device_extent_tree *dev_extent_cache)
7990 {
7991         int ret = 0;
7992         u64 last;
7993
7994         while (!list_empty(list)) {
7995                 struct root_item_record *rec;
7996                 struct extent_buffer *buf;
7997                 rec = list_entry(list->next,
7998                                  struct root_item_record, list);
7999                 last = 0;
8000                 buf = read_tree_block(root->fs_info->tree_root,
8001                                       rec->bytenr, rec->level_size, 0);
8002                 if (!extent_buffer_uptodate(buf)) {
8003                         free_extent_buffer(buf);
8004                         ret = -EIO;
8005                         break;
8006                 }
8007                 add_root_to_pending(buf, extent_cache, pending,
8008                                     seen, nodes, rec->objectid);
8009                 /*
8010                  * To rebuild extent tree, we need deal with snapshot
8011                  * one by one, otherwise we deal with node firstly which
8012                  * can maximize readahead.
8013                  */
8014                 while (1) {
8015                         ret = run_next_block(root, bits, bits_nr, &last,
8016                                              pending, seen, reada, nodes,
8017                                              extent_cache, chunk_cache,
8018                                              dev_cache, block_group_cache,
8019                                              dev_extent_cache, rec);
8020                         if (ret != 0)
8021                                 break;
8022                 }
8023                 free_extent_buffer(buf);
8024                 list_del(&rec->list);
8025                 free(rec);
8026                 if (ret < 0)
8027                         break;
8028         }
8029         while (ret >= 0) {
8030                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8031                                      reada, nodes, extent_cache, chunk_cache,
8032                                      dev_cache, block_group_cache,
8033                                      dev_extent_cache, NULL);
8034                 if (ret != 0) {
8035                         if (ret > 0)
8036                                 ret = 0;
8037                         break;
8038                 }
8039         }
8040         return ret;
8041 }
8042
8043 static int check_chunks_and_extents(struct btrfs_root *root)
8044 {
8045         struct rb_root dev_cache;
8046         struct cache_tree chunk_cache;
8047         struct block_group_tree block_group_cache;
8048         struct device_extent_tree dev_extent_cache;
8049         struct cache_tree extent_cache;
8050         struct cache_tree seen;
8051         struct cache_tree pending;
8052         struct cache_tree reada;
8053         struct cache_tree nodes;
8054         struct extent_io_tree excluded_extents;
8055         struct cache_tree corrupt_blocks;
8056         struct btrfs_path path;
8057         struct btrfs_key key;
8058         struct btrfs_key found_key;
8059         int ret, err = 0;
8060         struct block_info *bits;
8061         int bits_nr;
8062         struct extent_buffer *leaf;
8063         int slot;
8064         struct btrfs_root_item ri;
8065         struct list_head dropping_trees;
8066         struct list_head normal_trees;
8067         struct btrfs_root *root1;
8068         u64 objectid;
8069         u32 level_size;
8070         u8 level;
8071
8072         dev_cache = RB_ROOT;
8073         cache_tree_init(&chunk_cache);
8074         block_group_tree_init(&block_group_cache);
8075         device_extent_tree_init(&dev_extent_cache);
8076
8077         cache_tree_init(&extent_cache);
8078         cache_tree_init(&seen);
8079         cache_tree_init(&pending);
8080         cache_tree_init(&nodes);
8081         cache_tree_init(&reada);
8082         cache_tree_init(&corrupt_blocks);
8083         extent_io_tree_init(&excluded_extents);
8084         INIT_LIST_HEAD(&dropping_trees);
8085         INIT_LIST_HEAD(&normal_trees);
8086
8087         if (repair) {
8088                 root->fs_info->excluded_extents = &excluded_extents;
8089                 root->fs_info->fsck_extent_cache = &extent_cache;
8090                 root->fs_info->free_extent_hook = free_extent_hook;
8091                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8092         }
8093
8094         bits_nr = 1024;
8095         bits = malloc(bits_nr * sizeof(struct block_info));
8096         if (!bits) {
8097                 perror("malloc");
8098                 exit(1);
8099         }
8100
8101         if (ctx.progress_enabled) {
8102                 ctx.tp = TASK_EXTENTS;
8103                 task_start(ctx.info);
8104         }
8105
8106 again:
8107         root1 = root->fs_info->tree_root;
8108         level = btrfs_header_level(root1->node);
8109         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8110                                     root1->node->start, 0, level, 0,
8111                                     btrfs_level_size(root1, level), NULL);
8112         if (ret < 0)
8113                 goto out;
8114         root1 = root->fs_info->chunk_root;
8115         level = btrfs_header_level(root1->node);
8116         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8117                                     root1->node->start, 0, level, 0,
8118                                     btrfs_level_size(root1, level), NULL);
8119         if (ret < 0)
8120                 goto out;
8121         btrfs_init_path(&path);
8122         key.offset = 0;
8123         key.objectid = 0;
8124         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8125         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8126                                         &key, &path, 0, 0);
8127         if (ret < 0)
8128                 goto out;
8129         while(1) {
8130                 leaf = path.nodes[0];
8131                 slot = path.slots[0];
8132                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8133                         ret = btrfs_next_leaf(root, &path);
8134                         if (ret != 0)
8135                                 break;
8136                         leaf = path.nodes[0];
8137                         slot = path.slots[0];
8138                 }
8139                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8140                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8141                         unsigned long offset;
8142                         u64 last_snapshot;
8143
8144                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8145                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8146                         last_snapshot = btrfs_root_last_snapshot(&ri);
8147                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8148                                 level = btrfs_root_level(&ri);
8149                                 level_size = btrfs_level_size(root, level);
8150                                 ret = add_root_item_to_list(&normal_trees,
8151                                                 found_key.objectid,
8152                                                 btrfs_root_bytenr(&ri),
8153                                                 last_snapshot, level,
8154                                                 0, level_size, NULL);
8155                                 if (ret < 0)
8156                                         goto out;
8157                         } else {
8158                                 level = btrfs_root_level(&ri);
8159                                 level_size = btrfs_level_size(root, level);
8160                                 objectid = found_key.objectid;
8161                                 btrfs_disk_key_to_cpu(&found_key,
8162                                                       &ri.drop_progress);
8163                                 ret = add_root_item_to_list(&dropping_trees,
8164                                                 objectid,
8165                                                 btrfs_root_bytenr(&ri),
8166                                                 last_snapshot, level,
8167                                                 ri.drop_level,
8168                                                 level_size, &found_key);
8169                                 if (ret < 0)
8170                                         goto out;
8171                         }
8172                 }
8173                 path.slots[0]++;
8174         }
8175         btrfs_release_path(&path);
8176
8177         /*
8178          * check_block can return -EAGAIN if it fixes something, please keep
8179          * this in mind when dealing with return values from these functions, if
8180          * we get -EAGAIN we want to fall through and restart the loop.
8181          */
8182         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8183                                   &seen, &reada, &nodes, &extent_cache,
8184                                   &chunk_cache, &dev_cache, &block_group_cache,
8185                                   &dev_extent_cache);
8186         if (ret < 0) {
8187                 if (ret == -EAGAIN)
8188                         goto loop;
8189                 goto out;
8190         }
8191         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8192                                   &pending, &seen, &reada, &nodes,
8193                                   &extent_cache, &chunk_cache, &dev_cache,
8194                                   &block_group_cache, &dev_extent_cache);
8195         if (ret < 0) {
8196                 if (ret == -EAGAIN)
8197                         goto loop;
8198                 goto out;
8199         }
8200
8201         ret = check_chunks(&chunk_cache, &block_group_cache,
8202                            &dev_extent_cache, NULL, NULL, NULL, 0);
8203         if (ret) {
8204                 if (ret == -EAGAIN)
8205                         goto loop;
8206                 err = ret;
8207         }
8208
8209         ret = check_extent_refs(root, &extent_cache);
8210         if (ret < 0) {
8211                 if (ret == -EAGAIN)
8212                         goto loop;
8213                 goto out;
8214         }
8215
8216         ret = check_devices(&dev_cache, &dev_extent_cache);
8217         if (ret && err)
8218                 ret = err;
8219
8220 out:
8221         task_stop(ctx.info);
8222         if (repair) {
8223                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8224                 extent_io_tree_cleanup(&excluded_extents);
8225                 root->fs_info->fsck_extent_cache = NULL;
8226                 root->fs_info->free_extent_hook = NULL;
8227                 root->fs_info->corrupt_blocks = NULL;
8228                 root->fs_info->excluded_extents = NULL;
8229         }
8230         free(bits);
8231         free_chunk_cache_tree(&chunk_cache);
8232         free_device_cache_tree(&dev_cache);
8233         free_block_group_tree(&block_group_cache);
8234         free_device_extent_tree(&dev_extent_cache);
8235         free_extent_cache_tree(&seen);
8236         free_extent_cache_tree(&pending);
8237         free_extent_cache_tree(&reada);
8238         free_extent_cache_tree(&nodes);
8239         return ret;
8240 loop:
8241         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8242         free_extent_cache_tree(&seen);
8243         free_extent_cache_tree(&pending);
8244         free_extent_cache_tree(&reada);
8245         free_extent_cache_tree(&nodes);
8246         free_chunk_cache_tree(&chunk_cache);
8247         free_block_group_tree(&block_group_cache);
8248         free_device_cache_tree(&dev_cache);
8249         free_device_extent_tree(&dev_extent_cache);
8250         free_extent_record_cache(root->fs_info, &extent_cache);
8251         free_root_item_list(&normal_trees);
8252         free_root_item_list(&dropping_trees);
8253         extent_io_tree_cleanup(&excluded_extents);
8254         goto again;
8255 }
8256
8257 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8258                            struct btrfs_root *root, int overwrite)
8259 {
8260         struct extent_buffer *c;
8261         struct extent_buffer *old = root->node;
8262         int level;
8263         int ret;
8264         struct btrfs_disk_key disk_key = {0,0,0};
8265
8266         level = 0;
8267
8268         if (overwrite) {
8269                 c = old;
8270                 extent_buffer_get(c);
8271                 goto init;
8272         }
8273         c = btrfs_alloc_free_block(trans, root,
8274                                    btrfs_level_size(root, 0),
8275                                    root->root_key.objectid,
8276                                    &disk_key, level, 0, 0);
8277         if (IS_ERR(c)) {
8278                 c = old;
8279                 extent_buffer_get(c);
8280                 overwrite = 1;
8281         }
8282 init:
8283         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8284         btrfs_set_header_level(c, level);
8285         btrfs_set_header_bytenr(c, c->start);
8286         btrfs_set_header_generation(c, trans->transid);
8287         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8288         btrfs_set_header_owner(c, root->root_key.objectid);
8289
8290         write_extent_buffer(c, root->fs_info->fsid,
8291                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8292
8293         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8294                             btrfs_header_chunk_tree_uuid(c),
8295                             BTRFS_UUID_SIZE);
8296
8297         btrfs_mark_buffer_dirty(c);
8298         /*
8299          * this case can happen in the following case:
8300          *
8301          * 1.overwrite previous root.
8302          *
8303          * 2.reinit reloc data root, this is because we skip pin
8304          * down reloc data tree before which means we can allocate
8305          * same block bytenr here.
8306          */
8307         if (old->start == c->start) {
8308                 btrfs_set_root_generation(&root->root_item,
8309                                           trans->transid);
8310                 root->root_item.level = btrfs_header_level(root->node);
8311                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8312                                         &root->root_key, &root->root_item);
8313                 if (ret) {
8314                         free_extent_buffer(c);
8315                         return ret;
8316                 }
8317         }
8318         free_extent_buffer(old);
8319         root->node = c;
8320         add_root_to_dirty_list(root);
8321         return 0;
8322 }
8323
8324 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8325                                 struct extent_buffer *eb, int tree_root)
8326 {
8327         struct extent_buffer *tmp;
8328         struct btrfs_root_item *ri;
8329         struct btrfs_key key;
8330         u64 bytenr;
8331         u32 leafsize;
8332         int level = btrfs_header_level(eb);
8333         int nritems;
8334         int ret;
8335         int i;
8336
8337         /*
8338          * If we have pinned this block before, don't pin it again.
8339          * This can not only avoid forever loop with broken filesystem
8340          * but also give us some speedups.
8341          */
8342         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8343                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8344                 return 0;
8345
8346         btrfs_pin_extent(fs_info, eb->start, eb->len);
8347
8348         leafsize = btrfs_super_leafsize(fs_info->super_copy);
8349         nritems = btrfs_header_nritems(eb);
8350         for (i = 0; i < nritems; i++) {
8351                 if (level == 0) {
8352                         btrfs_item_key_to_cpu(eb, &key, i);
8353                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8354                                 continue;
8355                         /* Skip the extent root and reloc roots */
8356                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8357                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8358                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8359                                 continue;
8360                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8361                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8362
8363                         /*
8364                          * If at any point we start needing the real root we
8365                          * will have to build a stump root for the root we are
8366                          * in, but for now this doesn't actually use the root so
8367                          * just pass in extent_root.
8368                          */
8369                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8370                                               leafsize, 0);
8371                         if (!extent_buffer_uptodate(tmp)) {
8372                                 fprintf(stderr, "Error reading root block\n");
8373                                 return -EIO;
8374                         }
8375                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8376                         free_extent_buffer(tmp);
8377                         if (ret)
8378                                 return ret;
8379                 } else {
8380                         bytenr = btrfs_node_blockptr(eb, i);
8381
8382                         /* If we aren't the tree root don't read the block */
8383                         if (level == 1 && !tree_root) {
8384                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
8385                                 continue;
8386                         }
8387
8388                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8389                                               leafsize, 0);
8390                         if (!extent_buffer_uptodate(tmp)) {
8391                                 fprintf(stderr, "Error reading tree block\n");
8392                                 return -EIO;
8393                         }
8394                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8395                         free_extent_buffer(tmp);
8396                         if (ret)
8397                                 return ret;
8398                 }
8399         }
8400
8401         return 0;
8402 }
8403
8404 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8405 {
8406         int ret;
8407
8408         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8409         if (ret)
8410                 return ret;
8411
8412         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8413 }
8414
8415 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8416 {
8417         struct btrfs_block_group_cache *cache;
8418         struct btrfs_path *path;
8419         struct extent_buffer *leaf;
8420         struct btrfs_chunk *chunk;
8421         struct btrfs_key key;
8422         int ret;
8423         u64 start;
8424
8425         path = btrfs_alloc_path();
8426         if (!path)
8427                 return -ENOMEM;
8428
8429         key.objectid = 0;
8430         key.type = BTRFS_CHUNK_ITEM_KEY;
8431         key.offset = 0;
8432
8433         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8434         if (ret < 0) {
8435                 btrfs_free_path(path);
8436                 return ret;
8437         }
8438
8439         /*
8440          * We do this in case the block groups were screwed up and had alloc
8441          * bits that aren't actually set on the chunks.  This happens with
8442          * restored images every time and could happen in real life I guess.
8443          */
8444         fs_info->avail_data_alloc_bits = 0;
8445         fs_info->avail_metadata_alloc_bits = 0;
8446         fs_info->avail_system_alloc_bits = 0;
8447
8448         /* First we need to create the in-memory block groups */
8449         while (1) {
8450                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8451                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8452                         if (ret < 0) {
8453                                 btrfs_free_path(path);
8454                                 return ret;
8455                         }
8456                         if (ret) {
8457                                 ret = 0;
8458                                 break;
8459                         }
8460                 }
8461                 leaf = path->nodes[0];
8462                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8463                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8464                         path->slots[0]++;
8465                         continue;
8466                 }
8467
8468                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8469                                        struct btrfs_chunk);
8470                 btrfs_add_block_group(fs_info, 0,
8471                                       btrfs_chunk_type(leaf, chunk),
8472                                       key.objectid, key.offset,
8473                                       btrfs_chunk_length(leaf, chunk));
8474                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8475                                  key.offset + btrfs_chunk_length(leaf, chunk),
8476                                  GFP_NOFS);
8477                 path->slots[0]++;
8478         }
8479         start = 0;
8480         while (1) {
8481                 cache = btrfs_lookup_first_block_group(fs_info, start);
8482                 if (!cache)
8483                         break;
8484                 cache->cached = 1;
8485                 start = cache->key.objectid + cache->key.offset;
8486         }
8487
8488         btrfs_free_path(path);
8489         return 0;
8490 }
8491
8492 static int reset_balance(struct btrfs_trans_handle *trans,
8493                          struct btrfs_fs_info *fs_info)
8494 {
8495         struct btrfs_root *root = fs_info->tree_root;
8496         struct btrfs_path *path;
8497         struct extent_buffer *leaf;
8498         struct btrfs_key key;
8499         int del_slot, del_nr = 0;
8500         int ret;
8501         int found = 0;
8502
8503         path = btrfs_alloc_path();
8504         if (!path)
8505                 return -ENOMEM;
8506
8507         key.objectid = BTRFS_BALANCE_OBJECTID;
8508         key.type = BTRFS_BALANCE_ITEM_KEY;
8509         key.offset = 0;
8510
8511         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8512         if (ret) {
8513                 if (ret > 0)
8514                         ret = 0;
8515                 if (!ret)
8516                         goto reinit_data_reloc;
8517                 else
8518                         goto out;
8519         }
8520
8521         ret = btrfs_del_item(trans, root, path);
8522         if (ret)
8523                 goto out;
8524         btrfs_release_path(path);
8525
8526         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8527         key.type = BTRFS_ROOT_ITEM_KEY;
8528         key.offset = 0;
8529
8530         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8531         if (ret < 0)
8532                 goto out;
8533         while (1) {
8534                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8535                         if (!found)
8536                                 break;
8537
8538                         if (del_nr) {
8539                                 ret = btrfs_del_items(trans, root, path,
8540                                                       del_slot, del_nr);
8541                                 del_nr = 0;
8542                                 if (ret)
8543                                         goto out;
8544                         }
8545                         key.offset++;
8546                         btrfs_release_path(path);
8547
8548                         found = 0;
8549                         ret = btrfs_search_slot(trans, root, &key, path,
8550                                                 -1, 1);
8551                         if (ret < 0)
8552                                 goto out;
8553                         continue;
8554                 }
8555                 found = 1;
8556                 leaf = path->nodes[0];
8557                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8558                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8559                         break;
8560                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8561                         path->slots[0]++;
8562                         continue;
8563                 }
8564                 if (!del_nr) {
8565                         del_slot = path->slots[0];
8566                         del_nr = 1;
8567                 } else {
8568                         del_nr++;
8569                 }
8570                 path->slots[0]++;
8571         }
8572
8573         if (del_nr) {
8574                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8575                 if (ret)
8576                         goto out;
8577         }
8578         btrfs_release_path(path);
8579
8580 reinit_data_reloc:
8581         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8582         key.type = BTRFS_ROOT_ITEM_KEY;
8583         key.offset = (u64)-1;
8584         root = btrfs_read_fs_root(fs_info, &key);
8585         if (IS_ERR(root)) {
8586                 fprintf(stderr, "Error reading data reloc tree\n");
8587                 ret = PTR_ERR(root);
8588                 goto out;
8589         }
8590         record_root_in_trans(trans, root);
8591         ret = btrfs_fsck_reinit_root(trans, root, 0);
8592         if (ret)
8593                 goto out;
8594         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8595 out:
8596         btrfs_free_path(path);
8597         return ret;
8598 }
8599
8600 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8601                               struct btrfs_fs_info *fs_info)
8602 {
8603         u64 start = 0;
8604         int ret;
8605
8606         /*
8607          * The only reason we don't do this is because right now we're just
8608          * walking the trees we find and pinning down their bytes, we don't look
8609          * at any of the leaves.  In order to do mixed groups we'd have to check
8610          * the leaves of any fs roots and pin down the bytes for any file
8611          * extents we find.  Not hard but why do it if we don't have to?
8612          */
8613         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8614                 fprintf(stderr, "We don't support re-initing the extent tree "
8615                         "for mixed block groups yet, please notify a btrfs "
8616                         "developer you want to do this so they can add this "
8617                         "functionality.\n");
8618                 return -EINVAL;
8619         }
8620
8621         /*
8622          * first we need to walk all of the trees except the extent tree and pin
8623          * down the bytes that are in use so we don't overwrite any existing
8624          * metadata.
8625          */
8626         ret = pin_metadata_blocks(fs_info);
8627         if (ret) {
8628                 fprintf(stderr, "error pinning down used bytes\n");
8629                 return ret;
8630         }
8631
8632         /*
8633          * Need to drop all the block groups since we're going to recreate all
8634          * of them again.
8635          */
8636         btrfs_free_block_groups(fs_info);
8637         ret = reset_block_groups(fs_info);
8638         if (ret) {
8639                 fprintf(stderr, "error resetting the block groups\n");
8640                 return ret;
8641         }
8642
8643         /* Ok we can allocate now, reinit the extent root */
8644         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8645         if (ret) {
8646                 fprintf(stderr, "extent root initialization failed\n");
8647                 /*
8648                  * When the transaction code is updated we should end the
8649                  * transaction, but for now progs only knows about commit so
8650                  * just return an error.
8651                  */
8652                 return ret;
8653         }
8654
8655         /*
8656          * Now we have all the in-memory block groups setup so we can make
8657          * allocations properly, and the metadata we care about is safe since we
8658          * pinned all of it above.
8659          */
8660         while (1) {
8661                 struct btrfs_block_group_cache *cache;
8662
8663                 cache = btrfs_lookup_first_block_group(fs_info, start);
8664                 if (!cache)
8665                         break;
8666                 start = cache->key.objectid + cache->key.offset;
8667                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8668                                         &cache->key, &cache->item,
8669                                         sizeof(cache->item));
8670                 if (ret) {
8671                         fprintf(stderr, "Error adding block group\n");
8672                         return ret;
8673                 }
8674                 btrfs_extent_post_op(trans, fs_info->extent_root);
8675         }
8676
8677         ret = reset_balance(trans, fs_info);
8678         if (ret)
8679                 fprintf(stderr, "error reseting the pending balance\n");
8680
8681         return ret;
8682 }
8683
8684 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8685 {
8686         struct btrfs_path *path;
8687         struct btrfs_trans_handle *trans;
8688         struct btrfs_key key;
8689         int ret;
8690
8691         printf("Recowing metadata block %llu\n", eb->start);
8692         key.objectid = btrfs_header_owner(eb);
8693         key.type = BTRFS_ROOT_ITEM_KEY;
8694         key.offset = (u64)-1;
8695
8696         root = btrfs_read_fs_root(root->fs_info, &key);
8697         if (IS_ERR(root)) {
8698                 fprintf(stderr, "Couldn't find owner root %llu\n",
8699                         key.objectid);
8700                 return PTR_ERR(root);
8701         }
8702
8703         path = btrfs_alloc_path();
8704         if (!path)
8705                 return -ENOMEM;
8706
8707         trans = btrfs_start_transaction(root, 1);
8708         if (IS_ERR(trans)) {
8709                 btrfs_free_path(path);
8710                 return PTR_ERR(trans);
8711         }
8712
8713         path->lowest_level = btrfs_header_level(eb);
8714         if (path->lowest_level)
8715                 btrfs_node_key_to_cpu(eb, &key, 0);
8716         else
8717                 btrfs_item_key_to_cpu(eb, &key, 0);
8718
8719         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8720         btrfs_commit_transaction(trans, root);
8721         btrfs_free_path(path);
8722         return ret;
8723 }
8724
8725 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8726 {
8727         struct btrfs_path *path;
8728         struct btrfs_trans_handle *trans;
8729         struct btrfs_key key;
8730         int ret;
8731
8732         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8733                bad->key.type, bad->key.offset);
8734         key.objectid = bad->root_id;
8735         key.type = BTRFS_ROOT_ITEM_KEY;
8736         key.offset = (u64)-1;
8737
8738         root = btrfs_read_fs_root(root->fs_info, &key);
8739         if (IS_ERR(root)) {
8740                 fprintf(stderr, "Couldn't find owner root %llu\n",
8741                         key.objectid);
8742                 return PTR_ERR(root);
8743         }
8744
8745         path = btrfs_alloc_path();
8746         if (!path)
8747                 return -ENOMEM;
8748
8749         trans = btrfs_start_transaction(root, 1);
8750         if (IS_ERR(trans)) {
8751                 btrfs_free_path(path);
8752                 return PTR_ERR(trans);
8753         }
8754
8755         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8756         if (ret) {
8757                 if (ret > 0)
8758                         ret = 0;
8759                 goto out;
8760         }
8761         ret = btrfs_del_item(trans, root, path);
8762 out:
8763         btrfs_commit_transaction(trans, root);
8764         btrfs_free_path(path);
8765         return ret;
8766 }
8767
8768 static int zero_log_tree(struct btrfs_root *root)
8769 {
8770         struct btrfs_trans_handle *trans;
8771         int ret;
8772
8773         trans = btrfs_start_transaction(root, 1);
8774         if (IS_ERR(trans)) {
8775                 ret = PTR_ERR(trans);
8776                 return ret;
8777         }
8778         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8779         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8780         ret = btrfs_commit_transaction(trans, root);
8781         return ret;
8782 }
8783
8784 static int populate_csum(struct btrfs_trans_handle *trans,
8785                          struct btrfs_root *csum_root, char *buf, u64 start,
8786                          u64 len)
8787 {
8788         u64 offset = 0;
8789         u64 sectorsize;
8790         int ret = 0;
8791
8792         while (offset < len) {
8793                 sectorsize = csum_root->sectorsize;
8794                 ret = read_extent_data(csum_root, buf, start + offset,
8795                                        &sectorsize, 0);
8796                 if (ret)
8797                         break;
8798                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8799                                             start + offset, buf, sectorsize);
8800                 if (ret)
8801                         break;
8802                 offset += sectorsize;
8803         }
8804         return ret;
8805 }
8806
8807 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8808                                       struct btrfs_root *csum_root,
8809                                       struct btrfs_root *cur_root)
8810 {
8811         struct btrfs_path *path;
8812         struct btrfs_key key;
8813         struct extent_buffer *node;
8814         struct btrfs_file_extent_item *fi;
8815         char *buf = NULL;
8816         u64 start = 0;
8817         u64 len = 0;
8818         int slot = 0;
8819         int ret = 0;
8820
8821         path = btrfs_alloc_path();
8822         if (!path)
8823                 return -ENOMEM;
8824         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8825         if (!buf) {
8826                 ret = -ENOMEM;
8827                 goto out;
8828         }
8829
8830         key.objectid = 0;
8831         key.offset = 0;
8832         key.type = 0;
8833
8834         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8835         if (ret < 0)
8836                 goto out;
8837         /* Iterate all regular file extents and fill its csum */
8838         while (1) {
8839                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8840
8841                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8842                         goto next;
8843                 node = path->nodes[0];
8844                 slot = path->slots[0];
8845                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8846                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8847                         goto next;
8848                 start = btrfs_file_extent_disk_bytenr(node, fi);
8849                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8850
8851                 ret = populate_csum(trans, csum_root, buf, start, len);
8852                 if (ret == -EEXIST)
8853                         ret = 0;
8854                 if (ret < 0)
8855                         goto out;
8856 next:
8857                 /*
8858                  * TODO: if next leaf is corrupted, jump to nearest next valid
8859                  * leaf.
8860                  */
8861                 ret = btrfs_next_item(cur_root, path);
8862                 if (ret < 0)
8863                         goto out;
8864                 if (ret > 0) {
8865                         ret = 0;
8866                         goto out;
8867                 }
8868         }
8869
8870 out:
8871         btrfs_free_path(path);
8872         free(buf);
8873         return ret;
8874 }
8875
8876 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8877                                   struct btrfs_root *csum_root)
8878 {
8879         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8880         struct btrfs_path *path;
8881         struct btrfs_root *tree_root = fs_info->tree_root;
8882         struct btrfs_root *cur_root;
8883         struct extent_buffer *node;
8884         struct btrfs_key key;
8885         int slot = 0;
8886         int ret = 0;
8887
8888         path = btrfs_alloc_path();
8889         if (!path)
8890                 return -ENOMEM;
8891
8892         key.objectid = BTRFS_FS_TREE_OBJECTID;
8893         key.offset = 0;
8894         key.type = BTRFS_ROOT_ITEM_KEY;
8895
8896         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8897         if (ret < 0)
8898                 goto out;
8899         if (ret > 0) {
8900                 ret = -ENOENT;
8901                 goto out;
8902         }
8903
8904         while (1) {
8905                 node = path->nodes[0];
8906                 slot = path->slots[0];
8907                 btrfs_item_key_to_cpu(node, &key, slot);
8908                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8909                         goto out;
8910                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8911                         goto next;
8912                 if (!is_fstree(key.objectid))
8913                         goto next;
8914                 key.offset = (u64)-1;
8915
8916                 cur_root = btrfs_read_fs_root(fs_info, &key);
8917                 if (IS_ERR(cur_root) || !cur_root) {
8918                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8919                                 key.objectid);
8920                         goto out;
8921                 }
8922                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8923                                 cur_root);
8924                 if (ret < 0)
8925                         goto out;
8926 next:
8927                 ret = btrfs_next_item(tree_root, path);
8928                 if (ret > 0) {
8929                         ret = 0;
8930                         goto out;
8931                 }
8932                 if (ret < 0)
8933                         goto out;
8934         }
8935
8936 out:
8937         btrfs_free_path(path);
8938         return ret;
8939 }
8940
8941 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8942                                       struct btrfs_root *csum_root)
8943 {
8944         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8945         struct btrfs_path *path;
8946         struct btrfs_extent_item *ei;
8947         struct extent_buffer *leaf;
8948         char *buf;
8949         struct btrfs_key key;
8950         int ret;
8951
8952         path = btrfs_alloc_path();
8953         if (!path)
8954                 return -ENOMEM;
8955
8956         key.objectid = 0;
8957         key.type = BTRFS_EXTENT_ITEM_KEY;
8958         key.offset = 0;
8959
8960         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
8961         if (ret < 0) {
8962                 btrfs_free_path(path);
8963                 return ret;
8964         }
8965
8966         buf = malloc(csum_root->sectorsize);
8967         if (!buf) {
8968                 btrfs_free_path(path);
8969                 return -ENOMEM;
8970         }
8971
8972         while (1) {
8973                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8974                         ret = btrfs_next_leaf(extent_root, path);
8975                         if (ret < 0)
8976                                 break;
8977                         if (ret) {
8978                                 ret = 0;
8979                                 break;
8980                         }
8981                 }
8982                 leaf = path->nodes[0];
8983
8984                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8985                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8986                         path->slots[0]++;
8987                         continue;
8988                 }
8989
8990                 ei = btrfs_item_ptr(leaf, path->slots[0],
8991                                     struct btrfs_extent_item);
8992                 if (!(btrfs_extent_flags(leaf, ei) &
8993                       BTRFS_EXTENT_FLAG_DATA)) {
8994                         path->slots[0]++;
8995                         continue;
8996                 }
8997
8998                 ret = populate_csum(trans, csum_root, buf, key.objectid,
8999                                     key.offset);
9000                 if (ret)
9001                         break;
9002                 path->slots[0]++;
9003         }
9004
9005         btrfs_free_path(path);
9006         free(buf);
9007         return ret;
9008 }
9009
9010 /*
9011  * Recalculate the csum and put it into the csum tree.
9012  *
9013  * Extent tree init will wipe out all the extent info, so in that case, we
9014  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9015  * will use fs/subvol trees to init the csum tree.
9016  */
9017 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9018                           struct btrfs_root *csum_root,
9019                           int search_fs_tree)
9020 {
9021         if (search_fs_tree)
9022                 return fill_csum_tree_from_fs(trans, csum_root);
9023         else
9024                 return fill_csum_tree_from_extent(trans, csum_root);
9025 }
9026
9027 struct root_item_info {
9028         /* level of the root */
9029         u8 level;
9030         /* number of nodes at this level, must be 1 for a root */
9031         int node_count;
9032         u64 bytenr;
9033         u64 gen;
9034         struct cache_extent cache_extent;
9035 };
9036
9037 static struct cache_tree *roots_info_cache = NULL;
9038
9039 static void free_roots_info_cache(void)
9040 {
9041         if (!roots_info_cache)
9042                 return;
9043
9044         while (!cache_tree_empty(roots_info_cache)) {
9045                 struct cache_extent *entry;
9046                 struct root_item_info *rii;
9047
9048                 entry = first_cache_extent(roots_info_cache);
9049                 if (!entry)
9050                         break;
9051                 remove_cache_extent(roots_info_cache, entry);
9052                 rii = container_of(entry, struct root_item_info, cache_extent);
9053                 free(rii);
9054         }
9055
9056         free(roots_info_cache);
9057         roots_info_cache = NULL;
9058 }
9059
9060 static int build_roots_info_cache(struct btrfs_fs_info *info)
9061 {
9062         int ret = 0;
9063         struct btrfs_key key;
9064         struct extent_buffer *leaf;
9065         struct btrfs_path *path;
9066
9067         if (!roots_info_cache) {
9068                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9069                 if (!roots_info_cache)
9070                         return -ENOMEM;
9071                 cache_tree_init(roots_info_cache);
9072         }
9073
9074         path = btrfs_alloc_path();
9075         if (!path)
9076                 return -ENOMEM;
9077
9078         key.objectid = 0;
9079         key.type = BTRFS_EXTENT_ITEM_KEY;
9080         key.offset = 0;
9081
9082         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9083         if (ret < 0)
9084                 goto out;
9085         leaf = path->nodes[0];
9086
9087         while (1) {
9088                 struct btrfs_key found_key;
9089                 struct btrfs_extent_item *ei;
9090                 struct btrfs_extent_inline_ref *iref;
9091                 int slot = path->slots[0];
9092                 int type;
9093                 u64 flags;
9094                 u64 root_id;
9095                 u8 level;
9096                 struct cache_extent *entry;
9097                 struct root_item_info *rii;
9098
9099                 if (slot >= btrfs_header_nritems(leaf)) {
9100                         ret = btrfs_next_leaf(info->extent_root, path);
9101                         if (ret < 0) {
9102                                 break;
9103                         } else if (ret) {
9104                                 ret = 0;
9105                                 break;
9106                         }
9107                         leaf = path->nodes[0];
9108                         slot = path->slots[0];
9109                 }
9110
9111                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9112
9113                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9114                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9115                         goto next;
9116
9117                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9118                 flags = btrfs_extent_flags(leaf, ei);
9119
9120                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9121                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9122                         goto next;
9123
9124                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9125                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9126                         level = found_key.offset;
9127                 } else {
9128                         struct btrfs_tree_block_info *info;
9129
9130                         info = (struct btrfs_tree_block_info *)(ei + 1);
9131                         iref = (struct btrfs_extent_inline_ref *)(info + 1);
9132                         level = btrfs_tree_block_level(leaf, info);
9133                 }
9134
9135                 /*
9136                  * For a root extent, it must be of the following type and the
9137                  * first (and only one) iref in the item.
9138                  */
9139                 type = btrfs_extent_inline_ref_type(leaf, iref);
9140                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9141                         goto next;
9142
9143                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9144                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9145                 if (!entry) {
9146                         rii = malloc(sizeof(struct root_item_info));
9147                         if (!rii) {
9148                                 ret = -ENOMEM;
9149                                 goto out;
9150                         }
9151                         rii->cache_extent.start = root_id;
9152                         rii->cache_extent.size = 1;
9153                         rii->level = (u8)-1;
9154                         entry = &rii->cache_extent;
9155                         ret = insert_cache_extent(roots_info_cache, entry);
9156                         ASSERT(ret == 0);
9157                 } else {
9158                         rii = container_of(entry, struct root_item_info,
9159                                            cache_extent);
9160                 }
9161
9162                 ASSERT(rii->cache_extent.start == root_id);
9163                 ASSERT(rii->cache_extent.size == 1);
9164
9165                 if (level > rii->level || rii->level == (u8)-1) {
9166                         rii->level = level;
9167                         rii->bytenr = found_key.objectid;
9168                         rii->gen = btrfs_extent_generation(leaf, ei);
9169                         rii->node_count = 1;
9170                 } else if (level == rii->level) {
9171                         rii->node_count++;
9172                 }
9173 next:
9174                 path->slots[0]++;
9175         }
9176
9177 out:
9178         btrfs_free_path(path);
9179
9180         return ret;
9181 }
9182
9183 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9184                                   struct btrfs_path *path,
9185                                   const struct btrfs_key *root_key,
9186                                   const int read_only_mode)
9187 {
9188         const u64 root_id = root_key->objectid;
9189         struct cache_extent *entry;
9190         struct root_item_info *rii;
9191         struct btrfs_root_item ri;
9192         unsigned long offset;
9193
9194         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9195         if (!entry) {
9196                 fprintf(stderr,
9197                         "Error: could not find extent items for root %llu\n",
9198                         root_key->objectid);
9199                 return -ENOENT;
9200         }
9201
9202         rii = container_of(entry, struct root_item_info, cache_extent);
9203         ASSERT(rii->cache_extent.start == root_id);
9204         ASSERT(rii->cache_extent.size == 1);
9205
9206         if (rii->node_count != 1) {
9207                 fprintf(stderr,
9208                         "Error: could not find btree root extent for root %llu\n",
9209                         root_id);
9210                 return -ENOENT;
9211         }
9212
9213         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9214         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9215
9216         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9217             btrfs_root_level(&ri) != rii->level ||
9218             btrfs_root_generation(&ri) != rii->gen) {
9219
9220                 /*
9221                  * If we're in repair mode but our caller told us to not update
9222                  * the root item, i.e. just check if it needs to be updated, don't
9223                  * print this message, since the caller will call us again shortly
9224                  * for the same root item without read only mode (the caller will
9225                  * open a transaction first).
9226                  */
9227                 if (!(read_only_mode && repair))
9228                         fprintf(stderr,
9229                                 "%sroot item for root %llu,"
9230                                 " current bytenr %llu, current gen %llu, current level %u,"
9231                                 " new bytenr %llu, new gen %llu, new level %u\n",
9232                                 (read_only_mode ? "" : "fixing "),
9233                                 root_id,
9234                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9235                                 btrfs_root_level(&ri),
9236                                 rii->bytenr, rii->gen, rii->level);
9237
9238                 if (btrfs_root_generation(&ri) > rii->gen) {
9239                         fprintf(stderr,
9240                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9241                                 root_id, btrfs_root_generation(&ri), rii->gen);
9242                         return -EINVAL;
9243                 }
9244
9245                 if (!read_only_mode) {
9246                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9247                         btrfs_set_root_level(&ri, rii->level);
9248                         btrfs_set_root_generation(&ri, rii->gen);
9249                         write_extent_buffer(path->nodes[0], &ri,
9250                                             offset, sizeof(ri));
9251                 }
9252
9253                 return 1;
9254         }
9255
9256         return 0;
9257 }
9258
9259 /*
9260  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9261  * caused read-only snapshots to be corrupted if they were created at a moment
9262  * when the source subvolume/snapshot had orphan items. The issue was that the
9263  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9264  * node instead of the post orphan cleanup root node.
9265  * So this function, and its callees, just detects and fixes those cases. Even
9266  * though the regression was for read-only snapshots, this function applies to
9267  * any snapshot/subvolume root.
9268  * This must be run before any other repair code - not doing it so, makes other
9269  * repair code delete or modify backrefs in the extent tree for example, which
9270  * will result in an inconsistent fs after repairing the root items.
9271  */
9272 static int repair_root_items(struct btrfs_fs_info *info)
9273 {
9274         struct btrfs_path *path = NULL;
9275         struct btrfs_key key;
9276         struct extent_buffer *leaf;
9277         struct btrfs_trans_handle *trans = NULL;
9278         int ret = 0;
9279         int bad_roots = 0;
9280         int need_trans = 0;
9281
9282         ret = build_roots_info_cache(info);
9283         if (ret)
9284                 goto out;
9285
9286         path = btrfs_alloc_path();
9287         if (!path) {
9288                 ret = -ENOMEM;
9289                 goto out;
9290         }
9291
9292         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9293         key.type = BTRFS_ROOT_ITEM_KEY;
9294         key.offset = 0;
9295
9296 again:
9297         /*
9298          * Avoid opening and committing transactions if a leaf doesn't have
9299          * any root items that need to be fixed, so that we avoid rotating
9300          * backup roots unnecessarily.
9301          */
9302         if (need_trans) {
9303                 trans = btrfs_start_transaction(info->tree_root, 1);
9304                 if (IS_ERR(trans)) {
9305                         ret = PTR_ERR(trans);
9306                         goto out;
9307                 }
9308         }
9309
9310         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9311                                 0, trans ? 1 : 0);
9312         if (ret < 0)
9313                 goto out;
9314         leaf = path->nodes[0];
9315
9316         while (1) {
9317                 struct btrfs_key found_key;
9318
9319                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9320                         int no_more_keys = find_next_key(path, &key);
9321
9322                         btrfs_release_path(path);
9323                         if (trans) {
9324                                 ret = btrfs_commit_transaction(trans,
9325                                                                info->tree_root);
9326                                 trans = NULL;
9327                                 if (ret < 0)
9328                                         goto out;
9329                         }
9330                         need_trans = 0;
9331                         if (no_more_keys)
9332                                 break;
9333                         goto again;
9334                 }
9335
9336                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9337
9338                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9339                         goto next;
9340                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9341                         goto next;
9342
9343                 ret = maybe_repair_root_item(info, path, &found_key,
9344                                              trans ? 0 : 1);
9345                 if (ret < 0)
9346                         goto out;
9347                 if (ret) {
9348                         if (!trans && repair) {
9349                                 need_trans = 1;
9350                                 key = found_key;
9351                                 btrfs_release_path(path);
9352                                 goto again;
9353                         }
9354                         bad_roots++;
9355                 }
9356 next:
9357                 path->slots[0]++;
9358         }
9359         ret = 0;
9360 out:
9361         free_roots_info_cache();
9362         btrfs_free_path(path);
9363         if (trans)
9364                 btrfs_commit_transaction(trans, info->tree_root);
9365         if (ret < 0)
9366                 return ret;
9367
9368         return bad_roots;
9369 }
9370
9371 const char * const cmd_check_usage[] = {
9372         "btrfs check [options] <device>",
9373         "Check structural inegrity of a filesystem (unmounted).",
9374         "Check structural inegrity of an unmounted filesystem. Verify internal",
9375         "trees' consistency and item connectivity. In the repair mode try to",
9376         "fix the problems found.",
9377         "WARNING: the repair mode is considered dangerous",
9378         "",
9379         "-s|--super <superblock>     use this superblock copy",
9380         "-b|--backup                 use the backup root copy",
9381         "--repair                    try to repair the filesystem",
9382         "--readonly                  run in read-only mode (default)",
9383         "--init-csum-tree            create a new CRC tree",
9384         "--init-extent-tree          create a new extent tree",
9385         "--check-data-csum           verify checkums of data blocks",
9386         "-Q|--qgroup-report           print a report on qgroup consistency",
9387         "-E|--subvol-extents <subvolid>",
9388         "                            print subvolume extents and sharing state",
9389         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9390         "-p|--progress               indicate progress",
9391         NULL
9392 };
9393
9394 int cmd_check(int argc, char **argv)
9395 {
9396         struct cache_tree root_cache;
9397         struct btrfs_root *root;
9398         struct btrfs_fs_info *info;
9399         u64 bytenr = 0;
9400         u64 subvolid = 0;
9401         u64 tree_root_bytenr = 0;
9402         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9403         int ret;
9404         u64 num;
9405         int init_csum_tree = 0;
9406         int readonly = 0;
9407         int qgroup_report = 0;
9408         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9409
9410         while(1) {
9411                 int c;
9412                 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9413                         OPT_CHECK_CSUM, OPT_READONLY };
9414                 static const struct option long_options[] = {
9415                         { "super", required_argument, NULL, 's' },
9416                         { "repair", no_argument, NULL, OPT_REPAIR },
9417                         { "readonly", no_argument, NULL, OPT_READONLY },
9418                         { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9419                         { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9420                         { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9421                         { "backup", no_argument, NULL, 'b' },
9422                         { "subvol-extents", required_argument, NULL, 'E' },
9423                         { "qgroup-report", no_argument, NULL, 'Q' },
9424                         { "tree-root", required_argument, NULL, 'r' },
9425                         { "progress", no_argument, NULL, 'p' },
9426                         { NULL, 0, NULL, 0}
9427                 };
9428
9429                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9430                 if (c < 0)
9431                         break;
9432                 switch(c) {
9433                         case 'a': /* ignored */ break;
9434                         case 'b':
9435                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9436                                 break;
9437                         case 's':
9438                                 num = arg_strtou64(optarg);
9439                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9440                                         fprintf(stderr,
9441                                                 "ERROR: super mirror should be less than: %d\n",
9442                                                 BTRFS_SUPER_MIRROR_MAX);
9443                                         exit(1);
9444                                 }
9445                                 bytenr = btrfs_sb_offset(((int)num));
9446                                 printf("using SB copy %llu, bytenr %llu\n", num,
9447                                        (unsigned long long)bytenr);
9448                                 break;
9449                         case 'Q':
9450                                 qgroup_report = 1;
9451                                 break;
9452                         case 'E':
9453                                 subvolid = arg_strtou64(optarg);
9454                                 break;
9455                         case 'r':
9456                                 tree_root_bytenr = arg_strtou64(optarg);
9457                                 break;
9458                         case 'p':
9459                                 ctx.progress_enabled = true;
9460                                 break;
9461                         case '?':
9462                         case 'h':
9463                                 usage(cmd_check_usage);
9464                         case OPT_REPAIR:
9465                                 printf("enabling repair mode\n");
9466                                 repair = 1;
9467                                 ctree_flags |= OPEN_CTREE_WRITES;
9468                                 break;
9469                         case OPT_READONLY:
9470                                 readonly = 1;
9471                                 break;
9472                         case OPT_INIT_CSUM:
9473                                 printf("Creating a new CRC tree\n");
9474                                 init_csum_tree = 1;
9475                                 repair = 1;
9476                                 ctree_flags |= OPEN_CTREE_WRITES;
9477                                 break;
9478                         case OPT_INIT_EXTENT:
9479                                 init_extent_tree = 1;
9480                                 ctree_flags |= (OPEN_CTREE_WRITES |
9481                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9482                                 repair = 1;
9483                                 break;
9484                         case OPT_CHECK_CSUM:
9485                                 check_data_csum = 1;
9486                                 break;
9487                 }
9488         }
9489         argc = argc - optind;
9490
9491         if (check_argc_exact(argc, 1))
9492                 usage(cmd_check_usage);
9493
9494         if (ctx.progress_enabled) {
9495                 ctx.tp = TASK_NOTHING;
9496                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9497         }
9498
9499         /* This check is the only reason for --readonly to exist */
9500         if (readonly && repair) {
9501                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9502                 exit(1);
9503         }
9504
9505         radix_tree_init();
9506         cache_tree_init(&root_cache);
9507
9508         if((ret = check_mounted(argv[optind])) < 0) {
9509                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9510                 goto err_out;
9511         } else if(ret) {
9512                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9513                 ret = -EBUSY;
9514                 goto err_out;
9515         }
9516
9517         /* only allow partial opening under repair mode */
9518         if (repair)
9519                 ctree_flags |= OPEN_CTREE_PARTIAL;
9520
9521         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9522                                   ctree_flags);
9523         if (!info) {
9524                 fprintf(stderr, "Couldn't open file system\n");
9525                 ret = -EIO;
9526                 goto err_out;
9527         }
9528
9529         global_info = info;
9530         root = info->fs_root;
9531
9532         /*
9533          * repair mode will force us to commit transaction which
9534          * will make us fail to load log tree when mounting.
9535          */
9536         if (repair && btrfs_super_log_root(info->super_copy)) {
9537                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9538                 if (!ret) {
9539                         ret = 1;
9540                         goto close_out;
9541                 }
9542                 ret = zero_log_tree(root);
9543                 if (ret) {
9544                         fprintf(stderr, "fail to zero log tree\n");
9545                         goto close_out;
9546                 }
9547         }
9548
9549         uuid_unparse(info->super_copy->fsid, uuidbuf);
9550         if (qgroup_report) {
9551                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9552                        uuidbuf);
9553                 ret = qgroup_verify_all(info);
9554                 if (ret == 0)
9555                         print_qgroup_report(1);
9556                 goto close_out;
9557         }
9558         if (subvolid) {
9559                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9560                        subvolid, argv[optind], uuidbuf);
9561                 ret = print_extent_state(info, subvolid);
9562                 goto close_out;
9563         }
9564         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9565
9566         if (!extent_buffer_uptodate(info->tree_root->node) ||
9567             !extent_buffer_uptodate(info->dev_root->node) ||
9568             !extent_buffer_uptodate(info->chunk_root->node)) {
9569                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9570                 ret = -EIO;
9571                 goto close_out;
9572         }
9573
9574         if (init_extent_tree || init_csum_tree) {
9575                 struct btrfs_trans_handle *trans;
9576
9577                 trans = btrfs_start_transaction(info->extent_root, 0);
9578                 if (IS_ERR(trans)) {
9579                         fprintf(stderr, "Error starting transaction\n");
9580                         ret = PTR_ERR(trans);
9581                         goto close_out;
9582                 }
9583
9584                 if (init_extent_tree) {
9585                         printf("Creating a new extent tree\n");
9586                         ret = reinit_extent_tree(trans, info);
9587                         if (ret)
9588                                 goto close_out;
9589                 }
9590
9591                 if (init_csum_tree) {
9592                         fprintf(stderr, "Reinit crc root\n");
9593                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9594                         if (ret) {
9595                                 fprintf(stderr, "crc root initialization failed\n");
9596                                 ret = -EIO;
9597                                 goto close_out;
9598                         }
9599
9600                         ret = fill_csum_tree(trans, info->csum_root,
9601                                              init_extent_tree);
9602                         if (ret) {
9603                                 fprintf(stderr, "crc refilling failed\n");
9604                                 return -EIO;
9605                         }
9606                 }
9607                 /*
9608                  * Ok now we commit and run the normal fsck, which will add
9609                  * extent entries for all of the items it finds.
9610                  */
9611                 ret = btrfs_commit_transaction(trans, info->extent_root);
9612                 if (ret)
9613                         goto close_out;
9614         }
9615         if (!extent_buffer_uptodate(info->extent_root->node)) {
9616                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9617                 ret = -EIO;
9618                 goto close_out;
9619         }
9620         if (!extent_buffer_uptodate(info->csum_root->node)) {
9621                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9622                 ret = -EIO;
9623                 goto close_out;
9624         }
9625
9626         if (!ctx.progress_enabled)
9627                 fprintf(stderr, "checking extents\n");
9628         ret = check_chunks_and_extents(root);
9629         if (ret)
9630                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9631
9632         ret = repair_root_items(info);
9633         if (ret < 0)
9634                 goto close_out;
9635         if (repair) {
9636                 fprintf(stderr, "Fixed %d roots.\n", ret);
9637                 ret = 0;
9638         } else if (ret > 0) {
9639                 fprintf(stderr,
9640                        "Found %d roots with an outdated root item.\n",
9641                        ret);
9642                 fprintf(stderr,
9643                         "Please run a filesystem check with the option --repair to fix them.\n");
9644                 ret = 1;
9645                 goto close_out;
9646         }
9647
9648         if (!ctx.progress_enabled)
9649                 fprintf(stderr, "checking free space cache\n");
9650         ret = check_space_cache(root);
9651         if (ret)
9652                 goto out;
9653
9654         /*
9655          * We used to have to have these hole extents in between our real
9656          * extents so if we don't have this flag set we need to make sure there
9657          * are no gaps in the file extents for inodes, otherwise we can just
9658          * ignore it when this happens.
9659          */
9660         no_holes = btrfs_fs_incompat(root->fs_info,
9661                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9662         if (!ctx.progress_enabled)
9663                 fprintf(stderr, "checking fs roots\n");
9664         ret = check_fs_roots(root, &root_cache);
9665         if (ret)
9666                 goto out;
9667
9668         fprintf(stderr, "checking csums\n");
9669         ret = check_csums(root);
9670         if (ret)
9671                 goto out;
9672
9673         fprintf(stderr, "checking root refs\n");
9674         ret = check_root_refs(root, &root_cache);
9675         if (ret)
9676                 goto out;
9677
9678         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9679                 struct extent_buffer *eb;
9680
9681                 eb = list_first_entry(&root->fs_info->recow_ebs,
9682                                       struct extent_buffer, recow);
9683                 list_del_init(&eb->recow);
9684                 ret = recow_extent_buffer(root, eb);
9685                 if (ret)
9686                         break;
9687         }
9688
9689         while (!list_empty(&delete_items)) {
9690                 struct bad_item *bad;
9691
9692                 bad = list_first_entry(&delete_items, struct bad_item, list);
9693                 list_del_init(&bad->list);
9694                 if (repair)
9695                         ret = delete_bad_item(root, bad);
9696                 free(bad);
9697         }
9698
9699         if (info->quota_enabled) {
9700                 int err;
9701                 fprintf(stderr, "checking quota groups\n");
9702                 err = qgroup_verify_all(info);
9703                 if (err)
9704                         goto out;
9705         }
9706
9707         if (!list_empty(&root->fs_info->recow_ebs)) {
9708                 fprintf(stderr, "Transid errors in file system\n");
9709                 ret = 1;
9710         }
9711 out:
9712         print_qgroup_report(0);
9713         if (found_old_backref) { /*
9714                  * there was a disk format change when mixed
9715                  * backref was in testing tree. The old format
9716                  * existed about one week.
9717                  */
9718                 printf("\n * Found old mixed backref format. "
9719                        "The old format is not supported! *"
9720                        "\n * Please mount the FS in readonly mode, "
9721                        "backup data and re-format the FS. *\n\n");
9722                 ret = 1;
9723         }
9724         printf("found %llu bytes used err is %d\n",
9725                (unsigned long long)bytes_used, ret);
9726         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9727         printf("total tree bytes: %llu\n",
9728                (unsigned long long)total_btree_bytes);
9729         printf("total fs tree bytes: %llu\n",
9730                (unsigned long long)total_fs_tree_bytes);
9731         printf("total extent tree bytes: %llu\n",
9732                (unsigned long long)total_extent_tree_bytes);
9733         printf("btree space waste bytes: %llu\n",
9734                (unsigned long long)btree_space_waste);
9735         printf("file data blocks allocated: %llu\n referenced %llu\n",
9736                 (unsigned long long)data_bytes_allocated,
9737                 (unsigned long long)data_bytes_referenced);
9738
9739         free_root_recs_tree(&root_cache);
9740 close_out:
9741         close_ctree(root);
9742 err_out:
9743         if (ctx.progress_enabled)
9744                 task_deinit(ctx.info);
9745
9746         return ret;
9747 }