btrfs-progs: check: original mode: Check inline extent size
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/mode-common.h"
47 #include "check/mode-original.h"
48 #include "check/mode-lowmem.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void record_root_in_trans(struct btrfs_trans_handle *trans,
416                                  struct btrfs_root *root)
417 {
418         if (root->last_trans != trans->transid) {
419                 root->track_dirty = 1;
420                 root->last_trans = trans->transid;
421                 root->commit_root = root->node;
422                 extent_buffer_get(root->node);
423         }
424 }
425
426 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
427 {
428         struct device_record *rec1;
429         struct device_record *rec2;
430
431         rec1 = rb_entry(node1, struct device_record, node);
432         rec2 = rb_entry(node2, struct device_record, node);
433         if (rec1->devid > rec2->devid)
434                 return -1;
435         else if (rec1->devid < rec2->devid)
436                 return 1;
437         else
438                 return 0;
439 }
440
441 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
442 {
443         struct inode_record *rec;
444         struct inode_backref *backref;
445         struct inode_backref *orig;
446         struct inode_backref *tmp;
447         struct orphan_data_extent *src_orphan;
448         struct orphan_data_extent *dst_orphan;
449         struct rb_node *rb;
450         size_t size;
451         int ret;
452
453         rec = malloc(sizeof(*rec));
454         if (!rec)
455                 return ERR_PTR(-ENOMEM);
456         memcpy(rec, orig_rec, sizeof(*rec));
457         rec->refs = 1;
458         INIT_LIST_HEAD(&rec->backrefs);
459         INIT_LIST_HEAD(&rec->orphan_extents);
460         rec->holes = RB_ROOT;
461
462         list_for_each_entry(orig, &orig_rec->backrefs, list) {
463                 size = sizeof(*orig) + orig->namelen + 1;
464                 backref = malloc(size);
465                 if (!backref) {
466                         ret = -ENOMEM;
467                         goto cleanup;
468                 }
469                 memcpy(backref, orig, size);
470                 list_add_tail(&backref->list, &rec->backrefs);
471         }
472         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
473                 dst_orphan = malloc(sizeof(*dst_orphan));
474                 if (!dst_orphan) {
475                         ret = -ENOMEM;
476                         goto cleanup;
477                 }
478                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
479                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
480         }
481         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
482         if (ret < 0)
483                 goto cleanup_rb;
484
485         return rec;
486
487 cleanup_rb:
488         rb = rb_first(&rec->holes);
489         while (rb) {
490                 struct file_extent_hole *hole;
491
492                 hole = rb_entry(rb, struct file_extent_hole, node);
493                 rb = rb_next(rb);
494                 free(hole);
495         }
496
497 cleanup:
498         if (!list_empty(&rec->backrefs))
499                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
500                         list_del(&orig->list);
501                         free(orig);
502                 }
503
504         if (!list_empty(&rec->orphan_extents))
505                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
506                         list_del(&orig->list);
507                         free(orig);
508                 }
509
510         free(rec);
511
512         return ERR_PTR(ret);
513 }
514
515 static void print_orphan_data_extents(struct list_head *orphan_extents,
516                                       u64 objectid)
517 {
518         struct orphan_data_extent *orphan;
519
520         if (list_empty(orphan_extents))
521                 return;
522         printf("The following data extent is lost in tree %llu:\n",
523                objectid);
524         list_for_each_entry(orphan, orphan_extents, list) {
525                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
526                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
527                        orphan->disk_len);
528         }
529 }
530
531 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
532 {
533         u64 root_objectid = root->root_key.objectid;
534         int errors = rec->errors;
535
536         if (!errors)
537                 return;
538         /* reloc root errors, we print its corresponding fs root objectid*/
539         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
540                 root_objectid = root->root_key.offset;
541                 fprintf(stderr, "reloc");
542         }
543         fprintf(stderr, "root %llu inode %llu errors %x",
544                 (unsigned long long) root_objectid,
545                 (unsigned long long) rec->ino, rec->errors);
546
547         if (errors & I_ERR_NO_INODE_ITEM)
548                 fprintf(stderr, ", no inode item");
549         if (errors & I_ERR_NO_ORPHAN_ITEM)
550                 fprintf(stderr, ", no orphan item");
551         if (errors & I_ERR_DUP_INODE_ITEM)
552                 fprintf(stderr, ", dup inode item");
553         if (errors & I_ERR_DUP_DIR_INDEX)
554                 fprintf(stderr, ", dup dir index");
555         if (errors & I_ERR_ODD_DIR_ITEM)
556                 fprintf(stderr, ", odd dir item");
557         if (errors & I_ERR_ODD_FILE_EXTENT)
558                 fprintf(stderr, ", odd file extent");
559         if (errors & I_ERR_BAD_FILE_EXTENT)
560                 fprintf(stderr, ", bad file extent");
561         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
562                 fprintf(stderr, ", file extent overlap");
563         if (errors & I_ERR_FILE_EXTENT_TOO_LARGE)
564                 fprintf(stderr, ", inline file extent too large");
565         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566                 fprintf(stderr, ", file extent discount");
567         if (errors & I_ERR_DIR_ISIZE_WRONG)
568                 fprintf(stderr, ", dir isize wrong");
569         if (errors & I_ERR_FILE_NBYTES_WRONG)
570                 fprintf(stderr, ", nbytes wrong");
571         if (errors & I_ERR_ODD_CSUM_ITEM)
572                 fprintf(stderr, ", odd csum item");
573         if (errors & I_ERR_SOME_CSUM_MISSING)
574                 fprintf(stderr, ", some csum missing");
575         if (errors & I_ERR_LINK_COUNT_WRONG)
576                 fprintf(stderr, ", link count wrong");
577         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578                 fprintf(stderr, ", orphan file extent");
579         fprintf(stderr, "\n");
580         /* Print the orphan extents if needed */
581         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
583
584         /* Print the holes if needed */
585         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586                 struct file_extent_hole *hole;
587                 struct rb_node *node;
588                 int found = 0;
589
590                 node = rb_first(&rec->holes);
591                 fprintf(stderr, "Found file extent holes:\n");
592                 while (node) {
593                         found = 1;
594                         hole = rb_entry(node, struct file_extent_hole, node);
595                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
596                                 hole->start, hole->len);
597                         node = rb_next(node);
598                 }
599                 if (!found)
600                         fprintf(stderr, "\tstart: 0, len: %llu\n",
601                                 round_up(rec->isize,
602                                          root->fs_info->sectorsize));
603         }
604 }
605
606 static void print_ref_error(int errors)
607 {
608         if (errors & REF_ERR_NO_DIR_ITEM)
609                 fprintf(stderr, ", no dir item");
610         if (errors & REF_ERR_NO_DIR_INDEX)
611                 fprintf(stderr, ", no dir index");
612         if (errors & REF_ERR_NO_INODE_REF)
613                 fprintf(stderr, ", no inode ref");
614         if (errors & REF_ERR_DUP_DIR_ITEM)
615                 fprintf(stderr, ", dup dir item");
616         if (errors & REF_ERR_DUP_DIR_INDEX)
617                 fprintf(stderr, ", dup dir index");
618         if (errors & REF_ERR_DUP_INODE_REF)
619                 fprintf(stderr, ", dup inode ref");
620         if (errors & REF_ERR_INDEX_UNMATCH)
621                 fprintf(stderr, ", index mismatch");
622         if (errors & REF_ERR_FILETYPE_UNMATCH)
623                 fprintf(stderr, ", filetype mismatch");
624         if (errors & REF_ERR_NAME_TOO_LONG)
625                 fprintf(stderr, ", name too long");
626         if (errors & REF_ERR_NO_ROOT_REF)
627                 fprintf(stderr, ", no root ref");
628         if (errors & REF_ERR_NO_ROOT_BACKREF)
629                 fprintf(stderr, ", no root backref");
630         if (errors & REF_ERR_DUP_ROOT_REF)
631                 fprintf(stderr, ", dup root ref");
632         if (errors & REF_ERR_DUP_ROOT_BACKREF)
633                 fprintf(stderr, ", dup root backref");
634         fprintf(stderr, "\n");
635 }
636
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
638                                           u64 ino, int mod)
639 {
640         struct ptr_node *node;
641         struct cache_extent *cache;
642         struct inode_record *rec = NULL;
643         int ret;
644
645         cache = lookup_cache_extent(inode_cache, ino, 1);
646         if (cache) {
647                 node = container_of(cache, struct ptr_node, cache);
648                 rec = node->data;
649                 if (mod && rec->refs > 1) {
650                         node->data = clone_inode_rec(rec);
651                         if (IS_ERR(node->data))
652                                 return node->data;
653                         rec->refs--;
654                         rec = node->data;
655                 }
656         } else if (mod) {
657                 rec = calloc(1, sizeof(*rec));
658                 if (!rec)
659                         return ERR_PTR(-ENOMEM);
660                 rec->ino = ino;
661                 rec->extent_start = (u64)-1;
662                 rec->refs = 1;
663                 INIT_LIST_HEAD(&rec->backrefs);
664                 INIT_LIST_HEAD(&rec->orphan_extents);
665                 rec->holes = RB_ROOT;
666
667                 node = malloc(sizeof(*node));
668                 if (!node) {
669                         free(rec);
670                         return ERR_PTR(-ENOMEM);
671                 }
672                 node->cache.start = ino;
673                 node->cache.size = 1;
674                 node->data = rec;
675
676                 if (ino == BTRFS_FREE_INO_OBJECTID)
677                         rec->found_link = 1;
678
679                 ret = insert_cache_extent(inode_cache, &node->cache);
680                 if (ret)
681                         return ERR_PTR(-EEXIST);
682         }
683         return rec;
684 }
685
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
687 {
688         struct orphan_data_extent *orphan;
689
690         while (!list_empty(orphan_extents)) {
691                 orphan = list_entry(orphan_extents->next,
692                                     struct orphan_data_extent, list);
693                 list_del(&orphan->list);
694                 free(orphan);
695         }
696 }
697
698 static void free_inode_rec(struct inode_record *rec)
699 {
700         struct inode_backref *backref;
701
702         if (--rec->refs > 0)
703                 return;
704
705         while (!list_empty(&rec->backrefs)) {
706                 backref = to_inode_backref(rec->backrefs.next);
707                 list_del(&backref->list);
708                 free(backref);
709         }
710         free_orphan_data_extents(&rec->orphan_extents);
711         free_file_extent_holes(&rec->holes);
712         free(rec);
713 }
714
715 static int can_free_inode_rec(struct inode_record *rec)
716 {
717         if (!rec->errors && rec->checked && rec->found_inode_item &&
718             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
719                 return 1;
720         return 0;
721 }
722
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724                                  struct inode_record *rec)
725 {
726         struct cache_extent *cache;
727         struct inode_backref *tmp, *backref;
728         struct ptr_node *node;
729         u8 filetype;
730
731         if (!rec->found_inode_item)
732                 return;
733
734         filetype = imode_to_type(rec->imode);
735         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736                 if (backref->found_dir_item && backref->found_dir_index) {
737                         if (backref->filetype != filetype)
738                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739                         if (!backref->errors && backref->found_inode_ref &&
740                             rec->nlink == rec->found_link) {
741                                 list_del(&backref->list);
742                                 free(backref);
743                         }
744                 }
745         }
746
747         if (!rec->checked || rec->merging)
748                 return;
749
750         if (S_ISDIR(rec->imode)) {
751                 if (rec->found_size != rec->isize)
752                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753                 if (rec->found_file_extent)
754                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
755         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756                 if (rec->found_dir_item)
757                         rec->errors |= I_ERR_ODD_DIR_ITEM;
758                 if (rec->found_size != rec->nbytes)
759                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760                 if (rec->nlink > 0 && !no_holes &&
761                     (rec->extent_end < rec->isize ||
762                      first_extent_gap(&rec->holes) < rec->isize))
763                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
764         }
765
766         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767                 if (rec->found_csum_item && rec->nodatasum)
768                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
769                 if (rec->some_csum_missing && !rec->nodatasum)
770                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
771         }
772
773         BUG_ON(rec->refs != 1);
774         if (can_free_inode_rec(rec)) {
775                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776                 node = container_of(cache, struct ptr_node, cache);
777                 BUG_ON(node->data != rec);
778                 remove_cache_extent(inode_cache, &node->cache);
779                 free(node);
780                 free_inode_rec(rec);
781         }
782 }
783
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
785 {
786         struct btrfs_path path;
787         struct btrfs_key key;
788         int ret;
789
790         key.objectid = BTRFS_ORPHAN_OBJECTID;
791         key.type = BTRFS_ORPHAN_ITEM_KEY;
792         key.offset = ino;
793
794         btrfs_init_path(&path);
795         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796         btrfs_release_path(&path);
797         if (ret > 0)
798                 ret = -ENOENT;
799         return ret;
800 }
801
802 static int process_inode_item(struct extent_buffer *eb,
803                               int slot, struct btrfs_key *key,
804                               struct shared_node *active_node)
805 {
806         struct inode_record *rec;
807         struct btrfs_inode_item *item;
808
809         rec = active_node->current;
810         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811         if (rec->found_inode_item) {
812                 rec->errors |= I_ERR_DUP_INODE_ITEM;
813                 return 1;
814         }
815         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816         rec->nlink = btrfs_inode_nlink(eb, item);
817         rec->isize = btrfs_inode_size(eb, item);
818         rec->nbytes = btrfs_inode_nbytes(eb, item);
819         rec->imode = btrfs_inode_mode(eb, item);
820         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
821                 rec->nodatasum = 1;
822         rec->found_inode_item = 1;
823         if (rec->nlink == 0)
824                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825         maybe_free_inode_rec(&active_node->inode_cache, rec);
826         return 0;
827 }
828
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
830                                                 const char *name,
831                                                 int namelen, u64 dir)
832 {
833         struct inode_backref *backref;
834
835         list_for_each_entry(backref, &rec->backrefs, list) {
836                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
837                         break;
838                 if (backref->dir != dir || backref->namelen != namelen)
839                         continue;
840                 if (memcmp(name, backref->name, namelen))
841                         continue;
842                 return backref;
843         }
844
845         backref = malloc(sizeof(*backref) + namelen + 1);
846         if (!backref)
847                 return NULL;
848         memset(backref, 0, sizeof(*backref));
849         backref->dir = dir;
850         backref->namelen = namelen;
851         memcpy(backref->name, name, namelen);
852         backref->name[namelen] = '\0';
853         list_add_tail(&backref->list, &rec->backrefs);
854         return backref;
855 }
856
857 static int add_inode_backref(struct cache_tree *inode_cache,
858                              u64 ino, u64 dir, u64 index,
859                              const char *name, int namelen,
860                              u8 filetype, u8 itemtype, int errors)
861 {
862         struct inode_record *rec;
863         struct inode_backref *backref;
864
865         rec = get_inode_rec(inode_cache, ino, 1);
866         BUG_ON(IS_ERR(rec));
867         backref = get_inode_backref(rec, name, namelen, dir);
868         BUG_ON(!backref);
869         if (errors)
870                 backref->errors |= errors;
871         if (itemtype == BTRFS_DIR_INDEX_KEY) {
872                 if (backref->found_dir_index)
873                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
874                 if (backref->found_inode_ref && backref->index != index)
875                         backref->errors |= REF_ERR_INDEX_UNMATCH;
876                 if (backref->found_dir_item && backref->filetype != filetype)
877                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
878
879                 backref->index = index;
880                 backref->filetype = filetype;
881                 backref->found_dir_index = 1;
882         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
883                 rec->found_link++;
884                 if (backref->found_dir_item)
885                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
886                 if (backref->found_dir_index && backref->filetype != filetype)
887                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
888
889                 backref->filetype = filetype;
890                 backref->found_dir_item = 1;
891         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893                 if (backref->found_inode_ref)
894                         backref->errors |= REF_ERR_DUP_INODE_REF;
895                 if (backref->found_dir_index && backref->index != index)
896                         backref->errors |= REF_ERR_INDEX_UNMATCH;
897                 else
898                         backref->index = index;
899
900                 backref->ref_type = itemtype;
901                 backref->found_inode_ref = 1;
902         } else {
903                 BUG_ON(1);
904         }
905
906         maybe_free_inode_rec(inode_cache, rec);
907         return 0;
908 }
909
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911                             struct cache_tree *dst_cache)
912 {
913         struct inode_backref *backref;
914         u32 dir_count = 0;
915         int ret = 0;
916
917         dst->merging = 1;
918         list_for_each_entry(backref, &src->backrefs, list) {
919                 if (backref->found_dir_index) {
920                         add_inode_backref(dst_cache, dst->ino, backref->dir,
921                                         backref->index, backref->name,
922                                         backref->namelen, backref->filetype,
923                                         BTRFS_DIR_INDEX_KEY, backref->errors);
924                 }
925                 if (backref->found_dir_item) {
926                         dir_count++;
927                         add_inode_backref(dst_cache, dst->ino,
928                                         backref->dir, 0, backref->name,
929                                         backref->namelen, backref->filetype,
930                                         BTRFS_DIR_ITEM_KEY, backref->errors);
931                 }
932                 if (backref->found_inode_ref) {
933                         add_inode_backref(dst_cache, dst->ino,
934                                         backref->dir, backref->index,
935                                         backref->name, backref->namelen, 0,
936                                         backref->ref_type, backref->errors);
937                 }
938         }
939
940         if (src->found_dir_item)
941                 dst->found_dir_item = 1;
942         if (src->found_file_extent)
943                 dst->found_file_extent = 1;
944         if (src->found_csum_item)
945                 dst->found_csum_item = 1;
946         if (src->some_csum_missing)
947                 dst->some_csum_missing = 1;
948         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
950                 if (ret < 0)
951                         return ret;
952         }
953
954         BUG_ON(src->found_link < dir_count);
955         dst->found_link += src->found_link - dir_count;
956         dst->found_size += src->found_size;
957         if (src->extent_start != (u64)-1) {
958                 if (dst->extent_start == (u64)-1) {
959                         dst->extent_start = src->extent_start;
960                         dst->extent_end = src->extent_end;
961                 } else {
962                         if (dst->extent_end > src->extent_start)
963                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964                         else if (dst->extent_end < src->extent_start) {
965                                 ret = add_file_extent_hole(&dst->holes,
966                                         dst->extent_end,
967                                         src->extent_start - dst->extent_end);
968                         }
969                         if (dst->extent_end < src->extent_end)
970                                 dst->extent_end = src->extent_end;
971                 }
972         }
973
974         dst->errors |= src->errors;
975         if (src->found_inode_item) {
976                 if (!dst->found_inode_item) {
977                         dst->nlink = src->nlink;
978                         dst->isize = src->isize;
979                         dst->nbytes = src->nbytes;
980                         dst->imode = src->imode;
981                         dst->nodatasum = src->nodatasum;
982                         dst->found_inode_item = 1;
983                 } else {
984                         dst->errors |= I_ERR_DUP_INODE_ITEM;
985                 }
986         }
987         dst->merging = 0;
988
989         return 0;
990 }
991
992 static int splice_shared_node(struct shared_node *src_node,
993                               struct shared_node *dst_node)
994 {
995         struct cache_extent *cache;
996         struct ptr_node *node, *ins;
997         struct cache_tree *src, *dst;
998         struct inode_record *rec, *conflict;
999         u64 current_ino = 0;
1000         int splice = 0;
1001         int ret;
1002
1003         if (--src_node->refs == 0)
1004                 splice = 1;
1005         if (src_node->current)
1006                 current_ino = src_node->current->ino;
1007
1008         src = &src_node->root_cache;
1009         dst = &dst_node->root_cache;
1010 again:
1011         cache = search_cache_extent(src, 0);
1012         while (cache) {
1013                 node = container_of(cache, struct ptr_node, cache);
1014                 rec = node->data;
1015                 cache = next_cache_extent(cache);
1016
1017                 if (splice) {
1018                         remove_cache_extent(src, &node->cache);
1019                         ins = node;
1020                 } else {
1021                         ins = malloc(sizeof(*ins));
1022                         BUG_ON(!ins);
1023                         ins->cache.start = node->cache.start;
1024                         ins->cache.size = node->cache.size;
1025                         ins->data = rec;
1026                         rec->refs++;
1027                 }
1028                 ret = insert_cache_extent(dst, &ins->cache);
1029                 if (ret == -EEXIST) {
1030                         conflict = get_inode_rec(dst, rec->ino, 1);
1031                         BUG_ON(IS_ERR(conflict));
1032                         merge_inode_recs(rec, conflict, dst);
1033                         if (rec->checked) {
1034                                 conflict->checked = 1;
1035                                 if (dst_node->current == conflict)
1036                                         dst_node->current = NULL;
1037                         }
1038                         maybe_free_inode_rec(dst, conflict);
1039                         free_inode_rec(rec);
1040                         free(ins);
1041                 } else {
1042                         BUG_ON(ret);
1043                 }
1044         }
1045
1046         if (src == &src_node->root_cache) {
1047                 src = &src_node->inode_cache;
1048                 dst = &dst_node->inode_cache;
1049                 goto again;
1050         }
1051
1052         if (current_ino > 0 && (!dst_node->current ||
1053             current_ino > dst_node->current->ino)) {
1054                 if (dst_node->current) {
1055                         dst_node->current->checked = 1;
1056                         maybe_free_inode_rec(dst, dst_node->current);
1057                 }
1058                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059                 BUG_ON(IS_ERR(dst_node->current));
1060         }
1061         return 0;
1062 }
1063
1064 static void free_inode_ptr(struct cache_extent *cache)
1065 {
1066         struct ptr_node *node;
1067         struct inode_record *rec;
1068
1069         node = container_of(cache, struct ptr_node, cache);
1070         rec = node->data;
1071         free_inode_rec(rec);
1072         free(node);
1073 }
1074
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1076
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1078                                             u64 bytenr)
1079 {
1080         struct cache_extent *cache;
1081         struct shared_node *node;
1082
1083         cache = lookup_cache_extent(shared, bytenr, 1);
1084         if (cache) {
1085                 node = container_of(cache, struct shared_node, cache);
1086                 return node;
1087         }
1088         return NULL;
1089 }
1090
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1092 {
1093         int ret;
1094         struct shared_node *node;
1095
1096         node = calloc(1, sizeof(*node));
1097         if (!node)
1098                 return -ENOMEM;
1099         node->cache.start = bytenr;
1100         node->cache.size = 1;
1101         cache_tree_init(&node->root_cache);
1102         cache_tree_init(&node->inode_cache);
1103         node->refs = refs;
1104
1105         ret = insert_cache_extent(shared, &node->cache);
1106
1107         return ret;
1108 }
1109
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111                              struct walk_control *wc, int level)
1112 {
1113         struct shared_node *node;
1114         struct shared_node *dest;
1115         int ret;
1116
1117         if (level == wc->active_node)
1118                 return 0;
1119
1120         BUG_ON(wc->active_node <= level);
1121         node = find_shared_node(&wc->shared, bytenr);
1122         if (!node) {
1123                 ret = add_shared_node(&wc->shared, bytenr, refs);
1124                 BUG_ON(ret);
1125                 node = find_shared_node(&wc->shared, bytenr);
1126                 wc->nodes[level] = node;
1127                 wc->active_node = level;
1128                 return 0;
1129         }
1130
1131         if (wc->root_level == wc->active_node &&
1132             btrfs_root_refs(&root->root_item) == 0) {
1133                 if (--node->refs == 0) {
1134                         free_inode_recs_tree(&node->root_cache);
1135                         free_inode_recs_tree(&node->inode_cache);
1136                         remove_cache_extent(&wc->shared, &node->cache);
1137                         free(node);
1138                 }
1139                 return 1;
1140         }
1141
1142         dest = wc->nodes[wc->active_node];
1143         splice_shared_node(node, dest);
1144         if (node->refs == 0) {
1145                 remove_cache_extent(&wc->shared, &node->cache);
1146                 free(node);
1147         }
1148         return 1;
1149 }
1150
1151 static int leave_shared_node(struct btrfs_root *root,
1152                              struct walk_control *wc, int level)
1153 {
1154         struct shared_node *node;
1155         struct shared_node *dest;
1156         int i;
1157
1158         if (level == wc->root_level)
1159                 return 0;
1160
1161         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1162                 if (wc->nodes[i])
1163                         break;
1164         }
1165         BUG_ON(i >= BTRFS_MAX_LEVEL);
1166
1167         node = wc->nodes[wc->active_node];
1168         wc->nodes[wc->active_node] = NULL;
1169         wc->active_node = i;
1170
1171         dest = wc->nodes[wc->active_node];
1172         if (wc->active_node < wc->root_level ||
1173             btrfs_root_refs(&root->root_item) > 0) {
1174                 BUG_ON(node->refs <= 1);
1175                 splice_shared_node(node, dest);
1176         } else {
1177                 BUG_ON(node->refs < 2);
1178                 node->refs--;
1179         }
1180         return 0;
1181 }
1182
1183 /*
1184  * Returns:
1185  * < 0 - on error
1186  * 1   - if the root with id child_root_id is a child of root parent_root_id
1187  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1188  *       has other root(s) as parent(s)
1189  * 2   - if the root child_root_id doesn't have any parent roots
1190  */
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1192                          u64 child_root_id)
1193 {
1194         struct btrfs_path path;
1195         struct btrfs_key key;
1196         struct extent_buffer *leaf;
1197         int has_parent = 0;
1198         int ret;
1199
1200         btrfs_init_path(&path);
1201
1202         key.objectid = parent_root_id;
1203         key.type = BTRFS_ROOT_REF_KEY;
1204         key.offset = child_root_id;
1205         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1206                                 0, 0);
1207         if (ret < 0)
1208                 return ret;
1209         btrfs_release_path(&path);
1210         if (!ret)
1211                 return 1;
1212
1213         key.objectid = child_root_id;
1214         key.type = BTRFS_ROOT_BACKREF_KEY;
1215         key.offset = 0;
1216         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1217                                 0, 0);
1218         if (ret < 0)
1219                 goto out;
1220
1221         while (1) {
1222                 leaf = path.nodes[0];
1223                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1225                         if (ret)
1226                                 break;
1227                         leaf = path.nodes[0];
1228                 }
1229
1230                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231                 if (key.objectid != child_root_id ||
1232                     key.type != BTRFS_ROOT_BACKREF_KEY)
1233                         break;
1234
1235                 has_parent = 1;
1236
1237                 if (key.offset == parent_root_id) {
1238                         btrfs_release_path(&path);
1239                         return 1;
1240                 }
1241
1242                 path.slots[0]++;
1243         }
1244 out:
1245         btrfs_release_path(&path);
1246         if (ret < 0)
1247                 return ret;
1248         return has_parent ? 0 : 2;
1249 }
1250
1251 static int process_dir_item(struct extent_buffer *eb,
1252                             int slot, struct btrfs_key *key,
1253                             struct shared_node *active_node)
1254 {
1255         u32 total;
1256         u32 cur = 0;
1257         u32 len;
1258         u32 name_len;
1259         u32 data_len;
1260         int error;
1261         int nritems = 0;
1262         u8 filetype;
1263         struct btrfs_dir_item *di;
1264         struct inode_record *rec;
1265         struct cache_tree *root_cache;
1266         struct cache_tree *inode_cache;
1267         struct btrfs_key location;
1268         char namebuf[BTRFS_NAME_LEN];
1269
1270         root_cache = &active_node->root_cache;
1271         inode_cache = &active_node->inode_cache;
1272         rec = active_node->current;
1273         rec->found_dir_item = 1;
1274
1275         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276         total = btrfs_item_size_nr(eb, slot);
1277         while (cur < total) {
1278                 nritems++;
1279                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280                 name_len = btrfs_dir_name_len(eb, di);
1281                 data_len = btrfs_dir_data_len(eb, di);
1282                 filetype = btrfs_dir_type(eb, di);
1283
1284                 rec->found_size += name_len;
1285                 if (cur + sizeof(*di) + name_len > total ||
1286                     name_len > BTRFS_NAME_LEN) {
1287                         error = REF_ERR_NAME_TOO_LONG;
1288
1289                         if (cur + sizeof(*di) > total)
1290                                 break;
1291                         len = min_t(u32, total - cur - sizeof(*di),
1292                                     BTRFS_NAME_LEN);
1293                 } else {
1294                         len = name_len;
1295                         error = 0;
1296                 }
1297
1298                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1299
1300                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301                     key->offset != btrfs_name_hash(namebuf, len)) {
1302                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1303                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304                         key->objectid, key->offset, namebuf, len, filetype,
1305                         key->offset, btrfs_name_hash(namebuf, len));
1306                 }
1307
1308                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309                         add_inode_backref(inode_cache, location.objectid,
1310                                           key->objectid, key->offset, namebuf,
1311                                           len, filetype, key->type, error);
1312                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313                         add_inode_backref(root_cache, location.objectid,
1314                                           key->objectid, key->offset,
1315                                           namebuf, len, filetype,
1316                                           key->type, error);
1317                 } else {
1318                         fprintf(stderr,
1319                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320                                 location.type, key->objectid, key->offset);
1321                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322                                           key->objectid, key->offset, namebuf,
1323                                           len, filetype, key->type, error);
1324                 }
1325
1326                 len = sizeof(*di) + name_len + data_len;
1327                 di = (struct btrfs_dir_item *)((char *)di + len);
1328                 cur += len;
1329         }
1330         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1332
1333         return 0;
1334 }
1335
1336 static int process_inode_ref(struct extent_buffer *eb,
1337                              int slot, struct btrfs_key *key,
1338                              struct shared_node *active_node)
1339 {
1340         u32 total;
1341         u32 cur = 0;
1342         u32 len;
1343         u32 name_len;
1344         u64 index;
1345         int error;
1346         struct cache_tree *inode_cache;
1347         struct btrfs_inode_ref *ref;
1348         char namebuf[BTRFS_NAME_LEN];
1349
1350         inode_cache = &active_node->inode_cache;
1351
1352         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353         total = btrfs_item_size_nr(eb, slot);
1354         while (cur < total) {
1355                 name_len = btrfs_inode_ref_name_len(eb, ref);
1356                 index = btrfs_inode_ref_index(eb, ref);
1357
1358                 /* inode_ref + namelen should not cross item boundary */
1359                 if (cur + sizeof(*ref) + name_len > total ||
1360                     name_len > BTRFS_NAME_LEN) {
1361                         if (total < cur + sizeof(*ref))
1362                                 break;
1363
1364                         /* Still try to read out the remaining part */
1365                         len = min_t(u32, total - cur - sizeof(*ref),
1366                                     BTRFS_NAME_LEN);
1367                         error = REF_ERR_NAME_TOO_LONG;
1368                 } else {
1369                         len = name_len;
1370                         error = 0;
1371                 }
1372
1373                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374                 add_inode_backref(inode_cache, key->objectid, key->offset,
1375                                   index, namebuf, len, 0, key->type, error);
1376
1377                 len = sizeof(*ref) + name_len;
1378                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1379                 cur += len;
1380         }
1381         return 0;
1382 }
1383
1384 static int process_inode_extref(struct extent_buffer *eb,
1385                                 int slot, struct btrfs_key *key,
1386                                 struct shared_node *active_node)
1387 {
1388         u32 total;
1389         u32 cur = 0;
1390         u32 len;
1391         u32 name_len;
1392         u64 index;
1393         u64 parent;
1394         int error;
1395         struct cache_tree *inode_cache;
1396         struct btrfs_inode_extref *extref;
1397         char namebuf[BTRFS_NAME_LEN];
1398
1399         inode_cache = &active_node->inode_cache;
1400
1401         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402         total = btrfs_item_size_nr(eb, slot);
1403         while (cur < total) {
1404                 name_len = btrfs_inode_extref_name_len(eb, extref);
1405                 index = btrfs_inode_extref_index(eb, extref);
1406                 parent = btrfs_inode_extref_parent(eb, extref);
1407                 if (name_len <= BTRFS_NAME_LEN) {
1408                         len = name_len;
1409                         error = 0;
1410                 } else {
1411                         len = BTRFS_NAME_LEN;
1412                         error = REF_ERR_NAME_TOO_LONG;
1413                 }
1414                 read_extent_buffer(eb, namebuf,
1415                                    (unsigned long)(extref + 1), len);
1416                 add_inode_backref(inode_cache, key->objectid, parent,
1417                                   index, namebuf, len, 0, key->type, error);
1418
1419                 len = sizeof(*extref) + name_len;
1420                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1421                 cur += len;
1422         }
1423         return 0;
1424
1425 }
1426
1427 static int process_file_extent(struct btrfs_root *root,
1428                                 struct extent_buffer *eb,
1429                                 int slot, struct btrfs_key *key,
1430                                 struct shared_node *active_node)
1431 {
1432         struct inode_record *rec;
1433         struct btrfs_file_extent_item *fi;
1434         u64 num_bytes = 0;
1435         u64 disk_bytenr = 0;
1436         u64 extent_offset = 0;
1437         u64 mask = root->fs_info->sectorsize - 1;
1438         u32 max_inline_size = min_t(u32, mask,
1439                                 BTRFS_MAX_INLINE_DATA_SIZE(root->fs_info));
1440         int extent_type;
1441         int ret;
1442
1443         rec = active_node->current;
1444         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1445         rec->found_file_extent = 1;
1446
1447         if (rec->extent_start == (u64)-1) {
1448                 rec->extent_start = key->offset;
1449                 rec->extent_end = key->offset;
1450         }
1451
1452         if (rec->extent_end > key->offset)
1453                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1454         else if (rec->extent_end < key->offset) {
1455                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1456                                            key->offset - rec->extent_end);
1457                 if (ret < 0)
1458                         return ret;
1459         }
1460
1461         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1462         extent_type = btrfs_file_extent_type(eb, fi);
1463
1464         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1465                 u8 compression = btrfs_file_extent_compression(eb, fi);
1466                 struct btrfs_item *item = btrfs_item_nr(slot);
1467
1468                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1469                 if (num_bytes == 0)
1470                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1471                 if (compression) {
1472                         if (btrfs_file_extent_inline_item_len(eb, item) >
1473                             max_inline_size ||
1474                             num_bytes > root->fs_info->sectorsize)
1475                                 rec->errors |= I_ERR_FILE_EXTENT_TOO_LARGE;
1476                 } else {
1477                         if (num_bytes > max_inline_size)
1478                                 rec->errors |= I_ERR_FILE_EXTENT_TOO_LARGE;
1479                 }
1480                 rec->found_size += num_bytes;
1481                 num_bytes = (num_bytes + mask) & ~mask;
1482         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1483                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1484                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1485                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1486                 extent_offset = btrfs_file_extent_offset(eb, fi);
1487                 if (num_bytes == 0 || (num_bytes & mask))
1488                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1489                 if (num_bytes + extent_offset >
1490                     btrfs_file_extent_ram_bytes(eb, fi))
1491                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1492                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1493                     (btrfs_file_extent_compression(eb, fi) ||
1494                      btrfs_file_extent_encryption(eb, fi) ||
1495                      btrfs_file_extent_other_encoding(eb, fi)))
1496                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1497                 if (disk_bytenr > 0)
1498                         rec->found_size += num_bytes;
1499         } else {
1500                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1501         }
1502         rec->extent_end = key->offset + num_bytes;
1503
1504         /*
1505          * The data reloc tree will copy full extents into its inode and then
1506          * copy the corresponding csums.  Because the extent it copied could be
1507          * a preallocated extent that hasn't been written to yet there may be no
1508          * csums to copy, ergo we won't have csums for our file extent.  This is
1509          * ok so just don't bother checking csums if the inode belongs to the
1510          * data reloc tree.
1511          */
1512         if (disk_bytenr > 0 &&
1513             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1514                 u64 found;
1515                 if (btrfs_file_extent_compression(eb, fi))
1516                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1517                 else
1518                         disk_bytenr += extent_offset;
1519
1520                 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1521                                        &found);
1522                 if (ret < 0)
1523                         return ret;
1524                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1525                         if (found > 0)
1526                                 rec->found_csum_item = 1;
1527                         if (found < num_bytes)
1528                                 rec->some_csum_missing = 1;
1529                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1530                         if (found > 0) {
1531                                 ret = check_prealloc_extent_written(root->fs_info,
1532                                                                     disk_bytenr,
1533                                                                     num_bytes);
1534                                 if (ret < 0)
1535                                         return ret;
1536                                 if (ret == 0)
1537                                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1538                         }
1539                 }
1540         }
1541         return 0;
1542 }
1543
1544 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1545                             struct walk_control *wc)
1546 {
1547         struct btrfs_key key;
1548         u32 nritems;
1549         int i;
1550         int ret = 0;
1551         struct cache_tree *inode_cache;
1552         struct shared_node *active_node;
1553
1554         if (wc->root_level == wc->active_node &&
1555             btrfs_root_refs(&root->root_item) == 0)
1556                 return 0;
1557
1558         active_node = wc->nodes[wc->active_node];
1559         inode_cache = &active_node->inode_cache;
1560         nritems = btrfs_header_nritems(eb);
1561         for (i = 0; i < nritems; i++) {
1562                 btrfs_item_key_to_cpu(eb, &key, i);
1563
1564                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1565                         continue;
1566                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1567                         continue;
1568
1569                 if (active_node->current == NULL ||
1570                     active_node->current->ino < key.objectid) {
1571                         if (active_node->current) {
1572                                 active_node->current->checked = 1;
1573                                 maybe_free_inode_rec(inode_cache,
1574                                                      active_node->current);
1575                         }
1576                         active_node->current = get_inode_rec(inode_cache,
1577                                                              key.objectid, 1);
1578                         BUG_ON(IS_ERR(active_node->current));
1579                 }
1580                 switch (key.type) {
1581                 case BTRFS_DIR_ITEM_KEY:
1582                 case BTRFS_DIR_INDEX_KEY:
1583                         ret = process_dir_item(eb, i, &key, active_node);
1584                         break;
1585                 case BTRFS_INODE_REF_KEY:
1586                         ret = process_inode_ref(eb, i, &key, active_node);
1587                         break;
1588                 case BTRFS_INODE_EXTREF_KEY:
1589                         ret = process_inode_extref(eb, i, &key, active_node);
1590                         break;
1591                 case BTRFS_INODE_ITEM_KEY:
1592                         ret = process_inode_item(eb, i, &key, active_node);
1593                         break;
1594                 case BTRFS_EXTENT_DATA_KEY:
1595                         ret = process_file_extent(root, eb, i, &key,
1596                                                   active_node);
1597                         break;
1598                 default:
1599                         break;
1600                 };
1601         }
1602         return ret;
1603 }
1604
1605 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1606                           struct walk_control *wc, int *level,
1607                           struct node_refs *nrefs)
1608 {
1609         enum btrfs_tree_block_status status;
1610         u64 bytenr;
1611         u64 ptr_gen;
1612         struct btrfs_fs_info *fs_info = root->fs_info;
1613         struct extent_buffer *next;
1614         struct extent_buffer *cur;
1615         int ret, err = 0;
1616         u64 refs;
1617
1618         WARN_ON(*level < 0);
1619         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1620
1621         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1622                 refs = nrefs->refs[*level];
1623                 ret = 0;
1624         } else {
1625                 ret = btrfs_lookup_extent_info(NULL, root,
1626                                        path->nodes[*level]->start,
1627                                        *level, 1, &refs, NULL);
1628                 if (ret < 0) {
1629                         err = ret;
1630                         goto out;
1631                 }
1632                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1633                 nrefs->refs[*level] = refs;
1634         }
1635
1636         if (refs > 1) {
1637                 ret = enter_shared_node(root, path->nodes[*level]->start,
1638                                         refs, wc, *level);
1639                 if (ret > 0) {
1640                         err = ret;
1641                         goto out;
1642                 }
1643         }
1644
1645         while (*level >= 0) {
1646                 WARN_ON(*level < 0);
1647                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1648                 cur = path->nodes[*level];
1649
1650                 if (btrfs_header_level(cur) != *level)
1651                         WARN_ON(1);
1652
1653                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1654                         break;
1655                 if (*level == 0) {
1656                         ret = process_one_leaf(root, cur, wc);
1657                         if (ret < 0)
1658                                 err = ret;
1659                         break;
1660                 }
1661                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1662                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1663
1664                 if (bytenr == nrefs->bytenr[*level - 1]) {
1665                         refs = nrefs->refs[*level - 1];
1666                 } else {
1667                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1668                                         *level - 1, 1, &refs, NULL);
1669                         if (ret < 0) {
1670                                 refs = 0;
1671                         } else {
1672                                 nrefs->bytenr[*level - 1] = bytenr;
1673                                 nrefs->refs[*level - 1] = refs;
1674                         }
1675                 }
1676
1677                 if (refs > 1) {
1678                         ret = enter_shared_node(root, bytenr, refs,
1679                                                 wc, *level - 1);
1680                         if (ret > 0) {
1681                                 path->slots[*level]++;
1682                                 continue;
1683                         }
1684                 }
1685
1686                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
1687                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1688                         free_extent_buffer(next);
1689                         reada_walk_down(root, cur, path->slots[*level]);
1690                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
1691                         if (!extent_buffer_uptodate(next)) {
1692                                 struct btrfs_key node_key;
1693
1694                                 btrfs_node_key_to_cpu(path->nodes[*level],
1695                                                       &node_key,
1696                                                       path->slots[*level]);
1697                                 btrfs_add_corrupt_extent_record(root->fs_info,
1698                                                 &node_key,
1699                                                 path->nodes[*level]->start,
1700                                                 root->fs_info->nodesize,
1701                                                 *level);
1702                                 err = -EIO;
1703                                 goto out;
1704                         }
1705                 }
1706
1707                 ret = check_child_node(cur, path->slots[*level], next);
1708                 if (ret) {
1709                         free_extent_buffer(next);
1710                         err = ret;
1711                         goto out;
1712                 }
1713
1714                 if (btrfs_is_leaf(next))
1715                         status = btrfs_check_leaf(root, NULL, next);
1716                 else
1717                         status = btrfs_check_node(root, NULL, next);
1718                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1719                         free_extent_buffer(next);
1720                         err = -EIO;
1721                         goto out;
1722                 }
1723
1724                 *level = *level - 1;
1725                 free_extent_buffer(path->nodes[*level]);
1726                 path->nodes[*level] = next;
1727                 path->slots[*level] = 0;
1728         }
1729 out:
1730         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1731         return err;
1732 }
1733
1734 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1735                         struct walk_control *wc, int *level)
1736 {
1737         int i;
1738         struct extent_buffer *leaf;
1739
1740         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1741                 leaf = path->nodes[i];
1742                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1743                         path->slots[i]++;
1744                         *level = i;
1745                         return 0;
1746                 }
1747                 free_extent_buffer(path->nodes[*level]);
1748                 path->nodes[*level] = NULL;
1749                 BUG_ON(*level > wc->active_node);
1750                 if (*level == wc->active_node)
1751                         leave_shared_node(root, wc, *level);
1752                 *level = i + 1;
1753         }
1754         return 1;
1755 }
1756
1757 static int check_root_dir(struct inode_record *rec)
1758 {
1759         struct inode_backref *backref;
1760         int ret = -1;
1761
1762         if (!rec->found_inode_item || rec->errors)
1763                 goto out;
1764         if (rec->nlink != 1 || rec->found_link != 0)
1765                 goto out;
1766         if (list_empty(&rec->backrefs))
1767                 goto out;
1768         backref = to_inode_backref(rec->backrefs.next);
1769         if (!backref->found_inode_ref)
1770                 goto out;
1771         if (backref->index != 0 || backref->namelen != 2 ||
1772             memcmp(backref->name, "..", 2))
1773                 goto out;
1774         if (backref->found_dir_index || backref->found_dir_item)
1775                 goto out;
1776         ret = 0;
1777 out:
1778         return ret;
1779 }
1780
1781 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1782                               struct btrfs_root *root, struct btrfs_path *path,
1783                               struct inode_record *rec)
1784 {
1785         struct btrfs_inode_item *ei;
1786         struct btrfs_key key;
1787         int ret;
1788
1789         key.objectid = rec->ino;
1790         key.type = BTRFS_INODE_ITEM_KEY;
1791         key.offset = (u64)-1;
1792
1793         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1794         if (ret < 0)
1795                 goto out;
1796         if (ret) {
1797                 if (!path->slots[0]) {
1798                         ret = -ENOENT;
1799                         goto out;
1800                 }
1801                 path->slots[0]--;
1802                 ret = 0;
1803         }
1804         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1805         if (key.objectid != rec->ino) {
1806                 ret = -ENOENT;
1807                 goto out;
1808         }
1809
1810         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1811                             struct btrfs_inode_item);
1812         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1813         btrfs_mark_buffer_dirty(path->nodes[0]);
1814         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1815         printf("reset isize for dir %llu root %llu\n", rec->ino,
1816                root->root_key.objectid);
1817 out:
1818         btrfs_release_path(path);
1819         return ret;
1820 }
1821
1822 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1823                                     struct btrfs_root *root,
1824                                     struct btrfs_path *path,
1825                                     struct inode_record *rec)
1826 {
1827         int ret;
1828
1829         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1830         btrfs_release_path(path);
1831         if (!ret)
1832                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1833         return ret;
1834 }
1835
1836 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1837                                struct btrfs_root *root,
1838                                struct btrfs_path *path,
1839                                struct inode_record *rec)
1840 {
1841         struct btrfs_inode_item *ei;
1842         struct btrfs_key key;
1843         int ret = 0;
1844
1845         key.objectid = rec->ino;
1846         key.type = BTRFS_INODE_ITEM_KEY;
1847         key.offset = 0;
1848
1849         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1850         if (ret) {
1851                 if (ret > 0)
1852                         ret = -ENOENT;
1853                 goto out;
1854         }
1855
1856         /* Since ret == 0, no need to check anything */
1857         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1858                             struct btrfs_inode_item);
1859         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
1860         btrfs_mark_buffer_dirty(path->nodes[0]);
1861         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
1862         printf("reset nbytes for ino %llu root %llu\n",
1863                rec->ino, root->root_key.objectid);
1864 out:
1865         btrfs_release_path(path);
1866         return ret;
1867 }
1868
1869 static int add_missing_dir_index(struct btrfs_root *root,
1870                                  struct cache_tree *inode_cache,
1871                                  struct inode_record *rec,
1872                                  struct inode_backref *backref)
1873 {
1874         struct btrfs_path path;
1875         struct btrfs_trans_handle *trans;
1876         struct btrfs_dir_item *dir_item;
1877         struct extent_buffer *leaf;
1878         struct btrfs_key key;
1879         struct btrfs_disk_key disk_key;
1880         struct inode_record *dir_rec;
1881         unsigned long name_ptr;
1882         u32 data_size = sizeof(*dir_item) + backref->namelen;
1883         int ret;
1884
1885         trans = btrfs_start_transaction(root, 1);
1886         if (IS_ERR(trans))
1887                 return PTR_ERR(trans);
1888
1889         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
1890                 (unsigned long long)rec->ino);
1891
1892         btrfs_init_path(&path);
1893         key.objectid = backref->dir;
1894         key.type = BTRFS_DIR_INDEX_KEY;
1895         key.offset = backref->index;
1896         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
1897         BUG_ON(ret);
1898
1899         leaf = path.nodes[0];
1900         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
1901
1902         disk_key.objectid = cpu_to_le64(rec->ino);
1903         disk_key.type = BTRFS_INODE_ITEM_KEY;
1904         disk_key.offset = 0;
1905
1906         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
1907         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
1908         btrfs_set_dir_data_len(leaf, dir_item, 0);
1909         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
1910         name_ptr = (unsigned long)(dir_item + 1);
1911         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
1912         btrfs_mark_buffer_dirty(leaf);
1913         btrfs_release_path(&path);
1914         btrfs_commit_transaction(trans, root);
1915
1916         backref->found_dir_index = 1;
1917         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
1918         BUG_ON(IS_ERR(dir_rec));
1919         if (!dir_rec)
1920                 return 0;
1921         dir_rec->found_size += backref->namelen;
1922         if (dir_rec->found_size == dir_rec->isize &&
1923             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
1924                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1925         if (dir_rec->found_size != dir_rec->isize)
1926                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1927
1928         return 0;
1929 }
1930
1931 static int delete_dir_index(struct btrfs_root *root,
1932                             struct inode_backref *backref)
1933 {
1934         struct btrfs_trans_handle *trans;
1935         struct btrfs_dir_item *di;
1936         struct btrfs_path path;
1937         int ret = 0;
1938
1939         trans = btrfs_start_transaction(root, 1);
1940         if (IS_ERR(trans))
1941                 return PTR_ERR(trans);
1942
1943         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1944                 (unsigned long long)backref->dir,
1945                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
1946                 (unsigned long long)root->objectid);
1947
1948         btrfs_init_path(&path);
1949         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
1950                                     backref->name, backref->namelen,
1951                                     backref->index, -1);
1952         if (IS_ERR(di)) {
1953                 ret = PTR_ERR(di);
1954                 btrfs_release_path(&path);
1955                 btrfs_commit_transaction(trans, root);
1956                 if (ret == -ENOENT)
1957                         return 0;
1958                 return ret;
1959         }
1960
1961         if (!di)
1962                 ret = btrfs_del_item(trans, root, &path);
1963         else
1964                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
1965         BUG_ON(ret);
1966         btrfs_release_path(&path);
1967         btrfs_commit_transaction(trans, root);
1968         return ret;
1969 }
1970
1971 static int create_inode_item(struct btrfs_root *root,
1972                              struct inode_record *rec, int root_dir)
1973 {
1974         struct btrfs_trans_handle *trans;
1975         u64 nlink = 0;
1976         u32 mode = 0;
1977         u64 size = 0;
1978         int ret;
1979
1980         trans = btrfs_start_transaction(root, 1);
1981         if (IS_ERR(trans)) {
1982                 ret = PTR_ERR(trans);
1983                 return ret;
1984         }
1985
1986         nlink = root_dir ? 1 : rec->found_link;
1987         if (rec->found_dir_item) {
1988                 if (rec->found_file_extent)
1989                         fprintf(stderr, "root %llu inode %llu has both a dir "
1990                                 "item and extents, unsure if it is a dir or a "
1991                                 "regular file so setting it as a directory\n",
1992                                 (unsigned long long)root->objectid,
1993                                 (unsigned long long)rec->ino);
1994                 mode = S_IFDIR | 0755;
1995                 size = rec->found_size;
1996         } else if (!rec->found_dir_item) {
1997                 size = rec->extent_end;
1998                 mode =  S_IFREG | 0755;
1999         }
2000
2001         ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
2002                                   nlink, mode);
2003         btrfs_commit_transaction(trans, root);
2004         return 0;
2005 }
2006
2007 static int repair_inode_backrefs(struct btrfs_root *root,
2008                                  struct inode_record *rec,
2009                                  struct cache_tree *inode_cache,
2010                                  int delete)
2011 {
2012         struct inode_backref *tmp, *backref;
2013         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2014         int ret = 0;
2015         int repaired = 0;
2016
2017         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2018                 if (!delete && rec->ino == root_dirid) {
2019                         if (!rec->found_inode_item) {
2020                                 ret = create_inode_item(root, rec, 1);
2021                                 if (ret)
2022                                         break;
2023                                 repaired++;
2024                         }
2025                 }
2026
2027                 /* Index 0 for root dir's are special, don't mess with it */
2028                 if (rec->ino == root_dirid && backref->index == 0)
2029                         continue;
2030
2031                 if (delete &&
2032                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2033                      (backref->found_dir_index && backref->found_inode_ref &&
2034                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2035                         ret = delete_dir_index(root, backref);
2036                         if (ret)
2037                                 break;
2038                         repaired++;
2039                         list_del(&backref->list);
2040                         free(backref);
2041                         continue;
2042                 }
2043
2044                 if (!delete && !backref->found_dir_index &&
2045                     backref->found_dir_item && backref->found_inode_ref) {
2046                         ret = add_missing_dir_index(root, inode_cache, rec,
2047                                                     backref);
2048                         if (ret)
2049                                 break;
2050                         repaired++;
2051                         if (backref->found_dir_item &&
2052                             backref->found_dir_index) {
2053                                 if (!backref->errors &&
2054                                     backref->found_inode_ref) {
2055                                         list_del(&backref->list);
2056                                         free(backref);
2057                                         continue;
2058                                 }
2059                         }
2060                 }
2061
2062                 if (!delete && (!backref->found_dir_index &&
2063                                 !backref->found_dir_item &&
2064                                 backref->found_inode_ref)) {
2065                         struct btrfs_trans_handle *trans;
2066                         struct btrfs_key location;
2067
2068                         ret = check_dir_conflict(root, backref->name,
2069                                                  backref->namelen,
2070                                                  backref->dir,
2071                                                  backref->index);
2072                         if (ret) {
2073                                 /*
2074                                  * let nlink fixing routine to handle it,
2075                                  * which can do it better.
2076                                  */
2077                                 ret = 0;
2078                                 break;
2079                         }
2080                         location.objectid = rec->ino;
2081                         location.type = BTRFS_INODE_ITEM_KEY;
2082                         location.offset = 0;
2083
2084                         trans = btrfs_start_transaction(root, 1);
2085                         if (IS_ERR(trans)) {
2086                                 ret = PTR_ERR(trans);
2087                                 break;
2088                         }
2089                         fprintf(stderr, "adding missing dir index/item pair "
2090                                 "for inode %llu\n",
2091                                 (unsigned long long)rec->ino);
2092                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2093                                                     backref->namelen,
2094                                                     backref->dir, &location,
2095                                                     imode_to_type(rec->imode),
2096                                                     backref->index);
2097                         BUG_ON(ret);
2098                         btrfs_commit_transaction(trans, root);
2099                         repaired++;
2100                 }
2101
2102                 if (!delete && (backref->found_inode_ref &&
2103                                 backref->found_dir_index &&
2104                                 backref->found_dir_item &&
2105                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2106                                 !rec->found_inode_item)) {
2107                         ret = create_inode_item(root, rec, 0);
2108                         if (ret)
2109                                 break;
2110                         repaired++;
2111                 }
2112
2113         }
2114         return ret ? ret : repaired;
2115 }
2116
2117 /*
2118  * To determine the file type for nlink/inode_item repair
2119  *
2120  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2121  * Return -ENOENT if file type is not found.
2122  */
2123 static int find_file_type(struct inode_record *rec, u8 *type)
2124 {
2125         struct inode_backref *backref;
2126
2127         /* For inode item recovered case */
2128         if (rec->found_inode_item) {
2129                 *type = imode_to_type(rec->imode);
2130                 return 0;
2131         }
2132
2133         list_for_each_entry(backref, &rec->backrefs, list) {
2134                 if (backref->found_dir_index || backref->found_dir_item) {
2135                         *type = backref->filetype;
2136                         return 0;
2137                 }
2138         }
2139         return -ENOENT;
2140 }
2141
2142 /*
2143  * To determine the file name for nlink repair
2144  *
2145  * Return 0 if file name is found, set name and namelen.
2146  * Return -ENOENT if file name is not found.
2147  */
2148 static int find_file_name(struct inode_record *rec,
2149                           char *name, int *namelen)
2150 {
2151         struct inode_backref *backref;
2152
2153         list_for_each_entry(backref, &rec->backrefs, list) {
2154                 if (backref->found_dir_index || backref->found_dir_item ||
2155                     backref->found_inode_ref) {
2156                         memcpy(name, backref->name, backref->namelen);
2157                         *namelen = backref->namelen;
2158                         return 0;
2159                 }
2160         }
2161         return -ENOENT;
2162 }
2163
2164 /* Reset the nlink of the inode to the correct one */
2165 static int reset_nlink(struct btrfs_trans_handle *trans,
2166                        struct btrfs_root *root,
2167                        struct btrfs_path *path,
2168                        struct inode_record *rec)
2169 {
2170         struct inode_backref *backref;
2171         struct inode_backref *tmp;
2172         struct btrfs_key key;
2173         struct btrfs_inode_item *inode_item;
2174         int ret = 0;
2175
2176         /* We don't believe this either, reset it and iterate backref */
2177         rec->found_link = 0;
2178
2179         /* Remove all backref including the valid ones */
2180         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2181                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2182                                    backref->index, backref->name,
2183                                    backref->namelen, 0);
2184                 if (ret < 0)
2185                         goto out;
2186
2187                 /* remove invalid backref, so it won't be added back */
2188                 if (!(backref->found_dir_index &&
2189                       backref->found_dir_item &&
2190                       backref->found_inode_ref)) {
2191                         list_del(&backref->list);
2192                         free(backref);
2193                 } else {
2194                         rec->found_link++;
2195                 }
2196         }
2197
2198         /* Set nlink to 0 */
2199         key.objectid = rec->ino;
2200         key.type = BTRFS_INODE_ITEM_KEY;
2201         key.offset = 0;
2202         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2203         if (ret < 0)
2204                 goto out;
2205         if (ret > 0) {
2206                 ret = -ENOENT;
2207                 goto out;
2208         }
2209         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2210                                     struct btrfs_inode_item);
2211         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2212         btrfs_mark_buffer_dirty(path->nodes[0]);
2213         btrfs_release_path(path);
2214
2215         /*
2216          * Add back valid inode_ref/dir_item/dir_index,
2217          * add_link() will handle the nlink inc, so new nlink must be correct
2218          */
2219         list_for_each_entry(backref, &rec->backrefs, list) {
2220                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2221                                      backref->name, backref->namelen,
2222                                      backref->filetype, &backref->index, 1, 0);
2223                 if (ret < 0)
2224                         goto out;
2225         }
2226 out:
2227         btrfs_release_path(path);
2228         return ret;
2229 }
2230
2231 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2232                                struct btrfs_root *root,
2233                                struct btrfs_path *path,
2234                                struct inode_record *rec)
2235 {
2236         char namebuf[BTRFS_NAME_LEN] = {0};
2237         u8 type = 0;
2238         int namelen = 0;
2239         int name_recovered = 0;
2240         int type_recovered = 0;
2241         int ret = 0;
2242
2243         /*
2244          * Get file name and type first before these invalid inode ref
2245          * are deleted by remove_all_invalid_backref()
2246          */
2247         name_recovered = !find_file_name(rec, namebuf, &namelen);
2248         type_recovered = !find_file_type(rec, &type);
2249
2250         if (!name_recovered) {
2251                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2252                        rec->ino, rec->ino);
2253                 namelen = count_digits(rec->ino);
2254                 sprintf(namebuf, "%llu", rec->ino);
2255                 name_recovered = 1;
2256         }
2257         if (!type_recovered) {
2258                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2259                        rec->ino);
2260                 type = BTRFS_FT_REG_FILE;
2261                 type_recovered = 1;
2262         }
2263
2264         ret = reset_nlink(trans, root, path, rec);
2265         if (ret < 0) {
2266                 fprintf(stderr,
2267                         "Failed to reset nlink for inode %llu: %s\n",
2268                         rec->ino, strerror(-ret));
2269                 goto out;
2270         }
2271
2272         if (rec->found_link == 0) {
2273                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2274                                               namebuf, namelen, type,
2275                                               (u64 *)&rec->found_link);
2276                 if (ret)
2277                         goto out;
2278         }
2279         printf("Fixed the nlink of inode %llu\n", rec->ino);
2280 out:
2281         /*
2282          * Clear the flag anyway, or we will loop forever for the same inode
2283          * as it will not be removed from the bad inode list and the dead loop
2284          * happens.
2285          */
2286         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2287         btrfs_release_path(path);
2288         return ret;
2289 }
2290
2291 /*
2292  * Check if there is any normal(reg or prealloc) file extent for given
2293  * ino.
2294  * This is used to determine the file type when neither its dir_index/item or
2295  * inode_item exists.
2296  *
2297  * This will *NOT* report error, if any error happens, just consider it does
2298  * not have any normal file extent.
2299  */
2300 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2301 {
2302         struct btrfs_path path;
2303         struct btrfs_key key;
2304         struct btrfs_key found_key;
2305         struct btrfs_file_extent_item *fi;
2306         u8 type;
2307         int ret = 0;
2308
2309         btrfs_init_path(&path);
2310         key.objectid = ino;
2311         key.type = BTRFS_EXTENT_DATA_KEY;
2312         key.offset = 0;
2313
2314         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2315         if (ret < 0) {
2316                 ret = 0;
2317                 goto out;
2318         }
2319         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2320                 ret = btrfs_next_leaf(root, &path);
2321                 if (ret) {
2322                         ret = 0;
2323                         goto out;
2324                 }
2325         }
2326         while (1) {
2327                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2328                                       path.slots[0]);
2329                 if (found_key.objectid != ino ||
2330                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2331                         break;
2332                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2333                                     struct btrfs_file_extent_item);
2334                 type = btrfs_file_extent_type(path.nodes[0], fi);
2335                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2336                         ret = 1;
2337                         goto out;
2338                 }
2339         }
2340 out:
2341         btrfs_release_path(&path);
2342         return ret;
2343 }
2344
2345 static u32 btrfs_type_to_imode(u8 type)
2346 {
2347         static u32 imode_by_btrfs_type[] = {
2348                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2349                 [BTRFS_FT_DIR]          = S_IFDIR,
2350                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2351                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2352                 [BTRFS_FT_FIFO]         = S_IFIFO,
2353                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2354                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2355         };
2356
2357         return imode_by_btrfs_type[(type)];
2358 }
2359
2360 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2361                                 struct btrfs_root *root,
2362                                 struct btrfs_path *path,
2363                                 struct inode_record *rec)
2364 {
2365         u8 filetype;
2366         u32 mode = 0700;
2367         int type_recovered = 0;
2368         int ret = 0;
2369
2370         printf("Trying to rebuild inode:%llu\n", rec->ino);
2371
2372         type_recovered = !find_file_type(rec, &filetype);
2373
2374         /*
2375          * Try to determine inode type if type not found.
2376          *
2377          * For found regular file extent, it must be FILE.
2378          * For found dir_item/index, it must be DIR.
2379          *
2380          * For undetermined one, use FILE as fallback.
2381          *
2382          * TODO:
2383          * 1. If found backref(inode_index/item is already handled) to it,
2384          *    it must be DIR.
2385          *    Need new inode-inode ref structure to allow search for that.
2386          */
2387         if (!type_recovered) {
2388                 if (rec->found_file_extent &&
2389                     find_normal_file_extent(root, rec->ino)) {
2390                         type_recovered = 1;
2391                         filetype = BTRFS_FT_REG_FILE;
2392                 } else if (rec->found_dir_item) {
2393                         type_recovered = 1;
2394                         filetype = BTRFS_FT_DIR;
2395                 } else if (!list_empty(&rec->orphan_extents)) {
2396                         type_recovered = 1;
2397                         filetype = BTRFS_FT_REG_FILE;
2398                 } else{
2399                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2400                                rec->ino);
2401                         type_recovered = 1;
2402                         filetype = BTRFS_FT_REG_FILE;
2403                 }
2404         }
2405
2406         ret = btrfs_new_inode(trans, root, rec->ino,
2407                               mode | btrfs_type_to_imode(filetype));
2408         if (ret < 0)
2409                 goto out;
2410
2411         /*
2412          * Here inode rebuild is done, we only rebuild the inode item,
2413          * don't repair the nlink(like move to lost+found).
2414          * That is the job of nlink repair.
2415          *
2416          * We just fill the record and return
2417          */
2418         rec->found_dir_item = 1;
2419         rec->imode = mode | btrfs_type_to_imode(filetype);
2420         rec->nlink = 0;
2421         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2422         /* Ensure the inode_nlinks repair function will be called */
2423         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2424 out:
2425         return ret;
2426 }
2427
2428 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2429                                       struct btrfs_root *root,
2430                                       struct btrfs_path *path,
2431                                       struct inode_record *rec)
2432 {
2433         struct orphan_data_extent *orphan;
2434         struct orphan_data_extent *tmp;
2435         int ret = 0;
2436
2437         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2438                 /*
2439                  * Check for conflicting file extents
2440                  *
2441                  * Here we don't know whether the extents is compressed or not,
2442                  * so we can only assume it not compressed nor data offset,
2443                  * and use its disk_len as extent length.
2444                  */
2445                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2446                                        orphan->offset, orphan->disk_len, 0);
2447                 btrfs_release_path(path);
2448                 if (ret < 0)
2449                         goto out;
2450                 if (!ret) {
2451                         fprintf(stderr,
2452                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2453                                 orphan->disk_bytenr, orphan->disk_len);
2454                         ret = btrfs_free_extent(trans,
2455                                         root->fs_info->extent_root,
2456                                         orphan->disk_bytenr, orphan->disk_len,
2457                                         0, root->objectid, orphan->objectid,
2458                                         orphan->offset);
2459                         if (ret < 0)
2460                                 goto out;
2461                 }
2462                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2463                                 orphan->offset, orphan->disk_bytenr,
2464                                 orphan->disk_len, orphan->disk_len);
2465                 if (ret < 0)
2466                         goto out;
2467
2468                 /* Update file size info */
2469                 rec->found_size += orphan->disk_len;
2470                 if (rec->found_size == rec->nbytes)
2471                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2472
2473                 /* Update the file extent hole info too */
2474                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2475                                            orphan->disk_len);
2476                 if (ret < 0)
2477                         goto out;
2478                 if (RB_EMPTY_ROOT(&rec->holes))
2479                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2480
2481                 list_del(&orphan->list);
2482                 free(orphan);
2483         }
2484         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2485 out:
2486         return ret;
2487 }
2488
2489 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2490                                         struct btrfs_root *root,
2491                                         struct btrfs_path *path,
2492                                         struct inode_record *rec)
2493 {
2494         struct rb_node *node;
2495         struct file_extent_hole *hole;
2496         int found = 0;
2497         int ret = 0;
2498
2499         node = rb_first(&rec->holes);
2500
2501         while (node) {
2502                 found = 1;
2503                 hole = rb_entry(node, struct file_extent_hole, node);
2504                 ret = btrfs_punch_hole(trans, root, rec->ino,
2505                                        hole->start, hole->len);
2506                 if (ret < 0)
2507                         goto out;
2508                 ret = del_file_extent_hole(&rec->holes, hole->start,
2509                                            hole->len);
2510                 if (ret < 0)
2511                         goto out;
2512                 if (RB_EMPTY_ROOT(&rec->holes))
2513                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2514                 node = rb_first(&rec->holes);
2515         }
2516         /* special case for a file losing all its file extent */
2517         if (!found) {
2518                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2519                                        round_up(rec->isize,
2520                                                 root->fs_info->sectorsize));
2521                 if (ret < 0)
2522                         goto out;
2523         }
2524         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2525                rec->ino, root->objectid);
2526 out:
2527         return ret;
2528 }
2529
2530 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2531 {
2532         struct btrfs_trans_handle *trans;
2533         struct btrfs_path path;
2534         int ret = 0;
2535
2536         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2537                              I_ERR_NO_ORPHAN_ITEM |
2538                              I_ERR_LINK_COUNT_WRONG |
2539                              I_ERR_NO_INODE_ITEM |
2540                              I_ERR_FILE_EXTENT_ORPHAN |
2541                              I_ERR_FILE_EXTENT_DISCOUNT|
2542                              I_ERR_FILE_NBYTES_WRONG)))
2543                 return rec->errors;
2544
2545         /*
2546          * For nlink repair, it may create a dir and add link, so
2547          * 2 for parent(256)'s dir_index and dir_item
2548          * 2 for lost+found dir's inode_item and inode_ref
2549          * 1 for the new inode_ref of the file
2550          * 2 for lost+found dir's dir_index and dir_item for the file
2551          */
2552         trans = btrfs_start_transaction(root, 7);
2553         if (IS_ERR(trans))
2554                 return PTR_ERR(trans);
2555
2556         btrfs_init_path(&path);
2557         if (rec->errors & I_ERR_NO_INODE_ITEM)
2558                 ret = repair_inode_no_item(trans, root, &path, rec);
2559         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2560                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2561         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2562                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2563         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2564                 ret = repair_inode_isize(trans, root, &path, rec);
2565         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2566                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2567         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2568                 ret = repair_inode_nlinks(trans, root, &path, rec);
2569         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2570                 ret = repair_inode_nbytes(trans, root, &path, rec);
2571         btrfs_commit_transaction(trans, root);
2572         btrfs_release_path(&path);
2573         return ret;
2574 }
2575
2576 static int check_inode_recs(struct btrfs_root *root,
2577                             struct cache_tree *inode_cache)
2578 {
2579         struct cache_extent *cache;
2580         struct ptr_node *node;
2581         struct inode_record *rec;
2582         struct inode_backref *backref;
2583         int stage = 0;
2584         int ret = 0;
2585         int err = 0;
2586         u64 error = 0;
2587         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2588
2589         if (btrfs_root_refs(&root->root_item) == 0) {
2590                 if (!cache_tree_empty(inode_cache))
2591                         fprintf(stderr, "warning line %d\n", __LINE__);
2592                 return 0;
2593         }
2594
2595         /*
2596          * We need to repair backrefs first because we could change some of the
2597          * errors in the inode recs.
2598          *
2599          * We also need to go through and delete invalid backrefs first and then
2600          * add the correct ones second.  We do this because we may get EEXIST
2601          * when adding back the correct index because we hadn't yet deleted the
2602          * invalid index.
2603          *
2604          * For example, if we were missing a dir index then the directories
2605          * isize would be wrong, so if we fixed the isize to what we thought it
2606          * would be and then fixed the backref we'd still have a invalid fs, so
2607          * we need to add back the dir index and then check to see if the isize
2608          * is still wrong.
2609          */
2610         while (stage < 3) {
2611                 stage++;
2612                 if (stage == 3 && !err)
2613                         break;
2614
2615                 cache = search_cache_extent(inode_cache, 0);
2616                 while (repair && cache) {
2617                         node = container_of(cache, struct ptr_node, cache);
2618                         rec = node->data;
2619                         cache = next_cache_extent(cache);
2620
2621                         /* Need to free everything up and rescan */
2622                         if (stage == 3) {
2623                                 remove_cache_extent(inode_cache, &node->cache);
2624                                 free(node);
2625                                 free_inode_rec(rec);
2626                                 continue;
2627                         }
2628
2629                         if (list_empty(&rec->backrefs))
2630                                 continue;
2631
2632                         ret = repair_inode_backrefs(root, rec, inode_cache,
2633                                                     stage == 1);
2634                         if (ret < 0) {
2635                                 err = ret;
2636                                 stage = 2;
2637                                 break;
2638                         } if (ret > 0) {
2639                                 err = -EAGAIN;
2640                         }
2641                 }
2642         }
2643         if (err)
2644                 return err;
2645
2646         rec = get_inode_rec(inode_cache, root_dirid, 0);
2647         BUG_ON(IS_ERR(rec));
2648         if (rec) {
2649                 ret = check_root_dir(rec);
2650                 if (ret) {
2651                         fprintf(stderr, "root %llu root dir %llu error\n",
2652                                 (unsigned long long)root->root_key.objectid,
2653                                 (unsigned long long)root_dirid);
2654                         print_inode_error(root, rec);
2655                         error++;
2656                 }
2657         } else {
2658                 if (repair) {
2659                         struct btrfs_trans_handle *trans;
2660
2661                         trans = btrfs_start_transaction(root, 1);
2662                         if (IS_ERR(trans)) {
2663                                 err = PTR_ERR(trans);
2664                                 return err;
2665                         }
2666
2667                         fprintf(stderr,
2668                                 "root %llu missing its root dir, recreating\n",
2669                                 (unsigned long long)root->objectid);
2670
2671                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2672                         BUG_ON(ret);
2673
2674                         btrfs_commit_transaction(trans, root);
2675                         return -EAGAIN;
2676                 }
2677
2678                 fprintf(stderr, "root %llu root dir %llu not found\n",
2679                         (unsigned long long)root->root_key.objectid,
2680                         (unsigned long long)root_dirid);
2681         }
2682
2683         while (1) {
2684                 cache = search_cache_extent(inode_cache, 0);
2685                 if (!cache)
2686                         break;
2687                 node = container_of(cache, struct ptr_node, cache);
2688                 rec = node->data;
2689                 remove_cache_extent(inode_cache, &node->cache);
2690                 free(node);
2691                 if (rec->ino == root_dirid ||
2692                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2693                         free_inode_rec(rec);
2694                         continue;
2695                 }
2696
2697                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2698                         ret = check_orphan_item(root, rec->ino);
2699                         if (ret == 0)
2700                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2701                         if (can_free_inode_rec(rec)) {
2702                                 free_inode_rec(rec);
2703                                 continue;
2704                         }
2705                 }
2706
2707                 if (!rec->found_inode_item)
2708                         rec->errors |= I_ERR_NO_INODE_ITEM;
2709                 if (rec->found_link != rec->nlink)
2710                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2711                 if (repair) {
2712                         ret = try_repair_inode(root, rec);
2713                         if (ret == 0 && can_free_inode_rec(rec)) {
2714                                 free_inode_rec(rec);
2715                                 continue;
2716                         }
2717                         ret = 0;
2718                 }
2719
2720                 if (!(repair && ret == 0))
2721                         error++;
2722                 print_inode_error(root, rec);
2723                 list_for_each_entry(backref, &rec->backrefs, list) {
2724                         if (!backref->found_dir_item)
2725                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2726                         if (!backref->found_dir_index)
2727                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2728                         if (!backref->found_inode_ref)
2729                                 backref->errors |= REF_ERR_NO_INODE_REF;
2730                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2731                                 " namelen %u name %s filetype %d errors %x",
2732                                 (unsigned long long)backref->dir,
2733                                 (unsigned long long)backref->index,
2734                                 backref->namelen, backref->name,
2735                                 backref->filetype, backref->errors);
2736                         print_ref_error(backref->errors);
2737                 }
2738                 free_inode_rec(rec);
2739         }
2740         return (error > 0) ? -1 : 0;
2741 }
2742
2743 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2744                                         u64 objectid)
2745 {
2746         struct cache_extent *cache;
2747         struct root_record *rec = NULL;
2748         int ret;
2749
2750         cache = lookup_cache_extent(root_cache, objectid, 1);
2751         if (cache) {
2752                 rec = container_of(cache, struct root_record, cache);
2753         } else {
2754                 rec = calloc(1, sizeof(*rec));
2755                 if (!rec)
2756                         return ERR_PTR(-ENOMEM);
2757                 rec->objectid = objectid;
2758                 INIT_LIST_HEAD(&rec->backrefs);
2759                 rec->cache.start = objectid;
2760                 rec->cache.size = 1;
2761
2762                 ret = insert_cache_extent(root_cache, &rec->cache);
2763                 if (ret)
2764                         return ERR_PTR(-EEXIST);
2765         }
2766         return rec;
2767 }
2768
2769 static struct root_backref *get_root_backref(struct root_record *rec,
2770                                              u64 ref_root, u64 dir, u64 index,
2771                                              const char *name, int namelen)
2772 {
2773         struct root_backref *backref;
2774
2775         list_for_each_entry(backref, &rec->backrefs, list) {
2776                 if (backref->ref_root != ref_root || backref->dir != dir ||
2777                     backref->namelen != namelen)
2778                         continue;
2779                 if (memcmp(name, backref->name, namelen))
2780                         continue;
2781                 return backref;
2782         }
2783
2784         backref = calloc(1, sizeof(*backref) + namelen + 1);
2785         if (!backref)
2786                 return NULL;
2787         backref->ref_root = ref_root;
2788         backref->dir = dir;
2789         backref->index = index;
2790         backref->namelen = namelen;
2791         memcpy(backref->name, name, namelen);
2792         backref->name[namelen] = '\0';
2793         list_add_tail(&backref->list, &rec->backrefs);
2794         return backref;
2795 }
2796
2797 static void free_root_record(struct cache_extent *cache)
2798 {
2799         struct root_record *rec;
2800         struct root_backref *backref;
2801
2802         rec = container_of(cache, struct root_record, cache);
2803         while (!list_empty(&rec->backrefs)) {
2804                 backref = to_root_backref(rec->backrefs.next);
2805                 list_del(&backref->list);
2806                 free(backref);
2807         }
2808
2809         free(rec);
2810 }
2811
2812 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
2813
2814 static int add_root_backref(struct cache_tree *root_cache,
2815                             u64 root_id, u64 ref_root, u64 dir, u64 index,
2816                             const char *name, int namelen,
2817                             int item_type, int errors)
2818 {
2819         struct root_record *rec;
2820         struct root_backref *backref;
2821
2822         rec = get_root_rec(root_cache, root_id);
2823         BUG_ON(IS_ERR(rec));
2824         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
2825         BUG_ON(!backref);
2826
2827         backref->errors |= errors;
2828
2829         if (item_type != BTRFS_DIR_ITEM_KEY) {
2830                 if (backref->found_dir_index || backref->found_back_ref ||
2831                     backref->found_forward_ref) {
2832                         if (backref->index != index)
2833                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
2834                 } else {
2835                         backref->index = index;
2836                 }
2837         }
2838
2839         if (item_type == BTRFS_DIR_ITEM_KEY) {
2840                 if (backref->found_forward_ref)
2841                         rec->found_ref++;
2842                 backref->found_dir_item = 1;
2843         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
2844                 backref->found_dir_index = 1;
2845         } else if (item_type == BTRFS_ROOT_REF_KEY) {
2846                 if (backref->found_forward_ref)
2847                         backref->errors |= REF_ERR_DUP_ROOT_REF;
2848                 else if (backref->found_dir_item)
2849                         rec->found_ref++;
2850                 backref->found_forward_ref = 1;
2851         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
2852                 if (backref->found_back_ref)
2853                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
2854                 backref->found_back_ref = 1;
2855         } else {
2856                 BUG_ON(1);
2857         }
2858
2859         if (backref->found_forward_ref && backref->found_dir_item)
2860                 backref->reachable = 1;
2861         return 0;
2862 }
2863
2864 static int merge_root_recs(struct btrfs_root *root,
2865                            struct cache_tree *src_cache,
2866                            struct cache_tree *dst_cache)
2867 {
2868         struct cache_extent *cache;
2869         struct ptr_node *node;
2870         struct inode_record *rec;
2871         struct inode_backref *backref;
2872         int ret = 0;
2873
2874         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2875                 free_inode_recs_tree(src_cache);
2876                 return 0;
2877         }
2878
2879         while (1) {
2880                 cache = search_cache_extent(src_cache, 0);
2881                 if (!cache)
2882                         break;
2883                 node = container_of(cache, struct ptr_node, cache);
2884                 rec = node->data;
2885                 remove_cache_extent(src_cache, &node->cache);
2886                 free(node);
2887
2888                 ret = is_child_root(root, root->objectid, rec->ino);
2889                 if (ret < 0)
2890                         break;
2891                 else if (ret == 0)
2892                         goto skip;
2893
2894                 list_for_each_entry(backref, &rec->backrefs, list) {
2895                         BUG_ON(backref->found_inode_ref);
2896                         if (backref->found_dir_item)
2897                                 add_root_backref(dst_cache, rec->ino,
2898                                         root->root_key.objectid, backref->dir,
2899                                         backref->index, backref->name,
2900                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
2901                                         backref->errors);
2902                         if (backref->found_dir_index)
2903                                 add_root_backref(dst_cache, rec->ino,
2904                                         root->root_key.objectid, backref->dir,
2905                                         backref->index, backref->name,
2906                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
2907                                         backref->errors);
2908                 }
2909 skip:
2910                 free_inode_rec(rec);
2911         }
2912         if (ret < 0)
2913                 return ret;
2914         return 0;
2915 }
2916
2917 static int check_root_refs(struct btrfs_root *root,
2918                            struct cache_tree *root_cache)
2919 {
2920         struct root_record *rec;
2921         struct root_record *ref_root;
2922         struct root_backref *backref;
2923         struct cache_extent *cache;
2924         int loop = 1;
2925         int ret;
2926         int error;
2927         int errors = 0;
2928
2929         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
2930         BUG_ON(IS_ERR(rec));
2931         rec->found_ref = 1;
2932
2933         /* fixme: this can not detect circular references */
2934         while (loop) {
2935                 loop = 0;
2936                 cache = search_cache_extent(root_cache, 0);
2937                 while (1) {
2938                         if (!cache)
2939                                 break;
2940                         rec = container_of(cache, struct root_record, cache);
2941                         cache = next_cache_extent(cache);
2942
2943                         if (rec->found_ref == 0)
2944                                 continue;
2945
2946                         list_for_each_entry(backref, &rec->backrefs, list) {
2947                                 if (!backref->reachable)
2948                                         continue;
2949
2950                                 ref_root = get_root_rec(root_cache,
2951                                                         backref->ref_root);
2952                                 BUG_ON(IS_ERR(ref_root));
2953                                 if (ref_root->found_ref > 0)
2954                                         continue;
2955
2956                                 backref->reachable = 0;
2957                                 rec->found_ref--;
2958                                 if (rec->found_ref == 0)
2959                                         loop = 1;
2960                         }
2961                 }
2962         }
2963
2964         cache = search_cache_extent(root_cache, 0);
2965         while (1) {
2966                 if (!cache)
2967                         break;
2968                 rec = container_of(cache, struct root_record, cache);
2969                 cache = next_cache_extent(cache);
2970
2971                 if (rec->found_ref == 0 &&
2972                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
2973                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
2974                         ret = check_orphan_item(root->fs_info->tree_root,
2975                                                 rec->objectid);
2976                         if (ret == 0)
2977                                 continue;
2978
2979                         /*
2980                          * If we don't have a root item then we likely just have
2981                          * a dir item in a snapshot for this root but no actual
2982                          * ref key or anything so it's meaningless.
2983                          */
2984                         if (!rec->found_root_item)
2985                                 continue;
2986                         errors++;
2987                         fprintf(stderr, "fs tree %llu not referenced\n",
2988                                 (unsigned long long)rec->objectid);
2989                 }
2990
2991                 error = 0;
2992                 if (rec->found_ref > 0 && !rec->found_root_item)
2993                         error = 1;
2994                 list_for_each_entry(backref, &rec->backrefs, list) {
2995                         if (!backref->found_dir_item)
2996                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2997                         if (!backref->found_dir_index)
2998                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2999                         if (!backref->found_back_ref)
3000                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3001                         if (!backref->found_forward_ref)
3002                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3003                         if (backref->reachable && backref->errors)
3004                                 error = 1;
3005                 }
3006                 if (!error)
3007                         continue;
3008
3009                 errors++;
3010                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3011                         (unsigned long long)rec->objectid, rec->found_ref,
3012                          rec->found_root_item ? "" : "not found");
3013
3014                 list_for_each_entry(backref, &rec->backrefs, list) {
3015                         if (!backref->reachable)
3016                                 continue;
3017                         if (!backref->errors && rec->found_root_item)
3018                                 continue;
3019                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3020                                 " index %llu namelen %u name %s errors %x\n",
3021                                 (unsigned long long)backref->ref_root,
3022                                 (unsigned long long)backref->dir,
3023                                 (unsigned long long)backref->index,
3024                                 backref->namelen, backref->name,
3025                                 backref->errors);
3026                         print_ref_error(backref->errors);
3027                 }
3028         }
3029         return errors > 0 ? 1 : 0;
3030 }
3031
3032 static int process_root_ref(struct extent_buffer *eb, int slot,
3033                             struct btrfs_key *key,
3034                             struct cache_tree *root_cache)
3035 {
3036         u64 dirid;
3037         u64 index;
3038         u32 len;
3039         u32 name_len;
3040         struct btrfs_root_ref *ref;
3041         char namebuf[BTRFS_NAME_LEN];
3042         int error;
3043
3044         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3045
3046         dirid = btrfs_root_ref_dirid(eb, ref);
3047         index = btrfs_root_ref_sequence(eb, ref);
3048         name_len = btrfs_root_ref_name_len(eb, ref);
3049
3050         if (name_len <= BTRFS_NAME_LEN) {
3051                 len = name_len;
3052                 error = 0;
3053         } else {
3054                 len = BTRFS_NAME_LEN;
3055                 error = REF_ERR_NAME_TOO_LONG;
3056         }
3057         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3058
3059         if (key->type == BTRFS_ROOT_REF_KEY) {
3060                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3061                                  index, namebuf, len, key->type, error);
3062         } else {
3063                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3064                                  index, namebuf, len, key->type, error);
3065         }
3066         return 0;
3067 }
3068
3069 static void free_corrupt_block(struct cache_extent *cache)
3070 {
3071         struct btrfs_corrupt_block *corrupt;
3072
3073         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3074         free(corrupt);
3075 }
3076
3077 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3078
3079 /*
3080  * Repair the btree of the given root.
3081  *
3082  * The fix is to remove the node key in corrupt_blocks cache_tree.
3083  * and rebalance the tree.
3084  * After the fix, the btree should be writeable.
3085  */
3086 static int repair_btree(struct btrfs_root *root,
3087                         struct cache_tree *corrupt_blocks)
3088 {
3089         struct btrfs_trans_handle *trans;
3090         struct btrfs_path path;
3091         struct btrfs_corrupt_block *corrupt;
3092         struct cache_extent *cache;
3093         struct btrfs_key key;
3094         u64 offset;
3095         int level;
3096         int ret = 0;
3097
3098         if (cache_tree_empty(corrupt_blocks))
3099                 return 0;
3100
3101         trans = btrfs_start_transaction(root, 1);
3102         if (IS_ERR(trans)) {
3103                 ret = PTR_ERR(trans);
3104                 fprintf(stderr, "Error starting transaction: %s\n",
3105                         strerror(-ret));
3106                 return ret;
3107         }
3108         btrfs_init_path(&path);
3109         cache = first_cache_extent(corrupt_blocks);
3110         while (cache) {
3111                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3112                                        cache);
3113                 level = corrupt->level;
3114                 path.lowest_level = level;
3115                 key.objectid = corrupt->key.objectid;
3116                 key.type = corrupt->key.type;
3117                 key.offset = corrupt->key.offset;
3118
3119                 /*
3120                  * Here we don't want to do any tree balance, since it may
3121                  * cause a balance with corrupted brother leaf/node,
3122                  * so ins_len set to 0 here.
3123                  * Balance will be done after all corrupt node/leaf is deleted.
3124                  */
3125                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3126                 if (ret < 0)
3127                         goto out;
3128                 offset = btrfs_node_blockptr(path.nodes[level],
3129                                              path.slots[level]);
3130
3131                 /* Remove the ptr */
3132                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3133                 if (ret < 0)
3134                         goto out;
3135                 /*
3136                  * Remove the corresponding extent
3137                  * return value is not concerned.
3138                  */
3139                 btrfs_release_path(&path);
3140                 ret = btrfs_free_extent(trans, root, offset,
3141                                 root->fs_info->nodesize, 0,
3142                                 root->root_key.objectid, level - 1, 0);
3143                 cache = next_cache_extent(cache);
3144         }
3145
3146         /* Balance the btree using btrfs_search_slot() */
3147         cache = first_cache_extent(corrupt_blocks);
3148         while (cache) {
3149                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3150                                        cache);
3151                 memcpy(&key, &corrupt->key, sizeof(key));
3152                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3153                 if (ret < 0)
3154                         goto out;
3155                 /* return will always >0 since it won't find the item */
3156                 ret = 0;
3157                 btrfs_release_path(&path);
3158                 cache = next_cache_extent(cache);
3159         }
3160 out:
3161         btrfs_commit_transaction(trans, root);
3162         btrfs_release_path(&path);
3163         return ret;
3164 }
3165
3166 static int check_fs_root(struct btrfs_root *root,
3167                          struct cache_tree *root_cache,
3168                          struct walk_control *wc)
3169 {
3170         int ret = 0;
3171         int err = 0;
3172         int wret;
3173         int level;
3174         struct btrfs_path path;
3175         struct shared_node root_node;
3176         struct root_record *rec;
3177         struct btrfs_root_item *root_item = &root->root_item;
3178         struct cache_tree corrupt_blocks;
3179         struct orphan_data_extent *orphan;
3180         struct orphan_data_extent *tmp;
3181         enum btrfs_tree_block_status status;
3182         struct node_refs nrefs;
3183
3184         /*
3185          * Reuse the corrupt_block cache tree to record corrupted tree block
3186          *
3187          * Unlike the usage in extent tree check, here we do it in a per
3188          * fs/subvol tree base.
3189          */
3190         cache_tree_init(&corrupt_blocks);
3191         root->fs_info->corrupt_blocks = &corrupt_blocks;
3192
3193         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3194                 rec = get_root_rec(root_cache, root->root_key.objectid);
3195                 BUG_ON(IS_ERR(rec));
3196                 if (btrfs_root_refs(root_item) > 0)
3197                         rec->found_root_item = 1;
3198         }
3199
3200         btrfs_init_path(&path);
3201         memset(&root_node, 0, sizeof(root_node));
3202         cache_tree_init(&root_node.root_cache);
3203         cache_tree_init(&root_node.inode_cache);
3204         memset(&nrefs, 0, sizeof(nrefs));
3205
3206         /* Move the orphan extent record to corresponding inode_record */
3207         list_for_each_entry_safe(orphan, tmp,
3208                                  &root->orphan_data_extents, list) {
3209                 struct inode_record *inode;
3210
3211                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3212                                       1);
3213                 BUG_ON(IS_ERR(inode));
3214                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3215                 list_move(&orphan->list, &inode->orphan_extents);
3216         }
3217
3218         level = btrfs_header_level(root->node);
3219         memset(wc->nodes, 0, sizeof(wc->nodes));
3220         wc->nodes[level] = &root_node;
3221         wc->active_node = level;
3222         wc->root_level = level;
3223
3224         /* We may not have checked the root block, lets do that now */
3225         if (btrfs_is_leaf(root->node))
3226                 status = btrfs_check_leaf(root, NULL, root->node);
3227         else
3228                 status = btrfs_check_node(root, NULL, root->node);
3229         if (status != BTRFS_TREE_BLOCK_CLEAN)
3230                 return -EIO;
3231
3232         if (btrfs_root_refs(root_item) > 0 ||
3233             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3234                 path.nodes[level] = root->node;
3235                 extent_buffer_get(root->node);
3236                 path.slots[level] = 0;
3237         } else {
3238                 struct btrfs_key key;
3239                 struct btrfs_disk_key found_key;
3240
3241                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3242                 level = root_item->drop_level;
3243                 path.lowest_level = level;
3244                 if (level > btrfs_header_level(root->node) ||
3245                     level >= BTRFS_MAX_LEVEL) {
3246                         error("ignoring invalid drop level: %u", level);
3247                         goto skip_walking;
3248                 }
3249                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3250                 if (wret < 0)
3251                         goto skip_walking;
3252                 btrfs_node_key(path.nodes[level], &found_key,
3253                                 path.slots[level]);
3254                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3255                                         sizeof(found_key)));
3256         }
3257
3258         while (1) {
3259                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3260                 if (wret < 0)
3261                         ret = wret;
3262                 if (wret != 0)
3263                         break;
3264
3265                 wret = walk_up_tree(root, &path, wc, &level);
3266                 if (wret < 0)
3267                         ret = wret;
3268                 if (wret != 0)
3269                         break;
3270         }
3271 skip_walking:
3272         btrfs_release_path(&path);
3273
3274         if (!cache_tree_empty(&corrupt_blocks)) {
3275                 struct cache_extent *cache;
3276                 struct btrfs_corrupt_block *corrupt;
3277
3278                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3279                        root->root_key.objectid);
3280                 cache = first_cache_extent(&corrupt_blocks);
3281                 while (cache) {
3282                         corrupt = container_of(cache,
3283                                                struct btrfs_corrupt_block,
3284                                                cache);
3285                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3286                                cache->start, corrupt->level,
3287                                corrupt->key.objectid, corrupt->key.type,
3288                                corrupt->key.offset);
3289                         cache = next_cache_extent(cache);
3290                 }
3291                 if (repair) {
3292                         printf("Try to repair the btree for root %llu\n",
3293                                root->root_key.objectid);
3294                         ret = repair_btree(root, &corrupt_blocks);
3295                         if (ret < 0)
3296                                 fprintf(stderr, "Failed to repair btree: %s\n",
3297                                         strerror(-ret));
3298                         if (!ret)
3299                                 printf("Btree for root %llu is fixed\n",
3300                                        root->root_key.objectid);
3301                 }
3302         }
3303
3304         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3305         if (err < 0)
3306                 ret = err;
3307
3308         if (root_node.current) {
3309                 root_node.current->checked = 1;
3310                 maybe_free_inode_rec(&root_node.inode_cache,
3311                                 root_node.current);
3312         }
3313
3314         err = check_inode_recs(root, &root_node.inode_cache);
3315         if (!ret)
3316                 ret = err;
3317
3318         free_corrupt_blocks_tree(&corrupt_blocks);
3319         root->fs_info->corrupt_blocks = NULL;
3320         free_orphan_data_extents(&root->orphan_data_extents);
3321         return ret;
3322 }
3323
3324 static int check_fs_roots(struct btrfs_fs_info *fs_info,
3325                           struct cache_tree *root_cache)
3326 {
3327         struct btrfs_path path;
3328         struct btrfs_key key;
3329         struct walk_control wc;
3330         struct extent_buffer *leaf, *tree_node;
3331         struct btrfs_root *tmp_root;
3332         struct btrfs_root *tree_root = fs_info->tree_root;
3333         int ret;
3334         int err = 0;
3335
3336         if (ctx.progress_enabled) {
3337                 ctx.tp = TASK_FS_ROOTS;
3338                 task_start(ctx.info);
3339         }
3340
3341         /*
3342          * Just in case we made any changes to the extent tree that weren't
3343          * reflected into the free space cache yet.
3344          */
3345         if (repair)
3346                 reset_cached_block_groups(fs_info);
3347         memset(&wc, 0, sizeof(wc));
3348         cache_tree_init(&wc.shared);
3349         btrfs_init_path(&path);
3350
3351 again:
3352         key.offset = 0;
3353         key.objectid = 0;
3354         key.type = BTRFS_ROOT_ITEM_KEY;
3355         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3356         if (ret < 0) {
3357                 err = 1;
3358                 goto out;
3359         }
3360         tree_node = tree_root->node;
3361         while (1) {
3362                 if (tree_node != tree_root->node) {
3363                         free_root_recs_tree(root_cache);
3364                         btrfs_release_path(&path);
3365                         goto again;
3366                 }
3367                 leaf = path.nodes[0];
3368                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3369                         ret = btrfs_next_leaf(tree_root, &path);
3370                         if (ret) {
3371                                 if (ret < 0)
3372                                         err = 1;
3373                                 break;
3374                         }
3375                         leaf = path.nodes[0];
3376                 }
3377                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3378                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3379                     fs_root_objectid(key.objectid)) {
3380                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3381                                 tmp_root = btrfs_read_fs_root_no_cache(
3382                                                 fs_info, &key);
3383                         } else {
3384                                 key.offset = (u64)-1;
3385                                 tmp_root = btrfs_read_fs_root(
3386                                                 fs_info, &key);
3387                         }
3388                         if (IS_ERR(tmp_root)) {
3389                                 err = 1;
3390                                 goto next;
3391                         }
3392                         ret = check_fs_root(tmp_root, root_cache, &wc);
3393                         if (ret == -EAGAIN) {
3394                                 free_root_recs_tree(root_cache);
3395                                 btrfs_release_path(&path);
3396                                 goto again;
3397                         }
3398                         if (ret)
3399                                 err = 1;
3400                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3401                                 btrfs_free_fs_root(tmp_root);
3402                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3403                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3404                         process_root_ref(leaf, path.slots[0], &key,
3405                                          root_cache);
3406                 }
3407 next:
3408                 path.slots[0]++;
3409         }
3410 out:
3411         btrfs_release_path(&path);
3412         if (err)
3413                 free_extent_cache_tree(&wc.shared);
3414         if (!cache_tree_empty(&wc.shared))
3415                 fprintf(stderr, "warning line %d\n", __LINE__);
3416
3417         task_stop(ctx.info);
3418
3419         return err;
3420 }
3421
3422 static struct tree_backref *find_tree_backref(struct extent_record *rec,
3423                                                 u64 parent, u64 root)
3424 {
3425         struct rb_node *node;
3426         struct tree_backref *back = NULL;
3427         struct tree_backref match = {
3428                 .node = {
3429                         .is_data = 0,
3430                 },
3431         };
3432
3433         if (parent) {
3434                 match.parent = parent;
3435                 match.node.full_backref = 1;
3436         } else {
3437                 match.root = root;
3438         }
3439
3440         node = rb_search(&rec->backref_tree, &match.node.node,
3441                          (rb_compare_keys)compare_extent_backref, NULL);
3442         if (node)
3443                 back = to_tree_backref(rb_node_to_extent_backref(node));
3444
3445         return back;
3446 }
3447
3448 static struct data_backref *find_data_backref(struct extent_record *rec,
3449                                                 u64 parent, u64 root,
3450                                                 u64 owner, u64 offset,
3451                                                 int found_ref,
3452                                                 u64 disk_bytenr, u64 bytes)
3453 {
3454         struct rb_node *node;
3455         struct data_backref *back = NULL;
3456         struct data_backref match = {
3457                 .node = {
3458                         .is_data = 1,
3459                 },
3460                 .owner = owner,
3461                 .offset = offset,
3462                 .bytes = bytes,
3463                 .found_ref = found_ref,
3464                 .disk_bytenr = disk_bytenr,
3465         };
3466
3467         if (parent) {
3468                 match.parent = parent;
3469                 match.node.full_backref = 1;
3470         } else {
3471                 match.root = root;
3472         }
3473
3474         node = rb_search(&rec->backref_tree, &match.node.node,
3475                          (rb_compare_keys)compare_extent_backref, NULL);
3476         if (node)
3477                 back = to_data_backref(rb_node_to_extent_backref(node));
3478
3479         return back;
3480 }
3481
3482 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
3483                           struct cache_tree *root_cache)
3484 {
3485         int ret;
3486
3487         if (!ctx.progress_enabled)
3488                 fprintf(stderr, "checking fs roots\n");
3489         if (check_mode == CHECK_MODE_LOWMEM)
3490                 ret = check_fs_roots_lowmem(fs_info);
3491         else
3492                 ret = check_fs_roots(fs_info, root_cache);
3493
3494         return ret;
3495 }
3496
3497 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3498 {
3499         struct extent_backref *back, *tmp;
3500         struct tree_backref *tback;
3501         struct data_backref *dback;
3502         u64 found = 0;
3503         int err = 0;
3504
3505         rbtree_postorder_for_each_entry_safe(back, tmp,
3506                                              &rec->backref_tree, node) {
3507                 if (!back->found_extent_tree) {
3508                         err = 1;
3509                         if (!print_errs)
3510                                 goto out;
3511                         if (back->is_data) {
3512                                 dback = to_data_backref(back);
3513                                 fprintf(stderr,
3514 "data backref %llu %s %llu owner %llu offset %llu num_refs %lu not found in extent tree\n",
3515                                         (unsigned long long)rec->start,
3516                                         back->full_backref ?
3517                                         "parent" : "root",
3518                                         back->full_backref ?
3519                                         (unsigned long long)dback->parent :
3520                                         (unsigned long long)dback->root,
3521                                         (unsigned long long)dback->owner,
3522                                         (unsigned long long)dback->offset,
3523                                         (unsigned long)dback->num_refs);
3524                         } else {
3525                                 tback = to_tree_backref(back);
3526                                 fprintf(stderr,
3527 "tree backref %llu parent %llu root %llu not found in extent tree\n",
3528                                         (unsigned long long)rec->start,
3529                                         (unsigned long long)tback->parent,
3530                                         (unsigned long long)tback->root);
3531                         }
3532                 }
3533                 if (!back->is_data && !back->found_ref) {
3534                         err = 1;
3535                         if (!print_errs)
3536                                 goto out;
3537                         tback = to_tree_backref(back);
3538                         fprintf(stderr,
3539                                 "backref %llu %s %llu not referenced back %p\n",
3540                                 (unsigned long long)rec->start,
3541                                 back->full_backref ? "parent" : "root",
3542                                 back->full_backref ?
3543                                 (unsigned long long)tback->parent :
3544                                 (unsigned long long)tback->root, back);
3545                 }
3546                 if (back->is_data) {
3547                         dback = to_data_backref(back);
3548                         if (dback->found_ref != dback->num_refs) {
3549                                 err = 1;
3550                                 if (!print_errs)
3551                                         goto out;
3552                                 fprintf(stderr,
3553 "incorrect local backref count on %llu %s %llu owner %llu offset %llu found %u wanted %u back %p\n",
3554                                         (unsigned long long)rec->start,
3555                                         back->full_backref ?
3556                                         "parent" : "root",
3557                                         back->full_backref ?
3558                                         (unsigned long long)dback->parent :
3559                                         (unsigned long long)dback->root,
3560                                         (unsigned long long)dback->owner,
3561                                         (unsigned long long)dback->offset,
3562                                         dback->found_ref, dback->num_refs,
3563                                         back);
3564                         }
3565                         if (dback->disk_bytenr != rec->start) {
3566                                 err = 1;
3567                                 if (!print_errs)
3568                                         goto out;
3569                                 fprintf(stderr,
3570 "backref disk bytenr does not match extent record, bytenr=%llu, ref bytenr=%llu\n",
3571                                         (unsigned long long)rec->start,
3572                                         (unsigned long long)dback->disk_bytenr);
3573                         }
3574
3575                         if (dback->bytes != rec->nr) {
3576                                 err = 1;
3577                                 if (!print_errs)
3578                                         goto out;
3579                                 fprintf(stderr,
3580 "backref bytes do not match extent backref, bytenr=%llu, ref bytes=%llu, backref bytes=%llu\n",
3581                                         (unsigned long long)rec->start,
3582                                         (unsigned long long)rec->nr,
3583                                         (unsigned long long)dback->bytes);
3584                         }
3585                 }
3586                 if (!back->is_data) {
3587                         found += 1;
3588                 } else {
3589                         dback = to_data_backref(back);
3590                         found += dback->found_ref;
3591                 }
3592         }
3593         if (found != rec->refs) {
3594                 err = 1;
3595                 if (!print_errs)
3596                         goto out;
3597                 fprintf(stderr,
3598         "incorrect global backref count on %llu found %llu wanted %llu\n",
3599                         (unsigned long long)rec->start,
3600                         (unsigned long long)found,
3601                         (unsigned long long)rec->refs);
3602         }
3603 out:
3604         return err;
3605 }
3606
3607 static void __free_one_backref(struct rb_node *node)
3608 {
3609         struct extent_backref *back = rb_node_to_extent_backref(node);
3610
3611         free(back);
3612 }
3613
3614 static void free_all_extent_backrefs(struct extent_record *rec)
3615 {
3616         rb_free_nodes(&rec->backref_tree, __free_one_backref);
3617 }
3618
3619 static void free_extent_record_cache(struct cache_tree *extent_cache)
3620 {
3621         struct cache_extent *cache;
3622         struct extent_record *rec;
3623
3624         while (1) {
3625                 cache = first_cache_extent(extent_cache);
3626                 if (!cache)
3627                         break;
3628                 rec = container_of(cache, struct extent_record, cache);
3629                 remove_cache_extent(extent_cache, cache);
3630                 free_all_extent_backrefs(rec);
3631                 free(rec);
3632         }
3633 }
3634
3635 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3636                                  struct extent_record *rec)
3637 {
3638         if (rec->content_checked && rec->owner_ref_checked &&
3639             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3640             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3641             !rec->bad_full_backref && !rec->crossing_stripes &&
3642             !rec->wrong_chunk_type) {
3643                 remove_cache_extent(extent_cache, &rec->cache);
3644                 free_all_extent_backrefs(rec);
3645                 list_del_init(&rec->list);
3646                 free(rec);
3647         }
3648         return 0;
3649 }
3650
3651 static int check_owner_ref(struct btrfs_root *root,
3652                             struct extent_record *rec,
3653                             struct extent_buffer *buf)
3654 {
3655         struct extent_backref *node, *tmp;
3656         struct tree_backref *back;
3657         struct btrfs_root *ref_root;
3658         struct btrfs_key key;
3659         struct btrfs_path path;
3660         struct extent_buffer *parent;
3661         int level;
3662         int found = 0;
3663         int ret;
3664
3665         rbtree_postorder_for_each_entry_safe(node, tmp,
3666                                              &rec->backref_tree, node) {
3667                 if (node->is_data)
3668                         continue;
3669                 if (!node->found_ref)
3670                         continue;
3671                 if (node->full_backref)
3672                         continue;
3673                 back = to_tree_backref(node);
3674                 if (btrfs_header_owner(buf) == back->root)
3675                         return 0;
3676         }
3677         BUG_ON(rec->is_root);
3678
3679         /* try to find the block by search corresponding fs tree */
3680         key.objectid = btrfs_header_owner(buf);
3681         key.type = BTRFS_ROOT_ITEM_KEY;
3682         key.offset = (u64)-1;
3683
3684         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3685         if (IS_ERR(ref_root))
3686                 return 1;
3687
3688         level = btrfs_header_level(buf);
3689         if (level == 0)
3690                 btrfs_item_key_to_cpu(buf, &key, 0);
3691         else
3692                 btrfs_node_key_to_cpu(buf, &key, 0);
3693
3694         btrfs_init_path(&path);
3695         path.lowest_level = level + 1;
3696         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3697         if (ret < 0)
3698                 return 0;
3699
3700         parent = path.nodes[level + 1];
3701         if (parent && buf->start == btrfs_node_blockptr(parent,
3702                                                         path.slots[level + 1]))
3703                 found = 1;
3704
3705         btrfs_release_path(&path);
3706         return found ? 0 : 1;
3707 }
3708
3709 static int is_extent_tree_record(struct extent_record *rec)
3710 {
3711         struct extent_backref *node, *tmp;
3712         struct tree_backref *back;
3713         int is_extent = 0;
3714
3715         rbtree_postorder_for_each_entry_safe(node, tmp,
3716                                              &rec->backref_tree, node) {
3717                 if (node->is_data)
3718                         return 0;
3719                 back = to_tree_backref(node);
3720                 if (node->full_backref)
3721                         return 0;
3722                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3723                         is_extent = 1;
3724         }
3725         return is_extent;
3726 }
3727
3728
3729 static int record_bad_block_io(struct btrfs_fs_info *info,
3730                                struct cache_tree *extent_cache,
3731                                u64 start, u64 len)
3732 {
3733         struct extent_record *rec;
3734         struct cache_extent *cache;
3735         struct btrfs_key key;
3736
3737         cache = lookup_cache_extent(extent_cache, start, len);
3738         if (!cache)
3739                 return 0;
3740
3741         rec = container_of(cache, struct extent_record, cache);
3742         if (!is_extent_tree_record(rec))
3743                 return 0;
3744
3745         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3746         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3747 }
3748
3749 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3750                        struct extent_buffer *buf, int slot)
3751 {
3752         if (btrfs_header_level(buf)) {
3753                 struct btrfs_key_ptr ptr1, ptr2;
3754
3755                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3756                                    sizeof(struct btrfs_key_ptr));
3757                 read_extent_buffer(buf, &ptr2,
3758                                    btrfs_node_key_ptr_offset(slot + 1),
3759                                    sizeof(struct btrfs_key_ptr));
3760                 write_extent_buffer(buf, &ptr1,
3761                                     btrfs_node_key_ptr_offset(slot + 1),
3762                                     sizeof(struct btrfs_key_ptr));
3763                 write_extent_buffer(buf, &ptr2,
3764                                     btrfs_node_key_ptr_offset(slot),
3765                                     sizeof(struct btrfs_key_ptr));
3766                 if (slot == 0) {
3767                         struct btrfs_disk_key key;
3768
3769                         btrfs_node_key(buf, &key, 0);
3770                         btrfs_fixup_low_keys(root, path, &key,
3771                                              btrfs_header_level(buf) + 1);
3772                 }
3773         } else {
3774                 struct btrfs_item *item1, *item2;
3775                 struct btrfs_key k1, k2;
3776                 char *item1_data, *item2_data;
3777                 u32 item1_offset, item2_offset, item1_size, item2_size;
3778
3779                 item1 = btrfs_item_nr(slot);
3780                 item2 = btrfs_item_nr(slot + 1);
3781                 btrfs_item_key_to_cpu(buf, &k1, slot);
3782                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3783                 item1_offset = btrfs_item_offset(buf, item1);
3784                 item2_offset = btrfs_item_offset(buf, item2);
3785                 item1_size = btrfs_item_size(buf, item1);
3786                 item2_size = btrfs_item_size(buf, item2);
3787
3788                 item1_data = malloc(item1_size);
3789                 if (!item1_data)
3790                         return -ENOMEM;
3791                 item2_data = malloc(item2_size);
3792                 if (!item2_data) {
3793                         free(item1_data);
3794                         return -ENOMEM;
3795                 }
3796
3797                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3798                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3799
3800                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3801                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3802                 free(item1_data);
3803                 free(item2_data);
3804
3805                 btrfs_set_item_offset(buf, item1, item2_offset);
3806                 btrfs_set_item_offset(buf, item2, item1_offset);
3807                 btrfs_set_item_size(buf, item1, item2_size);
3808                 btrfs_set_item_size(buf, item2, item1_size);
3809
3810                 path->slots[0] = slot;
3811                 btrfs_set_item_key_unsafe(root, path, &k2);
3812                 path->slots[0] = slot + 1;
3813                 btrfs_set_item_key_unsafe(root, path, &k1);
3814         }
3815         return 0;
3816 }
3817
3818 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
3819 {
3820         struct extent_buffer *buf;
3821         struct btrfs_key k1, k2;
3822         int i;
3823         int level = path->lowest_level;
3824         int ret = -EIO;
3825
3826         buf = path->nodes[level];
3827         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3828                 if (level) {
3829                         btrfs_node_key_to_cpu(buf, &k1, i);
3830                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
3831                 } else {
3832                         btrfs_item_key_to_cpu(buf, &k1, i);
3833                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
3834                 }
3835                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
3836                         continue;
3837                 ret = swap_values(root, path, buf, i);
3838                 if (ret)
3839                         break;
3840                 btrfs_mark_buffer_dirty(buf);
3841                 i = 0;
3842         }
3843         return ret;
3844 }
3845
3846 static int delete_bogus_item(struct btrfs_root *root,
3847                              struct btrfs_path *path,
3848                              struct extent_buffer *buf, int slot)
3849 {
3850         struct btrfs_key key;
3851         int nritems = btrfs_header_nritems(buf);
3852
3853         btrfs_item_key_to_cpu(buf, &key, slot);
3854
3855         /* These are all the keys we can deal with missing. */
3856         if (key.type != BTRFS_DIR_INDEX_KEY &&
3857             key.type != BTRFS_EXTENT_ITEM_KEY &&
3858             key.type != BTRFS_METADATA_ITEM_KEY &&
3859             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
3860             key.type != BTRFS_EXTENT_DATA_REF_KEY)
3861                 return -1;
3862
3863         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
3864                (unsigned long long)key.objectid, key.type,
3865                (unsigned long long)key.offset, slot, buf->start);
3866         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
3867                               btrfs_item_nr_offset(slot + 1),
3868                               sizeof(struct btrfs_item) *
3869                               (nritems - slot - 1));
3870         btrfs_set_header_nritems(buf, nritems - 1);
3871         if (slot == 0) {
3872                 struct btrfs_disk_key disk_key;
3873
3874                 btrfs_item_key(buf, &disk_key, 0);
3875                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
3876         }
3877         btrfs_mark_buffer_dirty(buf);
3878         return 0;
3879 }
3880
3881 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
3882 {
3883         struct extent_buffer *buf;
3884         int i;
3885         int ret = 0;
3886
3887         /* We should only get this for leaves */
3888         BUG_ON(path->lowest_level);
3889         buf = path->nodes[0];
3890 again:
3891         for (i = 0; i < btrfs_header_nritems(buf); i++) {
3892                 unsigned int shift = 0, offset;
3893
3894                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
3895                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3896                         if (btrfs_item_end_nr(buf, i) >
3897                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3898                                 ret = delete_bogus_item(root, path, buf, i);
3899                                 if (!ret)
3900                                         goto again;
3901                                 fprintf(stderr,
3902                                 "item is off the end of the leaf, can't fix\n");
3903                                 ret = -EIO;
3904                                 break;
3905                         }
3906                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
3907                                 btrfs_item_end_nr(buf, i);
3908                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
3909                            btrfs_item_offset_nr(buf, i - 1)) {
3910                         if (btrfs_item_end_nr(buf, i) >
3911                             btrfs_item_offset_nr(buf, i - 1)) {
3912                                 ret = delete_bogus_item(root, path, buf, i);
3913                                 if (!ret)
3914                                         goto again;
3915                                 fprintf(stderr, "items overlap, can't fix\n");
3916                                 ret = -EIO;
3917                                 break;
3918                         }
3919                         shift = btrfs_item_offset_nr(buf, i - 1) -
3920                                 btrfs_item_end_nr(buf, i);
3921                 }
3922                 if (!shift)
3923                         continue;
3924
3925                 printf("Shifting item nr %d by %u bytes in block %llu\n",
3926                        i, shift, (unsigned long long)buf->start);
3927                 offset = btrfs_item_offset_nr(buf, i);
3928                 memmove_extent_buffer(buf,
3929                                       btrfs_leaf_data(buf) + offset + shift,
3930                                       btrfs_leaf_data(buf) + offset,
3931                                       btrfs_item_size_nr(buf, i));
3932                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
3933                                       offset + shift);
3934                 btrfs_mark_buffer_dirty(buf);
3935         }
3936
3937         /*
3938          * We may have moved things, in which case we want to exit so we don't
3939          * write those changes out.  Once we have proper abort functionality in
3940          * progs this can be changed to something nicer.
3941          */
3942         BUG_ON(ret);
3943         return ret;
3944 }
3945
3946 /*
3947  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
3948  * then just return -EIO.
3949  */
3950 static int try_to_fix_bad_block(struct btrfs_root *root,
3951                                 struct extent_buffer *buf,
3952                                 enum btrfs_tree_block_status status)
3953 {
3954         struct btrfs_trans_handle *trans;
3955         struct ulist *roots;
3956         struct ulist_node *node;
3957         struct btrfs_root *search_root;
3958         struct btrfs_path path;
3959         struct ulist_iterator iter;
3960         struct btrfs_key root_key, key;
3961         int ret;
3962
3963         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
3964             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
3965                 return -EIO;
3966
3967         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
3968         if (ret)
3969                 return -EIO;
3970
3971         btrfs_init_path(&path);
3972         ULIST_ITER_INIT(&iter);
3973         while ((node = ulist_next(roots, &iter))) {
3974                 root_key.objectid = node->val;
3975                 root_key.type = BTRFS_ROOT_ITEM_KEY;
3976                 root_key.offset = (u64)-1;
3977
3978                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
3979                 if (IS_ERR(root)) {
3980                         ret = -EIO;
3981                         break;
3982                 }
3983
3984
3985                 trans = btrfs_start_transaction(search_root, 0);
3986                 if (IS_ERR(trans)) {
3987                         ret = PTR_ERR(trans);
3988                         break;
3989                 }
3990
3991                 path.lowest_level = btrfs_header_level(buf);
3992                 path.skip_check_block = 1;
3993                 if (path.lowest_level)
3994                         btrfs_node_key_to_cpu(buf, &key, 0);
3995                 else
3996                         btrfs_item_key_to_cpu(buf, &key, 0);
3997                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
3998                 if (ret) {
3999                         ret = -EIO;
4000                         btrfs_commit_transaction(trans, search_root);
4001                         break;
4002                 }
4003                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4004                         ret = fix_key_order(search_root, &path);
4005                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4006                         ret = fix_item_offset(search_root, &path);
4007                 if (ret) {
4008                         btrfs_commit_transaction(trans, search_root);
4009                         break;
4010                 }
4011                 btrfs_release_path(&path);
4012                 btrfs_commit_transaction(trans, search_root);
4013         }
4014         ulist_free(roots);
4015         btrfs_release_path(&path);
4016         return ret;
4017 }
4018
4019 static int check_block(struct btrfs_root *root,
4020                        struct cache_tree *extent_cache,
4021                        struct extent_buffer *buf, u64 flags)
4022 {
4023         struct extent_record *rec;
4024         struct cache_extent *cache;
4025         struct btrfs_key key;
4026         enum btrfs_tree_block_status status;
4027         int ret = 0;
4028         int level;
4029
4030         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4031         if (!cache)
4032                 return 1;
4033         rec = container_of(cache, struct extent_record, cache);
4034         rec->generation = btrfs_header_generation(buf);
4035
4036         level = btrfs_header_level(buf);
4037         if (btrfs_header_nritems(buf) > 0) {
4038
4039                 if (level == 0)
4040                         btrfs_item_key_to_cpu(buf, &key, 0);
4041                 else
4042                         btrfs_node_key_to_cpu(buf, &key, 0);
4043
4044                 rec->info_objectid = key.objectid;
4045         }
4046         rec->info_level = level;
4047
4048         if (btrfs_is_leaf(buf))
4049                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4050         else
4051                 status = btrfs_check_node(root, &rec->parent_key, buf);
4052
4053         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4054                 if (repair)
4055                         status = try_to_fix_bad_block(root, buf, status);
4056                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4057                         ret = -EIO;
4058                         fprintf(stderr, "bad block %llu\n",
4059                                 (unsigned long long)buf->start);
4060                 } else {
4061                         /*
4062                          * Signal to callers we need to start the scan over
4063                          * again since we'll have cowed blocks.
4064                          */
4065                         ret = -EAGAIN;
4066                 }
4067         } else {
4068                 rec->content_checked = 1;
4069                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4070                         rec->owner_ref_checked = 1;
4071                 else {
4072                         ret = check_owner_ref(root, rec, buf);
4073                         if (!ret)
4074                                 rec->owner_ref_checked = 1;
4075                 }
4076         }
4077         if (!ret)
4078                 maybe_free_extent_rec(extent_cache, rec);
4079         return ret;
4080 }
4081
4082 #if 0
4083 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4084                                                 u64 parent, u64 root)
4085 {
4086         struct list_head *cur = rec->backrefs.next;
4087         struct extent_backref *node;
4088         struct tree_backref *back;
4089
4090         while (cur != &rec->backrefs) {
4091                 node = to_extent_backref(cur);
4092                 cur = cur->next;
4093                 if (node->is_data)
4094                         continue;
4095                 back = to_tree_backref(node);
4096                 if (parent > 0) {
4097                         if (!node->full_backref)
4098                                 continue;
4099                         if (parent == back->parent)
4100                                 return back;
4101                 } else {
4102                         if (node->full_backref)
4103                                 continue;
4104                         if (back->root == root)
4105                                 return back;
4106                 }
4107         }
4108         return NULL;
4109 }
4110 #endif
4111
4112 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4113                                                 u64 parent, u64 root)
4114 {
4115         struct tree_backref *ref = malloc(sizeof(*ref));
4116
4117         if (!ref)
4118                 return NULL;
4119         memset(&ref->node, 0, sizeof(ref->node));
4120         if (parent > 0) {
4121                 ref->parent = parent;
4122                 ref->node.full_backref = 1;
4123         } else {
4124                 ref->root = root;
4125                 ref->node.full_backref = 0;
4126         }
4127
4128         return ref;
4129 }
4130
4131 #if 0
4132 static struct data_backref *find_data_backref(struct extent_record *rec,
4133                                                 u64 parent, u64 root,
4134                                                 u64 owner, u64 offset,
4135                                                 int found_ref,
4136                                                 u64 disk_bytenr, u64 bytes)
4137 {
4138         struct list_head *cur = rec->backrefs.next;
4139         struct extent_backref *node;
4140         struct data_backref *back;
4141
4142         while (cur != &rec->backrefs) {
4143                 node = to_extent_backref(cur);
4144                 cur = cur->next;
4145                 if (!node->is_data)
4146                         continue;
4147                 back = to_data_backref(node);
4148                 if (parent > 0) {
4149                         if (!node->full_backref)
4150                                 continue;
4151                         if (parent == back->parent)
4152                                 return back;
4153                 } else {
4154                         if (node->full_backref)
4155                                 continue;
4156                         if (back->root == root && back->owner == owner &&
4157                             back->offset == offset) {
4158                                 if (found_ref && node->found_ref &&
4159                                     (back->bytes != bytes ||
4160                                     back->disk_bytenr != disk_bytenr))
4161                                         continue;
4162                                 return back;
4163                         }
4164                 }
4165         }
4166         return NULL;
4167 }
4168 #endif
4169
4170 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4171                                                 u64 parent, u64 root,
4172                                                 u64 owner, u64 offset,
4173                                                 u64 max_size)
4174 {
4175         struct data_backref *ref = malloc(sizeof(*ref));
4176
4177         if (!ref)
4178                 return NULL;
4179         memset(&ref->node, 0, sizeof(ref->node));
4180         ref->node.is_data = 1;
4181
4182         if (parent > 0) {
4183                 ref->parent = parent;
4184                 ref->owner = 0;
4185                 ref->offset = 0;
4186                 ref->node.full_backref = 1;
4187         } else {
4188                 ref->root = root;
4189                 ref->owner = owner;
4190                 ref->offset = offset;
4191                 ref->node.full_backref = 0;
4192         }
4193         ref->bytes = max_size;
4194         ref->found_ref = 0;
4195         ref->num_refs = 0;
4196         if (max_size > rec->max_size)
4197                 rec->max_size = max_size;
4198         return ref;
4199 }
4200
4201 /* Check if the type of extent matches with its chunk */
4202 static void check_extent_type(struct extent_record *rec)
4203 {
4204         struct btrfs_block_group_cache *bg_cache;
4205
4206         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4207         if (!bg_cache)
4208                 return;
4209
4210         /* data extent, check chunk directly*/
4211         if (!rec->metadata) {
4212                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4213                         rec->wrong_chunk_type = 1;
4214                 return;
4215         }
4216
4217         /* metadata extent, check the obvious case first */
4218         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4219                                  BTRFS_BLOCK_GROUP_METADATA))) {
4220                 rec->wrong_chunk_type = 1;
4221                 return;
4222         }
4223
4224         /*
4225          * Check SYSTEM extent, as it's also marked as metadata, we can only
4226          * make sure it's a SYSTEM extent by its backref
4227          */
4228         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4229                 struct extent_backref *node;
4230                 struct tree_backref *tback;
4231                 u64 bg_type;
4232
4233                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4234                 if (node->is_data) {
4235                         /* tree block shouldn't have data backref */
4236                         rec->wrong_chunk_type = 1;
4237                         return;
4238                 }
4239                 tback = container_of(node, struct tree_backref, node);
4240
4241                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4242                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4243                 else
4244                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4245                 if (!(bg_cache->flags & bg_type))
4246                         rec->wrong_chunk_type = 1;
4247         }
4248 }
4249
4250 /*
4251  * Allocate a new extent record, fill default values from @tmpl and insert int
4252  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4253  * the cache, otherwise it fails.
4254  */
4255 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4256                 struct extent_record *tmpl)
4257 {
4258         struct extent_record *rec;
4259         int ret = 0;
4260
4261         BUG_ON(tmpl->max_size == 0);
4262         rec = malloc(sizeof(*rec));
4263         if (!rec)
4264                 return -ENOMEM;
4265         rec->start = tmpl->start;
4266         rec->max_size = tmpl->max_size;
4267         rec->nr = max(tmpl->nr, tmpl->max_size);
4268         rec->found_rec = tmpl->found_rec;
4269         rec->content_checked = tmpl->content_checked;
4270         rec->owner_ref_checked = tmpl->owner_ref_checked;
4271         rec->num_duplicates = 0;
4272         rec->metadata = tmpl->metadata;
4273         rec->flag_block_full_backref = FLAG_UNSET;
4274         rec->bad_full_backref = 0;
4275         rec->crossing_stripes = 0;
4276         rec->wrong_chunk_type = 0;
4277         rec->is_root = tmpl->is_root;
4278         rec->refs = tmpl->refs;
4279         rec->extent_item_refs = tmpl->extent_item_refs;
4280         rec->parent_generation = tmpl->parent_generation;
4281         INIT_LIST_HEAD(&rec->backrefs);
4282         INIT_LIST_HEAD(&rec->dups);
4283         INIT_LIST_HEAD(&rec->list);
4284         rec->backref_tree = RB_ROOT;
4285         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4286         rec->cache.start = tmpl->start;
4287         rec->cache.size = tmpl->nr;
4288         ret = insert_cache_extent(extent_cache, &rec->cache);
4289         if (ret) {
4290                 free(rec);
4291                 return ret;
4292         }
4293         bytes_used += rec->nr;
4294
4295         if (tmpl->metadata)
4296                 rec->crossing_stripes = check_crossing_stripes(global_info,
4297                                 rec->start, global_info->nodesize);
4298         check_extent_type(rec);
4299         return ret;
4300 }
4301
4302 /*
4303  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4304  * some are hints:
4305  * - refs              - if found, increase refs
4306  * - is_root           - if found, set
4307  * - content_checked   - if found, set
4308  * - owner_ref_checked - if found, set
4309  *
4310  * If not found, create a new one, initialize and insert.
4311  */
4312 static int add_extent_rec(struct cache_tree *extent_cache,
4313                 struct extent_record *tmpl)
4314 {
4315         struct extent_record *rec;
4316         struct cache_extent *cache;
4317         int ret = 0;
4318         int dup = 0;
4319
4320         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4321         if (cache) {
4322                 rec = container_of(cache, struct extent_record, cache);
4323                 if (tmpl->refs)
4324                         rec->refs++;
4325                 if (rec->nr == 1)
4326                         rec->nr = max(tmpl->nr, tmpl->max_size);
4327
4328                 /*
4329                  * We need to make sure to reset nr to whatever the extent
4330                  * record says was the real size, this way we can compare it to
4331                  * the backrefs.
4332                  */
4333                 if (tmpl->found_rec) {
4334                         if (tmpl->start != rec->start || rec->found_rec) {
4335                                 struct extent_record *tmp;
4336
4337                                 dup = 1;
4338                                 if (list_empty(&rec->list))
4339                                         list_add_tail(&rec->list,
4340                                                       &duplicate_extents);
4341
4342                                 /*
4343                                  * We have to do this song and dance in case we
4344                                  * find an extent record that falls inside of
4345                                  * our current extent record but does not have
4346                                  * the same objectid.
4347                                  */
4348                                 tmp = malloc(sizeof(*tmp));
4349                                 if (!tmp)
4350                                         return -ENOMEM;
4351                                 tmp->start = tmpl->start;
4352                                 tmp->max_size = tmpl->max_size;
4353                                 tmp->nr = tmpl->nr;
4354                                 tmp->found_rec = 1;
4355                                 tmp->metadata = tmpl->metadata;
4356                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4357                                 INIT_LIST_HEAD(&tmp->list);
4358                                 list_add_tail(&tmp->list, &rec->dups);
4359                                 rec->num_duplicates++;
4360                         } else {
4361                                 rec->nr = tmpl->nr;
4362                                 rec->found_rec = 1;
4363                         }
4364                 }
4365
4366                 if (tmpl->extent_item_refs && !dup) {
4367                         if (rec->extent_item_refs) {
4368                                 fprintf(stderr,
4369                         "block %llu rec extent_item_refs %llu, passed %llu\n",
4370                                         (unsigned long long)tmpl->start,
4371                                         (unsigned long long)
4372                                                         rec->extent_item_refs,
4373                                         (unsigned long long)
4374                                                         tmpl->extent_item_refs);
4375                         }
4376                         rec->extent_item_refs = tmpl->extent_item_refs;
4377                 }
4378                 if (tmpl->is_root)
4379                         rec->is_root = 1;
4380                 if (tmpl->content_checked)
4381                         rec->content_checked = 1;
4382                 if (tmpl->owner_ref_checked)
4383                         rec->owner_ref_checked = 1;
4384                 memcpy(&rec->parent_key, &tmpl->parent_key,
4385                                 sizeof(tmpl->parent_key));
4386                 if (tmpl->parent_generation)
4387                         rec->parent_generation = tmpl->parent_generation;
4388                 if (rec->max_size < tmpl->max_size)
4389                         rec->max_size = tmpl->max_size;
4390
4391                 /*
4392                  * A metadata extent can't cross stripe_len boundary, otherwise
4393                  * kernel scrub won't be able to handle it.
4394                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4395                  * it.
4396                  */
4397                 if (tmpl->metadata)
4398                         rec->crossing_stripes = check_crossing_stripes(
4399                                         global_info, rec->start,
4400                                         global_info->nodesize);
4401                 check_extent_type(rec);
4402                 maybe_free_extent_rec(extent_cache, rec);
4403                 return ret;
4404         }
4405
4406         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4407
4408         return ret;
4409 }
4410
4411 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4412                             u64 parent, u64 root, int found_ref)
4413 {
4414         struct extent_record *rec;
4415         struct tree_backref *back;
4416         struct cache_extent *cache;
4417         int ret;
4418         bool insert = false;
4419
4420         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4421         if (!cache) {
4422                 struct extent_record tmpl;
4423
4424                 memset(&tmpl, 0, sizeof(tmpl));
4425                 tmpl.start = bytenr;
4426                 tmpl.nr = 1;
4427                 tmpl.metadata = 1;
4428                 tmpl.max_size = 1;
4429
4430                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4431                 if (ret)
4432                         return ret;
4433
4434                 /* really a bug in cache_extent implement now */
4435                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4436                 if (!cache)
4437                         return -ENOENT;
4438         }
4439
4440         rec = container_of(cache, struct extent_record, cache);
4441         if (rec->start != bytenr) {
4442                 /*
4443                  * Several cause, from unaligned bytenr to over lapping extents
4444                  */
4445                 return -EEXIST;
4446         }
4447
4448         back = find_tree_backref(rec, parent, root);
4449         if (!back) {
4450                 back = alloc_tree_backref(rec, parent, root);
4451                 if (!back)
4452                         return -ENOMEM;
4453                 insert = true;
4454         }
4455
4456         if (found_ref) {
4457                 if (back->node.found_ref) {
4458                         fprintf(stderr,
4459         "Extent back ref already exists for %llu parent %llu root %llu\n",
4460                                 (unsigned long long)bytenr,
4461                                 (unsigned long long)parent,
4462                                 (unsigned long long)root);
4463                 }
4464                 back->node.found_ref = 1;
4465         } else {
4466                 if (back->node.found_extent_tree) {
4467                         fprintf(stderr,
4468         "extent back ref already exists for %llu parent %llu root %llu\n",
4469                                 (unsigned long long)bytenr,
4470                                 (unsigned long long)parent,
4471                                 (unsigned long long)root);
4472                 }
4473                 back->node.found_extent_tree = 1;
4474         }
4475         if (insert)
4476                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4477                         compare_extent_backref));
4478         check_extent_type(rec);
4479         maybe_free_extent_rec(extent_cache, rec);
4480         return 0;
4481 }
4482
4483 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4484                             u64 parent, u64 root, u64 owner, u64 offset,
4485                             u32 num_refs, int found_ref, u64 max_size)
4486 {
4487         struct extent_record *rec;
4488         struct data_backref *back;
4489         struct cache_extent *cache;
4490         int ret;
4491         bool insert = false;
4492
4493         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4494         if (!cache) {
4495                 struct extent_record tmpl;
4496
4497                 memset(&tmpl, 0, sizeof(tmpl));
4498                 tmpl.start = bytenr;
4499                 tmpl.nr = 1;
4500                 tmpl.max_size = max_size;
4501
4502                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4503                 if (ret)
4504                         return ret;
4505
4506                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4507                 if (!cache)
4508                         abort();
4509         }
4510
4511         rec = container_of(cache, struct extent_record, cache);
4512         if (rec->max_size < max_size)
4513                 rec->max_size = max_size;
4514
4515         /*
4516          * If found_ref is set then max_size is the real size and must match the
4517          * existing refs.  So if we have already found a ref then we need to
4518          * make sure that this ref matches the existing one, otherwise we need
4519          * to add a new backref so we can notice that the backrefs don't match
4520          * and we need to figure out who is telling the truth.  This is to
4521          * account for that awful fsync bug I introduced where we'd end up with
4522          * a btrfs_file_extent_item that would have its length include multiple
4523          * prealloc extents or point inside of a prealloc extent.
4524          */
4525         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4526                                  bytenr, max_size);
4527         if (!back) {
4528                 back = alloc_data_backref(rec, parent, root, owner, offset,
4529                                           max_size);
4530                 BUG_ON(!back);
4531                 insert = true;
4532         }
4533
4534         if (found_ref) {
4535                 BUG_ON(num_refs != 1);
4536                 if (back->node.found_ref)
4537                         BUG_ON(back->bytes != max_size);
4538                 back->node.found_ref = 1;
4539                 back->found_ref += 1;
4540                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
4541                         back->bytes = max_size;
4542                         back->disk_bytenr = bytenr;
4543
4544                         /* Need to reinsert if not already in the tree */
4545                         if (!insert) {
4546                                 rb_erase(&back->node.node, &rec->backref_tree);
4547                                 insert = true;
4548                         }
4549                 }
4550                 rec->refs += 1;
4551                 rec->content_checked = 1;
4552                 rec->owner_ref_checked = 1;
4553         } else {
4554                 if (back->node.found_extent_tree) {
4555                         fprintf(stderr,
4556 "Extent back ref already exists for %llu parent %llu root %llu owner %llu offset %llu num_refs %lu\n",
4557                                 (unsigned long long)bytenr,
4558                                 (unsigned long long)parent,
4559                                 (unsigned long long)root,
4560                                 (unsigned long long)owner,
4561                                 (unsigned long long)offset,
4562                                 (unsigned long)num_refs);
4563                 }
4564                 back->num_refs = num_refs;
4565                 back->node.found_extent_tree = 1;
4566         }
4567         if (insert)
4568                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4569                         compare_extent_backref));
4570
4571         maybe_free_extent_rec(extent_cache, rec);
4572         return 0;
4573 }
4574
4575 static int add_pending(struct cache_tree *pending,
4576                        struct cache_tree *seen, u64 bytenr, u32 size)
4577 {
4578         int ret;
4579
4580         ret = add_cache_extent(seen, bytenr, size);
4581         if (ret)
4582                 return ret;
4583         add_cache_extent(pending, bytenr, size);
4584         return 0;
4585 }
4586
4587 static int pick_next_pending(struct cache_tree *pending,
4588                         struct cache_tree *reada,
4589                         struct cache_tree *nodes,
4590                         u64 last, struct block_info *bits, int bits_nr,
4591                         int *reada_bits)
4592 {
4593         unsigned long node_start = last;
4594         struct cache_extent *cache;
4595         int ret;
4596
4597         cache = search_cache_extent(reada, 0);
4598         if (cache) {
4599                 bits[0].start = cache->start;
4600                 bits[0].size = cache->size;
4601                 *reada_bits = 1;
4602                 return 1;
4603         }
4604         *reada_bits = 0;
4605         if (node_start > 32768)
4606                 node_start -= 32768;
4607
4608         cache = search_cache_extent(nodes, node_start);
4609         if (!cache)
4610                 cache = search_cache_extent(nodes, 0);
4611
4612         if (!cache) {
4613                 cache = search_cache_extent(pending, 0);
4614                 if (!cache)
4615                         return 0;
4616                 ret = 0;
4617                 do {
4618                         bits[ret].start = cache->start;
4619                         bits[ret].size = cache->size;
4620                         cache = next_cache_extent(cache);
4621                         ret++;
4622                 } while (cache && ret < bits_nr);
4623                 return ret;
4624         }
4625
4626         ret = 0;
4627         do {
4628                 bits[ret].start = cache->start;
4629                 bits[ret].size = cache->size;
4630                 cache = next_cache_extent(cache);
4631                 ret++;
4632         } while (cache && ret < bits_nr);
4633
4634         if (bits_nr - ret > 8) {
4635                 u64 lookup = bits[0].start + bits[0].size;
4636                 struct cache_extent *next;
4637
4638                 next = search_cache_extent(pending, lookup);
4639                 while (next) {
4640                         if (next->start - lookup > 32768)
4641                                 break;
4642                         bits[ret].start = next->start;
4643                         bits[ret].size = next->size;
4644                         lookup = next->start + next->size;
4645                         ret++;
4646                         if (ret == bits_nr)
4647                                 break;
4648                         next = next_cache_extent(next);
4649                         if (!next)
4650                                 break;
4651                 }
4652         }
4653         return ret;
4654 }
4655
4656 static void free_chunk_record(struct cache_extent *cache)
4657 {
4658         struct chunk_record *rec;
4659
4660         rec = container_of(cache, struct chunk_record, cache);
4661         list_del_init(&rec->list);
4662         list_del_init(&rec->dextents);
4663         free(rec);
4664 }
4665
4666 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4667 {
4668         cache_tree_free_extents(chunk_cache, free_chunk_record);
4669 }
4670
4671 static void free_device_record(struct rb_node *node)
4672 {
4673         struct device_record *rec;
4674
4675         rec = container_of(node, struct device_record, node);
4676         free(rec);
4677 }
4678
4679 FREE_RB_BASED_TREE(device_cache, free_device_record);
4680
4681 int insert_block_group_record(struct block_group_tree *tree,
4682                               struct block_group_record *bg_rec)
4683 {
4684         int ret;
4685
4686         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4687         if (ret)
4688                 return ret;
4689
4690         list_add_tail(&bg_rec->list, &tree->block_groups);
4691         return 0;
4692 }
4693
4694 static void free_block_group_record(struct cache_extent *cache)
4695 {
4696         struct block_group_record *rec;
4697
4698         rec = container_of(cache, struct block_group_record, cache);
4699         list_del_init(&rec->list);
4700         free(rec);
4701 }
4702
4703 void free_block_group_tree(struct block_group_tree *tree)
4704 {
4705         cache_tree_free_extents(&tree->tree, free_block_group_record);
4706 }
4707
4708 int insert_device_extent_record(struct device_extent_tree *tree,
4709                                 struct device_extent_record *de_rec)
4710 {
4711         int ret;
4712
4713         /*
4714          * Device extent is a bit different from the other extents, because
4715          * the extents which belong to the different devices may have the
4716          * same start and size, so we need use the special extent cache
4717          * search/insert functions.
4718          */
4719         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4720         if (ret)
4721                 return ret;
4722
4723         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4724         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4725         return 0;
4726 }
4727
4728 static void free_device_extent_record(struct cache_extent *cache)
4729 {
4730         struct device_extent_record *rec;
4731
4732         rec = container_of(cache, struct device_extent_record, cache);
4733         if (!list_empty(&rec->chunk_list))
4734                 list_del_init(&rec->chunk_list);
4735         if (!list_empty(&rec->device_list))
4736                 list_del_init(&rec->device_list);
4737         free(rec);
4738 }
4739
4740 void free_device_extent_tree(struct device_extent_tree *tree)
4741 {
4742         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4743 }
4744
4745 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4746 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4747                                  struct extent_buffer *leaf, int slot)
4748 {
4749         struct btrfs_extent_ref_v0 *ref0;
4750         struct btrfs_key key;
4751         int ret;
4752
4753         btrfs_item_key_to_cpu(leaf, &key, slot);
4754         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4755         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4756                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
4757                                 0, 0);
4758         } else {
4759                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
4760                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4761         }
4762         return ret;
4763 }
4764 #endif
4765
4766 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4767                                             struct btrfs_key *key,
4768                                             int slot)
4769 {
4770         struct btrfs_chunk *ptr;
4771         struct chunk_record *rec;
4772         int num_stripes, i;
4773
4774         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4775         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4776
4777         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4778         if (!rec) {
4779                 fprintf(stderr, "memory allocation failed\n");
4780                 exit(-1);
4781         }
4782
4783         INIT_LIST_HEAD(&rec->list);
4784         INIT_LIST_HEAD(&rec->dextents);
4785         rec->bg_rec = NULL;
4786
4787         rec->cache.start = key->offset;
4788         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4789
4790         rec->generation = btrfs_header_generation(leaf);
4791
4792         rec->objectid = key->objectid;
4793         rec->type = key->type;
4794         rec->offset = key->offset;
4795
4796         rec->length = rec->cache.size;
4797         rec->owner = btrfs_chunk_owner(leaf, ptr);
4798         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4799         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4800         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4801         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4802         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4803         rec->num_stripes = num_stripes;
4804         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4805
4806         for (i = 0; i < rec->num_stripes; ++i) {
4807                 rec->stripes[i].devid =
4808                         btrfs_stripe_devid_nr(leaf, ptr, i);
4809                 rec->stripes[i].offset =
4810                         btrfs_stripe_offset_nr(leaf, ptr, i);
4811                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4812                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4813                                 BTRFS_UUID_SIZE);
4814         }
4815
4816         return rec;
4817 }
4818
4819 static int process_chunk_item(struct cache_tree *chunk_cache,
4820                               struct btrfs_key *key, struct extent_buffer *eb,
4821                               int slot)
4822 {
4823         struct chunk_record *rec;
4824         struct btrfs_chunk *chunk;
4825         int ret = 0;
4826
4827         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
4828         /*
4829          * Do extra check for this chunk item,
4830          *
4831          * It's still possible one can craft a leaf with CHUNK_ITEM, with
4832          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
4833          * and owner<->key_type check.
4834          */
4835         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
4836                                       key->offset);
4837         if (ret < 0) {
4838                 error("chunk(%llu, %llu) is not valid, ignore it",
4839                       key->offset, btrfs_chunk_length(eb, chunk));
4840                 return 0;
4841         }
4842         rec = btrfs_new_chunk_record(eb, key, slot);
4843         ret = insert_cache_extent(chunk_cache, &rec->cache);
4844         if (ret) {
4845                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4846                         rec->offset, rec->length);
4847                 free(rec);
4848         }
4849
4850         return ret;
4851 }
4852
4853 static int process_device_item(struct rb_root *dev_cache,
4854                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4855 {
4856         struct btrfs_dev_item *ptr;
4857         struct device_record *rec;
4858         int ret = 0;
4859
4860         ptr = btrfs_item_ptr(eb,
4861                 slot, struct btrfs_dev_item);
4862
4863         rec = malloc(sizeof(*rec));
4864         if (!rec) {
4865                 fprintf(stderr, "memory allocation failed\n");
4866                 return -ENOMEM;
4867         }
4868
4869         rec->devid = key->offset;
4870         rec->generation = btrfs_header_generation(eb);
4871
4872         rec->objectid = key->objectid;
4873         rec->type = key->type;
4874         rec->offset = key->offset;
4875
4876         rec->devid = btrfs_device_id(eb, ptr);
4877         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4878         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4879
4880         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4881         if (ret) {
4882                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4883                 free(rec);
4884         }
4885
4886         return ret;
4887 }
4888
4889 struct block_group_record *
4890 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4891                              int slot)
4892 {
4893         struct btrfs_block_group_item *ptr;
4894         struct block_group_record *rec;
4895
4896         rec = calloc(1, sizeof(*rec));
4897         if (!rec) {
4898                 fprintf(stderr, "memory allocation failed\n");
4899                 exit(-1);
4900         }
4901
4902         rec->cache.start = key->objectid;
4903         rec->cache.size = key->offset;
4904
4905         rec->generation = btrfs_header_generation(leaf);
4906
4907         rec->objectid = key->objectid;
4908         rec->type = key->type;
4909         rec->offset = key->offset;
4910
4911         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
4912         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
4913
4914         INIT_LIST_HEAD(&rec->list);
4915
4916         return rec;
4917 }
4918
4919 static int process_block_group_item(struct block_group_tree *block_group_cache,
4920                                     struct btrfs_key *key,
4921                                     struct extent_buffer *eb, int slot)
4922 {
4923         struct block_group_record *rec;
4924         int ret = 0;
4925
4926         rec = btrfs_new_block_group_record(eb, key, slot);
4927         ret = insert_block_group_record(block_group_cache, rec);
4928         if (ret) {
4929                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
4930                         rec->objectid, rec->offset);
4931                 free(rec);
4932         }
4933
4934         return ret;
4935 }
4936
4937 struct device_extent_record *
4938 btrfs_new_device_extent_record(struct extent_buffer *leaf,
4939                                struct btrfs_key *key, int slot)
4940 {
4941         struct device_extent_record *rec;
4942         struct btrfs_dev_extent *ptr;
4943
4944         rec = calloc(1, sizeof(*rec));
4945         if (!rec) {
4946                 fprintf(stderr, "memory allocation failed\n");
4947                 exit(-1);
4948         }
4949
4950         rec->cache.objectid = key->objectid;
4951         rec->cache.start = key->offset;
4952
4953         rec->generation = btrfs_header_generation(leaf);
4954
4955         rec->objectid = key->objectid;
4956         rec->type = key->type;
4957         rec->offset = key->offset;
4958
4959         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
4960         rec->chunk_objecteid =
4961                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
4962         rec->chunk_offset =
4963                 btrfs_dev_extent_chunk_offset(leaf, ptr);
4964         rec->length = btrfs_dev_extent_length(leaf, ptr);
4965         rec->cache.size = rec->length;
4966
4967         INIT_LIST_HEAD(&rec->chunk_list);
4968         INIT_LIST_HEAD(&rec->device_list);
4969
4970         return rec;
4971 }
4972
4973 static int
4974 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
4975                            struct btrfs_key *key, struct extent_buffer *eb,
4976                            int slot)
4977 {
4978         struct device_extent_record *rec;
4979         int ret;
4980
4981         rec = btrfs_new_device_extent_record(eb, key, slot);
4982         ret = insert_device_extent_record(dev_extent_cache, rec);
4983         if (ret) {
4984                 fprintf(stderr,
4985                         "Device extent[%llu, %llu, %llu] existed.\n",
4986                         rec->objectid, rec->offset, rec->length);
4987                 free(rec);
4988         }
4989
4990         return ret;
4991 }
4992
4993 static int process_extent_item(struct btrfs_root *root,
4994                                struct cache_tree *extent_cache,
4995                                struct extent_buffer *eb, int slot)
4996 {
4997         struct btrfs_extent_item *ei;
4998         struct btrfs_extent_inline_ref *iref;
4999         struct btrfs_extent_data_ref *dref;
5000         struct btrfs_shared_data_ref *sref;
5001         struct btrfs_key key;
5002         struct extent_record tmpl;
5003         unsigned long end;
5004         unsigned long ptr;
5005         int ret;
5006         int type;
5007         u32 item_size = btrfs_item_size_nr(eb, slot);
5008         u64 refs = 0;
5009         u64 offset;
5010         u64 num_bytes;
5011         int metadata = 0;
5012
5013         btrfs_item_key_to_cpu(eb, &key, slot);
5014
5015         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5016                 metadata = 1;
5017                 num_bytes = root->fs_info->nodesize;
5018         } else {
5019                 num_bytes = key.offset;
5020         }
5021
5022         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
5023                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5024                       key.objectid, root->fs_info->sectorsize);
5025                 return -EIO;
5026         }
5027         if (item_size < sizeof(*ei)) {
5028 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5029                 struct btrfs_extent_item_v0 *ei0;
5030
5031                 if (item_size != sizeof(*ei0)) {
5032                         error(
5033         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
5034                                 key.objectid, key.type, key.offset,
5035                                 btrfs_header_bytenr(eb), slot);
5036                         BUG();
5037                 }
5038                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5039                 refs = btrfs_extent_refs_v0(eb, ei0);
5040 #else
5041                 BUG();
5042 #endif
5043                 memset(&tmpl, 0, sizeof(tmpl));
5044                 tmpl.start = key.objectid;
5045                 tmpl.nr = num_bytes;
5046                 tmpl.extent_item_refs = refs;
5047                 tmpl.metadata = metadata;
5048                 tmpl.found_rec = 1;
5049                 tmpl.max_size = num_bytes;
5050
5051                 return add_extent_rec(extent_cache, &tmpl);
5052         }
5053
5054         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5055         refs = btrfs_extent_refs(eb, ei);
5056         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5057                 metadata = 1;
5058         else
5059                 metadata = 0;
5060         if (metadata && num_bytes != root->fs_info->nodesize) {
5061                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5062                       num_bytes, root->fs_info->nodesize);
5063                 return -EIO;
5064         }
5065         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
5066                 error("ignore invalid data extent, length %llu is not aligned to %u",
5067                       num_bytes, root->fs_info->sectorsize);
5068                 return -EIO;
5069         }
5070
5071         memset(&tmpl, 0, sizeof(tmpl));
5072         tmpl.start = key.objectid;
5073         tmpl.nr = num_bytes;
5074         tmpl.extent_item_refs = refs;
5075         tmpl.metadata = metadata;
5076         tmpl.found_rec = 1;
5077         tmpl.max_size = num_bytes;
5078         add_extent_rec(extent_cache, &tmpl);
5079
5080         ptr = (unsigned long)(ei + 1);
5081         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5082             key.type == BTRFS_EXTENT_ITEM_KEY)
5083                 ptr += sizeof(struct btrfs_tree_block_info);
5084
5085         end = (unsigned long)ei + item_size;
5086         while (ptr < end) {
5087                 iref = (struct btrfs_extent_inline_ref *)ptr;
5088                 type = btrfs_extent_inline_ref_type(eb, iref);
5089                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5090                 switch (type) {
5091                 case BTRFS_TREE_BLOCK_REF_KEY:
5092                         ret = add_tree_backref(extent_cache, key.objectid,
5093                                         0, offset, 0);
5094                         if (ret < 0)
5095                                 error(
5096                         "add_tree_backref failed (extent items tree block): %s",
5097                                       strerror(-ret));
5098                         break;
5099                 case BTRFS_SHARED_BLOCK_REF_KEY:
5100                         ret = add_tree_backref(extent_cache, key.objectid,
5101                                         offset, 0, 0);
5102                         if (ret < 0)
5103                                 error(
5104                         "add_tree_backref failed (extent items shared block): %s",
5105                                       strerror(-ret));
5106                         break;
5107                 case BTRFS_EXTENT_DATA_REF_KEY:
5108                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5109                         add_data_backref(extent_cache, key.objectid, 0,
5110                                         btrfs_extent_data_ref_root(eb, dref),
5111                                         btrfs_extent_data_ref_objectid(eb,
5112                                                                        dref),
5113                                         btrfs_extent_data_ref_offset(eb, dref),
5114                                         btrfs_extent_data_ref_count(eb, dref),
5115                                         0, num_bytes);
5116                         break;
5117                 case BTRFS_SHARED_DATA_REF_KEY:
5118                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5119                         add_data_backref(extent_cache, key.objectid, offset,
5120                                         0, 0, 0,
5121                                         btrfs_shared_data_ref_count(eb, sref),
5122                                         0, num_bytes);
5123                         break;
5124                 default:
5125                         fprintf(stderr,
5126                                 "corrupt extent record: key [%llu,%u,%llu]\n",
5127                                 key.objectid, key.type, num_bytes);
5128                         goto out;
5129                 }
5130                 ptr += btrfs_extent_inline_ref_size(type);
5131         }
5132         WARN_ON(ptr > end);
5133 out:
5134         return 0;
5135 }
5136
5137 static int check_cache_range(struct btrfs_root *root,
5138                              struct btrfs_block_group_cache *cache,
5139                              u64 offset, u64 bytes)
5140 {
5141         struct btrfs_free_space *entry;
5142         u64 *logical;
5143         u64 bytenr;
5144         int stripe_len;
5145         int i, nr, ret;
5146
5147         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5148                 bytenr = btrfs_sb_offset(i);
5149                 ret = btrfs_rmap_block(root->fs_info,
5150                                        cache->key.objectid, bytenr, 0,
5151                                        &logical, &nr, &stripe_len);
5152                 if (ret)
5153                         return ret;
5154
5155                 while (nr--) {
5156                         if (logical[nr] + stripe_len <= offset)
5157                                 continue;
5158                         if (offset + bytes <= logical[nr])
5159                                 continue;
5160                         if (logical[nr] == offset) {
5161                                 if (stripe_len >= bytes) {
5162                                         free(logical);
5163                                         return 0;
5164                                 }
5165                                 bytes -= stripe_len;
5166                                 offset += stripe_len;
5167                         } else if (logical[nr] < offset) {
5168                                 if (logical[nr] + stripe_len >=
5169                                     offset + bytes) {
5170                                         free(logical);
5171                                         return 0;
5172                                 }
5173                                 bytes = (offset + bytes) -
5174                                         (logical[nr] + stripe_len);
5175                                 offset = logical[nr] + stripe_len;
5176                         } else {
5177                                 /*
5178                                  * Could be tricky, the super may land in the
5179                                  * middle of the area we're checking.  First
5180                                  * check the easiest case, it's at the end.
5181                                  */
5182                                 if (logical[nr] + stripe_len >=
5183                                     bytes + offset) {
5184                                         bytes = logical[nr] - offset;
5185                                         continue;
5186                                 }
5187
5188                                 /* Check the left side */
5189                                 ret = check_cache_range(root, cache,
5190                                                         offset,
5191                                                         logical[nr] - offset);
5192                                 if (ret) {
5193                                         free(logical);
5194                                         return ret;
5195                                 }
5196
5197                                 /* Now we continue with the right side */
5198                                 bytes = (offset + bytes) -
5199                                         (logical[nr] + stripe_len);
5200                                 offset = logical[nr] + stripe_len;
5201                         }
5202                 }
5203
5204                 free(logical);
5205         }
5206
5207         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5208         if (!entry) {
5209                 fprintf(stderr, "there is no free space entry for %llu-%llu\n",
5210                         offset, offset+bytes);
5211                 return -EINVAL;
5212         }
5213
5214         if (entry->offset != offset) {
5215                 fprintf(stderr, "wanted offset %llu, found %llu\n", offset,
5216                         entry->offset);
5217                 return -EINVAL;
5218         }
5219
5220         if (entry->bytes != bytes) {
5221                 fprintf(stderr, "wanted bytes %llu, found %llu for off %llu\n",
5222                         bytes, entry->bytes, offset);
5223                 return -EINVAL;
5224         }
5225
5226         unlink_free_space(cache->free_space_ctl, entry);
5227         free(entry);
5228         return 0;
5229 }
5230
5231 static int verify_space_cache(struct btrfs_root *root,
5232                               struct btrfs_block_group_cache *cache)
5233 {
5234         struct btrfs_path path;
5235         struct extent_buffer *leaf;
5236         struct btrfs_key key;
5237         u64 last;
5238         int ret = 0;
5239
5240         root = root->fs_info->extent_root;
5241
5242         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5243
5244         btrfs_init_path(&path);
5245         key.objectid = last;
5246         key.offset = 0;
5247         key.type = BTRFS_EXTENT_ITEM_KEY;
5248         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5249         if (ret < 0)
5250                 goto out;
5251         ret = 0;
5252         while (1) {
5253                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5254                         ret = btrfs_next_leaf(root, &path);
5255                         if (ret < 0)
5256                                 goto out;
5257                         if (ret > 0) {
5258                                 ret = 0;
5259                                 break;
5260                         }
5261                 }
5262                 leaf = path.nodes[0];
5263                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5264                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5265                         break;
5266                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5267                     key.type != BTRFS_METADATA_ITEM_KEY) {
5268                         path.slots[0]++;
5269                         continue;
5270                 }
5271
5272                 if (last == key.objectid) {
5273                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5274                                 last = key.objectid + key.offset;
5275                         else
5276                                 last = key.objectid + root->fs_info->nodesize;
5277                         path.slots[0]++;
5278                         continue;
5279                 }
5280
5281                 ret = check_cache_range(root, cache, last,
5282                                         key.objectid - last);
5283                 if (ret)
5284                         break;
5285                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5286                         last = key.objectid + key.offset;
5287                 else
5288                         last = key.objectid + root->fs_info->nodesize;
5289                 path.slots[0]++;
5290         }
5291
5292         if (last < cache->key.objectid + cache->key.offset)
5293                 ret = check_cache_range(root, cache, last,
5294                                         cache->key.objectid +
5295                                         cache->key.offset - last);
5296
5297 out:
5298         btrfs_release_path(&path);
5299
5300         if (!ret &&
5301             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5302                 fprintf(stderr, "There are still entries left in the space "
5303                         "cache\n");
5304                 ret = -EINVAL;
5305         }
5306
5307         return ret;
5308 }
5309
5310 static int check_space_cache(struct btrfs_root *root)
5311 {
5312         struct btrfs_block_group_cache *cache;
5313         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5314         int ret;
5315         int error = 0;
5316
5317         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5318             btrfs_super_generation(root->fs_info->super_copy) !=
5319             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5320                 printf("cache and super generation don't match, space cache "
5321                        "will be invalidated\n");
5322                 return 0;
5323         }
5324
5325         if (ctx.progress_enabled) {
5326                 ctx.tp = TASK_FREE_SPACE;
5327                 task_start(ctx.info);
5328         }
5329
5330         while (1) {
5331                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5332                 if (!cache)
5333                         break;
5334
5335                 start = cache->key.objectid + cache->key.offset;
5336                 if (!cache->free_space_ctl) {
5337                         if (btrfs_init_free_space_ctl(cache,
5338                                                 root->fs_info->sectorsize)) {
5339                                 ret = -ENOMEM;
5340                                 break;
5341                         }
5342                 } else {
5343                         btrfs_remove_free_space_cache(cache);
5344                 }
5345
5346                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
5347                         ret = exclude_super_stripes(root, cache);
5348                         if (ret) {
5349                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5350                                         strerror(-ret));
5351                                 error++;
5352                                 continue;
5353                         }
5354                         ret = load_free_space_tree(root->fs_info, cache);
5355                         free_excluded_extents(root, cache);
5356                         if (ret < 0) {
5357                                 fprintf(stderr, "could not load free space tree: %s\n",
5358                                         strerror(-ret));
5359                                 error++;
5360                                 continue;
5361                         }
5362                         error += ret;
5363                 } else {
5364                         ret = load_free_space_cache(root->fs_info, cache);
5365                         if (ret < 0)
5366                                 error++;
5367                         if (ret <= 0)
5368                                 continue;
5369                 }
5370
5371                 ret = verify_space_cache(root, cache);
5372                 if (ret) {
5373                         fprintf(stderr, "cache appears valid but isn't %llu\n",
5374                                 cache->key.objectid);
5375                         error++;
5376                 }
5377         }
5378
5379         task_stop(ctx.info);
5380
5381         return error ? -EINVAL : 0;
5382 }
5383
5384 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5385                         u64 num_bytes, unsigned long leaf_offset,
5386                         struct extent_buffer *eb)
5387 {
5388         struct btrfs_fs_info *fs_info = root->fs_info;
5389         u64 offset = 0;
5390         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
5391         char *data;
5392         unsigned long csum_offset;
5393         u32 csum;
5394         u32 csum_expected;
5395         u64 read_len;
5396         u64 data_checked = 0;
5397         u64 tmp;
5398         int ret = 0;
5399         int mirror;
5400         int num_copies;
5401
5402         if (num_bytes % fs_info->sectorsize)
5403                 return -EINVAL;
5404
5405         data = malloc(num_bytes);
5406         if (!data)
5407                 return -ENOMEM;
5408
5409         while (offset < num_bytes) {
5410                 mirror = 0;
5411 again:
5412                 read_len = num_bytes - offset;
5413                 /* read as much space once a time */
5414                 ret = read_extent_data(fs_info, data + offset,
5415                                 bytenr + offset, &read_len, mirror);
5416                 if (ret)
5417                         goto out;
5418                 data_checked = 0;
5419                 /* verify every 4k data's checksum */
5420                 while (data_checked < read_len) {
5421                         csum = ~(u32)0;
5422                         tmp = offset + data_checked;
5423
5424                         csum = btrfs_csum_data((char *)data + tmp,
5425                                                csum, fs_info->sectorsize);
5426                         btrfs_csum_final(csum, (u8 *)&csum);
5427
5428                         csum_offset = leaf_offset +
5429                                  tmp / fs_info->sectorsize * csum_size;
5430                         read_extent_buffer(eb, (char *)&csum_expected,
5431                                            csum_offset, csum_size);
5432                         /* try another mirror */
5433                         if (csum != csum_expected) {
5434                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5435                                                 mirror, bytenr + tmp,
5436                                                 csum, csum_expected);
5437                                 num_copies = btrfs_num_copies(root->fs_info,
5438                                                 bytenr, num_bytes);
5439                                 if (mirror < num_copies - 1) {
5440                                         mirror += 1;
5441                                         goto again;
5442                                 }
5443                         }
5444                         data_checked += fs_info->sectorsize;
5445                 }
5446                 offset += read_len;
5447         }
5448 out:
5449         free(data);
5450         return ret;
5451 }
5452
5453 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5454                                u64 num_bytes)
5455 {
5456         struct btrfs_path path;
5457         struct extent_buffer *leaf;
5458         struct btrfs_key key;
5459         int ret;
5460
5461         btrfs_init_path(&path);
5462         key.objectid = bytenr;
5463         key.type = BTRFS_EXTENT_ITEM_KEY;
5464         key.offset = (u64)-1;
5465
5466 again:
5467         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
5468                                 0, 0);
5469         if (ret < 0) {
5470                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5471                 btrfs_release_path(&path);
5472                 return ret;
5473         } else if (ret) {
5474                 if (path.slots[0] > 0) {
5475                         path.slots[0]--;
5476                 } else {
5477                         ret = btrfs_prev_leaf(root, &path);
5478                         if (ret < 0) {
5479                                 goto out;
5480                         } else if (ret > 0) {
5481                                 ret = 0;
5482                                 goto out;
5483                         }
5484                 }
5485         }
5486
5487         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5488
5489         /*
5490          * Block group items come before extent items if they have the same
5491          * bytenr, so walk back one more just in case.  Dear future traveller,
5492          * first congrats on mastering time travel.  Now if it's not too much
5493          * trouble could you go back to 2006 and tell Chris to make the
5494          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5495          * EXTENT_ITEM_KEY please?
5496          */
5497         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5498                 if (path.slots[0] > 0) {
5499                         path.slots[0]--;
5500                 } else {
5501                         ret = btrfs_prev_leaf(root, &path);
5502                         if (ret < 0) {
5503                                 goto out;
5504                         } else if (ret > 0) {
5505                                 ret = 0;
5506                                 goto out;
5507                         }
5508                 }
5509                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5510         }
5511
5512         while (num_bytes) {
5513                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5514                         ret = btrfs_next_leaf(root, &path);
5515                         if (ret < 0) {
5516                                 fprintf(stderr, "Error going to next leaf "
5517                                         "%d\n", ret);
5518                                 btrfs_release_path(&path);
5519                                 return ret;
5520                         } else if (ret) {
5521                                 break;
5522                         }
5523                 }
5524                 leaf = path.nodes[0];
5525                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5526                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5527                         path.slots[0]++;
5528                         continue;
5529                 }
5530                 if (key.objectid + key.offset < bytenr) {
5531                         path.slots[0]++;
5532                         continue;
5533                 }
5534                 if (key.objectid > bytenr + num_bytes)
5535                         break;
5536
5537                 if (key.objectid == bytenr) {
5538                         if (key.offset >= num_bytes) {
5539                                 num_bytes = 0;
5540                                 break;
5541                         }
5542                         num_bytes -= key.offset;
5543                         bytenr += key.offset;
5544                 } else if (key.objectid < bytenr) {
5545                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5546                                 num_bytes = 0;
5547                                 break;
5548                         }
5549                         num_bytes = (bytenr + num_bytes) -
5550                                 (key.objectid + key.offset);
5551                         bytenr = key.objectid + key.offset;
5552                 } else {
5553                         if (key.objectid + key.offset < bytenr + num_bytes) {
5554                                 u64 new_start = key.objectid + key.offset;
5555                                 u64 new_bytes = bytenr + num_bytes - new_start;
5556
5557                                 /*
5558                                  * Weird case, the extent is in the middle of
5559                                  * our range, we'll have to search one side
5560                                  * and then the other.  Not sure if this happens
5561                                  * in real life, but no harm in coding it up
5562                                  * anyway just in case.
5563                                  */
5564                                 btrfs_release_path(&path);
5565                                 ret = check_extent_exists(root, new_start,
5566                                                           new_bytes);
5567                                 if (ret) {
5568                                         fprintf(stderr, "Right section didn't "
5569                                                 "have a record\n");
5570                                         break;
5571                                 }
5572                                 num_bytes = key.objectid - bytenr;
5573                                 goto again;
5574                         }
5575                         num_bytes = key.objectid - bytenr;
5576                 }
5577                 path.slots[0]++;
5578         }
5579         ret = 0;
5580
5581 out:
5582         if (num_bytes && !ret) {
5583                 fprintf(stderr,
5584                         "there are no extents for csum range %llu-%llu\n",
5585                         bytenr, bytenr+num_bytes);
5586                 ret = 1;
5587         }
5588
5589         btrfs_release_path(&path);
5590         return ret;
5591 }
5592
5593 static int check_csums(struct btrfs_root *root)
5594 {
5595         struct btrfs_path path;
5596         struct extent_buffer *leaf;
5597         struct btrfs_key key;
5598         u64 offset = 0, num_bytes = 0;
5599         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5600         int errors = 0;
5601         int ret;
5602         u64 data_len;
5603         unsigned long leaf_offset;
5604
5605         root = root->fs_info->csum_root;
5606         if (!extent_buffer_uptodate(root->node)) {
5607                 fprintf(stderr, "No valid csum tree found\n");
5608                 return -ENOENT;
5609         }
5610
5611         btrfs_init_path(&path);
5612         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5613         key.type = BTRFS_EXTENT_CSUM_KEY;
5614         key.offset = 0;
5615         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5616         if (ret < 0) {
5617                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5618                 btrfs_release_path(&path);
5619                 return ret;
5620         }
5621
5622         if (ret > 0 && path.slots[0])
5623                 path.slots[0]--;
5624         ret = 0;
5625
5626         while (1) {
5627                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5628                         ret = btrfs_next_leaf(root, &path);
5629                         if (ret < 0) {
5630                                 fprintf(stderr, "Error going to next leaf "
5631                                         "%d\n", ret);
5632                                 break;
5633                         }
5634                         if (ret)
5635                                 break;
5636                 }
5637                 leaf = path.nodes[0];
5638
5639                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5640                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5641                         path.slots[0]++;
5642                         continue;
5643                 }
5644
5645                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
5646                               csum_size) * root->fs_info->sectorsize;
5647                 if (!check_data_csum)
5648                         goto skip_csum_check;
5649                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5650                 ret = check_extent_csums(root, key.offset, data_len,
5651                                          leaf_offset, leaf);
5652                 if (ret)
5653                         break;
5654 skip_csum_check:
5655                 if (!num_bytes) {
5656                         offset = key.offset;
5657                 } else if (key.offset != offset + num_bytes) {
5658                         ret = check_extent_exists(root, offset, num_bytes);
5659                         if (ret) {
5660                                 fprintf(stderr,
5661                 "csum exists for %llu-%llu but there is no extent record\n",
5662                                         offset, offset+num_bytes);
5663                                 errors++;
5664                         }
5665                         offset = key.offset;
5666                         num_bytes = 0;
5667                 }
5668                 num_bytes += data_len;
5669                 path.slots[0]++;
5670         }
5671
5672         btrfs_release_path(&path);
5673         return errors;
5674 }
5675
5676 static int is_dropped_key(struct btrfs_key *key,
5677                           struct btrfs_key *drop_key)
5678 {
5679         if (key->objectid < drop_key->objectid)
5680                 return 1;
5681         else if (key->objectid == drop_key->objectid) {
5682                 if (key->type < drop_key->type)
5683                         return 1;
5684                 else if (key->type == drop_key->type) {
5685                         if (key->offset < drop_key->offset)
5686                                 return 1;
5687                 }
5688         }
5689         return 0;
5690 }
5691
5692 /*
5693  * Here are the rules for FULL_BACKREF.
5694  *
5695  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5696  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5697  *      FULL_BACKREF set.
5698  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
5699  *    if it happened after the relocation occurred since we'll have dropped the
5700  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5701  *    have no real way to know for sure.
5702  *
5703  * We process the blocks one root at a time, and we start from the lowest root
5704  * objectid and go to the highest.  So we can just lookup the owner backref for
5705  * the record and if we don't find it then we know it doesn't exist and we have
5706  * a FULL BACKREF.
5707  *
5708  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5709  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5710  * be set or not and then we can check later once we've gathered all the refs.
5711  */
5712 static int calc_extent_flag(struct cache_tree *extent_cache,
5713                            struct extent_buffer *buf,
5714                            struct root_item_record *ri,
5715                            u64 *flags)
5716 {
5717         struct extent_record *rec;
5718         struct cache_extent *cache;
5719         struct tree_backref *tback;
5720         u64 owner = 0;
5721
5722         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5723         /* we have added this extent before */
5724         if (!cache)
5725                 return -ENOENT;
5726
5727         rec = container_of(cache, struct extent_record, cache);
5728
5729         /*
5730          * Except file/reloc tree, we can not have
5731          * FULL BACKREF MODE
5732          */
5733         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5734                 goto normal;
5735         /*
5736          * root node
5737          */
5738         if (buf->start == ri->bytenr)
5739                 goto normal;
5740
5741         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5742                 goto full_backref;
5743
5744         owner = btrfs_header_owner(buf);
5745         if (owner == ri->objectid)
5746                 goto normal;
5747
5748         tback = find_tree_backref(rec, 0, owner);
5749         if (!tback)
5750                 goto full_backref;
5751 normal:
5752         *flags = 0;
5753         if (rec->flag_block_full_backref != FLAG_UNSET &&
5754             rec->flag_block_full_backref != 0)
5755                 rec->bad_full_backref = 1;
5756         return 0;
5757 full_backref:
5758         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5759         if (rec->flag_block_full_backref != FLAG_UNSET &&
5760             rec->flag_block_full_backref != 1)
5761                 rec->bad_full_backref = 1;
5762         return 0;
5763 }
5764
5765 static void report_mismatch_key_root(u8 key_type, u64 rootid)
5766 {
5767         fprintf(stderr, "Invalid key type(");
5768         print_key_type(stderr, 0, key_type);
5769         fprintf(stderr, ") found in root(");
5770         print_objectid(stderr, rootid, 0);
5771         fprintf(stderr, ")\n");
5772 }
5773
5774 /*
5775  * Check if the key is valid with its extent buffer.
5776  *
5777  * This is a early check in case invalid key exists in a extent buffer
5778  * This is not comprehensive yet, but should prevent wrong key/item passed
5779  * further
5780  */
5781 static int check_type_with_root(u64 rootid, u8 key_type)
5782 {
5783         switch (key_type) {
5784         /* Only valid in chunk tree */
5785         case BTRFS_DEV_ITEM_KEY:
5786         case BTRFS_CHUNK_ITEM_KEY:
5787                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
5788                         goto err;
5789                 break;
5790         /* valid in csum and log tree */
5791         case BTRFS_CSUM_TREE_OBJECTID:
5792                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
5793                       is_fstree(rootid)))
5794                         goto err;
5795                 break;
5796         case BTRFS_EXTENT_ITEM_KEY:
5797         case BTRFS_METADATA_ITEM_KEY:
5798         case BTRFS_BLOCK_GROUP_ITEM_KEY:
5799                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
5800                         goto err;
5801                 break;
5802         case BTRFS_ROOT_ITEM_KEY:
5803                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
5804                         goto err;
5805                 break;
5806         case BTRFS_DEV_EXTENT_KEY:
5807                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
5808                         goto err;
5809                 break;
5810         }
5811         return 0;
5812 err:
5813         report_mismatch_key_root(key_type, rootid);
5814         return -EINVAL;
5815 }
5816
5817 static int run_next_block(struct btrfs_root *root,
5818                           struct block_info *bits,
5819                           int bits_nr,
5820                           u64 *last,
5821                           struct cache_tree *pending,
5822                           struct cache_tree *seen,
5823                           struct cache_tree *reada,
5824                           struct cache_tree *nodes,
5825                           struct cache_tree *extent_cache,
5826                           struct cache_tree *chunk_cache,
5827                           struct rb_root *dev_cache,
5828                           struct block_group_tree *block_group_cache,
5829                           struct device_extent_tree *dev_extent_cache,
5830                           struct root_item_record *ri)
5831 {
5832         struct btrfs_fs_info *fs_info = root->fs_info;
5833         struct extent_buffer *buf;
5834         struct extent_record *rec = NULL;
5835         u64 bytenr;
5836         u32 size;
5837         u64 parent;
5838         u64 owner;
5839         u64 flags;
5840         u64 ptr;
5841         u64 gen = 0;
5842         int ret = 0;
5843         int i;
5844         int nritems;
5845         struct btrfs_key key;
5846         struct cache_extent *cache;
5847         int reada_bits;
5848
5849         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5850                                     bits_nr, &reada_bits);
5851         if (nritems == 0)
5852                 return 1;
5853
5854         if (!reada_bits) {
5855                 for (i = 0; i < nritems; i++) {
5856                         ret = add_cache_extent(reada, bits[i].start,
5857                                                bits[i].size);
5858                         if (ret == -EEXIST)
5859                                 continue;
5860
5861                         /* fixme, get the parent transid */
5862                         readahead_tree_block(fs_info, bits[i].start, 0);
5863                 }
5864         }
5865         *last = bits[0].start;
5866         bytenr = bits[0].start;
5867         size = bits[0].size;
5868
5869         cache = lookup_cache_extent(pending, bytenr, size);
5870         if (cache) {
5871                 remove_cache_extent(pending, cache);
5872                 free(cache);
5873         }
5874         cache = lookup_cache_extent(reada, bytenr, size);
5875         if (cache) {
5876                 remove_cache_extent(reada, cache);
5877                 free(cache);
5878         }
5879         cache = lookup_cache_extent(nodes, bytenr, size);
5880         if (cache) {
5881                 remove_cache_extent(nodes, cache);
5882                 free(cache);
5883         }
5884         cache = lookup_cache_extent(extent_cache, bytenr, size);
5885         if (cache) {
5886                 rec = container_of(cache, struct extent_record, cache);
5887                 gen = rec->parent_generation;
5888         }
5889
5890         /* fixme, get the real parent transid */
5891         buf = read_tree_block(root->fs_info, bytenr, gen);
5892         if (!extent_buffer_uptodate(buf)) {
5893                 record_bad_block_io(root->fs_info,
5894                                     extent_cache, bytenr, size);
5895                 goto out;
5896         }
5897
5898         nritems = btrfs_header_nritems(buf);
5899
5900         flags = 0;
5901         if (!init_extent_tree) {
5902                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5903                                        btrfs_header_level(buf), 1, NULL,
5904                                        &flags);
5905                 if (ret < 0) {
5906                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5907                         if (ret < 0) {
5908                                 fprintf(stderr, "Couldn't calc extent flags\n");
5909                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5910                         }
5911                 }
5912         } else {
5913                 flags = 0;
5914                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5915                 if (ret < 0) {
5916                         fprintf(stderr, "Couldn't calc extent flags\n");
5917                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5918                 }
5919         }
5920
5921         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5922                 if (ri != NULL &&
5923                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5924                     ri->objectid == btrfs_header_owner(buf)) {
5925                         /*
5926                          * Ok we got to this block from it's original owner and
5927                          * we have FULL_BACKREF set.  Relocation can leave
5928                          * converted blocks over so this is altogether possible,
5929                          * however it's not possible if the generation > the
5930                          * last snapshot, so check for this case.
5931                          */
5932                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5933                             btrfs_header_generation(buf) > ri->last_snapshot) {
5934                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5935                                 rec->bad_full_backref = 1;
5936                         }
5937                 }
5938         } else {
5939                 if (ri != NULL &&
5940                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5941                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5942                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5943                         rec->bad_full_backref = 1;
5944                 }
5945         }
5946
5947         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5948                 rec->flag_block_full_backref = 1;
5949                 parent = bytenr;
5950                 owner = 0;
5951         } else {
5952                 rec->flag_block_full_backref = 0;
5953                 parent = 0;
5954                 owner = btrfs_header_owner(buf);
5955         }
5956
5957         ret = check_block(root, extent_cache, buf, flags);
5958         if (ret)
5959                 goto out;
5960
5961         if (btrfs_is_leaf(buf)) {
5962                 btree_space_waste += btrfs_leaf_free_space(root, buf);
5963                 for (i = 0; i < nritems; i++) {
5964                         struct btrfs_file_extent_item *fi;
5965
5966                         btrfs_item_key_to_cpu(buf, &key, i);
5967                         /*
5968                          * Check key type against the leaf owner.
5969                          * Could filter quite a lot of early error if
5970                          * owner is correct
5971                          */
5972                         if (check_type_with_root(btrfs_header_owner(buf),
5973                                                  key.type)) {
5974                                 fprintf(stderr, "ignoring invalid key\n");
5975                                 continue;
5976                         }
5977                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5978                                 process_extent_item(root, extent_cache, buf,
5979                                                     i);
5980                                 continue;
5981                         }
5982                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5983                                 process_extent_item(root, extent_cache, buf,
5984                                                     i);
5985                                 continue;
5986                         }
5987                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5988                                 total_csum_bytes +=
5989                                         btrfs_item_size_nr(buf, i);
5990                                 continue;
5991                         }
5992                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5993                                 process_chunk_item(chunk_cache, &key, buf, i);
5994                                 continue;
5995                         }
5996                         if (key.type == BTRFS_DEV_ITEM_KEY) {
5997                                 process_device_item(dev_cache, &key, buf, i);
5998                                 continue;
5999                         }
6000                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6001                                 process_block_group_item(block_group_cache,
6002                                         &key, buf, i);
6003                                 continue;
6004                         }
6005                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6006                                 process_device_extent_item(dev_extent_cache,
6007                                         &key, buf, i);
6008                                 continue;
6009
6010                         }
6011                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6012 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6013                                 process_extent_ref_v0(extent_cache, buf, i);
6014 #else
6015                                 BUG();
6016 #endif
6017                                 continue;
6018                         }
6019
6020                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6021                                 ret = add_tree_backref(extent_cache,
6022                                                 key.objectid, 0, key.offset, 0);
6023                                 if (ret < 0)
6024                                         error(
6025                                 "add_tree_backref failed (leaf tree block): %s",
6026                                               strerror(-ret));
6027                                 continue;
6028                         }
6029                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6030                                 ret = add_tree_backref(extent_cache,
6031                                                 key.objectid, key.offset, 0, 0);
6032                                 if (ret < 0)
6033                                         error(
6034                                 "add_tree_backref failed (leaf shared block): %s",
6035                                               strerror(-ret));
6036                                 continue;
6037                         }
6038                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6039                                 struct btrfs_extent_data_ref *ref;
6040
6041                                 ref = btrfs_item_ptr(buf, i,
6042                                                 struct btrfs_extent_data_ref);
6043                                 add_data_backref(extent_cache,
6044                                         key.objectid, 0,
6045                                         btrfs_extent_data_ref_root(buf, ref),
6046                                         btrfs_extent_data_ref_objectid(buf,
6047                                                                        ref),
6048                                         btrfs_extent_data_ref_offset(buf, ref),
6049                                         btrfs_extent_data_ref_count(buf, ref),
6050                                         0, root->fs_info->sectorsize);
6051                                 continue;
6052                         }
6053                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6054                                 struct btrfs_shared_data_ref *ref;
6055
6056                                 ref = btrfs_item_ptr(buf, i,
6057                                                 struct btrfs_shared_data_ref);
6058                                 add_data_backref(extent_cache,
6059                                         key.objectid, key.offset, 0, 0, 0,
6060                                         btrfs_shared_data_ref_count(buf, ref),
6061                                         0, root->fs_info->sectorsize);
6062                                 continue;
6063                         }
6064                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6065                                 struct bad_item *bad;
6066
6067                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6068                                         continue;
6069                                 if (!owner)
6070                                         continue;
6071                                 bad = malloc(sizeof(struct bad_item));
6072                                 if (!bad)
6073                                         continue;
6074                                 INIT_LIST_HEAD(&bad->list);
6075                                 memcpy(&bad->key, &key,
6076                                        sizeof(struct btrfs_key));
6077                                 bad->root_id = owner;
6078                                 list_add_tail(&bad->list, &delete_items);
6079                                 continue;
6080                         }
6081                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6082                                 continue;
6083                         fi = btrfs_item_ptr(buf, i,
6084                                             struct btrfs_file_extent_item);
6085                         if (btrfs_file_extent_type(buf, fi) ==
6086                             BTRFS_FILE_EXTENT_INLINE)
6087                                 continue;
6088                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6089                                 continue;
6090
6091                         data_bytes_allocated +=
6092                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6093                         if (data_bytes_allocated < root->fs_info->sectorsize)
6094                                 abort();
6095
6096                         data_bytes_referenced +=
6097                                 btrfs_file_extent_num_bytes(buf, fi);
6098                         add_data_backref(extent_cache,
6099                                 btrfs_file_extent_disk_bytenr(buf, fi),
6100                                 parent, owner, key.objectid, key.offset -
6101                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6102                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6103                 }
6104         } else {
6105                 int level;
6106                 struct btrfs_key first_key;
6107
6108                 first_key.objectid = 0;
6109
6110                 if (nritems > 0)
6111                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6112                 level = btrfs_header_level(buf);
6113                 for (i = 0; i < nritems; i++) {
6114                         struct extent_record tmpl;
6115
6116                         ptr = btrfs_node_blockptr(buf, i);
6117                         size = root->fs_info->nodesize;
6118                         btrfs_node_key_to_cpu(buf, &key, i);
6119                         if (ri != NULL) {
6120                                 if ((level == ri->drop_level)
6121                                     && is_dropped_key(&key, &ri->drop_key)) {
6122                                         continue;
6123                                 }
6124                         }
6125
6126                         memset(&tmpl, 0, sizeof(tmpl));
6127                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6128                         tmpl.parent_generation =
6129                                 btrfs_node_ptr_generation(buf, i);
6130                         tmpl.start = ptr;
6131                         tmpl.nr = size;
6132                         tmpl.refs = 1;
6133                         tmpl.metadata = 1;
6134                         tmpl.max_size = size;
6135                         ret = add_extent_rec(extent_cache, &tmpl);
6136                         if (ret < 0)
6137                                 goto out;
6138
6139                         ret = add_tree_backref(extent_cache, ptr, parent,
6140                                         owner, 1);
6141                         if (ret < 0) {
6142                                 error(
6143                                 "add_tree_backref failed (non-leaf block): %s",
6144                                       strerror(-ret));
6145                                 continue;
6146                         }
6147
6148                         if (level > 1)
6149                                 add_pending(nodes, seen, ptr, size);
6150                         else
6151                                 add_pending(pending, seen, ptr, size);
6152                 }
6153                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
6154                                       nritems) * sizeof(struct btrfs_key_ptr);
6155         }
6156         total_btree_bytes += buf->len;
6157         if (fs_root_objectid(btrfs_header_owner(buf)))
6158                 total_fs_tree_bytes += buf->len;
6159         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6160                 total_extent_tree_bytes += buf->len;
6161 out:
6162         free_extent_buffer(buf);
6163         return ret;
6164 }
6165
6166 static int add_root_to_pending(struct extent_buffer *buf,
6167                                struct cache_tree *extent_cache,
6168                                struct cache_tree *pending,
6169                                struct cache_tree *seen,
6170                                struct cache_tree *nodes,
6171                                u64 objectid)
6172 {
6173         struct extent_record tmpl;
6174         int ret;
6175
6176         if (btrfs_header_level(buf) > 0)
6177                 add_pending(nodes, seen, buf->start, buf->len);
6178         else
6179                 add_pending(pending, seen, buf->start, buf->len);
6180
6181         memset(&tmpl, 0, sizeof(tmpl));
6182         tmpl.start = buf->start;
6183         tmpl.nr = buf->len;
6184         tmpl.is_root = 1;
6185         tmpl.refs = 1;
6186         tmpl.metadata = 1;
6187         tmpl.max_size = buf->len;
6188         add_extent_rec(extent_cache, &tmpl);
6189
6190         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6191             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6192                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6193                                 0, 1);
6194         else
6195                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6196                                 1);
6197         return ret;
6198 }
6199
6200 /* as we fix the tree, we might be deleting blocks that
6201  * we're tracking for repair.  This hook makes sure we
6202  * remove any backrefs for blocks as we are fixing them.
6203  */
6204 static int free_extent_hook(struct btrfs_trans_handle *trans,
6205                             struct btrfs_root *root,
6206                             u64 bytenr, u64 num_bytes, u64 parent,
6207                             u64 root_objectid, u64 owner, u64 offset,
6208                             int refs_to_drop)
6209 {
6210         struct extent_record *rec;
6211         struct cache_extent *cache;
6212         int is_data;
6213         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6214
6215         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6216         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6217         if (!cache)
6218                 return 0;
6219
6220         rec = container_of(cache, struct extent_record, cache);
6221         if (is_data) {
6222                 struct data_backref *back;
6223
6224                 back = find_data_backref(rec, parent, root_objectid, owner,
6225                                          offset, 1, bytenr, num_bytes);
6226                 if (!back)
6227                         goto out;
6228                 if (back->node.found_ref) {
6229                         back->found_ref -= refs_to_drop;
6230                         if (rec->refs)
6231                                 rec->refs -= refs_to_drop;
6232                 }
6233                 if (back->node.found_extent_tree) {
6234                         back->num_refs -= refs_to_drop;
6235                         if (rec->extent_item_refs)
6236                                 rec->extent_item_refs -= refs_to_drop;
6237                 }
6238                 if (back->found_ref == 0)
6239                         back->node.found_ref = 0;
6240                 if (back->num_refs == 0)
6241                         back->node.found_extent_tree = 0;
6242
6243                 if (!back->node.found_extent_tree && back->node.found_ref) {
6244                         rb_erase(&back->node.node, &rec->backref_tree);
6245                         free(back);
6246                 }
6247         } else {
6248                 struct tree_backref *back;
6249
6250                 back = find_tree_backref(rec, parent, root_objectid);
6251                 if (!back)
6252                         goto out;
6253                 if (back->node.found_ref) {
6254                         if (rec->refs)
6255                                 rec->refs--;
6256                         back->node.found_ref = 0;
6257                 }
6258                 if (back->node.found_extent_tree) {
6259                         if (rec->extent_item_refs)
6260                                 rec->extent_item_refs--;
6261                         back->node.found_extent_tree = 0;
6262                 }
6263                 if (!back->node.found_extent_tree && back->node.found_ref) {
6264                         rb_erase(&back->node.node, &rec->backref_tree);
6265                         free(back);
6266                 }
6267         }
6268         maybe_free_extent_rec(extent_cache, rec);
6269 out:
6270         return 0;
6271 }
6272
6273 static int delete_extent_records(struct btrfs_trans_handle *trans,
6274                                  struct btrfs_root *root,
6275                                  struct btrfs_path *path,
6276                                  u64 bytenr)
6277 {
6278         struct btrfs_key key;
6279         struct btrfs_key found_key;
6280         struct extent_buffer *leaf;
6281         int ret;
6282         int slot;
6283
6284
6285         key.objectid = bytenr;
6286         key.type = (u8)-1;
6287         key.offset = (u64)-1;
6288
6289         while (1) {
6290                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6291                                         &key, path, 0, 1);
6292                 if (ret < 0)
6293                         break;
6294
6295                 if (ret > 0) {
6296                         ret = 0;
6297                         if (path->slots[0] == 0)
6298                                 break;
6299                         path->slots[0]--;
6300                 }
6301                 ret = 0;
6302
6303                 leaf = path->nodes[0];
6304                 slot = path->slots[0];
6305
6306                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6307                 if (found_key.objectid != bytenr)
6308                         break;
6309
6310                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6311                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6312                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6313                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6314                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6315                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6316                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6317                         btrfs_release_path(path);
6318                         if (found_key.type == 0) {
6319                                 if (found_key.offset == 0)
6320                                         break;
6321                                 key.offset = found_key.offset - 1;
6322                                 key.type = found_key.type;
6323                         }
6324                         key.type = found_key.type - 1;
6325                         key.offset = (u64)-1;
6326                         continue;
6327                 }
6328
6329                 fprintf(stderr,
6330                         "repair deleting extent record: key [%llu,%u,%llu]\n",
6331                         found_key.objectid, found_key.type, found_key.offset);
6332
6333                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6334                 if (ret)
6335                         break;
6336                 btrfs_release_path(path);
6337
6338                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6339                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6340                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6341                                 found_key.offset : root->fs_info->nodesize;
6342
6343                         ret = btrfs_update_block_group(root, bytenr,
6344                                                        bytes, 0, 0);
6345                         if (ret)
6346                                 break;
6347                 }
6348         }
6349
6350         btrfs_release_path(path);
6351         return ret;
6352 }
6353
6354 /*
6355  * for a single backref, this will allocate a new extent
6356  * and add the backref to it.
6357  */
6358 static int record_extent(struct btrfs_trans_handle *trans,
6359                          struct btrfs_fs_info *info,
6360                          struct btrfs_path *path,
6361                          struct extent_record *rec,
6362                          struct extent_backref *back,
6363                          int allocated, u64 flags)
6364 {
6365         int ret = 0;
6366         struct btrfs_root *extent_root = info->extent_root;
6367         struct extent_buffer *leaf;
6368         struct btrfs_key ins_key;
6369         struct btrfs_extent_item *ei;
6370         struct data_backref *dback;
6371         struct btrfs_tree_block_info *bi;
6372
6373         if (!back->is_data)
6374                 rec->max_size = max_t(u64, rec->max_size,
6375                                     info->nodesize);
6376
6377         if (!allocated) {
6378                 u32 item_size = sizeof(*ei);
6379
6380                 if (!back->is_data)
6381                         item_size += sizeof(*bi);
6382
6383                 ins_key.objectid = rec->start;
6384                 ins_key.offset = rec->max_size;
6385                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6386
6387                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6388                                         &ins_key, item_size);
6389                 if (ret)
6390                         goto fail;
6391
6392                 leaf = path->nodes[0];
6393                 ei = btrfs_item_ptr(leaf, path->slots[0],
6394                                     struct btrfs_extent_item);
6395
6396                 btrfs_set_extent_refs(leaf, ei, 0);
6397                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6398
6399                 if (back->is_data) {
6400                         btrfs_set_extent_flags(leaf, ei,
6401                                                BTRFS_EXTENT_FLAG_DATA);
6402                 } else {
6403                         struct btrfs_disk_key copy_key;
6404
6405                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6406                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6407                                              sizeof(*bi));
6408
6409                         btrfs_set_disk_key_objectid(&copy_key,
6410                                                     rec->info_objectid);
6411                         btrfs_set_disk_key_type(&copy_key, 0);
6412                         btrfs_set_disk_key_offset(&copy_key, 0);
6413
6414                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6415                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6416
6417                         btrfs_set_extent_flags(leaf, ei,
6418                                         flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
6419                 }
6420
6421                 btrfs_mark_buffer_dirty(leaf);
6422                 ret = btrfs_update_block_group(extent_root, rec->start,
6423                                                rec->max_size, 1, 0);
6424                 if (ret)
6425                         goto fail;
6426                 btrfs_release_path(path);
6427         }
6428
6429         if (back->is_data) {
6430                 u64 parent;
6431                 int i;
6432
6433                 dback = to_data_backref(back);
6434                 if (back->full_backref)
6435                         parent = dback->parent;
6436                 else
6437                         parent = 0;
6438
6439                 for (i = 0; i < dback->found_ref; i++) {
6440                         /* if parent != 0, we're doing a full backref
6441                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6442                          * just makes the backref allocator create a data
6443                          * backref
6444                          */
6445                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6446                                                    rec->start, rec->max_size,
6447                                                    parent,
6448                                                    dback->root,
6449                                                    parent ?
6450                                                    BTRFS_FIRST_FREE_OBJECTID :
6451                                                    dback->owner,
6452                                                    dback->offset);
6453                         if (ret)
6454                                 break;
6455                 }
6456                 fprintf(stderr,
6457 "adding new data backref on %llu %s %llu owner %llu offset %llu found %d\n",
6458                         (unsigned long long)rec->start,
6459                         back->full_backref ? "parent" : "root",
6460                         back->full_backref ? (unsigned long long)parent :
6461                                              (unsigned long long)dback->root,
6462                         (unsigned long long)dback->owner,
6463                         (unsigned long long)dback->offset, dback->found_ref);
6464         } else {
6465                 u64 parent;
6466                 struct tree_backref *tback;
6467
6468                 tback = to_tree_backref(back);
6469                 if (back->full_backref)
6470                         parent = tback->parent;
6471                 else
6472                         parent = 0;
6473
6474                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6475                                            rec->start, rec->max_size,
6476                                            parent, tback->root, 0, 0);
6477                 fprintf(stderr,
6478 "adding new tree backref on start %llu len %llu parent %llu root %llu\n",
6479                         rec->start, rec->max_size, parent, tback->root);
6480         }
6481 fail:
6482         btrfs_release_path(path);
6483         return ret;
6484 }
6485
6486 static struct extent_entry *find_entry(struct list_head *entries,
6487                                        u64 bytenr, u64 bytes)
6488 {
6489         struct extent_entry *entry = NULL;
6490
6491         list_for_each_entry(entry, entries, list) {
6492                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6493                         return entry;
6494         }
6495
6496         return NULL;
6497 }
6498
6499 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6500 {
6501         struct extent_entry *entry, *best = NULL, *prev = NULL;
6502
6503         list_for_each_entry(entry, entries, list) {
6504                 /*
6505                  * If there are as many broken entries as entries then we know
6506                  * not to trust this particular entry.
6507                  */
6508                 if (entry->broken == entry->count)
6509                         continue;
6510
6511                 /*
6512                  * Special case, when there are only two entries and 'best' is
6513                  * the first one
6514                  */
6515                 if (!prev) {
6516                         best = entry;
6517                         prev = entry;
6518                         continue;
6519                 }
6520
6521                 /*
6522                  * If our current entry == best then we can't be sure our best
6523                  * is really the best, so we need to keep searching.
6524                  */
6525                 if (best && best->count == entry->count) {
6526                         prev = entry;
6527                         best = NULL;
6528                         continue;
6529                 }
6530
6531                 /* Prev == entry, not good enough, have to keep searching */
6532                 if (!prev->broken && prev->count == entry->count)
6533                         continue;
6534
6535                 if (!best)
6536                         best = (prev->count > entry->count) ? prev : entry;
6537                 else if (best->count < entry->count)
6538                         best = entry;
6539                 prev = entry;
6540         }
6541
6542         return best;
6543 }
6544
6545 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6546                       struct data_backref *dback, struct extent_entry *entry)
6547 {
6548         struct btrfs_trans_handle *trans;
6549         struct btrfs_root *root;
6550         struct btrfs_file_extent_item *fi;
6551         struct extent_buffer *leaf;
6552         struct btrfs_key key;
6553         u64 bytenr, bytes;
6554         int ret, err;
6555
6556         key.objectid = dback->root;
6557         key.type = BTRFS_ROOT_ITEM_KEY;
6558         key.offset = (u64)-1;
6559         root = btrfs_read_fs_root(info, &key);
6560         if (IS_ERR(root)) {
6561                 fprintf(stderr, "Couldn't find root for our ref\n");
6562                 return -EINVAL;
6563         }
6564
6565         /*
6566          * The backref points to the original offset of the extent if it was
6567          * split, so we need to search down to the offset we have and then walk
6568          * forward until we find the backref we're looking for.
6569          */
6570         key.objectid = dback->owner;
6571         key.type = BTRFS_EXTENT_DATA_KEY;
6572         key.offset = dback->offset;
6573         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6574         if (ret < 0) {
6575                 fprintf(stderr, "Error looking up ref %d\n", ret);
6576                 return ret;
6577         }
6578
6579         while (1) {
6580                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6581                         ret = btrfs_next_leaf(root, path);
6582                         if (ret) {
6583                                 fprintf(stderr, "Couldn't find our ref, next\n");
6584                                 return -EINVAL;
6585                         }
6586                 }
6587                 leaf = path->nodes[0];
6588                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6589                 if (key.objectid != dback->owner ||
6590                     key.type != BTRFS_EXTENT_DATA_KEY) {
6591                         fprintf(stderr, "Couldn't find our ref, search\n");
6592                         return -EINVAL;
6593                 }
6594                 fi = btrfs_item_ptr(leaf, path->slots[0],
6595                                     struct btrfs_file_extent_item);
6596                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6597                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6598
6599                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6600                         break;
6601                 path->slots[0]++;
6602         }
6603
6604         btrfs_release_path(path);
6605
6606         trans = btrfs_start_transaction(root, 1);
6607         if (IS_ERR(trans))
6608                 return PTR_ERR(trans);
6609
6610         /*
6611          * Ok we have the key of the file extent we want to fix, now we can cow
6612          * down to the thing and fix it.
6613          */
6614         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6615         if (ret < 0) {
6616                 fprintf(stderr, "error cowing down to ref [%llu,%u,%llu]: %d\n",
6617                         key.objectid, key.type, key.offset, ret);
6618                 goto out;
6619         }
6620         if (ret > 0) {
6621                 fprintf(stderr,
6622                 "well that's odd, we just found this key [%llu,%u,%llu]\n",
6623                         key.objectid, key.type, key.offset);
6624                 ret = -EINVAL;
6625                 goto out;
6626         }
6627         leaf = path->nodes[0];
6628         fi = btrfs_item_ptr(leaf, path->slots[0],
6629                             struct btrfs_file_extent_item);
6630
6631         if (btrfs_file_extent_compression(leaf, fi) &&
6632             dback->disk_bytenr != entry->bytenr) {
6633                 fprintf(stderr,
6634 "ref doesn't match the record start and is compressed, please take a btrfs-image of this file system and send it to a btrfs developer so they can complete this functionality for bytenr %llu\n",
6635                         dback->disk_bytenr);
6636                 ret = -EINVAL;
6637                 goto out;
6638         }
6639
6640         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6641                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6642         } else if (dback->disk_bytenr > entry->bytenr) {
6643                 u64 off_diff, offset;
6644
6645                 off_diff = dback->disk_bytenr - entry->bytenr;
6646                 offset = btrfs_file_extent_offset(leaf, fi);
6647                 if (dback->disk_bytenr + offset +
6648                     btrfs_file_extent_num_bytes(leaf, fi) >
6649                     entry->bytenr + entry->bytes) {
6650                         fprintf(stderr,
6651 "ref is past the entry end, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6652                                 dback->disk_bytenr);
6653                         ret = -EINVAL;
6654                         goto out;
6655                 }
6656                 offset += off_diff;
6657                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6658                 btrfs_set_file_extent_offset(leaf, fi, offset);
6659         } else if (dback->disk_bytenr < entry->bytenr) {
6660                 u64 offset;
6661
6662                 offset = btrfs_file_extent_offset(leaf, fi);
6663                 if (dback->disk_bytenr + offset < entry->bytenr) {
6664                         fprintf(stderr,
6665 "ref is before the entry start, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6666                                 dback->disk_bytenr);
6667                         ret = -EINVAL;
6668                         goto out;
6669                 }
6670
6671                 offset += dback->disk_bytenr;
6672                 offset -= entry->bytenr;
6673                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6674                 btrfs_set_file_extent_offset(leaf, fi, offset);
6675         }
6676
6677         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6678
6679         /*
6680          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6681          * only do this if we aren't using compression, otherwise it's a
6682          * trickier case.
6683          */
6684         if (!btrfs_file_extent_compression(leaf, fi))
6685                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6686         else
6687                 printf("ram bytes may be wrong?\n");
6688         btrfs_mark_buffer_dirty(leaf);
6689 out:
6690         err = btrfs_commit_transaction(trans, root);
6691         btrfs_release_path(path);
6692         return ret ? ret : err;
6693 }
6694
6695 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6696                            struct extent_record *rec)
6697 {
6698         struct extent_backref *back, *tmp;
6699         struct data_backref *dback;
6700         struct extent_entry *entry, *best = NULL;
6701         LIST_HEAD(entries);
6702         int nr_entries = 0;
6703         int broken_entries = 0;
6704         int ret = 0;
6705         short mismatch = 0;
6706
6707         /*
6708          * Metadata is easy and the backrefs should always agree on bytenr and
6709          * size, if not we've got bigger issues.
6710          */
6711         if (rec->metadata)
6712                 return 0;
6713
6714         rbtree_postorder_for_each_entry_safe(back, tmp,
6715                                              &rec->backref_tree, node) {
6716                 if (back->full_backref || !back->is_data)
6717                         continue;
6718
6719                 dback = to_data_backref(back);
6720
6721                 /*
6722                  * We only pay attention to backrefs that we found a real
6723                  * backref for.
6724                  */
6725                 if (dback->found_ref == 0)
6726                         continue;
6727
6728                 /*
6729                  * For now we only catch when the bytes don't match, not the
6730                  * bytenr.  We can easily do this at the same time, but I want
6731                  * to have a fs image to test on before we just add repair
6732                  * functionality willy-nilly so we know we won't screw up the
6733                  * repair.
6734                  */
6735
6736                 entry = find_entry(&entries, dback->disk_bytenr,
6737                                    dback->bytes);
6738                 if (!entry) {
6739                         entry = malloc(sizeof(struct extent_entry));
6740                         if (!entry) {
6741                                 ret = -ENOMEM;
6742                                 goto out;
6743                         }
6744                         memset(entry, 0, sizeof(*entry));
6745                         entry->bytenr = dback->disk_bytenr;
6746                         entry->bytes = dback->bytes;
6747                         list_add_tail(&entry->list, &entries);
6748                         nr_entries++;
6749                 }
6750
6751                 /*
6752                  * If we only have on entry we may think the entries agree when
6753                  * in reality they don't so we have to do some extra checking.
6754                  */
6755                 if (dback->disk_bytenr != rec->start ||
6756                     dback->bytes != rec->nr || back->broken)
6757                         mismatch = 1;
6758
6759                 if (back->broken) {
6760                         entry->broken++;
6761                         broken_entries++;
6762                 }
6763
6764                 entry->count++;
6765         }
6766
6767         /* Yay all the backrefs agree, carry on good sir */
6768         if (nr_entries <= 1 && !mismatch)
6769                 goto out;
6770
6771         fprintf(stderr,
6772                 "attempting to repair backref discrepency for bytenr %llu\n",
6773                 rec->start);
6774
6775         /*
6776          * First we want to see if the backrefs can agree amongst themselves who
6777          * is right, so figure out which one of the entries has the highest
6778          * count.
6779          */
6780         best = find_most_right_entry(&entries);
6781
6782         /*
6783          * Ok so we may have an even split between what the backrefs think, so
6784          * this is where we use the extent ref to see what it thinks.
6785          */
6786         if (!best) {
6787                 entry = find_entry(&entries, rec->start, rec->nr);
6788                 if (!entry && (!broken_entries || !rec->found_rec)) {
6789                         fprintf(stderr,
6790 "backrefs don't agree with each other and extent record doesn't agree with anybody, so we can't fix bytenr %llu bytes %llu\n",
6791                                 rec->start, rec->nr);
6792                         ret = -EINVAL;
6793                         goto out;
6794                 } else if (!entry) {
6795                         /*
6796                          * Ok our backrefs were broken, we'll assume this is the
6797                          * correct value and add an entry for this range.
6798                          */
6799                         entry = malloc(sizeof(struct extent_entry));
6800                         if (!entry) {
6801                                 ret = -ENOMEM;
6802                                 goto out;
6803                         }
6804                         memset(entry, 0, sizeof(*entry));
6805                         entry->bytenr = rec->start;
6806                         entry->bytes = rec->nr;
6807                         list_add_tail(&entry->list, &entries);
6808                         nr_entries++;
6809                 }
6810                 entry->count++;
6811                 best = find_most_right_entry(&entries);
6812                 if (!best) {
6813                         fprintf(stderr,
6814 "backrefs and extent record evenly split on who is right, this is going to require user input to fix bytenr %llu bytes %llu\n",
6815                                 rec->start, rec->nr);
6816                         ret = -EINVAL;
6817                         goto out;
6818                 }
6819         }
6820
6821         /*
6822          * I don't think this can happen currently as we'll abort() if we catch
6823          * this case higher up, but in case somebody removes that we still can't
6824          * deal with it properly here yet, so just bail out of that's the case.
6825          */
6826         if (best->bytenr != rec->start) {
6827                 fprintf(stderr,
6828 "extent start and backref starts don't match, please use btrfs-image on this file system and send it to a btrfs developer so they can make fsck fix this particular case.  bytenr is %llu, bytes is %llu\n",
6829                         rec->start, rec->nr);
6830                 ret = -EINVAL;
6831                 goto out;
6832         }
6833
6834         /*
6835          * Ok great we all agreed on an extent record, let's go find the real
6836          * references and fix up the ones that don't match.
6837          */
6838         rbtree_postorder_for_each_entry_safe(back, tmp,
6839                                              &rec->backref_tree, node) {
6840                 if (back->full_backref || !back->is_data)
6841                         continue;
6842
6843                 dback = to_data_backref(back);
6844
6845                 /*
6846                  * Still ignoring backrefs that don't have a real ref attached
6847                  * to them.
6848                  */
6849                 if (dback->found_ref == 0)
6850                         continue;
6851
6852                 if (dback->bytes == best->bytes &&
6853                     dback->disk_bytenr == best->bytenr)
6854                         continue;
6855
6856                 ret = repair_ref(info, path, dback, best);
6857                 if (ret)
6858                         goto out;
6859         }
6860
6861         /*
6862          * Ok we messed with the actual refs, which means we need to drop our
6863          * entire cache and go back and rescan.  I know this is a huge pain and
6864          * adds a lot of extra work, but it's the only way to be safe.  Once all
6865          * the backrefs agree we may not need to do anything to the extent
6866          * record itself.
6867          */
6868         ret = -EAGAIN;
6869 out:
6870         while (!list_empty(&entries)) {
6871                 entry = list_entry(entries.next, struct extent_entry, list);
6872                 list_del_init(&entry->list);
6873                 free(entry);
6874         }
6875         return ret;
6876 }
6877
6878 static int process_duplicates(struct cache_tree *extent_cache,
6879                               struct extent_record *rec)
6880 {
6881         struct extent_record *good, *tmp;
6882         struct cache_extent *cache;
6883         int ret;
6884
6885         /*
6886          * If we found a extent record for this extent then return, or if we
6887          * have more than one duplicate we are likely going to need to delete
6888          * something.
6889          */
6890         if (rec->found_rec || rec->num_duplicates > 1)
6891                 return 0;
6892
6893         /* Shouldn't happen but just in case */
6894         BUG_ON(!rec->num_duplicates);
6895
6896         /*
6897          * So this happens if we end up with a backref that doesn't match the
6898          * actual extent entry.  So either the backref is bad or the extent
6899          * entry is bad.  Either way we want to have the extent_record actually
6900          * reflect what we found in the extent_tree, so we need to take the
6901          * duplicate out and use that as the extent_record since the only way we
6902          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6903          */
6904         remove_cache_extent(extent_cache, &rec->cache);
6905
6906         good = to_extent_record(rec->dups.next);
6907         list_del_init(&good->list);
6908         INIT_LIST_HEAD(&good->backrefs);
6909         INIT_LIST_HEAD(&good->dups);
6910         good->cache.start = good->start;
6911         good->cache.size = good->nr;
6912         good->content_checked = 0;
6913         good->owner_ref_checked = 0;
6914         good->num_duplicates = 0;
6915         good->refs = rec->refs;
6916         list_splice_init(&rec->backrefs, &good->backrefs);
6917         while (1) {
6918                 cache = lookup_cache_extent(extent_cache, good->start,
6919                                             good->nr);
6920                 if (!cache)
6921                         break;
6922                 tmp = container_of(cache, struct extent_record, cache);
6923
6924                 /*
6925                  * If we find another overlapping extent and it's found_rec is
6926                  * set then it's a duplicate and we need to try and delete
6927                  * something.
6928                  */
6929                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6930                         if (list_empty(&good->list))
6931                                 list_add_tail(&good->list,
6932                                               &duplicate_extents);
6933                         good->num_duplicates += tmp->num_duplicates + 1;
6934                         list_splice_init(&tmp->dups, &good->dups);
6935                         list_del_init(&tmp->list);
6936                         list_add_tail(&tmp->list, &good->dups);
6937                         remove_cache_extent(extent_cache, &tmp->cache);
6938                         continue;
6939                 }
6940
6941                 /*
6942                  * Ok we have another non extent item backed extent rec, so lets
6943                  * just add it to this extent and carry on like we did above.
6944                  */
6945                 good->refs += tmp->refs;
6946                 list_splice_init(&tmp->backrefs, &good->backrefs);
6947                 remove_cache_extent(extent_cache, &tmp->cache);
6948                 free(tmp);
6949         }
6950         ret = insert_cache_extent(extent_cache, &good->cache);
6951         BUG_ON(ret);
6952         free(rec);
6953         return good->num_duplicates ? 0 : 1;
6954 }
6955
6956 static int delete_duplicate_records(struct btrfs_root *root,
6957                                     struct extent_record *rec)
6958 {
6959         struct btrfs_trans_handle *trans;
6960         LIST_HEAD(delete_list);
6961         struct btrfs_path path;
6962         struct extent_record *tmp, *good, *n;
6963         int nr_del = 0;
6964         int ret = 0, err;
6965         struct btrfs_key key;
6966
6967         btrfs_init_path(&path);
6968
6969         good = rec;
6970         /* Find the record that covers all of the duplicates. */
6971         list_for_each_entry(tmp, &rec->dups, list) {
6972                 if (good->start < tmp->start)
6973                         continue;
6974                 if (good->nr > tmp->nr)
6975                         continue;
6976
6977                 if (tmp->start + tmp->nr < good->start + good->nr) {
6978                         fprintf(stderr,
6979 "Ok we have overlapping extents that aren't completely covered by each other, this is going to require more careful thought. The extents are [%llu-%llu] and [%llu-%llu]\n",
6980                                 tmp->start, tmp->nr, good->start, good->nr);
6981                         abort();
6982                 }
6983                 good = tmp;
6984         }
6985
6986         if (good != rec)
6987                 list_add_tail(&rec->list, &delete_list);
6988
6989         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6990                 if (tmp == good)
6991                         continue;
6992                 list_move_tail(&tmp->list, &delete_list);
6993         }
6994
6995         root = root->fs_info->extent_root;
6996         trans = btrfs_start_transaction(root, 1);
6997         if (IS_ERR(trans)) {
6998                 ret = PTR_ERR(trans);
6999                 goto out;
7000         }
7001
7002         list_for_each_entry(tmp, &delete_list, list) {
7003                 if (tmp->found_rec == 0)
7004                         continue;
7005                 key.objectid = tmp->start;
7006                 key.type = BTRFS_EXTENT_ITEM_KEY;
7007                 key.offset = tmp->nr;
7008
7009                 /* Shouldn't happen but just in case */
7010                 if (tmp->metadata) {
7011                         fprintf(stderr,
7012 "well this shouldn't happen, extent record overlaps but is metadata? [%llu, %llu]\n",
7013                                 tmp->start, tmp->nr);
7014                         abort();
7015                 }
7016
7017                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7018                 if (ret) {
7019                         if (ret > 0)
7020                                 ret = -EINVAL;
7021                         break;
7022                 }
7023                 ret = btrfs_del_item(trans, root, &path);
7024                 if (ret)
7025                         break;
7026                 btrfs_release_path(&path);
7027                 nr_del++;
7028         }
7029         err = btrfs_commit_transaction(trans, root);
7030         if (err && !ret)
7031                 ret = err;
7032 out:
7033         while (!list_empty(&delete_list)) {
7034                 tmp = to_extent_record(delete_list.next);
7035                 list_del_init(&tmp->list);
7036                 if (tmp == rec)
7037                         continue;
7038                 free(tmp);
7039         }
7040
7041         while (!list_empty(&rec->dups)) {
7042                 tmp = to_extent_record(rec->dups.next);
7043                 list_del_init(&tmp->list);
7044                 free(tmp);
7045         }
7046
7047         btrfs_release_path(&path);
7048
7049         if (!ret && !nr_del)
7050                 rec->num_duplicates = 0;
7051
7052         return ret ? ret : nr_del;
7053 }
7054
7055 static int find_possible_backrefs(struct btrfs_fs_info *info,
7056                                   struct btrfs_path *path,
7057                                   struct cache_tree *extent_cache,
7058                                   struct extent_record *rec)
7059 {
7060         struct btrfs_root *root;
7061         struct extent_backref *back, *tmp;
7062         struct data_backref *dback;
7063         struct cache_extent *cache;
7064         struct btrfs_file_extent_item *fi;
7065         struct btrfs_key key;
7066         u64 bytenr, bytes;
7067         int ret;
7068
7069         rbtree_postorder_for_each_entry_safe(back, tmp,
7070                                              &rec->backref_tree, node) {
7071                 /* Don't care about full backrefs (poor unloved backrefs) */
7072                 if (back->full_backref || !back->is_data)
7073                         continue;
7074
7075                 dback = to_data_backref(back);
7076
7077                 /* We found this one, we don't need to do a lookup */
7078                 if (dback->found_ref)
7079                         continue;
7080
7081                 key.objectid = dback->root;
7082                 key.type = BTRFS_ROOT_ITEM_KEY;
7083                 key.offset = (u64)-1;
7084
7085                 root = btrfs_read_fs_root(info, &key);
7086
7087                 /* No root, definitely a bad ref, skip */
7088                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7089                         continue;
7090                 /* Other err, exit */
7091                 if (IS_ERR(root))
7092                         return PTR_ERR(root);
7093
7094                 key.objectid = dback->owner;
7095                 key.type = BTRFS_EXTENT_DATA_KEY;
7096                 key.offset = dback->offset;
7097                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7098                 if (ret) {
7099                         btrfs_release_path(path);
7100                         if (ret < 0)
7101                                 return ret;
7102                         /* Didn't find it, we can carry on */
7103                         ret = 0;
7104                         continue;
7105                 }
7106
7107                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7108                                     struct btrfs_file_extent_item);
7109                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7110                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7111                 btrfs_release_path(path);
7112                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7113                 if (cache) {
7114                         struct extent_record *tmp;
7115
7116                         tmp = container_of(cache, struct extent_record, cache);
7117
7118                         /*
7119                          * If we found an extent record for the bytenr for this
7120                          * particular backref then we can't add it to our
7121                          * current extent record.  We only want to add backrefs
7122                          * that don't have a corresponding extent item in the
7123                          * extent tree since they likely belong to this record
7124                          * and we need to fix it if it doesn't match bytenrs.
7125                          */
7126                         if  (tmp->found_rec)
7127                                 continue;
7128                 }
7129
7130                 dback->found_ref += 1;
7131                 dback->disk_bytenr = bytenr;
7132                 dback->bytes = bytes;
7133
7134                 /*
7135                  * Set this so the verify backref code knows not to trust the
7136                  * values in this backref.
7137                  */
7138                 back->broken = 1;
7139         }
7140
7141         return 0;
7142 }
7143
7144 /*
7145  * Record orphan data ref into corresponding root.
7146  *
7147  * Return 0 if the extent item contains data ref and recorded.
7148  * Return 1 if the extent item contains no useful data ref
7149  *   On that case, it may contains only shared_dataref or metadata backref
7150  *   or the file extent exists(this should be handled by the extent bytenr
7151  *   recovery routine)
7152  * Return <0 if something goes wrong.
7153  */
7154 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7155                                       struct extent_record *rec)
7156 {
7157         struct btrfs_key key;
7158         struct btrfs_root *dest_root;
7159         struct extent_backref *back, *tmp;
7160         struct data_backref *dback;
7161         struct orphan_data_extent *orphan;
7162         struct btrfs_path path;
7163         int recorded_data_ref = 0;
7164         int ret = 0;
7165
7166         if (rec->metadata)
7167                 return 1;
7168         btrfs_init_path(&path);
7169         rbtree_postorder_for_each_entry_safe(back, tmp,
7170                                              &rec->backref_tree, node) {
7171                 if (back->full_backref || !back->is_data ||
7172                     !back->found_extent_tree)
7173                         continue;
7174                 dback = to_data_backref(back);
7175                 if (dback->found_ref)
7176                         continue;
7177                 key.objectid = dback->root;
7178                 key.type = BTRFS_ROOT_ITEM_KEY;
7179                 key.offset = (u64)-1;
7180
7181                 dest_root = btrfs_read_fs_root(fs_info, &key);
7182
7183                 /* For non-exist root we just skip it */
7184                 if (IS_ERR(dest_root) || !dest_root)
7185                         continue;
7186
7187                 key.objectid = dback->owner;
7188                 key.type = BTRFS_EXTENT_DATA_KEY;
7189                 key.offset = dback->offset;
7190
7191                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7192                 btrfs_release_path(&path);
7193                 /*
7194                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7195                  * we need to record it for inode/file extent rebuild.
7196                  * For ret > 0, we record it only for file extent rebuild.
7197                  * For ret == 0, the file extent exists but only bytenr
7198                  * mismatch, let the original bytenr fix routine to handle,
7199                  * don't record it.
7200                  */
7201                 if (ret == 0)
7202                         continue;
7203                 ret = 0;
7204                 orphan = malloc(sizeof(*orphan));
7205                 if (!orphan) {
7206                         ret = -ENOMEM;
7207                         goto out;
7208                 }
7209                 INIT_LIST_HEAD(&orphan->list);
7210                 orphan->root = dback->root;
7211                 orphan->objectid = dback->owner;
7212                 orphan->offset = dback->offset;
7213                 orphan->disk_bytenr = rec->cache.start;
7214                 orphan->disk_len = rec->cache.size;
7215                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7216                 recorded_data_ref = 1;
7217         }
7218 out:
7219         btrfs_release_path(&path);
7220         if (!ret)
7221                 return !recorded_data_ref;
7222         else
7223                 return ret;
7224 }
7225
7226 /*
7227  * when an incorrect extent item is found, this will delete
7228  * all of the existing entries for it and recreate them
7229  * based on what the tree scan found.
7230  */
7231 static int fixup_extent_refs(struct btrfs_fs_info *info,
7232                              struct cache_tree *extent_cache,
7233                              struct extent_record *rec)
7234 {
7235         struct btrfs_trans_handle *trans = NULL;
7236         int ret;
7237         struct btrfs_path path;
7238         struct cache_extent *cache;
7239         struct extent_backref *back, *tmp;
7240         int allocated = 0;
7241         u64 flags = 0;
7242
7243         if (rec->flag_block_full_backref)
7244                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7245
7246         btrfs_init_path(&path);
7247         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7248                 /*
7249                  * Sometimes the backrefs themselves are so broken they don't
7250                  * get attached to any meaningful rec, so first go back and
7251                  * check any of our backrefs that we couldn't find and throw
7252                  * them into the list if we find the backref so that
7253                  * verify_backrefs can figure out what to do.
7254                  */
7255                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7256                 if (ret < 0)
7257                         goto out;
7258         }
7259
7260         /* step one, make sure all of the backrefs agree */
7261         ret = verify_backrefs(info, &path, rec);
7262         if (ret < 0)
7263                 goto out;
7264
7265         trans = btrfs_start_transaction(info->extent_root, 1);
7266         if (IS_ERR(trans)) {
7267                 ret = PTR_ERR(trans);
7268                 goto out;
7269         }
7270
7271         /* step two, delete all the existing records */
7272         ret = delete_extent_records(trans, info->extent_root, &path,
7273                                     rec->start);
7274
7275         if (ret < 0)
7276                 goto out;
7277
7278         /* was this block corrupt?  If so, don't add references to it */
7279         cache = lookup_cache_extent(info->corrupt_blocks,
7280                                     rec->start, rec->max_size);
7281         if (cache) {
7282                 ret = 0;
7283                 goto out;
7284         }
7285
7286         /* step three, recreate all the refs we did find */
7287         rbtree_postorder_for_each_entry_safe(back, tmp,
7288                                              &rec->backref_tree, node) {
7289                 /*
7290                  * if we didn't find any references, don't create a
7291                  * new extent record
7292                  */
7293                 if (!back->found_ref)
7294                         continue;
7295
7296                 rec->bad_full_backref = 0;
7297                 ret = record_extent(trans, info, &path, rec, back, allocated,
7298                                     flags);
7299                 allocated = 1;
7300
7301                 if (ret)
7302                         goto out;
7303         }
7304 out:
7305         if (trans) {
7306                 int err = btrfs_commit_transaction(trans, info->extent_root);
7307
7308                 if (!ret)
7309                         ret = err;
7310         }
7311
7312         if (!ret)
7313                 fprintf(stderr, "Repaired extent references for %llu\n",
7314                                 (unsigned long long)rec->start);
7315
7316         btrfs_release_path(&path);
7317         return ret;
7318 }
7319
7320 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7321                               struct extent_record *rec)
7322 {
7323         struct btrfs_trans_handle *trans;
7324         struct btrfs_root *root = fs_info->extent_root;
7325         struct btrfs_path path;
7326         struct btrfs_extent_item *ei;
7327         struct btrfs_key key;
7328         u64 flags;
7329         int ret = 0;
7330
7331         key.objectid = rec->start;
7332         if (rec->metadata) {
7333                 key.type = BTRFS_METADATA_ITEM_KEY;
7334                 key.offset = rec->info_level;
7335         } else {
7336                 key.type = BTRFS_EXTENT_ITEM_KEY;
7337                 key.offset = rec->max_size;
7338         }
7339
7340         trans = btrfs_start_transaction(root, 0);
7341         if (IS_ERR(trans))
7342                 return PTR_ERR(trans);
7343
7344         btrfs_init_path(&path);
7345         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7346         if (ret < 0) {
7347                 btrfs_release_path(&path);
7348                 btrfs_commit_transaction(trans, root);
7349                 return ret;
7350         } else if (ret) {
7351                 fprintf(stderr, "Didn't find extent for %llu\n",
7352                         (unsigned long long)rec->start);
7353                 btrfs_release_path(&path);
7354                 btrfs_commit_transaction(trans, root);
7355                 return -ENOENT;
7356         }
7357
7358         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7359                             struct btrfs_extent_item);
7360         flags = btrfs_extent_flags(path.nodes[0], ei);
7361         if (rec->flag_block_full_backref) {
7362                 fprintf(stderr, "setting full backref on %llu\n",
7363                         (unsigned long long)key.objectid);
7364                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7365         } else {
7366                 fprintf(stderr, "clearing full backref on %llu\n",
7367                         (unsigned long long)key.objectid);
7368                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7369         }
7370         btrfs_set_extent_flags(path.nodes[0], ei, flags);
7371         btrfs_mark_buffer_dirty(path.nodes[0]);
7372         btrfs_release_path(&path);
7373         ret = btrfs_commit_transaction(trans, root);
7374         if (!ret)
7375                 fprintf(stderr, "Repaired extent flags for %llu\n",
7376                                 (unsigned long long)rec->start);
7377
7378         return ret;
7379 }
7380
7381 /* right now we only prune from the extent allocation tree */
7382 static int prune_one_block(struct btrfs_trans_handle *trans,
7383                            struct btrfs_fs_info *info,
7384                            struct btrfs_corrupt_block *corrupt)
7385 {
7386         int ret;
7387         struct btrfs_path path;
7388         struct extent_buffer *eb;
7389         u64 found;
7390         int slot;
7391         int nritems;
7392         int level = corrupt->level + 1;
7393
7394         btrfs_init_path(&path);
7395 again:
7396         /* we want to stop at the parent to our busted block */
7397         path.lowest_level = level;
7398
7399         ret = btrfs_search_slot(trans, info->extent_root,
7400                                 &corrupt->key, &path, -1, 1);
7401
7402         if (ret < 0)
7403                 goto out;
7404
7405         eb = path.nodes[level];
7406         if (!eb) {
7407                 ret = -ENOENT;
7408                 goto out;
7409         }
7410
7411         /*
7412          * hopefully the search gave us the block we want to prune,
7413          * lets try that first
7414          */
7415         slot = path.slots[level];
7416         found =  btrfs_node_blockptr(eb, slot);
7417         if (found == corrupt->cache.start)
7418                 goto del_ptr;
7419
7420         nritems = btrfs_header_nritems(eb);
7421
7422         /* the search failed, lets scan this node and hope we find it */
7423         for (slot = 0; slot < nritems; slot++) {
7424                 found =  btrfs_node_blockptr(eb, slot);
7425                 if (found == corrupt->cache.start)
7426                         goto del_ptr;
7427         }
7428         /*
7429          * We couldn't find the bad block.
7430          * TODO: search all the nodes for pointers to this block
7431          */
7432         if (eb == info->extent_root->node) {
7433                 ret = -ENOENT;
7434                 goto out;
7435         } else {
7436                 level++;
7437                 btrfs_release_path(&path);
7438                 goto again;
7439         }
7440
7441 del_ptr:
7442         printk("deleting pointer to block %llu\n", corrupt->cache.start);
7443         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
7444
7445 out:
7446         btrfs_release_path(&path);
7447         return ret;
7448 }
7449
7450 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7451 {
7452         struct btrfs_trans_handle *trans = NULL;
7453         struct cache_extent *cache;
7454         struct btrfs_corrupt_block *corrupt;
7455
7456         while (1) {
7457                 cache = search_cache_extent(info->corrupt_blocks, 0);
7458                 if (!cache)
7459                         break;
7460                 if (!trans) {
7461                         trans = btrfs_start_transaction(info->extent_root, 1);
7462                         if (IS_ERR(trans))
7463                                 return PTR_ERR(trans);
7464                 }
7465                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7466                 prune_one_block(trans, info, corrupt);
7467                 remove_cache_extent(info->corrupt_blocks, cache);
7468         }
7469         if (trans)
7470                 return btrfs_commit_transaction(trans, info->extent_root);
7471         return 0;
7472 }
7473
7474 static int check_extent_refs(struct btrfs_root *root,
7475                              struct cache_tree *extent_cache)
7476 {
7477         struct extent_record *rec;
7478         struct cache_extent *cache;
7479         int ret = 0;
7480         int had_dups = 0;
7481         int err = 0;
7482
7483         if (repair) {
7484                 /*
7485                  * if we're doing a repair, we have to make sure
7486                  * we don't allocate from the problem extents.
7487                  * In the worst case, this will be all the
7488                  * extents in the FS
7489                  */
7490                 cache = search_cache_extent(extent_cache, 0);
7491                 while (cache) {
7492                         rec = container_of(cache, struct extent_record, cache);
7493                         set_extent_dirty(root->fs_info->excluded_extents,
7494                                          rec->start,
7495                                          rec->start + rec->max_size - 1);
7496                         cache = next_cache_extent(cache);
7497                 }
7498
7499                 /* pin down all the corrupted blocks too */
7500                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7501                 while (cache) {
7502                         set_extent_dirty(root->fs_info->excluded_extents,
7503                                          cache->start,
7504                                          cache->start + cache->size - 1);
7505                         cache = next_cache_extent(cache);
7506                 }
7507                 prune_corrupt_blocks(root->fs_info);
7508                 reset_cached_block_groups(root->fs_info);
7509         }
7510
7511         reset_cached_block_groups(root->fs_info);
7512
7513         /*
7514          * We need to delete any duplicate entries we find first otherwise we
7515          * could mess up the extent tree when we have backrefs that actually
7516          * belong to a different extent item and not the weird duplicate one.
7517          */
7518         while (repair && !list_empty(&duplicate_extents)) {
7519                 rec = to_extent_record(duplicate_extents.next);
7520                 list_del_init(&rec->list);
7521
7522                 /* Sometimes we can find a backref before we find an actual
7523                  * extent, so we need to process it a little bit to see if there
7524                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7525                  * if this is a backref screwup.  If we need to delete stuff
7526                  * process_duplicates() will return 0, otherwise it will return
7527                  * 1 and we
7528                  */
7529                 if (process_duplicates(extent_cache, rec))
7530                         continue;
7531                 ret = delete_duplicate_records(root, rec);
7532                 if (ret < 0)
7533                         return ret;
7534                 /*
7535                  * delete_duplicate_records will return the number of entries
7536                  * deleted, so if it's greater than 0 then we know we actually
7537                  * did something and we need to remove.
7538                  */
7539                 if (ret)
7540                         had_dups = 1;
7541         }
7542
7543         if (had_dups)
7544                 return -EAGAIN;
7545
7546         while (1) {
7547                 int cur_err = 0;
7548                 int fix = 0;
7549
7550                 cache = search_cache_extent(extent_cache, 0);
7551                 if (!cache)
7552                         break;
7553                 rec = container_of(cache, struct extent_record, cache);
7554                 if (rec->num_duplicates) {
7555                         fprintf(stderr,
7556                                 "extent item %llu has multiple extent items\n",
7557                                 (unsigned long long)rec->start);
7558                         cur_err = 1;
7559                 }
7560
7561                 if (rec->refs != rec->extent_item_refs) {
7562                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7563                                 (unsigned long long)rec->start,
7564                                 (unsigned long long)rec->nr);
7565                         fprintf(stderr, "extent item %llu, found %llu\n",
7566                                 (unsigned long long)rec->extent_item_refs,
7567                                 (unsigned long long)rec->refs);
7568                         ret = record_orphan_data_extents(root->fs_info, rec);
7569                         if (ret < 0)
7570                                 goto repair_abort;
7571                         fix = ret;
7572                         cur_err = 1;
7573                 }
7574                 if (all_backpointers_checked(rec, 1)) {
7575                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7576                                 (unsigned long long)rec->start,
7577                                 (unsigned long long)rec->nr);
7578                         fix = 1;
7579                         cur_err = 1;
7580                 }
7581                 if (!rec->owner_ref_checked) {
7582                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7583                                 (unsigned long long)rec->start,
7584                                 (unsigned long long)rec->nr);
7585                         fix = 1;
7586                         cur_err = 1;
7587                 }
7588
7589                 if (repair && fix) {
7590                         ret = fixup_extent_refs(root->fs_info, extent_cache,
7591                                                 rec);
7592                         if (ret)
7593                                 goto repair_abort;
7594                 }
7595
7596
7597                 if (rec->bad_full_backref) {
7598                         fprintf(stderr, "bad full backref, on [%llu]\n",
7599                                 (unsigned long long)rec->start);
7600                         if (repair) {
7601                                 ret = fixup_extent_flags(root->fs_info, rec);
7602                                 if (ret)
7603                                         goto repair_abort;
7604                                 fix = 1;
7605                         }
7606                         cur_err = 1;
7607                 }
7608                 /*
7609                  * Although it's not a extent ref's problem, we reuse this
7610                  * routine for error reporting.
7611                  * No repair function yet.
7612                  */
7613                 if (rec->crossing_stripes) {
7614                         fprintf(stderr,
7615                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7616                                 rec->start, rec->start + rec->max_size);
7617                         cur_err = 1;
7618                 }
7619
7620                 if (rec->wrong_chunk_type) {
7621                         fprintf(stderr,
7622                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7623                                 rec->start, rec->start + rec->max_size);
7624                         cur_err = 1;
7625                 }
7626
7627                 err = cur_err;
7628                 remove_cache_extent(extent_cache, cache);
7629                 free_all_extent_backrefs(rec);
7630                 if (!init_extent_tree && repair && (!cur_err || fix))
7631                         clear_extent_dirty(root->fs_info->excluded_extents,
7632                                            rec->start,
7633                                            rec->start + rec->max_size - 1);
7634                 free(rec);
7635         }
7636 repair_abort:
7637         if (repair) {
7638                 if (ret && ret != -EAGAIN) {
7639                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7640                         exit(1);
7641                 } else if (!ret) {
7642                         struct btrfs_trans_handle *trans;
7643
7644                         root = root->fs_info->extent_root;
7645                         trans = btrfs_start_transaction(root, 1);
7646                         if (IS_ERR(trans)) {
7647                                 ret = PTR_ERR(trans);
7648                                 goto repair_abort;
7649                         }
7650
7651                         ret = btrfs_fix_block_accounting(trans, root);
7652                         if (ret)
7653                                 goto repair_abort;
7654                         ret = btrfs_commit_transaction(trans, root);
7655                         if (ret)
7656                                 goto repair_abort;
7657                 }
7658                 return ret;
7659         }
7660
7661         if (err)
7662                 err = -EIO;
7663         return err;
7664 }
7665
7666 /*
7667  * Check the chunk with its block group/dev list ref:
7668  * Return 0 if all refs seems valid.
7669  * Return 1 if part of refs seems valid, need later check for rebuild ref
7670  * like missing block group and needs to search extent tree to rebuild them.
7671  * Return -1 if essential refs are missing and unable to rebuild.
7672  */
7673 static int check_chunk_refs(struct chunk_record *chunk_rec,
7674                             struct block_group_tree *block_group_cache,
7675                             struct device_extent_tree *dev_extent_cache,
7676                             int silent)
7677 {
7678         struct cache_extent *block_group_item;
7679         struct block_group_record *block_group_rec;
7680         struct cache_extent *dev_extent_item;
7681         struct device_extent_record *dev_extent_rec;
7682         u64 devid;
7683         u64 offset;
7684         u64 length;
7685         int metadump_v2 = 0;
7686         int i;
7687         int ret = 0;
7688
7689         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7690                                                chunk_rec->offset,
7691                                                chunk_rec->length);
7692         if (block_group_item) {
7693                 block_group_rec = container_of(block_group_item,
7694                                                struct block_group_record,
7695                                                cache);
7696                 if (chunk_rec->length != block_group_rec->offset ||
7697                     chunk_rec->offset != block_group_rec->objectid ||
7698                     (!metadump_v2 &&
7699                      chunk_rec->type_flags != block_group_rec->flags)) {
7700                         if (!silent)
7701                                 fprintf(stderr,
7702                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7703                                         chunk_rec->objectid,
7704                                         chunk_rec->type,
7705                                         chunk_rec->offset,
7706                                         chunk_rec->length,
7707                                         chunk_rec->offset,
7708                                         chunk_rec->type_flags,
7709                                         block_group_rec->objectid,
7710                                         block_group_rec->type,
7711                                         block_group_rec->offset,
7712                                         block_group_rec->offset,
7713                                         block_group_rec->objectid,
7714                                         block_group_rec->flags);
7715                         ret = -1;
7716                 } else {
7717                         list_del_init(&block_group_rec->list);
7718                         chunk_rec->bg_rec = block_group_rec;
7719                 }
7720         } else {
7721                 if (!silent)
7722                         fprintf(stderr,
7723                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7724                                 chunk_rec->objectid,
7725                                 chunk_rec->type,
7726                                 chunk_rec->offset,
7727                                 chunk_rec->length,
7728                                 chunk_rec->offset,
7729                                 chunk_rec->type_flags);
7730                 ret = 1;
7731         }
7732
7733         if (metadump_v2)
7734                 return ret;
7735
7736         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7737                                     chunk_rec->num_stripes);
7738         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7739                 devid = chunk_rec->stripes[i].devid;
7740                 offset = chunk_rec->stripes[i].offset;
7741                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7742                                                        devid, offset, length);
7743                 if (dev_extent_item) {
7744                         dev_extent_rec = container_of(dev_extent_item,
7745                                                 struct device_extent_record,
7746                                                 cache);
7747                         if (dev_extent_rec->objectid != devid ||
7748                             dev_extent_rec->offset != offset ||
7749                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7750                             dev_extent_rec->length != length) {
7751                                 if (!silent)
7752                                         fprintf(stderr,
7753                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7754                                                 chunk_rec->objectid,
7755                                                 chunk_rec->type,
7756                                                 chunk_rec->offset,
7757                                                 chunk_rec->stripes[i].devid,
7758                                                 chunk_rec->stripes[i].offset,
7759                                                 dev_extent_rec->objectid,
7760                                                 dev_extent_rec->offset,
7761                                                 dev_extent_rec->length);
7762                                 ret = -1;
7763                         } else {
7764                                 list_move(&dev_extent_rec->chunk_list,
7765                                           &chunk_rec->dextents);
7766                         }
7767                 } else {
7768                         if (!silent)
7769                                 fprintf(stderr,
7770                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7771                                         chunk_rec->objectid,
7772                                         chunk_rec->type,
7773                                         chunk_rec->offset,
7774                                         chunk_rec->stripes[i].devid,
7775                                         chunk_rec->stripes[i].offset);
7776                         ret = -1;
7777                 }
7778         }
7779         return ret;
7780 }
7781
7782 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7783 int check_chunks(struct cache_tree *chunk_cache,
7784                  struct block_group_tree *block_group_cache,
7785                  struct device_extent_tree *dev_extent_cache,
7786                  struct list_head *good, struct list_head *bad,
7787                  struct list_head *rebuild, int silent)
7788 {
7789         struct cache_extent *chunk_item;
7790         struct chunk_record *chunk_rec;
7791         struct block_group_record *bg_rec;
7792         struct device_extent_record *dext_rec;
7793         int err;
7794         int ret = 0;
7795
7796         chunk_item = first_cache_extent(chunk_cache);
7797         while (chunk_item) {
7798                 chunk_rec = container_of(chunk_item, struct chunk_record,
7799                                          cache);
7800                 err = check_chunk_refs(chunk_rec, block_group_cache,
7801                                        dev_extent_cache, silent);
7802                 if (err < 0)
7803                         ret = err;
7804                 if (err == 0 && good)
7805                         list_add_tail(&chunk_rec->list, good);
7806                 if (err > 0 && rebuild)
7807                         list_add_tail(&chunk_rec->list, rebuild);
7808                 if (err < 0 && bad)
7809                         list_add_tail(&chunk_rec->list, bad);
7810                 chunk_item = next_cache_extent(chunk_item);
7811         }
7812
7813         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7814                 if (!silent)
7815                         fprintf(stderr,
7816                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7817                                 bg_rec->objectid,
7818                                 bg_rec->offset,
7819                                 bg_rec->flags);
7820                 if (!ret)
7821                         ret = 1;
7822         }
7823
7824         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7825                             chunk_list) {
7826                 if (!silent)
7827                         fprintf(stderr,
7828                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7829                                 dext_rec->objectid,
7830                                 dext_rec->offset,
7831                                 dext_rec->length);
7832                 if (!ret)
7833                         ret = 1;
7834         }
7835         return ret;
7836 }
7837
7838
7839 static int check_device_used(struct device_record *dev_rec,
7840                              struct device_extent_tree *dext_cache)
7841 {
7842         struct cache_extent *cache;
7843         struct device_extent_record *dev_extent_rec;
7844         u64 total_byte = 0;
7845
7846         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7847         while (cache) {
7848                 dev_extent_rec = container_of(cache,
7849                                               struct device_extent_record,
7850                                               cache);
7851                 if (dev_extent_rec->objectid != dev_rec->devid)
7852                         break;
7853
7854                 list_del_init(&dev_extent_rec->device_list);
7855                 total_byte += dev_extent_rec->length;
7856                 cache = next_cache_extent(cache);
7857         }
7858
7859         if (total_byte != dev_rec->byte_used) {
7860                 fprintf(stderr,
7861                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7862                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7863                         dev_rec->type, dev_rec->offset);
7864                 return -1;
7865         } else {
7866                 return 0;
7867         }
7868 }
7869
7870 /*
7871  * Unlike device size alignment check above, some super total_bytes check
7872  * failure can lead to mount failure for newer kernel.
7873  *
7874  * So this function will return the error for a fatal super total_bytes problem.
7875  */
7876 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
7877 {
7878         struct btrfs_device *dev;
7879         struct list_head *dev_list = &fs_info->fs_devices->devices;
7880         u64 total_bytes = 0;
7881         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
7882
7883         list_for_each_entry(dev, dev_list, dev_list)
7884                 total_bytes += dev->total_bytes;
7885
7886         /* Important check, which can cause unmountable fs */
7887         if (super_bytes < total_bytes) {
7888                 error("super total bytes %llu smaller than real device(s) size %llu",
7889                         super_bytes, total_bytes);
7890                 error("mounting this fs may fail for newer kernels");
7891                 error("this can be fixed by 'btrfs rescue fix-device-size'");
7892                 return false;
7893         }
7894
7895         /*
7896          * Optional check, just to make everything aligned and match with each
7897          * other.
7898          *
7899          * For a btrfs-image restored fs, we don't need to check it anyway.
7900          */
7901         if (btrfs_super_flags(fs_info->super_copy) &
7902             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
7903                 return true;
7904         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
7905             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
7906             super_bytes != total_bytes) {
7907                 warning("minor unaligned/mismatch device size detected");
7908                 warning(
7909                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
7910         }
7911         return true;
7912 }
7913
7914 /* check btrfs_dev_item -> btrfs_dev_extent */
7915 static int check_devices(struct rb_root *dev_cache,
7916                          struct device_extent_tree *dev_extent_cache)
7917 {
7918         struct rb_node *dev_node;
7919         struct device_record *dev_rec;
7920         struct device_extent_record *dext_rec;
7921         int err;
7922         int ret = 0;
7923
7924         dev_node = rb_first(dev_cache);
7925         while (dev_node) {
7926                 dev_rec = container_of(dev_node, struct device_record, node);
7927                 err = check_device_used(dev_rec, dev_extent_cache);
7928                 if (err)
7929                         ret = err;
7930
7931                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
7932                                          global_info->sectorsize);
7933                 dev_node = rb_next(dev_node);
7934         }
7935         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7936                             device_list) {
7937                 fprintf(stderr,
7938                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7939                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7940                 if (!ret)
7941                         ret = 1;
7942         }
7943         return ret;
7944 }
7945
7946 static int add_root_item_to_list(struct list_head *head,
7947                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7948                                   u8 level, u8 drop_level,
7949                                   struct btrfs_key *drop_key)
7950 {
7951         struct root_item_record *ri_rec;
7952
7953         ri_rec = malloc(sizeof(*ri_rec));
7954         if (!ri_rec)
7955                 return -ENOMEM;
7956         ri_rec->bytenr = bytenr;
7957         ri_rec->objectid = objectid;
7958         ri_rec->level = level;
7959         ri_rec->drop_level = drop_level;
7960         ri_rec->last_snapshot = last_snapshot;
7961         if (drop_key)
7962                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7963         list_add_tail(&ri_rec->list, head);
7964
7965         return 0;
7966 }
7967
7968 static void free_root_item_list(struct list_head *list)
7969 {
7970         struct root_item_record *ri_rec;
7971
7972         while (!list_empty(list)) {
7973                 ri_rec = list_first_entry(list, struct root_item_record,
7974                                           list);
7975                 list_del_init(&ri_rec->list);
7976                 free(ri_rec);
7977         }
7978 }
7979
7980 static int deal_root_from_list(struct list_head *list,
7981                                struct btrfs_root *root,
7982                                struct block_info *bits,
7983                                int bits_nr,
7984                                struct cache_tree *pending,
7985                                struct cache_tree *seen,
7986                                struct cache_tree *reada,
7987                                struct cache_tree *nodes,
7988                                struct cache_tree *extent_cache,
7989                                struct cache_tree *chunk_cache,
7990                                struct rb_root *dev_cache,
7991                                struct block_group_tree *block_group_cache,
7992                                struct device_extent_tree *dev_extent_cache)
7993 {
7994         int ret = 0;
7995         u64 last;
7996
7997         while (!list_empty(list)) {
7998                 struct root_item_record *rec;
7999                 struct extent_buffer *buf;
8000
8001                 rec = list_entry(list->next,
8002                                  struct root_item_record, list);
8003                 last = 0;
8004                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
8005                 if (!extent_buffer_uptodate(buf)) {
8006                         free_extent_buffer(buf);
8007                         ret = -EIO;
8008                         break;
8009                 }
8010                 ret = add_root_to_pending(buf, extent_cache, pending,
8011                                     seen, nodes, rec->objectid);
8012                 if (ret < 0)
8013                         break;
8014                 /*
8015                  * To rebuild extent tree, we need deal with snapshot
8016                  * one by one, otherwise we deal with node firstly which
8017                  * can maximize readahead.
8018                  */
8019                 while (1) {
8020                         ret = run_next_block(root, bits, bits_nr, &last,
8021                                              pending, seen, reada, nodes,
8022                                              extent_cache, chunk_cache,
8023                                              dev_cache, block_group_cache,
8024                                              dev_extent_cache, rec);
8025                         if (ret != 0)
8026                                 break;
8027                 }
8028                 free_extent_buffer(buf);
8029                 list_del(&rec->list);
8030                 free(rec);
8031                 if (ret < 0)
8032                         break;
8033         }
8034         while (ret >= 0) {
8035                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8036                                      reada, nodes, extent_cache, chunk_cache,
8037                                      dev_cache, block_group_cache,
8038                                      dev_extent_cache, NULL);
8039                 if (ret != 0) {
8040                         if (ret > 0)
8041                                 ret = 0;
8042                         break;
8043                 }
8044         }
8045         return ret;
8046 }
8047
8048 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8049 {
8050         struct rb_root dev_cache;
8051         struct cache_tree chunk_cache;
8052         struct block_group_tree block_group_cache;
8053         struct device_extent_tree dev_extent_cache;
8054         struct cache_tree extent_cache;
8055         struct cache_tree seen;
8056         struct cache_tree pending;
8057         struct cache_tree reada;
8058         struct cache_tree nodes;
8059         struct extent_io_tree excluded_extents;
8060         struct cache_tree corrupt_blocks;
8061         struct btrfs_path path;
8062         struct btrfs_key key;
8063         struct btrfs_key found_key;
8064         int ret, err = 0;
8065         struct block_info *bits;
8066         int bits_nr;
8067         struct extent_buffer *leaf;
8068         int slot;
8069         struct btrfs_root_item ri;
8070         struct list_head dropping_trees;
8071         struct list_head normal_trees;
8072         struct btrfs_root *root1;
8073         struct btrfs_root *root;
8074         u64 objectid;
8075         u8 level;
8076
8077         root = fs_info->fs_root;
8078         dev_cache = RB_ROOT;
8079         cache_tree_init(&chunk_cache);
8080         block_group_tree_init(&block_group_cache);
8081         device_extent_tree_init(&dev_extent_cache);
8082
8083         cache_tree_init(&extent_cache);
8084         cache_tree_init(&seen);
8085         cache_tree_init(&pending);
8086         cache_tree_init(&nodes);
8087         cache_tree_init(&reada);
8088         cache_tree_init(&corrupt_blocks);
8089         extent_io_tree_init(&excluded_extents);
8090         INIT_LIST_HEAD(&dropping_trees);
8091         INIT_LIST_HEAD(&normal_trees);
8092
8093         if (repair) {
8094                 fs_info->excluded_extents = &excluded_extents;
8095                 fs_info->fsck_extent_cache = &extent_cache;
8096                 fs_info->free_extent_hook = free_extent_hook;
8097                 fs_info->corrupt_blocks = &corrupt_blocks;
8098         }
8099
8100         bits_nr = 1024;
8101         bits = malloc(bits_nr * sizeof(struct block_info));
8102         if (!bits) {
8103                 perror("malloc");
8104                 exit(1);
8105         }
8106
8107         if (ctx.progress_enabled) {
8108                 ctx.tp = TASK_EXTENTS;
8109                 task_start(ctx.info);
8110         }
8111
8112 again:
8113         root1 = fs_info->tree_root;
8114         level = btrfs_header_level(root1->node);
8115         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8116                                     root1->node->start, 0, level, 0, NULL);
8117         if (ret < 0)
8118                 goto out;
8119         root1 = fs_info->chunk_root;
8120         level = btrfs_header_level(root1->node);
8121         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8122                                     root1->node->start, 0, level, 0, NULL);
8123         if (ret < 0)
8124                 goto out;
8125         btrfs_init_path(&path);
8126         key.offset = 0;
8127         key.objectid = 0;
8128         key.type = BTRFS_ROOT_ITEM_KEY;
8129         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
8130         if (ret < 0)
8131                 goto out;
8132         while (1) {
8133                 leaf = path.nodes[0];
8134                 slot = path.slots[0];
8135                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8136                         ret = btrfs_next_leaf(root, &path);
8137                         if (ret != 0)
8138                                 break;
8139                         leaf = path.nodes[0];
8140                         slot = path.slots[0];
8141                 }
8142                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8143                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8144                         unsigned long offset;
8145                         u64 last_snapshot;
8146
8147                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8148                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8149                         last_snapshot = btrfs_root_last_snapshot(&ri);
8150                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8151                                 level = btrfs_root_level(&ri);
8152                                 ret = add_root_item_to_list(&normal_trees,
8153                                                 found_key.objectid,
8154                                                 btrfs_root_bytenr(&ri),
8155                                                 last_snapshot, level,
8156                                                 0, NULL);
8157                                 if (ret < 0)
8158                                         goto out;
8159                         } else {
8160                                 level = btrfs_root_level(&ri);
8161                                 objectid = found_key.objectid;
8162                                 btrfs_disk_key_to_cpu(&found_key,
8163                                                       &ri.drop_progress);
8164                                 ret = add_root_item_to_list(&dropping_trees,
8165                                                 objectid,
8166                                                 btrfs_root_bytenr(&ri),
8167                                                 last_snapshot, level,
8168                                                 ri.drop_level, &found_key);
8169                                 if (ret < 0)
8170                                         goto out;
8171                         }
8172                 }
8173                 path.slots[0]++;
8174         }
8175         btrfs_release_path(&path);
8176
8177         /*
8178          * check_block can return -EAGAIN if it fixes something, please keep
8179          * this in mind when dealing with return values from these functions, if
8180          * we get -EAGAIN we want to fall through and restart the loop.
8181          */
8182         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8183                                   &seen, &reada, &nodes, &extent_cache,
8184                                   &chunk_cache, &dev_cache, &block_group_cache,
8185                                   &dev_extent_cache);
8186         if (ret < 0) {
8187                 if (ret == -EAGAIN)
8188                         goto loop;
8189                 goto out;
8190         }
8191         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8192                                   &pending, &seen, &reada, &nodes,
8193                                   &extent_cache, &chunk_cache, &dev_cache,
8194                                   &block_group_cache, &dev_extent_cache);
8195         if (ret < 0) {
8196                 if (ret == -EAGAIN)
8197                         goto loop;
8198                 goto out;
8199         }
8200
8201         ret = check_chunks(&chunk_cache, &block_group_cache,
8202                            &dev_extent_cache, NULL, NULL, NULL, 0);
8203         if (ret) {
8204                 if (ret == -EAGAIN)
8205                         goto loop;
8206                 err = ret;
8207         }
8208
8209         ret = check_extent_refs(root, &extent_cache);
8210         if (ret < 0) {
8211                 if (ret == -EAGAIN)
8212                         goto loop;
8213                 goto out;
8214         }
8215
8216         ret = check_devices(&dev_cache, &dev_extent_cache);
8217         if (ret && err)
8218                 ret = err;
8219
8220 out:
8221         task_stop(ctx.info);
8222         if (repair) {
8223                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8224                 extent_io_tree_cleanup(&excluded_extents);
8225                 fs_info->fsck_extent_cache = NULL;
8226                 fs_info->free_extent_hook = NULL;
8227                 fs_info->corrupt_blocks = NULL;
8228                 fs_info->excluded_extents = NULL;
8229         }
8230         free(bits);
8231         free_chunk_cache_tree(&chunk_cache);
8232         free_device_cache_tree(&dev_cache);
8233         free_block_group_tree(&block_group_cache);
8234         free_device_extent_tree(&dev_extent_cache);
8235         free_extent_cache_tree(&seen);
8236         free_extent_cache_tree(&pending);
8237         free_extent_cache_tree(&reada);
8238         free_extent_cache_tree(&nodes);
8239         free_root_item_list(&normal_trees);
8240         free_root_item_list(&dropping_trees);
8241         return ret;
8242 loop:
8243         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8244         free_extent_cache_tree(&seen);
8245         free_extent_cache_tree(&pending);
8246         free_extent_cache_tree(&reada);
8247         free_extent_cache_tree(&nodes);
8248         free_chunk_cache_tree(&chunk_cache);
8249         free_block_group_tree(&block_group_cache);
8250         free_device_cache_tree(&dev_cache);
8251         free_device_extent_tree(&dev_extent_cache);
8252         free_extent_record_cache(&extent_cache);
8253         free_root_item_list(&normal_trees);
8254         free_root_item_list(&dropping_trees);
8255         extent_io_tree_cleanup(&excluded_extents);
8256         goto again;
8257 }
8258
8259 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8260 {
8261         int ret;
8262
8263         if (!ctx.progress_enabled)
8264                 fprintf(stderr, "checking extents\n");
8265         if (check_mode == CHECK_MODE_LOWMEM)
8266                 ret = check_chunks_and_extents_lowmem(fs_info);
8267         else
8268                 ret = check_chunks_and_extents(fs_info);
8269
8270         /* Also repair device size related problems */
8271         if (repair && !ret) {
8272                 ret = btrfs_fix_device_and_super_size(fs_info);
8273                 if (ret > 0)
8274                         ret = 0;
8275         }
8276         return ret;
8277 }
8278
8279 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8280                            struct btrfs_root *root, int overwrite)
8281 {
8282         struct extent_buffer *c;
8283         struct extent_buffer *old = root->node;
8284         int level;
8285         int ret;
8286         struct btrfs_disk_key disk_key = {0,0,0};
8287
8288         level = 0;
8289
8290         if (overwrite) {
8291                 c = old;
8292                 extent_buffer_get(c);
8293                 goto init;
8294         }
8295         c = btrfs_alloc_free_block(trans, root,
8296                                    root->fs_info->nodesize,
8297                                    root->root_key.objectid,
8298                                    &disk_key, level, 0, 0);
8299         if (IS_ERR(c)) {
8300                 c = old;
8301                 extent_buffer_get(c);
8302                 overwrite = 1;
8303         }
8304 init:
8305         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8306         btrfs_set_header_level(c, level);
8307         btrfs_set_header_bytenr(c, c->start);
8308         btrfs_set_header_generation(c, trans->transid);
8309         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8310         btrfs_set_header_owner(c, root->root_key.objectid);
8311
8312         write_extent_buffer(c, root->fs_info->fsid,
8313                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8314
8315         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8316                             btrfs_header_chunk_tree_uuid(c),
8317                             BTRFS_UUID_SIZE);
8318
8319         btrfs_mark_buffer_dirty(c);
8320         /*
8321          * this case can happen in the following case:
8322          *
8323          * 1.overwrite previous root.
8324          *
8325          * 2.reinit reloc data root, this is because we skip pin
8326          * down reloc data tree before which means we can allocate
8327          * same block bytenr here.
8328          */
8329         if (old->start == c->start) {
8330                 btrfs_set_root_generation(&root->root_item,
8331                                           trans->transid);
8332                 root->root_item.level = btrfs_header_level(root->node);
8333                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8334                                         &root->root_key, &root->root_item);
8335                 if (ret) {
8336                         free_extent_buffer(c);
8337                         return ret;
8338                 }
8339         }
8340         free_extent_buffer(old);
8341         root->node = c;
8342         add_root_to_dirty_list(root);
8343         return 0;
8344 }
8345
8346 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8347                                 struct extent_buffer *eb, int tree_root)
8348 {
8349         struct extent_buffer *tmp;
8350         struct btrfs_root_item *ri;
8351         struct btrfs_key key;
8352         u64 bytenr;
8353         int level = btrfs_header_level(eb);
8354         int nritems;
8355         int ret;
8356         int i;
8357
8358         /*
8359          * If we have pinned this block before, don't pin it again.
8360          * This can not only avoid forever loop with broken filesystem
8361          * but also give us some speedups.
8362          */
8363         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8364                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8365                 return 0;
8366
8367         btrfs_pin_extent(fs_info, eb->start, eb->len);
8368
8369         nritems = btrfs_header_nritems(eb);
8370         for (i = 0; i < nritems; i++) {
8371                 if (level == 0) {
8372                         btrfs_item_key_to_cpu(eb, &key, i);
8373                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8374                                 continue;
8375                         /* Skip the extent root and reloc roots */
8376                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8377                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8378                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8379                                 continue;
8380                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8381                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8382
8383                         /*
8384                          * If at any point we start needing the real root we
8385                          * will have to build a stump root for the root we are
8386                          * in, but for now this doesn't actually use the root so
8387                          * just pass in extent_root.
8388                          */
8389                         tmp = read_tree_block(fs_info, bytenr, 0);
8390                         if (!extent_buffer_uptodate(tmp)) {
8391                                 fprintf(stderr, "Error reading root block\n");
8392                                 return -EIO;
8393                         }
8394                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8395                         free_extent_buffer(tmp);
8396                         if (ret)
8397                                 return ret;
8398                 } else {
8399                         bytenr = btrfs_node_blockptr(eb, i);
8400
8401                         /* If we aren't the tree root don't read the block */
8402                         if (level == 1 && !tree_root) {
8403                                 btrfs_pin_extent(fs_info, bytenr,
8404                                                 fs_info->nodesize);
8405                                 continue;
8406                         }
8407
8408                         tmp = read_tree_block(fs_info, bytenr, 0);
8409                         if (!extent_buffer_uptodate(tmp)) {
8410                                 fprintf(stderr, "Error reading tree block\n");
8411                                 return -EIO;
8412                         }
8413                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8414                         free_extent_buffer(tmp);
8415                         if (ret)
8416                                 return ret;
8417                 }
8418         }
8419
8420         return 0;
8421 }
8422
8423 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8424 {
8425         int ret;
8426
8427         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8428         if (ret)
8429                 return ret;
8430
8431         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8432 }
8433
8434 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8435 {
8436         struct btrfs_block_group_cache *cache;
8437         struct btrfs_path path;
8438         struct extent_buffer *leaf;
8439         struct btrfs_chunk *chunk;
8440         struct btrfs_key key;
8441         int ret;
8442         u64 start;
8443
8444         btrfs_init_path(&path);
8445         key.objectid = 0;
8446         key.type = BTRFS_CHUNK_ITEM_KEY;
8447         key.offset = 0;
8448         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
8449         if (ret < 0) {
8450                 btrfs_release_path(&path);
8451                 return ret;
8452         }
8453
8454         /*
8455          * We do this in case the block groups were screwed up and had alloc
8456          * bits that aren't actually set on the chunks.  This happens with
8457          * restored images every time and could happen in real life I guess.
8458          */
8459         fs_info->avail_data_alloc_bits = 0;
8460         fs_info->avail_metadata_alloc_bits = 0;
8461         fs_info->avail_system_alloc_bits = 0;
8462
8463         /* First we need to create the in-memory block groups */
8464         while (1) {
8465                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8466                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
8467                         if (ret < 0) {
8468                                 btrfs_release_path(&path);
8469                                 return ret;
8470                         }
8471                         if (ret) {
8472                                 ret = 0;
8473                                 break;
8474                         }
8475                 }
8476                 leaf = path.nodes[0];
8477                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8478                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8479                         path.slots[0]++;
8480                         continue;
8481                 }
8482
8483                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
8484                 btrfs_add_block_group(fs_info, 0,
8485                                       btrfs_chunk_type(leaf, chunk), key.offset,
8486                                       btrfs_chunk_length(leaf, chunk));
8487                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8488                                  key.offset + btrfs_chunk_length(leaf, chunk));
8489                 path.slots[0]++;
8490         }
8491         start = 0;
8492         while (1) {
8493                 cache = btrfs_lookup_first_block_group(fs_info, start);
8494                 if (!cache)
8495                         break;
8496                 cache->cached = 1;
8497                 start = cache->key.objectid + cache->key.offset;
8498         }
8499
8500         btrfs_release_path(&path);
8501         return 0;
8502 }
8503
8504 static int reset_balance(struct btrfs_trans_handle *trans,
8505                          struct btrfs_fs_info *fs_info)
8506 {
8507         struct btrfs_root *root = fs_info->tree_root;
8508         struct btrfs_path path;
8509         struct extent_buffer *leaf;
8510         struct btrfs_key key;
8511         int del_slot, del_nr = 0;
8512         int ret;
8513         int found = 0;
8514
8515         btrfs_init_path(&path);
8516         key.objectid = BTRFS_BALANCE_OBJECTID;
8517         key.type = BTRFS_BALANCE_ITEM_KEY;
8518         key.offset = 0;
8519         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8520         if (ret) {
8521                 if (ret > 0)
8522                         ret = 0;
8523                 if (!ret)
8524                         goto reinit_data_reloc;
8525                 else
8526                         goto out;
8527         }
8528
8529         ret = btrfs_del_item(trans, root, &path);
8530         if (ret)
8531                 goto out;
8532         btrfs_release_path(&path);
8533
8534         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8535         key.type = BTRFS_ROOT_ITEM_KEY;
8536         key.offset = 0;
8537         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8538         if (ret < 0)
8539                 goto out;
8540         while (1) {
8541                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8542                         if (!found)
8543                                 break;
8544
8545                         if (del_nr) {
8546                                 ret = btrfs_del_items(trans, root, &path,
8547                                                       del_slot, del_nr);
8548                                 del_nr = 0;
8549                                 if (ret)
8550                                         goto out;
8551                         }
8552                         key.offset++;
8553                         btrfs_release_path(&path);
8554
8555                         found = 0;
8556                         ret = btrfs_search_slot(trans, root, &key, &path,
8557                                                 -1, 1);
8558                         if (ret < 0)
8559                                 goto out;
8560                         continue;
8561                 }
8562                 found = 1;
8563                 leaf = path.nodes[0];
8564                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8565                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8566                         break;
8567                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8568                         path.slots[0]++;
8569                         continue;
8570                 }
8571                 if (!del_nr) {
8572                         del_slot = path.slots[0];
8573                         del_nr = 1;
8574                 } else {
8575                         del_nr++;
8576                 }
8577                 path.slots[0]++;
8578         }
8579
8580         if (del_nr) {
8581                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
8582                 if (ret)
8583                         goto out;
8584         }
8585         btrfs_release_path(&path);
8586
8587 reinit_data_reloc:
8588         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8589         key.type = BTRFS_ROOT_ITEM_KEY;
8590         key.offset = (u64)-1;
8591         root = btrfs_read_fs_root(fs_info, &key);
8592         if (IS_ERR(root)) {
8593                 fprintf(stderr, "Error reading data reloc tree\n");
8594                 ret = PTR_ERR(root);
8595                 goto out;
8596         }
8597         record_root_in_trans(trans, root);
8598         ret = btrfs_fsck_reinit_root(trans, root, 0);
8599         if (ret)
8600                 goto out;
8601         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8602 out:
8603         btrfs_release_path(&path);
8604         return ret;
8605 }
8606
8607 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8608                               struct btrfs_fs_info *fs_info)
8609 {
8610         u64 start = 0;
8611         int ret;
8612
8613         /*
8614          * The only reason we don't do this is because right now we're just
8615          * walking the trees we find and pinning down their bytes, we don't look
8616          * at any of the leaves.  In order to do mixed groups we'd have to check
8617          * the leaves of any fs roots and pin down the bytes for any file
8618          * extents we find.  Not hard but why do it if we don't have to?
8619          */
8620         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
8621                 fprintf(stderr, "We don't support re-initing the extent tree "
8622                         "for mixed block groups yet, please notify a btrfs "
8623                         "developer you want to do this so they can add this "
8624                         "functionality.\n");
8625                 return -EINVAL;
8626         }
8627
8628         /*
8629          * first we need to walk all of the trees except the extent tree and pin
8630          * down the bytes that are in use so we don't overwrite any existing
8631          * metadata.
8632          */
8633         ret = pin_metadata_blocks(fs_info);
8634         if (ret) {
8635                 fprintf(stderr, "error pinning down used bytes\n");
8636                 return ret;
8637         }
8638
8639         /*
8640          * Need to drop all the block groups since we're going to recreate all
8641          * of them again.
8642          */
8643         btrfs_free_block_groups(fs_info);
8644         ret = reset_block_groups(fs_info);
8645         if (ret) {
8646                 fprintf(stderr, "error resetting the block groups\n");
8647                 return ret;
8648         }
8649
8650         /* Ok we can allocate now, reinit the extent root */
8651         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8652         if (ret) {
8653                 fprintf(stderr, "extent root initialization failed\n");
8654                 /*
8655                  * When the transaction code is updated we should end the
8656                  * transaction, but for now progs only knows about commit so
8657                  * just return an error.
8658                  */
8659                 return ret;
8660         }
8661
8662         /*
8663          * Now we have all the in-memory block groups setup so we can make
8664          * allocations properly, and the metadata we care about is safe since we
8665          * pinned all of it above.
8666          */
8667         while (1) {
8668                 struct btrfs_block_group_cache *cache;
8669
8670                 cache = btrfs_lookup_first_block_group(fs_info, start);
8671                 if (!cache)
8672                         break;
8673                 start = cache->key.objectid + cache->key.offset;
8674                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8675                                         &cache->key, &cache->item,
8676                                         sizeof(cache->item));
8677                 if (ret) {
8678                         fprintf(stderr, "Error adding block group\n");
8679                         return ret;
8680                 }
8681                 btrfs_extent_post_op(trans, fs_info->extent_root);
8682         }
8683
8684         ret = reset_balance(trans, fs_info);
8685         if (ret)
8686                 fprintf(stderr, "error resetting the pending balance\n");
8687
8688         return ret;
8689 }
8690
8691 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8692 {
8693         struct btrfs_path path;
8694         struct btrfs_trans_handle *trans;
8695         struct btrfs_key key;
8696         int ret;
8697
8698         printf("Recowing metadata block %llu\n", eb->start);
8699         key.objectid = btrfs_header_owner(eb);
8700         key.type = BTRFS_ROOT_ITEM_KEY;
8701         key.offset = (u64)-1;
8702
8703         root = btrfs_read_fs_root(root->fs_info, &key);
8704         if (IS_ERR(root)) {
8705                 fprintf(stderr, "Couldn't find owner root %llu\n",
8706                         key.objectid);
8707                 return PTR_ERR(root);
8708         }
8709
8710         trans = btrfs_start_transaction(root, 1);
8711         if (IS_ERR(trans))
8712                 return PTR_ERR(trans);
8713
8714         btrfs_init_path(&path);
8715         path.lowest_level = btrfs_header_level(eb);
8716         if (path.lowest_level)
8717                 btrfs_node_key_to_cpu(eb, &key, 0);
8718         else
8719                 btrfs_item_key_to_cpu(eb, &key, 0);
8720
8721         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8722         btrfs_commit_transaction(trans, root);
8723         btrfs_release_path(&path);
8724         return ret;
8725 }
8726
8727 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8728 {
8729         struct btrfs_path path;
8730         struct btrfs_trans_handle *trans;
8731         struct btrfs_key key;
8732         int ret;
8733
8734         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8735                bad->key.type, bad->key.offset);
8736         key.objectid = bad->root_id;
8737         key.type = BTRFS_ROOT_ITEM_KEY;
8738         key.offset = (u64)-1;
8739
8740         root = btrfs_read_fs_root(root->fs_info, &key);
8741         if (IS_ERR(root)) {
8742                 fprintf(stderr, "Couldn't find owner root %llu\n",
8743                         key.objectid);
8744                 return PTR_ERR(root);
8745         }
8746
8747         trans = btrfs_start_transaction(root, 1);
8748         if (IS_ERR(trans))
8749                 return PTR_ERR(trans);
8750
8751         btrfs_init_path(&path);
8752         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
8753         if (ret) {
8754                 if (ret > 0)
8755                         ret = 0;
8756                 goto out;
8757         }
8758         ret = btrfs_del_item(trans, root, &path);
8759 out:
8760         btrfs_commit_transaction(trans, root);
8761         btrfs_release_path(&path);
8762         return ret;
8763 }
8764
8765 static int zero_log_tree(struct btrfs_root *root)
8766 {
8767         struct btrfs_trans_handle *trans;
8768         int ret;
8769
8770         trans = btrfs_start_transaction(root, 1);
8771         if (IS_ERR(trans)) {
8772                 ret = PTR_ERR(trans);
8773                 return ret;
8774         }
8775         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8776         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8777         ret = btrfs_commit_transaction(trans, root);
8778         return ret;
8779 }
8780
8781 static int populate_csum(struct btrfs_trans_handle *trans,
8782                          struct btrfs_root *csum_root, char *buf, u64 start,
8783                          u64 len)
8784 {
8785         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8786         u64 offset = 0;
8787         u64 sectorsize;
8788         int ret = 0;
8789
8790         while (offset < len) {
8791                 sectorsize = fs_info->sectorsize;
8792                 ret = read_extent_data(fs_info, buf, start + offset,
8793                                        &sectorsize, 0);
8794                 if (ret)
8795                         break;
8796                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8797                                             start + offset, buf, sectorsize);
8798                 if (ret)
8799                         break;
8800                 offset += sectorsize;
8801         }
8802         return ret;
8803 }
8804
8805 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8806                                       struct btrfs_root *csum_root,
8807                                       struct btrfs_root *cur_root)
8808 {
8809         struct btrfs_path path;
8810         struct btrfs_key key;
8811         struct extent_buffer *node;
8812         struct btrfs_file_extent_item *fi;
8813         char *buf = NULL;
8814         u64 start = 0;
8815         u64 len = 0;
8816         int slot = 0;
8817         int ret = 0;
8818
8819         buf = malloc(cur_root->fs_info->sectorsize);
8820         if (!buf)
8821                 return -ENOMEM;
8822
8823         btrfs_init_path(&path);
8824         key.objectid = 0;
8825         key.offset = 0;
8826         key.type = 0;
8827         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
8828         if (ret < 0)
8829                 goto out;
8830         /* Iterate all regular file extents and fill its csum */
8831         while (1) {
8832                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8833
8834                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8835                         goto next;
8836                 node = path.nodes[0];
8837                 slot = path.slots[0];
8838                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8839                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8840                         goto next;
8841                 start = btrfs_file_extent_disk_bytenr(node, fi);
8842                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8843
8844                 ret = populate_csum(trans, csum_root, buf, start, len);
8845                 if (ret == -EEXIST)
8846                         ret = 0;
8847                 if (ret < 0)
8848                         goto out;
8849 next:
8850                 /*
8851                  * TODO: if next leaf is corrupted, jump to nearest next valid
8852                  * leaf.
8853                  */
8854                 ret = btrfs_next_item(cur_root, &path);
8855                 if (ret < 0)
8856                         goto out;
8857                 if (ret > 0) {
8858                         ret = 0;
8859                         goto out;
8860                 }
8861         }
8862
8863 out:
8864         btrfs_release_path(&path);
8865         free(buf);
8866         return ret;
8867 }
8868
8869 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8870                                   struct btrfs_root *csum_root)
8871 {
8872         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8873         struct btrfs_path path;
8874         struct btrfs_root *tree_root = fs_info->tree_root;
8875         struct btrfs_root *cur_root;
8876         struct extent_buffer *node;
8877         struct btrfs_key key;
8878         int slot = 0;
8879         int ret = 0;
8880
8881         btrfs_init_path(&path);
8882         key.objectid = BTRFS_FS_TREE_OBJECTID;
8883         key.offset = 0;
8884         key.type = BTRFS_ROOT_ITEM_KEY;
8885         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
8886         if (ret < 0)
8887                 goto out;
8888         if (ret > 0) {
8889                 ret = -ENOENT;
8890                 goto out;
8891         }
8892
8893         while (1) {
8894                 node = path.nodes[0];
8895                 slot = path.slots[0];
8896                 btrfs_item_key_to_cpu(node, &key, slot);
8897                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8898                         goto out;
8899                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8900                         goto next;
8901                 if (!is_fstree(key.objectid))
8902                         goto next;
8903                 key.offset = (u64)-1;
8904
8905                 cur_root = btrfs_read_fs_root(fs_info, &key);
8906                 if (IS_ERR(cur_root) || !cur_root) {
8907                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8908                                 key.objectid);
8909                         goto out;
8910                 }
8911                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8912                                 cur_root);
8913                 if (ret < 0)
8914                         goto out;
8915 next:
8916                 ret = btrfs_next_item(tree_root, &path);
8917                 if (ret > 0) {
8918                         ret = 0;
8919                         goto out;
8920                 }
8921                 if (ret < 0)
8922                         goto out;
8923         }
8924
8925 out:
8926         btrfs_release_path(&path);
8927         return ret;
8928 }
8929
8930 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8931                                       struct btrfs_root *csum_root)
8932 {
8933         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8934         struct btrfs_path path;
8935         struct btrfs_extent_item *ei;
8936         struct extent_buffer *leaf;
8937         char *buf;
8938         struct btrfs_key key;
8939         int ret;
8940
8941         btrfs_init_path(&path);
8942         key.objectid = 0;
8943         key.type = BTRFS_EXTENT_ITEM_KEY;
8944         key.offset = 0;
8945         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8946         if (ret < 0) {
8947                 btrfs_release_path(&path);
8948                 return ret;
8949         }
8950
8951         buf = malloc(csum_root->fs_info->sectorsize);
8952         if (!buf) {
8953                 btrfs_release_path(&path);
8954                 return -ENOMEM;
8955         }
8956
8957         while (1) {
8958                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8959                         ret = btrfs_next_leaf(extent_root, &path);
8960                         if (ret < 0)
8961                                 break;
8962                         if (ret) {
8963                                 ret = 0;
8964                                 break;
8965                         }
8966                 }
8967                 leaf = path.nodes[0];
8968
8969                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8970                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8971                         path.slots[0]++;
8972                         continue;
8973                 }
8974
8975                 ei = btrfs_item_ptr(leaf, path.slots[0],
8976                                     struct btrfs_extent_item);
8977                 if (!(btrfs_extent_flags(leaf, ei) &
8978                       BTRFS_EXTENT_FLAG_DATA)) {
8979                         path.slots[0]++;
8980                         continue;
8981                 }
8982
8983                 ret = populate_csum(trans, csum_root, buf, key.objectid,
8984                                     key.offset);
8985                 if (ret)
8986                         break;
8987                 path.slots[0]++;
8988         }
8989
8990         btrfs_release_path(&path);
8991         free(buf);
8992         return ret;
8993 }
8994
8995 /*
8996  * Recalculate the csum and put it into the csum tree.
8997  *
8998  * Extent tree init will wipe out all the extent info, so in that case, we
8999  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9000  * will use fs/subvol trees to init the csum tree.
9001  */
9002 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9003                           struct btrfs_root *csum_root,
9004                           int search_fs_tree)
9005 {
9006         if (search_fs_tree)
9007                 return fill_csum_tree_from_fs(trans, csum_root);
9008         else
9009                 return fill_csum_tree_from_extent(trans, csum_root);
9010 }
9011
9012 static void free_roots_info_cache(void)
9013 {
9014         if (!roots_info_cache)
9015                 return;
9016
9017         while (!cache_tree_empty(roots_info_cache)) {
9018                 struct cache_extent *entry;
9019                 struct root_item_info *rii;
9020
9021                 entry = first_cache_extent(roots_info_cache);
9022                 if (!entry)
9023                         break;
9024                 remove_cache_extent(roots_info_cache, entry);
9025                 rii = container_of(entry, struct root_item_info, cache_extent);
9026                 free(rii);
9027         }
9028
9029         free(roots_info_cache);
9030         roots_info_cache = NULL;
9031 }
9032
9033 static int build_roots_info_cache(struct btrfs_fs_info *info)
9034 {
9035         int ret = 0;
9036         struct btrfs_key key;
9037         struct extent_buffer *leaf;
9038         struct btrfs_path path;
9039
9040         if (!roots_info_cache) {
9041                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9042                 if (!roots_info_cache)
9043                         return -ENOMEM;
9044                 cache_tree_init(roots_info_cache);
9045         }
9046
9047         btrfs_init_path(&path);
9048         key.objectid = 0;
9049         key.type = BTRFS_EXTENT_ITEM_KEY;
9050         key.offset = 0;
9051         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
9052         if (ret < 0)
9053                 goto out;
9054         leaf = path.nodes[0];
9055
9056         while (1) {
9057                 struct btrfs_key found_key;
9058                 struct btrfs_extent_item *ei;
9059                 struct btrfs_extent_inline_ref *iref;
9060                 unsigned long item_end;
9061                 int slot = path.slots[0];
9062                 int type;
9063                 u64 flags;
9064                 u64 root_id;
9065                 u8 level;
9066                 struct cache_extent *entry;
9067                 struct root_item_info *rii;
9068
9069                 if (slot >= btrfs_header_nritems(leaf)) {
9070                         ret = btrfs_next_leaf(info->extent_root, &path);
9071                         if (ret < 0) {
9072                                 break;
9073                         } else if (ret) {
9074                                 ret = 0;
9075                                 break;
9076                         }
9077                         leaf = path.nodes[0];
9078                         slot = path.slots[0];
9079                 }
9080
9081                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9082
9083                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9084                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9085                         goto next;
9086
9087                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9088                 flags = btrfs_extent_flags(leaf, ei);
9089                 item_end = (unsigned long)ei + btrfs_item_size_nr(leaf, slot);
9090
9091                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9092                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9093                         goto next;
9094
9095                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9096                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9097                         level = found_key.offset;
9098                 } else {
9099                         struct btrfs_tree_block_info *binfo;
9100
9101                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9102                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9103                         level = btrfs_tree_block_level(leaf, binfo);
9104                 }
9105
9106                 /*
9107                  * It's a valid extent/metadata item that has no inline ref,
9108                  * but SHARED_BLOCK_REF or other shared references.
9109                  * So we need to do extra check to avoid reading beyond leaf
9110                  * boudnary.
9111                  */
9112                 if ((unsigned long)iref >= item_end)
9113                         goto next;
9114
9115                 /*
9116                  * For a root extent, it must be of the following type and the
9117                  * first (and only one) iref in the item.
9118                  */
9119                 type = btrfs_extent_inline_ref_type(leaf, iref);
9120                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9121                         goto next;
9122
9123                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9124                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9125                 if (!entry) {
9126                         rii = malloc(sizeof(struct root_item_info));
9127                         if (!rii) {
9128                                 ret = -ENOMEM;
9129                                 goto out;
9130                         }
9131                         rii->cache_extent.start = root_id;
9132                         rii->cache_extent.size = 1;
9133                         rii->level = (u8)-1;
9134                         entry = &rii->cache_extent;
9135                         ret = insert_cache_extent(roots_info_cache, entry);
9136                         ASSERT(ret == 0);
9137                 } else {
9138                         rii = container_of(entry, struct root_item_info,
9139                                            cache_extent);
9140                 }
9141
9142                 ASSERT(rii->cache_extent.start == root_id);
9143                 ASSERT(rii->cache_extent.size == 1);
9144
9145                 if (level > rii->level || rii->level == (u8)-1) {
9146                         rii->level = level;
9147                         rii->bytenr = found_key.objectid;
9148                         rii->gen = btrfs_extent_generation(leaf, ei);
9149                         rii->node_count = 1;
9150                 } else if (level == rii->level) {
9151                         rii->node_count++;
9152                 }
9153 next:
9154                 path.slots[0]++;
9155         }
9156
9157 out:
9158         btrfs_release_path(&path);
9159
9160         return ret;
9161 }
9162
9163 static int maybe_repair_root_item(struct btrfs_path *path,
9164                                   const struct btrfs_key *root_key,
9165                                   const int read_only_mode)
9166 {
9167         const u64 root_id = root_key->objectid;
9168         struct cache_extent *entry;
9169         struct root_item_info *rii;
9170         struct btrfs_root_item ri;
9171         unsigned long offset;
9172
9173         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9174         if (!entry) {
9175                 fprintf(stderr,
9176                         "Error: could not find extent items for root %llu\n",
9177                         root_key->objectid);
9178                 return -ENOENT;
9179         }
9180
9181         rii = container_of(entry, struct root_item_info, cache_extent);
9182         ASSERT(rii->cache_extent.start == root_id);
9183         ASSERT(rii->cache_extent.size == 1);
9184
9185         if (rii->node_count != 1) {
9186                 fprintf(stderr,
9187                         "Error: could not find btree root extent for root %llu\n",
9188                         root_id);
9189                 return -ENOENT;
9190         }
9191
9192         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9193         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9194
9195         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9196             btrfs_root_level(&ri) != rii->level ||
9197             btrfs_root_generation(&ri) != rii->gen) {
9198
9199                 /*
9200                  * If we're in repair mode but our caller told us to not update
9201                  * the root item, i.e. just check if it needs to be updated, don't
9202                  * print this message, since the caller will call us again shortly
9203                  * for the same root item without read only mode (the caller will
9204                  * open a transaction first).
9205                  */
9206                 if (!(read_only_mode && repair))
9207                         fprintf(stderr,
9208                                 "%sroot item for root %llu,"
9209                                 " current bytenr %llu, current gen %llu, current level %u,"
9210                                 " new bytenr %llu, new gen %llu, new level %u\n",
9211                                 (read_only_mode ? "" : "fixing "),
9212                                 root_id,
9213                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9214                                 btrfs_root_level(&ri),
9215                                 rii->bytenr, rii->gen, rii->level);
9216
9217                 if (btrfs_root_generation(&ri) > rii->gen) {
9218                         fprintf(stderr,
9219                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9220                                 root_id, btrfs_root_generation(&ri), rii->gen);
9221                         return -EINVAL;
9222                 }
9223
9224                 if (!read_only_mode) {
9225                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9226                         btrfs_set_root_level(&ri, rii->level);
9227                         btrfs_set_root_generation(&ri, rii->gen);
9228                         write_extent_buffer(path->nodes[0], &ri,
9229                                             offset, sizeof(ri));
9230                 }
9231
9232                 return 1;
9233         }
9234
9235         return 0;
9236 }
9237
9238 /*
9239  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9240  * caused read-only snapshots to be corrupted if they were created at a moment
9241  * when the source subvolume/snapshot had orphan items. The issue was that the
9242  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9243  * node instead of the post orphan cleanup root node.
9244  * So this function, and its callees, just detects and fixes those cases. Even
9245  * though the regression was for read-only snapshots, this function applies to
9246  * any snapshot/subvolume root.
9247  * This must be run before any other repair code - not doing it so, makes other
9248  * repair code delete or modify backrefs in the extent tree for example, which
9249  * will result in an inconsistent fs after repairing the root items.
9250  */
9251 static int repair_root_items(struct btrfs_fs_info *info)
9252 {
9253         struct btrfs_path path;
9254         struct btrfs_key key;
9255         struct extent_buffer *leaf;
9256         struct btrfs_trans_handle *trans = NULL;
9257         int ret = 0;
9258         int bad_roots = 0;
9259         int need_trans = 0;
9260
9261         btrfs_init_path(&path);
9262
9263         ret = build_roots_info_cache(info);
9264         if (ret)
9265                 goto out;
9266
9267         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9268         key.type = BTRFS_ROOT_ITEM_KEY;
9269         key.offset = 0;
9270
9271 again:
9272         /*
9273          * Avoid opening and committing transactions if a leaf doesn't have
9274          * any root items that need to be fixed, so that we avoid rotating
9275          * backup roots unnecessarily.
9276          */
9277         if (need_trans) {
9278                 trans = btrfs_start_transaction(info->tree_root, 1);
9279                 if (IS_ERR(trans)) {
9280                         ret = PTR_ERR(trans);
9281                         goto out;
9282                 }
9283         }
9284
9285         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
9286                                 0, trans ? 1 : 0);
9287         if (ret < 0)
9288                 goto out;
9289         leaf = path.nodes[0];
9290
9291         while (1) {
9292                 struct btrfs_key found_key;
9293
9294                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
9295                         int no_more_keys = find_next_key(&path, &key);
9296
9297                         btrfs_release_path(&path);
9298                         if (trans) {
9299                                 ret = btrfs_commit_transaction(trans,
9300                                                                info->tree_root);
9301                                 trans = NULL;
9302                                 if (ret < 0)
9303                                         goto out;
9304                         }
9305                         need_trans = 0;
9306                         if (no_more_keys)
9307                                 break;
9308                         goto again;
9309                 }
9310
9311                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9312
9313                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9314                         goto next;
9315                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9316                         goto next;
9317
9318                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
9319                 if (ret < 0)
9320                         goto out;
9321                 if (ret) {
9322                         if (!trans && repair) {
9323                                 need_trans = 1;
9324                                 key = found_key;
9325                                 btrfs_release_path(&path);
9326                                 goto again;
9327                         }
9328                         bad_roots++;
9329                 }
9330 next:
9331                 path.slots[0]++;
9332         }
9333         ret = 0;
9334 out:
9335         free_roots_info_cache();
9336         btrfs_release_path(&path);
9337         if (trans)
9338                 btrfs_commit_transaction(trans, info->tree_root);
9339         if (ret < 0)
9340                 return ret;
9341
9342         return bad_roots;
9343 }
9344
9345 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
9346 {
9347         struct btrfs_trans_handle *trans;
9348         struct btrfs_block_group_cache *bg_cache;
9349         u64 current = 0;
9350         int ret = 0;
9351
9352         /* Clear all free space cache inodes and its extent data */
9353         while (1) {
9354                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
9355                 if (!bg_cache)
9356                         break;
9357                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
9358                 if (ret < 0)
9359                         return ret;
9360                 current = bg_cache->key.objectid + bg_cache->key.offset;
9361         }
9362
9363         /* Don't forget to set cache_generation to -1 */
9364         trans = btrfs_start_transaction(fs_info->tree_root, 0);
9365         if (IS_ERR(trans)) {
9366                 error("failed to update super block cache generation");
9367                 return PTR_ERR(trans);
9368         }
9369         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
9370         btrfs_commit_transaction(trans, fs_info->tree_root);
9371
9372         return ret;
9373 }
9374
9375 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
9376                 int clear_version)
9377 {
9378         int ret = 0;
9379
9380         if (clear_version == 1) {
9381                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9382                         error(
9383                 "free space cache v2 detected, use --clear-space-cache v2");
9384                         ret = 1;
9385                         goto close_out;
9386                 }
9387                 printf("Clearing free space cache\n");
9388                 ret = clear_free_space_cache(fs_info);
9389                 if (ret) {
9390                         error("failed to clear free space cache");
9391                         ret = 1;
9392                 } else {
9393                         printf("Free space cache cleared\n");
9394                 }
9395         } else if (clear_version == 2) {
9396                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9397                         printf("no free space cache v2 to clear\n");
9398                         ret = 0;
9399                         goto close_out;
9400                 }
9401                 printf("Clear free space cache v2\n");
9402                 ret = btrfs_clear_free_space_tree(fs_info);
9403                 if (ret) {
9404                         error("failed to clear free space cache v2: %d", ret);
9405                         ret = 1;
9406                 } else {
9407                         printf("free space cache v2 cleared\n");
9408                 }
9409         }
9410 close_out:
9411         return ret;
9412 }
9413
9414 const char * const cmd_check_usage[] = {
9415         "btrfs check [options] <device>",
9416         "Check structural integrity of a filesystem (unmounted).",
9417         "Check structural integrity of an unmounted filesystem. Verify internal",
9418         "trees' consistency and item connectivity. In the repair mode try to",
9419         "fix the problems found. ",
9420         "WARNING: the repair mode is considered dangerous",
9421         "",
9422         "-s|--super <superblock>     use this superblock copy",
9423         "-b|--backup                 use the first valid backup root copy",
9424         "--force                     skip mount checks, repair is not possible",
9425         "--repair                    try to repair the filesystem",
9426         "--readonly                  run in read-only mode (default)",
9427         "--init-csum-tree            create a new CRC tree",
9428         "--init-extent-tree          create a new extent tree",
9429         "--mode <MODE>               allows choice of memory/IO trade-offs",
9430         "                            where MODE is one of:",
9431         "                            original - read inodes and extents to memory (requires",
9432         "                                       more memory, does less IO)",
9433         "                            lowmem   - try to use less memory but read blocks again",
9434         "                                       when needed",
9435         "--check-data-csum           verify checksums of data blocks",
9436         "-Q|--qgroup-report          print a report on qgroup consistency",
9437         "-E|--subvol-extents <subvolid>",
9438         "                            print subvolume extents and sharing state",
9439         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9440         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9441         "-p|--progress               indicate progress",
9442         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
9443         NULL
9444 };
9445
9446 int cmd_check(int argc, char **argv)
9447 {
9448         struct cache_tree root_cache;
9449         struct btrfs_root *root;
9450         struct btrfs_fs_info *info;
9451         u64 bytenr = 0;
9452         u64 subvolid = 0;
9453         u64 tree_root_bytenr = 0;
9454         u64 chunk_root_bytenr = 0;
9455         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9456         int ret = 0;
9457         int err = 0;
9458         u64 num;
9459         int init_csum_tree = 0;
9460         int readonly = 0;
9461         int clear_space_cache = 0;
9462         int qgroup_report = 0;
9463         int qgroups_repaired = 0;
9464         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
9465         int force = 0;
9466
9467         while(1) {
9468                 int c;
9469                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9470                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9471                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
9472                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
9473                         GETOPT_VAL_FORCE };
9474                 static const struct option long_options[] = {
9475                         { "super", required_argument, NULL, 's' },
9476                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9477                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9478                         { "init-csum-tree", no_argument, NULL,
9479                                 GETOPT_VAL_INIT_CSUM },
9480                         { "init-extent-tree", no_argument, NULL,
9481                                 GETOPT_VAL_INIT_EXTENT },
9482                         { "check-data-csum", no_argument, NULL,
9483                                 GETOPT_VAL_CHECK_CSUM },
9484                         { "backup", no_argument, NULL, 'b' },
9485                         { "subvol-extents", required_argument, NULL, 'E' },
9486                         { "qgroup-report", no_argument, NULL, 'Q' },
9487                         { "tree-root", required_argument, NULL, 'r' },
9488                         { "chunk-root", required_argument, NULL,
9489                                 GETOPT_VAL_CHUNK_TREE },
9490                         { "progress", no_argument, NULL, 'p' },
9491                         { "mode", required_argument, NULL,
9492                                 GETOPT_VAL_MODE },
9493                         { "clear-space-cache", required_argument, NULL,
9494                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
9495                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
9496                         { NULL, 0, NULL, 0}
9497                 };
9498
9499                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
9500                 if (c < 0)
9501                         break;
9502                 switch(c) {
9503                         case 'a': /* ignored */ break;
9504                         case 'b':
9505                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9506                                 break;
9507                         case 's':
9508                                 num = arg_strtou64(optarg);
9509                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9510                                         error(
9511                                         "super mirror should be less than %d",
9512                                                 BTRFS_SUPER_MIRROR_MAX);
9513                                         exit(1);
9514                                 }
9515                                 bytenr = btrfs_sb_offset(((int)num));
9516                                 printf("using SB copy %llu, bytenr %llu\n", num,
9517                                        (unsigned long long)bytenr);
9518                                 break;
9519                         case 'Q':
9520                                 qgroup_report = 1;
9521                                 break;
9522                         case 'E':
9523                                 subvolid = arg_strtou64(optarg);
9524                                 break;
9525                         case 'r':
9526                                 tree_root_bytenr = arg_strtou64(optarg);
9527                                 break;
9528                         case GETOPT_VAL_CHUNK_TREE:
9529                                 chunk_root_bytenr = arg_strtou64(optarg);
9530                                 break;
9531                         case 'p':
9532                                 ctx.progress_enabled = true;
9533                                 break;
9534                         case '?':
9535                         case 'h':
9536                                 usage(cmd_check_usage);
9537                         case GETOPT_VAL_REPAIR:
9538                                 printf("enabling repair mode\n");
9539                                 repair = 1;
9540                                 ctree_flags |= OPEN_CTREE_WRITES;
9541                                 break;
9542                         case GETOPT_VAL_READONLY:
9543                                 readonly = 1;
9544                                 break;
9545                         case GETOPT_VAL_INIT_CSUM:
9546                                 printf("Creating a new CRC tree\n");
9547                                 init_csum_tree = 1;
9548                                 repair = 1;
9549                                 ctree_flags |= OPEN_CTREE_WRITES;
9550                                 break;
9551                         case GETOPT_VAL_INIT_EXTENT:
9552                                 init_extent_tree = 1;
9553                                 ctree_flags |= (OPEN_CTREE_WRITES |
9554                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9555                                 repair = 1;
9556                                 break;
9557                         case GETOPT_VAL_CHECK_CSUM:
9558                                 check_data_csum = 1;
9559                                 break;
9560                         case GETOPT_VAL_MODE:
9561                                 check_mode = parse_check_mode(optarg);
9562                                 if (check_mode == CHECK_MODE_UNKNOWN) {
9563                                         error("unknown mode: %s", optarg);
9564                                         exit(1);
9565                                 }
9566                                 break;
9567                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
9568                                 if (strcmp(optarg, "v1") == 0) {
9569                                         clear_space_cache = 1;
9570                                 } else if (strcmp(optarg, "v2") == 0) {
9571                                         clear_space_cache = 2;
9572                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
9573                                 } else {
9574                                         error(
9575                 "invalid argument to --clear-space-cache, must be v1 or v2");
9576                                         exit(1);
9577                                 }
9578                                 ctree_flags |= OPEN_CTREE_WRITES;
9579                                 break;
9580                         case GETOPT_VAL_FORCE:
9581                                 force = 1;
9582                                 break;
9583                 }
9584         }
9585
9586         if (check_argc_exact(argc - optind, 1))
9587                 usage(cmd_check_usage);
9588
9589         if (ctx.progress_enabled) {
9590                 ctx.tp = TASK_NOTHING;
9591                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9592         }
9593
9594         /* This check is the only reason for --readonly to exist */
9595         if (readonly && repair) {
9596                 error("repair options are not compatible with --readonly");
9597                 exit(1);
9598         }
9599
9600         /*
9601          * experimental and dangerous
9602          */
9603         if (repair && check_mode == CHECK_MODE_LOWMEM)
9604                 warning("low-memory mode repair support is only partial");
9605
9606         radix_tree_init();
9607         cache_tree_init(&root_cache);
9608
9609         ret = check_mounted(argv[optind]);
9610         if (!force) {
9611                 if (ret < 0) {
9612                         error("could not check mount status: %s",
9613                                         strerror(-ret));
9614                         err |= !!ret;
9615                         goto err_out;
9616                 } else if (ret) {
9617                         error(
9618 "%s is currently mounted, use --force if you really intend to check the filesystem",
9619                                 argv[optind]);
9620                         ret = -EBUSY;
9621                         err |= !!ret;
9622                         goto err_out;
9623                 }
9624         } else {
9625                 if (repair) {
9626                         error("repair and --force is not yet supported");
9627                         ret = 1;
9628                         err |= !!ret;
9629                         goto err_out;
9630                 }
9631                 if (ret < 0) {
9632                         warning(
9633 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
9634                                 argv[optind]);
9635                 } else if (ret) {
9636                         warning(
9637                         "filesystem mounted, continuing because of --force");
9638                 }
9639                 /* A block device is mounted in exclusive mode by kernel */
9640                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
9641         }
9642
9643         /* only allow partial opening under repair mode */
9644         if (repair)
9645                 ctree_flags |= OPEN_CTREE_PARTIAL;
9646
9647         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9648                                   chunk_root_bytenr, ctree_flags);
9649         if (!info) {
9650                 error("cannot open file system");
9651                 ret = -EIO;
9652                 err |= !!ret;
9653                 goto err_out;
9654         }
9655
9656         global_info = info;
9657         root = info->fs_root;
9658         uuid_unparse(info->super_copy->fsid, uuidbuf);
9659
9660         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9661
9662         /*
9663          * Check the bare minimum before starting anything else that could rely
9664          * on it, namely the tree roots, any local consistency checks
9665          */
9666         if (!extent_buffer_uptodate(info->tree_root->node) ||
9667             !extent_buffer_uptodate(info->dev_root->node) ||
9668             !extent_buffer_uptodate(info->chunk_root->node)) {
9669                 error("critical roots corrupted, unable to check the filesystem");
9670                 err |= !!ret;
9671                 ret = -EIO;
9672                 goto close_out;
9673         }
9674
9675         if (clear_space_cache) {
9676                 ret = do_clear_free_space_cache(info, clear_space_cache);
9677                 err |= !!ret;
9678                 goto close_out;
9679         }
9680
9681         /*
9682          * repair mode will force us to commit transaction which
9683          * will make us fail to load log tree when mounting.
9684          */
9685         if (repair && btrfs_super_log_root(info->super_copy)) {
9686                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
9687                 if (!ret) {
9688                         ret = 1;
9689                         err |= !!ret;
9690                         goto close_out;
9691                 }
9692                 ret = zero_log_tree(root);
9693                 err |= !!ret;
9694                 if (ret) {
9695                         error("failed to zero log tree: %d", ret);
9696                         goto close_out;
9697                 }
9698         }
9699
9700         if (qgroup_report) {
9701                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9702                        uuidbuf);
9703                 ret = qgroup_verify_all(info);
9704                 err |= !!ret;
9705                 if (ret == 0)
9706                         report_qgroups(1);
9707                 goto close_out;
9708         }
9709         if (subvolid) {
9710                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9711                        subvolid, argv[optind], uuidbuf);
9712                 ret = print_extent_state(info, subvolid);
9713                 err |= !!ret;
9714                 goto close_out;
9715         }
9716
9717         if (init_extent_tree || init_csum_tree) {
9718                 struct btrfs_trans_handle *trans;
9719
9720                 trans = btrfs_start_transaction(info->extent_root, 0);
9721                 if (IS_ERR(trans)) {
9722                         error("error starting transaction");
9723                         ret = PTR_ERR(trans);
9724                         err |= !!ret;
9725                         goto close_out;
9726                 }
9727
9728                 if (init_extent_tree) {
9729                         printf("Creating a new extent tree\n");
9730                         ret = reinit_extent_tree(trans, info);
9731                         err |= !!ret;
9732                         if (ret)
9733                                 goto close_out;
9734                 }
9735
9736                 if (init_csum_tree) {
9737                         printf("Reinitialize checksum tree\n");
9738                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9739                         if (ret) {
9740                                 error("checksum tree initialization failed: %d",
9741                                                 ret);
9742                                 ret = -EIO;
9743                                 err |= !!ret;
9744                                 goto close_out;
9745                         }
9746
9747                         ret = fill_csum_tree(trans, info->csum_root,
9748                                              init_extent_tree);
9749                         err |= !!ret;
9750                         if (ret) {
9751                                 error("checksum tree refilling failed: %d", ret);
9752                                 return -EIO;
9753                         }
9754                 }
9755                 /*
9756                  * Ok now we commit and run the normal fsck, which will add
9757                  * extent entries for all of the items it finds.
9758                  */
9759                 ret = btrfs_commit_transaction(trans, info->extent_root);
9760                 err |= !!ret;
9761                 if (ret)
9762                         goto close_out;
9763         }
9764         if (!extent_buffer_uptodate(info->extent_root->node)) {
9765                 error("critical: extent_root, unable to check the filesystem");
9766                 ret = -EIO;
9767                 err |= !!ret;
9768                 goto close_out;
9769         }
9770         if (!extent_buffer_uptodate(info->csum_root->node)) {
9771                 error("critical: csum_root, unable to check the filesystem");
9772                 ret = -EIO;
9773                 err |= !!ret;
9774                 goto close_out;
9775         }
9776
9777         if (!init_extent_tree) {
9778                 ret = repair_root_items(info);
9779                 if (ret < 0) {
9780                         err = !!ret;
9781                         error("failed to repair root items: %s", strerror(-ret));
9782                         goto close_out;
9783                 }
9784                 if (repair) {
9785                         fprintf(stderr, "Fixed %d roots.\n", ret);
9786                         ret = 0;
9787                 } else if (ret > 0) {
9788                         fprintf(stderr,
9789                                 "Found %d roots with an outdated root item.\n",
9790                                 ret);
9791                         fprintf(stderr,
9792         "Please run a filesystem check with the option --repair to fix them.\n");
9793                         ret = 1;
9794                         err |= ret;
9795                         goto close_out;
9796                 }
9797         }
9798
9799         ret = do_check_chunks_and_extents(info);
9800         err |= !!ret;
9801         if (ret)
9802                 error(
9803                 "errors found in extent allocation tree or chunk allocation");
9804
9805         /* Only re-check super size after we checked and repaired the fs */
9806         err |= !is_super_size_valid(info);
9807
9808         if (!ctx.progress_enabled) {
9809                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9810                         fprintf(stderr, "checking free space tree\n");
9811                 else
9812                         fprintf(stderr, "checking free space cache\n");
9813         }
9814         ret = check_space_cache(root);
9815         err |= !!ret;
9816         if (ret) {
9817                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9818                         error("errors found in free space tree");
9819                 else
9820                         error("errors found in free space cache");
9821                 goto out;
9822         }
9823
9824         /*
9825          * We used to have to have these hole extents in between our real
9826          * extents so if we don't have this flag set we need to make sure there
9827          * are no gaps in the file extents for inodes, otherwise we can just
9828          * ignore it when this happens.
9829          */
9830         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
9831         ret = do_check_fs_roots(info, &root_cache);
9832         err |= !!ret;
9833         if (ret) {
9834                 error("errors found in fs roots");
9835                 goto out;
9836         }
9837
9838         fprintf(stderr, "checking csums\n");
9839         ret = check_csums(root);
9840         err |= !!ret;
9841         if (ret) {
9842                 error("errors found in csum tree");
9843                 goto out;
9844         }
9845
9846         fprintf(stderr, "checking root refs\n");
9847         /* For low memory mode, check_fs_roots_v2 handles root refs */
9848         if (check_mode != CHECK_MODE_LOWMEM) {
9849                 ret = check_root_refs(root, &root_cache);
9850                 err |= !!ret;
9851                 if (ret) {
9852                         error("errors found in root refs");
9853                         goto out;
9854                 }
9855         }
9856
9857         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9858                 struct extent_buffer *eb;
9859
9860                 eb = list_first_entry(&root->fs_info->recow_ebs,
9861                                       struct extent_buffer, recow);
9862                 list_del_init(&eb->recow);
9863                 ret = recow_extent_buffer(root, eb);
9864                 err |= !!ret;
9865                 if (ret) {
9866                         error("fails to fix transid errors");
9867                         break;
9868                 }
9869         }
9870
9871         while (!list_empty(&delete_items)) {
9872                 struct bad_item *bad;
9873
9874                 bad = list_first_entry(&delete_items, struct bad_item, list);
9875                 list_del_init(&bad->list);
9876                 if (repair) {
9877                         ret = delete_bad_item(root, bad);
9878                         err |= !!ret;
9879                 }
9880                 free(bad);
9881         }
9882
9883         if (info->quota_enabled) {
9884                 fprintf(stderr, "checking quota groups\n");
9885                 ret = qgroup_verify_all(info);
9886                 err |= !!ret;
9887                 if (ret) {
9888                         error("failed to check quota groups");
9889                         goto out;
9890                 }
9891                 report_qgroups(0);
9892                 ret = repair_qgroups(info, &qgroups_repaired);
9893                 err |= !!ret;
9894                 if (err) {
9895                         error("failed to repair quota groups");
9896                         goto out;
9897                 }
9898                 ret = 0;
9899         }
9900
9901         if (!list_empty(&root->fs_info->recow_ebs)) {
9902                 error("transid errors in file system");
9903                 ret = 1;
9904                 err |= !!ret;
9905         }
9906 out:
9907         printf("found %llu bytes used, ",
9908                (unsigned long long)bytes_used);
9909         if (err)
9910                 printf("error(s) found\n");
9911         else
9912                 printf("no error found\n");
9913         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9914         printf("total tree bytes: %llu\n",
9915                (unsigned long long)total_btree_bytes);
9916         printf("total fs tree bytes: %llu\n",
9917                (unsigned long long)total_fs_tree_bytes);
9918         printf("total extent tree bytes: %llu\n",
9919                (unsigned long long)total_extent_tree_bytes);
9920         printf("btree space waste bytes: %llu\n",
9921                (unsigned long long)btree_space_waste);
9922         printf("file data blocks allocated: %llu\n referenced %llu\n",
9923                 (unsigned long long)data_bytes_allocated,
9924                 (unsigned long long)data_bytes_referenced);
9925
9926         free_qgroup_counts();
9927         free_root_recs_tree(&root_cache);
9928 close_out:
9929         close_ctree(root);
9930 err_out:
9931         if (ctx.progress_enabled)
9932                 task_deinit(ctx.info);
9933
9934         return err;
9935 }