Btrfs-progs: check, fix false error reports for shared prealloc extents
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/mode-common.h"
47 #include "check/mode-original.h"
48 #include "check/mode-lowmem.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void record_root_in_trans(struct btrfs_trans_handle *trans,
416                                  struct btrfs_root *root)
417 {
418         if (root->last_trans != trans->transid) {
419                 root->track_dirty = 1;
420                 root->last_trans = trans->transid;
421                 root->commit_root = root->node;
422                 extent_buffer_get(root->node);
423         }
424 }
425
426 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
427 {
428         struct device_record *rec1;
429         struct device_record *rec2;
430
431         rec1 = rb_entry(node1, struct device_record, node);
432         rec2 = rb_entry(node2, struct device_record, node);
433         if (rec1->devid > rec2->devid)
434                 return -1;
435         else if (rec1->devid < rec2->devid)
436                 return 1;
437         else
438                 return 0;
439 }
440
441 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
442 {
443         struct inode_record *rec;
444         struct inode_backref *backref;
445         struct inode_backref *orig;
446         struct inode_backref *tmp;
447         struct orphan_data_extent *src_orphan;
448         struct orphan_data_extent *dst_orphan;
449         struct rb_node *rb;
450         size_t size;
451         int ret;
452
453         rec = malloc(sizeof(*rec));
454         if (!rec)
455                 return ERR_PTR(-ENOMEM);
456         memcpy(rec, orig_rec, sizeof(*rec));
457         rec->refs = 1;
458         INIT_LIST_HEAD(&rec->backrefs);
459         INIT_LIST_HEAD(&rec->orphan_extents);
460         rec->holes = RB_ROOT;
461
462         list_for_each_entry(orig, &orig_rec->backrefs, list) {
463                 size = sizeof(*orig) + orig->namelen + 1;
464                 backref = malloc(size);
465                 if (!backref) {
466                         ret = -ENOMEM;
467                         goto cleanup;
468                 }
469                 memcpy(backref, orig, size);
470                 list_add_tail(&backref->list, &rec->backrefs);
471         }
472         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
473                 dst_orphan = malloc(sizeof(*dst_orphan));
474                 if (!dst_orphan) {
475                         ret = -ENOMEM;
476                         goto cleanup;
477                 }
478                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
479                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
480         }
481         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
482         if (ret < 0)
483                 goto cleanup_rb;
484
485         return rec;
486
487 cleanup_rb:
488         rb = rb_first(&rec->holes);
489         while (rb) {
490                 struct file_extent_hole *hole;
491
492                 hole = rb_entry(rb, struct file_extent_hole, node);
493                 rb = rb_next(rb);
494                 free(hole);
495         }
496
497 cleanup:
498         if (!list_empty(&rec->backrefs))
499                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
500                         list_del(&orig->list);
501                         free(orig);
502                 }
503
504         if (!list_empty(&rec->orphan_extents))
505                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
506                         list_del(&orig->list);
507                         free(orig);
508                 }
509
510         free(rec);
511
512         return ERR_PTR(ret);
513 }
514
515 static void print_orphan_data_extents(struct list_head *orphan_extents,
516                                       u64 objectid)
517 {
518         struct orphan_data_extent *orphan;
519
520         if (list_empty(orphan_extents))
521                 return;
522         printf("The following data extent is lost in tree %llu:\n",
523                objectid);
524         list_for_each_entry(orphan, orphan_extents, list) {
525                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
526                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
527                        orphan->disk_len);
528         }
529 }
530
531 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
532 {
533         u64 root_objectid = root->root_key.objectid;
534         int errors = rec->errors;
535
536         if (!errors)
537                 return;
538         /* reloc root errors, we print its corresponding fs root objectid*/
539         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
540                 root_objectid = root->root_key.offset;
541                 fprintf(stderr, "reloc");
542         }
543         fprintf(stderr, "root %llu inode %llu errors %x",
544                 (unsigned long long) root_objectid,
545                 (unsigned long long) rec->ino, rec->errors);
546
547         if (errors & I_ERR_NO_INODE_ITEM)
548                 fprintf(stderr, ", no inode item");
549         if (errors & I_ERR_NO_ORPHAN_ITEM)
550                 fprintf(stderr, ", no orphan item");
551         if (errors & I_ERR_DUP_INODE_ITEM)
552                 fprintf(stderr, ", dup inode item");
553         if (errors & I_ERR_DUP_DIR_INDEX)
554                 fprintf(stderr, ", dup dir index");
555         if (errors & I_ERR_ODD_DIR_ITEM)
556                 fprintf(stderr, ", odd dir item");
557         if (errors & I_ERR_ODD_FILE_EXTENT)
558                 fprintf(stderr, ", odd file extent");
559         if (errors & I_ERR_BAD_FILE_EXTENT)
560                 fprintf(stderr, ", bad file extent");
561         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
562                 fprintf(stderr, ", file extent overlap");
563         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
564                 fprintf(stderr, ", file extent discount");
565         if (errors & I_ERR_DIR_ISIZE_WRONG)
566                 fprintf(stderr, ", dir isize wrong");
567         if (errors & I_ERR_FILE_NBYTES_WRONG)
568                 fprintf(stderr, ", nbytes wrong");
569         if (errors & I_ERR_ODD_CSUM_ITEM)
570                 fprintf(stderr, ", odd csum item");
571         if (errors & I_ERR_SOME_CSUM_MISSING)
572                 fprintf(stderr, ", some csum missing");
573         if (errors & I_ERR_LINK_COUNT_WRONG)
574                 fprintf(stderr, ", link count wrong");
575         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
576                 fprintf(stderr, ", orphan file extent");
577         fprintf(stderr, "\n");
578         /* Print the orphan extents if needed */
579         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
580                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
581
582         /* Print the holes if needed */
583         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
584                 struct file_extent_hole *hole;
585                 struct rb_node *node;
586                 int found = 0;
587
588                 node = rb_first(&rec->holes);
589                 fprintf(stderr, "Found file extent holes:\n");
590                 while (node) {
591                         found = 1;
592                         hole = rb_entry(node, struct file_extent_hole, node);
593                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
594                                 hole->start, hole->len);
595                         node = rb_next(node);
596                 }
597                 if (!found)
598                         fprintf(stderr, "\tstart: 0, len: %llu\n",
599                                 round_up(rec->isize,
600                                          root->fs_info->sectorsize));
601         }
602 }
603
604 static void print_ref_error(int errors)
605 {
606         if (errors & REF_ERR_NO_DIR_ITEM)
607                 fprintf(stderr, ", no dir item");
608         if (errors & REF_ERR_NO_DIR_INDEX)
609                 fprintf(stderr, ", no dir index");
610         if (errors & REF_ERR_NO_INODE_REF)
611                 fprintf(stderr, ", no inode ref");
612         if (errors & REF_ERR_DUP_DIR_ITEM)
613                 fprintf(stderr, ", dup dir item");
614         if (errors & REF_ERR_DUP_DIR_INDEX)
615                 fprintf(stderr, ", dup dir index");
616         if (errors & REF_ERR_DUP_INODE_REF)
617                 fprintf(stderr, ", dup inode ref");
618         if (errors & REF_ERR_INDEX_UNMATCH)
619                 fprintf(stderr, ", index mismatch");
620         if (errors & REF_ERR_FILETYPE_UNMATCH)
621                 fprintf(stderr, ", filetype mismatch");
622         if (errors & REF_ERR_NAME_TOO_LONG)
623                 fprintf(stderr, ", name too long");
624         if (errors & REF_ERR_NO_ROOT_REF)
625                 fprintf(stderr, ", no root ref");
626         if (errors & REF_ERR_NO_ROOT_BACKREF)
627                 fprintf(stderr, ", no root backref");
628         if (errors & REF_ERR_DUP_ROOT_REF)
629                 fprintf(stderr, ", dup root ref");
630         if (errors & REF_ERR_DUP_ROOT_BACKREF)
631                 fprintf(stderr, ", dup root backref");
632         fprintf(stderr, "\n");
633 }
634
635 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
636                                           u64 ino, int mod)
637 {
638         struct ptr_node *node;
639         struct cache_extent *cache;
640         struct inode_record *rec = NULL;
641         int ret;
642
643         cache = lookup_cache_extent(inode_cache, ino, 1);
644         if (cache) {
645                 node = container_of(cache, struct ptr_node, cache);
646                 rec = node->data;
647                 if (mod && rec->refs > 1) {
648                         node->data = clone_inode_rec(rec);
649                         if (IS_ERR(node->data))
650                                 return node->data;
651                         rec->refs--;
652                         rec = node->data;
653                 }
654         } else if (mod) {
655                 rec = calloc(1, sizeof(*rec));
656                 if (!rec)
657                         return ERR_PTR(-ENOMEM);
658                 rec->ino = ino;
659                 rec->extent_start = (u64)-1;
660                 rec->refs = 1;
661                 INIT_LIST_HEAD(&rec->backrefs);
662                 INIT_LIST_HEAD(&rec->orphan_extents);
663                 rec->holes = RB_ROOT;
664
665                 node = malloc(sizeof(*node));
666                 if (!node) {
667                         free(rec);
668                         return ERR_PTR(-ENOMEM);
669                 }
670                 node->cache.start = ino;
671                 node->cache.size = 1;
672                 node->data = rec;
673
674                 if (ino == BTRFS_FREE_INO_OBJECTID)
675                         rec->found_link = 1;
676
677                 ret = insert_cache_extent(inode_cache, &node->cache);
678                 if (ret)
679                         return ERR_PTR(-EEXIST);
680         }
681         return rec;
682 }
683
684 static void free_orphan_data_extents(struct list_head *orphan_extents)
685 {
686         struct orphan_data_extent *orphan;
687
688         while (!list_empty(orphan_extents)) {
689                 orphan = list_entry(orphan_extents->next,
690                                     struct orphan_data_extent, list);
691                 list_del(&orphan->list);
692                 free(orphan);
693         }
694 }
695
696 static void free_inode_rec(struct inode_record *rec)
697 {
698         struct inode_backref *backref;
699
700         if (--rec->refs > 0)
701                 return;
702
703         while (!list_empty(&rec->backrefs)) {
704                 backref = to_inode_backref(rec->backrefs.next);
705                 list_del(&backref->list);
706                 free(backref);
707         }
708         free_orphan_data_extents(&rec->orphan_extents);
709         free_file_extent_holes(&rec->holes);
710         free(rec);
711 }
712
713 static int can_free_inode_rec(struct inode_record *rec)
714 {
715         if (!rec->errors && rec->checked && rec->found_inode_item &&
716             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
717                 return 1;
718         return 0;
719 }
720
721 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
722                                  struct inode_record *rec)
723 {
724         struct cache_extent *cache;
725         struct inode_backref *tmp, *backref;
726         struct ptr_node *node;
727         u8 filetype;
728
729         if (!rec->found_inode_item)
730                 return;
731
732         filetype = imode_to_type(rec->imode);
733         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
734                 if (backref->found_dir_item && backref->found_dir_index) {
735                         if (backref->filetype != filetype)
736                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
737                         if (!backref->errors && backref->found_inode_ref &&
738                             rec->nlink == rec->found_link) {
739                                 list_del(&backref->list);
740                                 free(backref);
741                         }
742                 }
743         }
744
745         if (!rec->checked || rec->merging)
746                 return;
747
748         if (S_ISDIR(rec->imode)) {
749                 if (rec->found_size != rec->isize)
750                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
751                 if (rec->found_file_extent)
752                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
753         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
754                 if (rec->found_dir_item)
755                         rec->errors |= I_ERR_ODD_DIR_ITEM;
756                 if (rec->found_size != rec->nbytes)
757                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
758                 if (rec->nlink > 0 && !no_holes &&
759                     (rec->extent_end < rec->isize ||
760                      first_extent_gap(&rec->holes) < rec->isize))
761                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
762         }
763
764         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
765                 if (rec->found_csum_item && rec->nodatasum)
766                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
767                 if (rec->some_csum_missing && !rec->nodatasum)
768                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
769         }
770
771         BUG_ON(rec->refs != 1);
772         if (can_free_inode_rec(rec)) {
773                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
774                 node = container_of(cache, struct ptr_node, cache);
775                 BUG_ON(node->data != rec);
776                 remove_cache_extent(inode_cache, &node->cache);
777                 free(node);
778                 free_inode_rec(rec);
779         }
780 }
781
782 static int check_orphan_item(struct btrfs_root *root, u64 ino)
783 {
784         struct btrfs_path path;
785         struct btrfs_key key;
786         int ret;
787
788         key.objectid = BTRFS_ORPHAN_OBJECTID;
789         key.type = BTRFS_ORPHAN_ITEM_KEY;
790         key.offset = ino;
791
792         btrfs_init_path(&path);
793         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
794         btrfs_release_path(&path);
795         if (ret > 0)
796                 ret = -ENOENT;
797         return ret;
798 }
799
800 static int process_inode_item(struct extent_buffer *eb,
801                               int slot, struct btrfs_key *key,
802                               struct shared_node *active_node)
803 {
804         struct inode_record *rec;
805         struct btrfs_inode_item *item;
806
807         rec = active_node->current;
808         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
809         if (rec->found_inode_item) {
810                 rec->errors |= I_ERR_DUP_INODE_ITEM;
811                 return 1;
812         }
813         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
814         rec->nlink = btrfs_inode_nlink(eb, item);
815         rec->isize = btrfs_inode_size(eb, item);
816         rec->nbytes = btrfs_inode_nbytes(eb, item);
817         rec->imode = btrfs_inode_mode(eb, item);
818         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
819                 rec->nodatasum = 1;
820         rec->found_inode_item = 1;
821         if (rec->nlink == 0)
822                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
823         maybe_free_inode_rec(&active_node->inode_cache, rec);
824         return 0;
825 }
826
827 static struct inode_backref *get_inode_backref(struct inode_record *rec,
828                                                 const char *name,
829                                                 int namelen, u64 dir)
830 {
831         struct inode_backref *backref;
832
833         list_for_each_entry(backref, &rec->backrefs, list) {
834                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
835                         break;
836                 if (backref->dir != dir || backref->namelen != namelen)
837                         continue;
838                 if (memcmp(name, backref->name, namelen))
839                         continue;
840                 return backref;
841         }
842
843         backref = malloc(sizeof(*backref) + namelen + 1);
844         if (!backref)
845                 return NULL;
846         memset(backref, 0, sizeof(*backref));
847         backref->dir = dir;
848         backref->namelen = namelen;
849         memcpy(backref->name, name, namelen);
850         backref->name[namelen] = '\0';
851         list_add_tail(&backref->list, &rec->backrefs);
852         return backref;
853 }
854
855 static int add_inode_backref(struct cache_tree *inode_cache,
856                              u64 ino, u64 dir, u64 index,
857                              const char *name, int namelen,
858                              u8 filetype, u8 itemtype, int errors)
859 {
860         struct inode_record *rec;
861         struct inode_backref *backref;
862
863         rec = get_inode_rec(inode_cache, ino, 1);
864         BUG_ON(IS_ERR(rec));
865         backref = get_inode_backref(rec, name, namelen, dir);
866         BUG_ON(!backref);
867         if (errors)
868                 backref->errors |= errors;
869         if (itemtype == BTRFS_DIR_INDEX_KEY) {
870                 if (backref->found_dir_index)
871                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
872                 if (backref->found_inode_ref && backref->index != index)
873                         backref->errors |= REF_ERR_INDEX_UNMATCH;
874                 if (backref->found_dir_item && backref->filetype != filetype)
875                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
876
877                 backref->index = index;
878                 backref->filetype = filetype;
879                 backref->found_dir_index = 1;
880         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
881                 rec->found_link++;
882                 if (backref->found_dir_item)
883                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
884                 if (backref->found_dir_index && backref->filetype != filetype)
885                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
886
887                 backref->filetype = filetype;
888                 backref->found_dir_item = 1;
889         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
890                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
891                 if (backref->found_inode_ref)
892                         backref->errors |= REF_ERR_DUP_INODE_REF;
893                 if (backref->found_dir_index && backref->index != index)
894                         backref->errors |= REF_ERR_INDEX_UNMATCH;
895                 else
896                         backref->index = index;
897
898                 backref->ref_type = itemtype;
899                 backref->found_inode_ref = 1;
900         } else {
901                 BUG_ON(1);
902         }
903
904         maybe_free_inode_rec(inode_cache, rec);
905         return 0;
906 }
907
908 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
909                             struct cache_tree *dst_cache)
910 {
911         struct inode_backref *backref;
912         u32 dir_count = 0;
913         int ret = 0;
914
915         dst->merging = 1;
916         list_for_each_entry(backref, &src->backrefs, list) {
917                 if (backref->found_dir_index) {
918                         add_inode_backref(dst_cache, dst->ino, backref->dir,
919                                         backref->index, backref->name,
920                                         backref->namelen, backref->filetype,
921                                         BTRFS_DIR_INDEX_KEY, backref->errors);
922                 }
923                 if (backref->found_dir_item) {
924                         dir_count++;
925                         add_inode_backref(dst_cache, dst->ino,
926                                         backref->dir, 0, backref->name,
927                                         backref->namelen, backref->filetype,
928                                         BTRFS_DIR_ITEM_KEY, backref->errors);
929                 }
930                 if (backref->found_inode_ref) {
931                         add_inode_backref(dst_cache, dst->ino,
932                                         backref->dir, backref->index,
933                                         backref->name, backref->namelen, 0,
934                                         backref->ref_type, backref->errors);
935                 }
936         }
937
938         if (src->found_dir_item)
939                 dst->found_dir_item = 1;
940         if (src->found_file_extent)
941                 dst->found_file_extent = 1;
942         if (src->found_csum_item)
943                 dst->found_csum_item = 1;
944         if (src->some_csum_missing)
945                 dst->some_csum_missing = 1;
946         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
947                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
948                 if (ret < 0)
949                         return ret;
950         }
951
952         BUG_ON(src->found_link < dir_count);
953         dst->found_link += src->found_link - dir_count;
954         dst->found_size += src->found_size;
955         if (src->extent_start != (u64)-1) {
956                 if (dst->extent_start == (u64)-1) {
957                         dst->extent_start = src->extent_start;
958                         dst->extent_end = src->extent_end;
959                 } else {
960                         if (dst->extent_end > src->extent_start)
961                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
962                         else if (dst->extent_end < src->extent_start) {
963                                 ret = add_file_extent_hole(&dst->holes,
964                                         dst->extent_end,
965                                         src->extent_start - dst->extent_end);
966                         }
967                         if (dst->extent_end < src->extent_end)
968                                 dst->extent_end = src->extent_end;
969                 }
970         }
971
972         dst->errors |= src->errors;
973         if (src->found_inode_item) {
974                 if (!dst->found_inode_item) {
975                         dst->nlink = src->nlink;
976                         dst->isize = src->isize;
977                         dst->nbytes = src->nbytes;
978                         dst->imode = src->imode;
979                         dst->nodatasum = src->nodatasum;
980                         dst->found_inode_item = 1;
981                 } else {
982                         dst->errors |= I_ERR_DUP_INODE_ITEM;
983                 }
984         }
985         dst->merging = 0;
986
987         return 0;
988 }
989
990 static int splice_shared_node(struct shared_node *src_node,
991                               struct shared_node *dst_node)
992 {
993         struct cache_extent *cache;
994         struct ptr_node *node, *ins;
995         struct cache_tree *src, *dst;
996         struct inode_record *rec, *conflict;
997         u64 current_ino = 0;
998         int splice = 0;
999         int ret;
1000
1001         if (--src_node->refs == 0)
1002                 splice = 1;
1003         if (src_node->current)
1004                 current_ino = src_node->current->ino;
1005
1006         src = &src_node->root_cache;
1007         dst = &dst_node->root_cache;
1008 again:
1009         cache = search_cache_extent(src, 0);
1010         while (cache) {
1011                 node = container_of(cache, struct ptr_node, cache);
1012                 rec = node->data;
1013                 cache = next_cache_extent(cache);
1014
1015                 if (splice) {
1016                         remove_cache_extent(src, &node->cache);
1017                         ins = node;
1018                 } else {
1019                         ins = malloc(sizeof(*ins));
1020                         BUG_ON(!ins);
1021                         ins->cache.start = node->cache.start;
1022                         ins->cache.size = node->cache.size;
1023                         ins->data = rec;
1024                         rec->refs++;
1025                 }
1026                 ret = insert_cache_extent(dst, &ins->cache);
1027                 if (ret == -EEXIST) {
1028                         conflict = get_inode_rec(dst, rec->ino, 1);
1029                         BUG_ON(IS_ERR(conflict));
1030                         merge_inode_recs(rec, conflict, dst);
1031                         if (rec->checked) {
1032                                 conflict->checked = 1;
1033                                 if (dst_node->current == conflict)
1034                                         dst_node->current = NULL;
1035                         }
1036                         maybe_free_inode_rec(dst, conflict);
1037                         free_inode_rec(rec);
1038                         free(ins);
1039                 } else {
1040                         BUG_ON(ret);
1041                 }
1042         }
1043
1044         if (src == &src_node->root_cache) {
1045                 src = &src_node->inode_cache;
1046                 dst = &dst_node->inode_cache;
1047                 goto again;
1048         }
1049
1050         if (current_ino > 0 && (!dst_node->current ||
1051             current_ino > dst_node->current->ino)) {
1052                 if (dst_node->current) {
1053                         dst_node->current->checked = 1;
1054                         maybe_free_inode_rec(dst, dst_node->current);
1055                 }
1056                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1057                 BUG_ON(IS_ERR(dst_node->current));
1058         }
1059         return 0;
1060 }
1061
1062 static void free_inode_ptr(struct cache_extent *cache)
1063 {
1064         struct ptr_node *node;
1065         struct inode_record *rec;
1066
1067         node = container_of(cache, struct ptr_node, cache);
1068         rec = node->data;
1069         free_inode_rec(rec);
1070         free(node);
1071 }
1072
1073 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1074
1075 static struct shared_node *find_shared_node(struct cache_tree *shared,
1076                                             u64 bytenr)
1077 {
1078         struct cache_extent *cache;
1079         struct shared_node *node;
1080
1081         cache = lookup_cache_extent(shared, bytenr, 1);
1082         if (cache) {
1083                 node = container_of(cache, struct shared_node, cache);
1084                 return node;
1085         }
1086         return NULL;
1087 }
1088
1089 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1090 {
1091         int ret;
1092         struct shared_node *node;
1093
1094         node = calloc(1, sizeof(*node));
1095         if (!node)
1096                 return -ENOMEM;
1097         node->cache.start = bytenr;
1098         node->cache.size = 1;
1099         cache_tree_init(&node->root_cache);
1100         cache_tree_init(&node->inode_cache);
1101         node->refs = refs;
1102
1103         ret = insert_cache_extent(shared, &node->cache);
1104
1105         return ret;
1106 }
1107
1108 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1109                              struct walk_control *wc, int level)
1110 {
1111         struct shared_node *node;
1112         struct shared_node *dest;
1113         int ret;
1114
1115         if (level == wc->active_node)
1116                 return 0;
1117
1118         BUG_ON(wc->active_node <= level);
1119         node = find_shared_node(&wc->shared, bytenr);
1120         if (!node) {
1121                 ret = add_shared_node(&wc->shared, bytenr, refs);
1122                 BUG_ON(ret);
1123                 node = find_shared_node(&wc->shared, bytenr);
1124                 wc->nodes[level] = node;
1125                 wc->active_node = level;
1126                 return 0;
1127         }
1128
1129         if (wc->root_level == wc->active_node &&
1130             btrfs_root_refs(&root->root_item) == 0) {
1131                 if (--node->refs == 0) {
1132                         free_inode_recs_tree(&node->root_cache);
1133                         free_inode_recs_tree(&node->inode_cache);
1134                         remove_cache_extent(&wc->shared, &node->cache);
1135                         free(node);
1136                 }
1137                 return 1;
1138         }
1139
1140         dest = wc->nodes[wc->active_node];
1141         splice_shared_node(node, dest);
1142         if (node->refs == 0) {
1143                 remove_cache_extent(&wc->shared, &node->cache);
1144                 free(node);
1145         }
1146         return 1;
1147 }
1148
1149 static int leave_shared_node(struct btrfs_root *root,
1150                              struct walk_control *wc, int level)
1151 {
1152         struct shared_node *node;
1153         struct shared_node *dest;
1154         int i;
1155
1156         if (level == wc->root_level)
1157                 return 0;
1158
1159         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1160                 if (wc->nodes[i])
1161                         break;
1162         }
1163         BUG_ON(i >= BTRFS_MAX_LEVEL);
1164
1165         node = wc->nodes[wc->active_node];
1166         wc->nodes[wc->active_node] = NULL;
1167         wc->active_node = i;
1168
1169         dest = wc->nodes[wc->active_node];
1170         if (wc->active_node < wc->root_level ||
1171             btrfs_root_refs(&root->root_item) > 0) {
1172                 BUG_ON(node->refs <= 1);
1173                 splice_shared_node(node, dest);
1174         } else {
1175                 BUG_ON(node->refs < 2);
1176                 node->refs--;
1177         }
1178         return 0;
1179 }
1180
1181 /*
1182  * Returns:
1183  * < 0 - on error
1184  * 1   - if the root with id child_root_id is a child of root parent_root_id
1185  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1186  *       has other root(s) as parent(s)
1187  * 2   - if the root child_root_id doesn't have any parent roots
1188  */
1189 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1190                          u64 child_root_id)
1191 {
1192         struct btrfs_path path;
1193         struct btrfs_key key;
1194         struct extent_buffer *leaf;
1195         int has_parent = 0;
1196         int ret;
1197
1198         btrfs_init_path(&path);
1199
1200         key.objectid = parent_root_id;
1201         key.type = BTRFS_ROOT_REF_KEY;
1202         key.offset = child_root_id;
1203         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1204                                 0, 0);
1205         if (ret < 0)
1206                 return ret;
1207         btrfs_release_path(&path);
1208         if (!ret)
1209                 return 1;
1210
1211         key.objectid = child_root_id;
1212         key.type = BTRFS_ROOT_BACKREF_KEY;
1213         key.offset = 0;
1214         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1215                                 0, 0);
1216         if (ret < 0)
1217                 goto out;
1218
1219         while (1) {
1220                 leaf = path.nodes[0];
1221                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1222                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1223                         if (ret)
1224                                 break;
1225                         leaf = path.nodes[0];
1226                 }
1227
1228                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1229                 if (key.objectid != child_root_id ||
1230                     key.type != BTRFS_ROOT_BACKREF_KEY)
1231                         break;
1232
1233                 has_parent = 1;
1234
1235                 if (key.offset == parent_root_id) {
1236                         btrfs_release_path(&path);
1237                         return 1;
1238                 }
1239
1240                 path.slots[0]++;
1241         }
1242 out:
1243         btrfs_release_path(&path);
1244         if (ret < 0)
1245                 return ret;
1246         return has_parent ? 0 : 2;
1247 }
1248
1249 static int process_dir_item(struct extent_buffer *eb,
1250                             int slot, struct btrfs_key *key,
1251                             struct shared_node *active_node)
1252 {
1253         u32 total;
1254         u32 cur = 0;
1255         u32 len;
1256         u32 name_len;
1257         u32 data_len;
1258         int error;
1259         int nritems = 0;
1260         u8 filetype;
1261         struct btrfs_dir_item *di;
1262         struct inode_record *rec;
1263         struct cache_tree *root_cache;
1264         struct cache_tree *inode_cache;
1265         struct btrfs_key location;
1266         char namebuf[BTRFS_NAME_LEN];
1267
1268         root_cache = &active_node->root_cache;
1269         inode_cache = &active_node->inode_cache;
1270         rec = active_node->current;
1271         rec->found_dir_item = 1;
1272
1273         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1274         total = btrfs_item_size_nr(eb, slot);
1275         while (cur < total) {
1276                 nritems++;
1277                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1278                 name_len = btrfs_dir_name_len(eb, di);
1279                 data_len = btrfs_dir_data_len(eb, di);
1280                 filetype = btrfs_dir_type(eb, di);
1281
1282                 rec->found_size += name_len;
1283                 if (cur + sizeof(*di) + name_len > total ||
1284                     name_len > BTRFS_NAME_LEN) {
1285                         error = REF_ERR_NAME_TOO_LONG;
1286
1287                         if (cur + sizeof(*di) > total)
1288                                 break;
1289                         len = min_t(u32, total - cur - sizeof(*di),
1290                                     BTRFS_NAME_LEN);
1291                 } else {
1292                         len = name_len;
1293                         error = 0;
1294                 }
1295
1296                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1297
1298                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1299                     key->offset != btrfs_name_hash(namebuf, len)) {
1300                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1301                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1302                         key->objectid, key->offset, namebuf, len, filetype,
1303                         key->offset, btrfs_name_hash(namebuf, len));
1304                 }
1305
1306                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1307                         add_inode_backref(inode_cache, location.objectid,
1308                                           key->objectid, key->offset, namebuf,
1309                                           len, filetype, key->type, error);
1310                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1311                         add_inode_backref(root_cache, location.objectid,
1312                                           key->objectid, key->offset,
1313                                           namebuf, len, filetype,
1314                                           key->type, error);
1315                 } else {
1316                         fprintf(stderr,
1317                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1318                                 location.type, key->objectid, key->offset);
1319                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1320                                           key->objectid, key->offset, namebuf,
1321                                           len, filetype, key->type, error);
1322                 }
1323
1324                 len = sizeof(*di) + name_len + data_len;
1325                 di = (struct btrfs_dir_item *)((char *)di + len);
1326                 cur += len;
1327         }
1328         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1329                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1330
1331         return 0;
1332 }
1333
1334 static int process_inode_ref(struct extent_buffer *eb,
1335                              int slot, struct btrfs_key *key,
1336                              struct shared_node *active_node)
1337 {
1338         u32 total;
1339         u32 cur = 0;
1340         u32 len;
1341         u32 name_len;
1342         u64 index;
1343         int error;
1344         struct cache_tree *inode_cache;
1345         struct btrfs_inode_ref *ref;
1346         char namebuf[BTRFS_NAME_LEN];
1347
1348         inode_cache = &active_node->inode_cache;
1349
1350         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1351         total = btrfs_item_size_nr(eb, slot);
1352         while (cur < total) {
1353                 name_len = btrfs_inode_ref_name_len(eb, ref);
1354                 index = btrfs_inode_ref_index(eb, ref);
1355
1356                 /* inode_ref + namelen should not cross item boundary */
1357                 if (cur + sizeof(*ref) + name_len > total ||
1358                     name_len > BTRFS_NAME_LEN) {
1359                         if (total < cur + sizeof(*ref))
1360                                 break;
1361
1362                         /* Still try to read out the remaining part */
1363                         len = min_t(u32, total - cur - sizeof(*ref),
1364                                     BTRFS_NAME_LEN);
1365                         error = REF_ERR_NAME_TOO_LONG;
1366                 } else {
1367                         len = name_len;
1368                         error = 0;
1369                 }
1370
1371                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1372                 add_inode_backref(inode_cache, key->objectid, key->offset,
1373                                   index, namebuf, len, 0, key->type, error);
1374
1375                 len = sizeof(*ref) + name_len;
1376                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1377                 cur += len;
1378         }
1379         return 0;
1380 }
1381
1382 static int process_inode_extref(struct extent_buffer *eb,
1383                                 int slot, struct btrfs_key *key,
1384                                 struct shared_node *active_node)
1385 {
1386         u32 total;
1387         u32 cur = 0;
1388         u32 len;
1389         u32 name_len;
1390         u64 index;
1391         u64 parent;
1392         int error;
1393         struct cache_tree *inode_cache;
1394         struct btrfs_inode_extref *extref;
1395         char namebuf[BTRFS_NAME_LEN];
1396
1397         inode_cache = &active_node->inode_cache;
1398
1399         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1400         total = btrfs_item_size_nr(eb, slot);
1401         while (cur < total) {
1402                 name_len = btrfs_inode_extref_name_len(eb, extref);
1403                 index = btrfs_inode_extref_index(eb, extref);
1404                 parent = btrfs_inode_extref_parent(eb, extref);
1405                 if (name_len <= BTRFS_NAME_LEN) {
1406                         len = name_len;
1407                         error = 0;
1408                 } else {
1409                         len = BTRFS_NAME_LEN;
1410                         error = REF_ERR_NAME_TOO_LONG;
1411                 }
1412                 read_extent_buffer(eb, namebuf,
1413                                    (unsigned long)(extref + 1), len);
1414                 add_inode_backref(inode_cache, key->objectid, parent,
1415                                   index, namebuf, len, 0, key->type, error);
1416
1417                 len = sizeof(*extref) + name_len;
1418                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1419                 cur += len;
1420         }
1421         return 0;
1422
1423 }
1424
1425 static int process_file_extent(struct btrfs_root *root,
1426                                 struct extent_buffer *eb,
1427                                 int slot, struct btrfs_key *key,
1428                                 struct shared_node *active_node)
1429 {
1430         struct inode_record *rec;
1431         struct btrfs_file_extent_item *fi;
1432         u64 num_bytes = 0;
1433         u64 disk_bytenr = 0;
1434         u64 extent_offset = 0;
1435         u64 mask = root->fs_info->sectorsize - 1;
1436         int extent_type;
1437         int ret;
1438
1439         rec = active_node->current;
1440         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1441         rec->found_file_extent = 1;
1442
1443         if (rec->extent_start == (u64)-1) {
1444                 rec->extent_start = key->offset;
1445                 rec->extent_end = key->offset;
1446         }
1447
1448         if (rec->extent_end > key->offset)
1449                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1450         else if (rec->extent_end < key->offset) {
1451                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1452                                            key->offset - rec->extent_end);
1453                 if (ret < 0)
1454                         return ret;
1455         }
1456
1457         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1458         extent_type = btrfs_file_extent_type(eb, fi);
1459
1460         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1461                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1462                 if (num_bytes == 0)
1463                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1464                 rec->found_size += num_bytes;
1465                 num_bytes = (num_bytes + mask) & ~mask;
1466         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1467                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1468                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1469                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1470                 extent_offset = btrfs_file_extent_offset(eb, fi);
1471                 if (num_bytes == 0 || (num_bytes & mask))
1472                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1473                 if (num_bytes + extent_offset >
1474                     btrfs_file_extent_ram_bytes(eb, fi))
1475                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1476                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1477                     (btrfs_file_extent_compression(eb, fi) ||
1478                      btrfs_file_extent_encryption(eb, fi) ||
1479                      btrfs_file_extent_other_encoding(eb, fi)))
1480                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1481                 if (disk_bytenr > 0)
1482                         rec->found_size += num_bytes;
1483         } else {
1484                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1485         }
1486         rec->extent_end = key->offset + num_bytes;
1487
1488         /*
1489          * The data reloc tree will copy full extents into its inode and then
1490          * copy the corresponding csums.  Because the extent it copied could be
1491          * a preallocated extent that hasn't been written to yet there may be no
1492          * csums to copy, ergo we won't have csums for our file extent.  This is
1493          * ok so just don't bother checking csums if the inode belongs to the
1494          * data reloc tree.
1495          */
1496         if (disk_bytenr > 0 &&
1497             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1498                 u64 found;
1499                 if (btrfs_file_extent_compression(eb, fi))
1500                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1501                 else
1502                         disk_bytenr += extent_offset;
1503
1504                 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1505                                        &found);
1506                 if (ret < 0)
1507                         return ret;
1508                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1509                         if (found > 0)
1510                                 rec->found_csum_item = 1;
1511                         if (found < num_bytes)
1512                                 rec->some_csum_missing = 1;
1513                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1514                         if (found > 0) {
1515                                 ret = check_prealloc_extent_written(root->fs_info,
1516                                                                     disk_bytenr,
1517                                                                     num_bytes);
1518                                 if (ret < 0)
1519                                         return ret;
1520                                 if (ret == 0)
1521                                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1522                         }
1523                 }
1524         }
1525         return 0;
1526 }
1527
1528 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1529                             struct walk_control *wc)
1530 {
1531         struct btrfs_key key;
1532         u32 nritems;
1533         int i;
1534         int ret = 0;
1535         struct cache_tree *inode_cache;
1536         struct shared_node *active_node;
1537
1538         if (wc->root_level == wc->active_node &&
1539             btrfs_root_refs(&root->root_item) == 0)
1540                 return 0;
1541
1542         active_node = wc->nodes[wc->active_node];
1543         inode_cache = &active_node->inode_cache;
1544         nritems = btrfs_header_nritems(eb);
1545         for (i = 0; i < nritems; i++) {
1546                 btrfs_item_key_to_cpu(eb, &key, i);
1547
1548                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1549                         continue;
1550                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1551                         continue;
1552
1553                 if (active_node->current == NULL ||
1554                     active_node->current->ino < key.objectid) {
1555                         if (active_node->current) {
1556                                 active_node->current->checked = 1;
1557                                 maybe_free_inode_rec(inode_cache,
1558                                                      active_node->current);
1559                         }
1560                         active_node->current = get_inode_rec(inode_cache,
1561                                                              key.objectid, 1);
1562                         BUG_ON(IS_ERR(active_node->current));
1563                 }
1564                 switch (key.type) {
1565                 case BTRFS_DIR_ITEM_KEY:
1566                 case BTRFS_DIR_INDEX_KEY:
1567                         ret = process_dir_item(eb, i, &key, active_node);
1568                         break;
1569                 case BTRFS_INODE_REF_KEY:
1570                         ret = process_inode_ref(eb, i, &key, active_node);
1571                         break;
1572                 case BTRFS_INODE_EXTREF_KEY:
1573                         ret = process_inode_extref(eb, i, &key, active_node);
1574                         break;
1575                 case BTRFS_INODE_ITEM_KEY:
1576                         ret = process_inode_item(eb, i, &key, active_node);
1577                         break;
1578                 case BTRFS_EXTENT_DATA_KEY:
1579                         ret = process_file_extent(root, eb, i, &key,
1580                                                   active_node);
1581                         break;
1582                 default:
1583                         break;
1584                 };
1585         }
1586         return ret;
1587 }
1588
1589 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1590                           struct walk_control *wc, int *level,
1591                           struct node_refs *nrefs)
1592 {
1593         enum btrfs_tree_block_status status;
1594         u64 bytenr;
1595         u64 ptr_gen;
1596         struct btrfs_fs_info *fs_info = root->fs_info;
1597         struct extent_buffer *next;
1598         struct extent_buffer *cur;
1599         int ret, err = 0;
1600         u64 refs;
1601
1602         WARN_ON(*level < 0);
1603         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1604
1605         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1606                 refs = nrefs->refs[*level];
1607                 ret = 0;
1608         } else {
1609                 ret = btrfs_lookup_extent_info(NULL, root,
1610                                        path->nodes[*level]->start,
1611                                        *level, 1, &refs, NULL);
1612                 if (ret < 0) {
1613                         err = ret;
1614                         goto out;
1615                 }
1616                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1617                 nrefs->refs[*level] = refs;
1618         }
1619
1620         if (refs > 1) {
1621                 ret = enter_shared_node(root, path->nodes[*level]->start,
1622                                         refs, wc, *level);
1623                 if (ret > 0) {
1624                         err = ret;
1625                         goto out;
1626                 }
1627         }
1628
1629         while (*level >= 0) {
1630                 WARN_ON(*level < 0);
1631                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1632                 cur = path->nodes[*level];
1633
1634                 if (btrfs_header_level(cur) != *level)
1635                         WARN_ON(1);
1636
1637                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1638                         break;
1639                 if (*level == 0) {
1640                         ret = process_one_leaf(root, cur, wc);
1641                         if (ret < 0)
1642                                 err = ret;
1643                         break;
1644                 }
1645                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1646                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1647
1648                 if (bytenr == nrefs->bytenr[*level - 1]) {
1649                         refs = nrefs->refs[*level - 1];
1650                 } else {
1651                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1652                                         *level - 1, 1, &refs, NULL);
1653                         if (ret < 0) {
1654                                 refs = 0;
1655                         } else {
1656                                 nrefs->bytenr[*level - 1] = bytenr;
1657                                 nrefs->refs[*level - 1] = refs;
1658                         }
1659                 }
1660
1661                 if (refs > 1) {
1662                         ret = enter_shared_node(root, bytenr, refs,
1663                                                 wc, *level - 1);
1664                         if (ret > 0) {
1665                                 path->slots[*level]++;
1666                                 continue;
1667                         }
1668                 }
1669
1670                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
1671                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1672                         free_extent_buffer(next);
1673                         reada_walk_down(root, cur, path->slots[*level]);
1674                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
1675                         if (!extent_buffer_uptodate(next)) {
1676                                 struct btrfs_key node_key;
1677
1678                                 btrfs_node_key_to_cpu(path->nodes[*level],
1679                                                       &node_key,
1680                                                       path->slots[*level]);
1681                                 btrfs_add_corrupt_extent_record(root->fs_info,
1682                                                 &node_key,
1683                                                 path->nodes[*level]->start,
1684                                                 root->fs_info->nodesize,
1685                                                 *level);
1686                                 err = -EIO;
1687                                 goto out;
1688                         }
1689                 }
1690
1691                 ret = check_child_node(cur, path->slots[*level], next);
1692                 if (ret) {
1693                         free_extent_buffer(next);
1694                         err = ret;
1695                         goto out;
1696                 }
1697
1698                 if (btrfs_is_leaf(next))
1699                         status = btrfs_check_leaf(root, NULL, next);
1700                 else
1701                         status = btrfs_check_node(root, NULL, next);
1702                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1703                         free_extent_buffer(next);
1704                         err = -EIO;
1705                         goto out;
1706                 }
1707
1708                 *level = *level - 1;
1709                 free_extent_buffer(path->nodes[*level]);
1710                 path->nodes[*level] = next;
1711                 path->slots[*level] = 0;
1712         }
1713 out:
1714         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1715         return err;
1716 }
1717
1718 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1719                         struct walk_control *wc, int *level)
1720 {
1721         int i;
1722         struct extent_buffer *leaf;
1723
1724         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1725                 leaf = path->nodes[i];
1726                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1727                         path->slots[i]++;
1728                         *level = i;
1729                         return 0;
1730                 }
1731                 free_extent_buffer(path->nodes[*level]);
1732                 path->nodes[*level] = NULL;
1733                 BUG_ON(*level > wc->active_node);
1734                 if (*level == wc->active_node)
1735                         leave_shared_node(root, wc, *level);
1736                 *level = i + 1;
1737         }
1738         return 1;
1739 }
1740
1741 static int check_root_dir(struct inode_record *rec)
1742 {
1743         struct inode_backref *backref;
1744         int ret = -1;
1745
1746         if (!rec->found_inode_item || rec->errors)
1747                 goto out;
1748         if (rec->nlink != 1 || rec->found_link != 0)
1749                 goto out;
1750         if (list_empty(&rec->backrefs))
1751                 goto out;
1752         backref = to_inode_backref(rec->backrefs.next);
1753         if (!backref->found_inode_ref)
1754                 goto out;
1755         if (backref->index != 0 || backref->namelen != 2 ||
1756             memcmp(backref->name, "..", 2))
1757                 goto out;
1758         if (backref->found_dir_index || backref->found_dir_item)
1759                 goto out;
1760         ret = 0;
1761 out:
1762         return ret;
1763 }
1764
1765 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1766                               struct btrfs_root *root, struct btrfs_path *path,
1767                               struct inode_record *rec)
1768 {
1769         struct btrfs_inode_item *ei;
1770         struct btrfs_key key;
1771         int ret;
1772
1773         key.objectid = rec->ino;
1774         key.type = BTRFS_INODE_ITEM_KEY;
1775         key.offset = (u64)-1;
1776
1777         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1778         if (ret < 0)
1779                 goto out;
1780         if (ret) {
1781                 if (!path->slots[0]) {
1782                         ret = -ENOENT;
1783                         goto out;
1784                 }
1785                 path->slots[0]--;
1786                 ret = 0;
1787         }
1788         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1789         if (key.objectid != rec->ino) {
1790                 ret = -ENOENT;
1791                 goto out;
1792         }
1793
1794         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1795                             struct btrfs_inode_item);
1796         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1797         btrfs_mark_buffer_dirty(path->nodes[0]);
1798         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1799         printf("reset isize for dir %llu root %llu\n", rec->ino,
1800                root->root_key.objectid);
1801 out:
1802         btrfs_release_path(path);
1803         return ret;
1804 }
1805
1806 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1807                                     struct btrfs_root *root,
1808                                     struct btrfs_path *path,
1809                                     struct inode_record *rec)
1810 {
1811         int ret;
1812
1813         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1814         btrfs_release_path(path);
1815         if (!ret)
1816                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1817         return ret;
1818 }
1819
1820 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1821                                struct btrfs_root *root,
1822                                struct btrfs_path *path,
1823                                struct inode_record *rec)
1824 {
1825         struct btrfs_inode_item *ei;
1826         struct btrfs_key key;
1827         int ret = 0;
1828
1829         key.objectid = rec->ino;
1830         key.type = BTRFS_INODE_ITEM_KEY;
1831         key.offset = 0;
1832
1833         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1834         if (ret) {
1835                 if (ret > 0)
1836                         ret = -ENOENT;
1837                 goto out;
1838         }
1839
1840         /* Since ret == 0, no need to check anything */
1841         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1842                             struct btrfs_inode_item);
1843         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
1844         btrfs_mark_buffer_dirty(path->nodes[0]);
1845         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
1846         printf("reset nbytes for ino %llu root %llu\n",
1847                rec->ino, root->root_key.objectid);
1848 out:
1849         btrfs_release_path(path);
1850         return ret;
1851 }
1852
1853 static int add_missing_dir_index(struct btrfs_root *root,
1854                                  struct cache_tree *inode_cache,
1855                                  struct inode_record *rec,
1856                                  struct inode_backref *backref)
1857 {
1858         struct btrfs_path path;
1859         struct btrfs_trans_handle *trans;
1860         struct btrfs_dir_item *dir_item;
1861         struct extent_buffer *leaf;
1862         struct btrfs_key key;
1863         struct btrfs_disk_key disk_key;
1864         struct inode_record *dir_rec;
1865         unsigned long name_ptr;
1866         u32 data_size = sizeof(*dir_item) + backref->namelen;
1867         int ret;
1868
1869         trans = btrfs_start_transaction(root, 1);
1870         if (IS_ERR(trans))
1871                 return PTR_ERR(trans);
1872
1873         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
1874                 (unsigned long long)rec->ino);
1875
1876         btrfs_init_path(&path);
1877         key.objectid = backref->dir;
1878         key.type = BTRFS_DIR_INDEX_KEY;
1879         key.offset = backref->index;
1880         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
1881         BUG_ON(ret);
1882
1883         leaf = path.nodes[0];
1884         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
1885
1886         disk_key.objectid = cpu_to_le64(rec->ino);
1887         disk_key.type = BTRFS_INODE_ITEM_KEY;
1888         disk_key.offset = 0;
1889
1890         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
1891         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
1892         btrfs_set_dir_data_len(leaf, dir_item, 0);
1893         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
1894         name_ptr = (unsigned long)(dir_item + 1);
1895         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
1896         btrfs_mark_buffer_dirty(leaf);
1897         btrfs_release_path(&path);
1898         btrfs_commit_transaction(trans, root);
1899
1900         backref->found_dir_index = 1;
1901         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
1902         BUG_ON(IS_ERR(dir_rec));
1903         if (!dir_rec)
1904                 return 0;
1905         dir_rec->found_size += backref->namelen;
1906         if (dir_rec->found_size == dir_rec->isize &&
1907             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
1908                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1909         if (dir_rec->found_size != dir_rec->isize)
1910                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1911
1912         return 0;
1913 }
1914
1915 static int delete_dir_index(struct btrfs_root *root,
1916                             struct inode_backref *backref)
1917 {
1918         struct btrfs_trans_handle *trans;
1919         struct btrfs_dir_item *di;
1920         struct btrfs_path path;
1921         int ret = 0;
1922
1923         trans = btrfs_start_transaction(root, 1);
1924         if (IS_ERR(trans))
1925                 return PTR_ERR(trans);
1926
1927         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1928                 (unsigned long long)backref->dir,
1929                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
1930                 (unsigned long long)root->objectid);
1931
1932         btrfs_init_path(&path);
1933         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
1934                                     backref->name, backref->namelen,
1935                                     backref->index, -1);
1936         if (IS_ERR(di)) {
1937                 ret = PTR_ERR(di);
1938                 btrfs_release_path(&path);
1939                 btrfs_commit_transaction(trans, root);
1940                 if (ret == -ENOENT)
1941                         return 0;
1942                 return ret;
1943         }
1944
1945         if (!di)
1946                 ret = btrfs_del_item(trans, root, &path);
1947         else
1948                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
1949         BUG_ON(ret);
1950         btrfs_release_path(&path);
1951         btrfs_commit_transaction(trans, root);
1952         return ret;
1953 }
1954
1955 static int create_inode_item(struct btrfs_root *root,
1956                              struct inode_record *rec, int root_dir)
1957 {
1958         struct btrfs_trans_handle *trans;
1959         u64 nlink = 0;
1960         u32 mode = 0;
1961         u64 size = 0;
1962         int ret;
1963
1964         trans = btrfs_start_transaction(root, 1);
1965         if (IS_ERR(trans)) {
1966                 ret = PTR_ERR(trans);
1967                 return ret;
1968         }
1969
1970         nlink = root_dir ? 1 : rec->found_link;
1971         if (rec->found_dir_item) {
1972                 if (rec->found_file_extent)
1973                         fprintf(stderr, "root %llu inode %llu has both a dir "
1974                                 "item and extents, unsure if it is a dir or a "
1975                                 "regular file so setting it as a directory\n",
1976                                 (unsigned long long)root->objectid,
1977                                 (unsigned long long)rec->ino);
1978                 mode = S_IFDIR | 0755;
1979                 size = rec->found_size;
1980         } else if (!rec->found_dir_item) {
1981                 size = rec->extent_end;
1982                 mode =  S_IFREG | 0755;
1983         }
1984
1985         ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
1986                                   nlink, mode);
1987         btrfs_commit_transaction(trans, root);
1988         return 0;
1989 }
1990
1991 static int repair_inode_backrefs(struct btrfs_root *root,
1992                                  struct inode_record *rec,
1993                                  struct cache_tree *inode_cache,
1994                                  int delete)
1995 {
1996         struct inode_backref *tmp, *backref;
1997         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1998         int ret = 0;
1999         int repaired = 0;
2000
2001         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2002                 if (!delete && rec->ino == root_dirid) {
2003                         if (!rec->found_inode_item) {
2004                                 ret = create_inode_item(root, rec, 1);
2005                                 if (ret)
2006                                         break;
2007                                 repaired++;
2008                         }
2009                 }
2010
2011                 /* Index 0 for root dir's are special, don't mess with it */
2012                 if (rec->ino == root_dirid && backref->index == 0)
2013                         continue;
2014
2015                 if (delete &&
2016                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2017                      (backref->found_dir_index && backref->found_inode_ref &&
2018                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2019                         ret = delete_dir_index(root, backref);
2020                         if (ret)
2021                                 break;
2022                         repaired++;
2023                         list_del(&backref->list);
2024                         free(backref);
2025                         continue;
2026                 }
2027
2028                 if (!delete && !backref->found_dir_index &&
2029                     backref->found_dir_item && backref->found_inode_ref) {
2030                         ret = add_missing_dir_index(root, inode_cache, rec,
2031                                                     backref);
2032                         if (ret)
2033                                 break;
2034                         repaired++;
2035                         if (backref->found_dir_item &&
2036                             backref->found_dir_index) {
2037                                 if (!backref->errors &&
2038                                     backref->found_inode_ref) {
2039                                         list_del(&backref->list);
2040                                         free(backref);
2041                                         continue;
2042                                 }
2043                         }
2044                 }
2045
2046                 if (!delete && (!backref->found_dir_index &&
2047                                 !backref->found_dir_item &&
2048                                 backref->found_inode_ref)) {
2049                         struct btrfs_trans_handle *trans;
2050                         struct btrfs_key location;
2051
2052                         ret = check_dir_conflict(root, backref->name,
2053                                                  backref->namelen,
2054                                                  backref->dir,
2055                                                  backref->index);
2056                         if (ret) {
2057                                 /*
2058                                  * let nlink fixing routine to handle it,
2059                                  * which can do it better.
2060                                  */
2061                                 ret = 0;
2062                                 break;
2063                         }
2064                         location.objectid = rec->ino;
2065                         location.type = BTRFS_INODE_ITEM_KEY;
2066                         location.offset = 0;
2067
2068                         trans = btrfs_start_transaction(root, 1);
2069                         if (IS_ERR(trans)) {
2070                                 ret = PTR_ERR(trans);
2071                                 break;
2072                         }
2073                         fprintf(stderr, "adding missing dir index/item pair "
2074                                 "for inode %llu\n",
2075                                 (unsigned long long)rec->ino);
2076                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2077                                                     backref->namelen,
2078                                                     backref->dir, &location,
2079                                                     imode_to_type(rec->imode),
2080                                                     backref->index);
2081                         BUG_ON(ret);
2082                         btrfs_commit_transaction(trans, root);
2083                         repaired++;
2084                 }
2085
2086                 if (!delete && (backref->found_inode_ref &&
2087                                 backref->found_dir_index &&
2088                                 backref->found_dir_item &&
2089                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2090                                 !rec->found_inode_item)) {
2091                         ret = create_inode_item(root, rec, 0);
2092                         if (ret)
2093                                 break;
2094                         repaired++;
2095                 }
2096
2097         }
2098         return ret ? ret : repaired;
2099 }
2100
2101 /*
2102  * To determine the file type for nlink/inode_item repair
2103  *
2104  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2105  * Return -ENOENT if file type is not found.
2106  */
2107 static int find_file_type(struct inode_record *rec, u8 *type)
2108 {
2109         struct inode_backref *backref;
2110
2111         /* For inode item recovered case */
2112         if (rec->found_inode_item) {
2113                 *type = imode_to_type(rec->imode);
2114                 return 0;
2115         }
2116
2117         list_for_each_entry(backref, &rec->backrefs, list) {
2118                 if (backref->found_dir_index || backref->found_dir_item) {
2119                         *type = backref->filetype;
2120                         return 0;
2121                 }
2122         }
2123         return -ENOENT;
2124 }
2125
2126 /*
2127  * To determine the file name for nlink repair
2128  *
2129  * Return 0 if file name is found, set name and namelen.
2130  * Return -ENOENT if file name is not found.
2131  */
2132 static int find_file_name(struct inode_record *rec,
2133                           char *name, int *namelen)
2134 {
2135         struct inode_backref *backref;
2136
2137         list_for_each_entry(backref, &rec->backrefs, list) {
2138                 if (backref->found_dir_index || backref->found_dir_item ||
2139                     backref->found_inode_ref) {
2140                         memcpy(name, backref->name, backref->namelen);
2141                         *namelen = backref->namelen;
2142                         return 0;
2143                 }
2144         }
2145         return -ENOENT;
2146 }
2147
2148 /* Reset the nlink of the inode to the correct one */
2149 static int reset_nlink(struct btrfs_trans_handle *trans,
2150                        struct btrfs_root *root,
2151                        struct btrfs_path *path,
2152                        struct inode_record *rec)
2153 {
2154         struct inode_backref *backref;
2155         struct inode_backref *tmp;
2156         struct btrfs_key key;
2157         struct btrfs_inode_item *inode_item;
2158         int ret = 0;
2159
2160         /* We don't believe this either, reset it and iterate backref */
2161         rec->found_link = 0;
2162
2163         /* Remove all backref including the valid ones */
2164         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2165                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2166                                    backref->index, backref->name,
2167                                    backref->namelen, 0);
2168                 if (ret < 0)
2169                         goto out;
2170
2171                 /* remove invalid backref, so it won't be added back */
2172                 if (!(backref->found_dir_index &&
2173                       backref->found_dir_item &&
2174                       backref->found_inode_ref)) {
2175                         list_del(&backref->list);
2176                         free(backref);
2177                 } else {
2178                         rec->found_link++;
2179                 }
2180         }
2181
2182         /* Set nlink to 0 */
2183         key.objectid = rec->ino;
2184         key.type = BTRFS_INODE_ITEM_KEY;
2185         key.offset = 0;
2186         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2187         if (ret < 0)
2188                 goto out;
2189         if (ret > 0) {
2190                 ret = -ENOENT;
2191                 goto out;
2192         }
2193         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2194                                     struct btrfs_inode_item);
2195         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2196         btrfs_mark_buffer_dirty(path->nodes[0]);
2197         btrfs_release_path(path);
2198
2199         /*
2200          * Add back valid inode_ref/dir_item/dir_index,
2201          * add_link() will handle the nlink inc, so new nlink must be correct
2202          */
2203         list_for_each_entry(backref, &rec->backrefs, list) {
2204                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2205                                      backref->name, backref->namelen,
2206                                      backref->filetype, &backref->index, 1, 0);
2207                 if (ret < 0)
2208                         goto out;
2209         }
2210 out:
2211         btrfs_release_path(path);
2212         return ret;
2213 }
2214
2215 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2216                                struct btrfs_root *root,
2217                                struct btrfs_path *path,
2218                                struct inode_record *rec)
2219 {
2220         char namebuf[BTRFS_NAME_LEN] = {0};
2221         u8 type = 0;
2222         int namelen = 0;
2223         int name_recovered = 0;
2224         int type_recovered = 0;
2225         int ret = 0;
2226
2227         /*
2228          * Get file name and type first before these invalid inode ref
2229          * are deleted by remove_all_invalid_backref()
2230          */
2231         name_recovered = !find_file_name(rec, namebuf, &namelen);
2232         type_recovered = !find_file_type(rec, &type);
2233
2234         if (!name_recovered) {
2235                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2236                        rec->ino, rec->ino);
2237                 namelen = count_digits(rec->ino);
2238                 sprintf(namebuf, "%llu", rec->ino);
2239                 name_recovered = 1;
2240         }
2241         if (!type_recovered) {
2242                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2243                        rec->ino);
2244                 type = BTRFS_FT_REG_FILE;
2245                 type_recovered = 1;
2246         }
2247
2248         ret = reset_nlink(trans, root, path, rec);
2249         if (ret < 0) {
2250                 fprintf(stderr,
2251                         "Failed to reset nlink for inode %llu: %s\n",
2252                         rec->ino, strerror(-ret));
2253                 goto out;
2254         }
2255
2256         if (rec->found_link == 0) {
2257                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2258                                               namebuf, namelen, type,
2259                                               (u64 *)&rec->found_link);
2260                 if (ret)
2261                         goto out;
2262         }
2263         printf("Fixed the nlink of inode %llu\n", rec->ino);
2264 out:
2265         /*
2266          * Clear the flag anyway, or we will loop forever for the same inode
2267          * as it will not be removed from the bad inode list and the dead loop
2268          * happens.
2269          */
2270         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2271         btrfs_release_path(path);
2272         return ret;
2273 }
2274
2275 /*
2276  * Check if there is any normal(reg or prealloc) file extent for given
2277  * ino.
2278  * This is used to determine the file type when neither its dir_index/item or
2279  * inode_item exists.
2280  *
2281  * This will *NOT* report error, if any error happens, just consider it does
2282  * not have any normal file extent.
2283  */
2284 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2285 {
2286         struct btrfs_path path;
2287         struct btrfs_key key;
2288         struct btrfs_key found_key;
2289         struct btrfs_file_extent_item *fi;
2290         u8 type;
2291         int ret = 0;
2292
2293         btrfs_init_path(&path);
2294         key.objectid = ino;
2295         key.type = BTRFS_EXTENT_DATA_KEY;
2296         key.offset = 0;
2297
2298         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2299         if (ret < 0) {
2300                 ret = 0;
2301                 goto out;
2302         }
2303         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2304                 ret = btrfs_next_leaf(root, &path);
2305                 if (ret) {
2306                         ret = 0;
2307                         goto out;
2308                 }
2309         }
2310         while (1) {
2311                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2312                                       path.slots[0]);
2313                 if (found_key.objectid != ino ||
2314                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2315                         break;
2316                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2317                                     struct btrfs_file_extent_item);
2318                 type = btrfs_file_extent_type(path.nodes[0], fi);
2319                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2320                         ret = 1;
2321                         goto out;
2322                 }
2323         }
2324 out:
2325         btrfs_release_path(&path);
2326         return ret;
2327 }
2328
2329 static u32 btrfs_type_to_imode(u8 type)
2330 {
2331         static u32 imode_by_btrfs_type[] = {
2332                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2333                 [BTRFS_FT_DIR]          = S_IFDIR,
2334                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2335                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2336                 [BTRFS_FT_FIFO]         = S_IFIFO,
2337                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2338                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2339         };
2340
2341         return imode_by_btrfs_type[(type)];
2342 }
2343
2344 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2345                                 struct btrfs_root *root,
2346                                 struct btrfs_path *path,
2347                                 struct inode_record *rec)
2348 {
2349         u8 filetype;
2350         u32 mode = 0700;
2351         int type_recovered = 0;
2352         int ret = 0;
2353
2354         printf("Trying to rebuild inode:%llu\n", rec->ino);
2355
2356         type_recovered = !find_file_type(rec, &filetype);
2357
2358         /*
2359          * Try to determine inode type if type not found.
2360          *
2361          * For found regular file extent, it must be FILE.
2362          * For found dir_item/index, it must be DIR.
2363          *
2364          * For undetermined one, use FILE as fallback.
2365          *
2366          * TODO:
2367          * 1. If found backref(inode_index/item is already handled) to it,
2368          *    it must be DIR.
2369          *    Need new inode-inode ref structure to allow search for that.
2370          */
2371         if (!type_recovered) {
2372                 if (rec->found_file_extent &&
2373                     find_normal_file_extent(root, rec->ino)) {
2374                         type_recovered = 1;
2375                         filetype = BTRFS_FT_REG_FILE;
2376                 } else if (rec->found_dir_item) {
2377                         type_recovered = 1;
2378                         filetype = BTRFS_FT_DIR;
2379                 } else if (!list_empty(&rec->orphan_extents)) {
2380                         type_recovered = 1;
2381                         filetype = BTRFS_FT_REG_FILE;
2382                 } else{
2383                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2384                                rec->ino);
2385                         type_recovered = 1;
2386                         filetype = BTRFS_FT_REG_FILE;
2387                 }
2388         }
2389
2390         ret = btrfs_new_inode(trans, root, rec->ino,
2391                               mode | btrfs_type_to_imode(filetype));
2392         if (ret < 0)
2393                 goto out;
2394
2395         /*
2396          * Here inode rebuild is done, we only rebuild the inode item,
2397          * don't repair the nlink(like move to lost+found).
2398          * That is the job of nlink repair.
2399          *
2400          * We just fill the record and return
2401          */
2402         rec->found_dir_item = 1;
2403         rec->imode = mode | btrfs_type_to_imode(filetype);
2404         rec->nlink = 0;
2405         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2406         /* Ensure the inode_nlinks repair function will be called */
2407         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2408 out:
2409         return ret;
2410 }
2411
2412 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2413                                       struct btrfs_root *root,
2414                                       struct btrfs_path *path,
2415                                       struct inode_record *rec)
2416 {
2417         struct orphan_data_extent *orphan;
2418         struct orphan_data_extent *tmp;
2419         int ret = 0;
2420
2421         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2422                 /*
2423                  * Check for conflicting file extents
2424                  *
2425                  * Here we don't know whether the extents is compressed or not,
2426                  * so we can only assume it not compressed nor data offset,
2427                  * and use its disk_len as extent length.
2428                  */
2429                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2430                                        orphan->offset, orphan->disk_len, 0);
2431                 btrfs_release_path(path);
2432                 if (ret < 0)
2433                         goto out;
2434                 if (!ret) {
2435                         fprintf(stderr,
2436                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2437                                 orphan->disk_bytenr, orphan->disk_len);
2438                         ret = btrfs_free_extent(trans,
2439                                         root->fs_info->extent_root,
2440                                         orphan->disk_bytenr, orphan->disk_len,
2441                                         0, root->objectid, orphan->objectid,
2442                                         orphan->offset);
2443                         if (ret < 0)
2444                                 goto out;
2445                 }
2446                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2447                                 orphan->offset, orphan->disk_bytenr,
2448                                 orphan->disk_len, orphan->disk_len);
2449                 if (ret < 0)
2450                         goto out;
2451
2452                 /* Update file size info */
2453                 rec->found_size += orphan->disk_len;
2454                 if (rec->found_size == rec->nbytes)
2455                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2456
2457                 /* Update the file extent hole info too */
2458                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2459                                            orphan->disk_len);
2460                 if (ret < 0)
2461                         goto out;
2462                 if (RB_EMPTY_ROOT(&rec->holes))
2463                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2464
2465                 list_del(&orphan->list);
2466                 free(orphan);
2467         }
2468         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2469 out:
2470         return ret;
2471 }
2472
2473 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2474                                         struct btrfs_root *root,
2475                                         struct btrfs_path *path,
2476                                         struct inode_record *rec)
2477 {
2478         struct rb_node *node;
2479         struct file_extent_hole *hole;
2480         int found = 0;
2481         int ret = 0;
2482
2483         node = rb_first(&rec->holes);
2484
2485         while (node) {
2486                 found = 1;
2487                 hole = rb_entry(node, struct file_extent_hole, node);
2488                 ret = btrfs_punch_hole(trans, root, rec->ino,
2489                                        hole->start, hole->len);
2490                 if (ret < 0)
2491                         goto out;
2492                 ret = del_file_extent_hole(&rec->holes, hole->start,
2493                                            hole->len);
2494                 if (ret < 0)
2495                         goto out;
2496                 if (RB_EMPTY_ROOT(&rec->holes))
2497                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2498                 node = rb_first(&rec->holes);
2499         }
2500         /* special case for a file losing all its file extent */
2501         if (!found) {
2502                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2503                                        round_up(rec->isize,
2504                                                 root->fs_info->sectorsize));
2505                 if (ret < 0)
2506                         goto out;
2507         }
2508         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2509                rec->ino, root->objectid);
2510 out:
2511         return ret;
2512 }
2513
2514 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2515 {
2516         struct btrfs_trans_handle *trans;
2517         struct btrfs_path path;
2518         int ret = 0;
2519
2520         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2521                              I_ERR_NO_ORPHAN_ITEM |
2522                              I_ERR_LINK_COUNT_WRONG |
2523                              I_ERR_NO_INODE_ITEM |
2524                              I_ERR_FILE_EXTENT_ORPHAN |
2525                              I_ERR_FILE_EXTENT_DISCOUNT|
2526                              I_ERR_FILE_NBYTES_WRONG)))
2527                 return rec->errors;
2528
2529         /*
2530          * For nlink repair, it may create a dir and add link, so
2531          * 2 for parent(256)'s dir_index and dir_item
2532          * 2 for lost+found dir's inode_item and inode_ref
2533          * 1 for the new inode_ref of the file
2534          * 2 for lost+found dir's dir_index and dir_item for the file
2535          */
2536         trans = btrfs_start_transaction(root, 7);
2537         if (IS_ERR(trans))
2538                 return PTR_ERR(trans);
2539
2540         btrfs_init_path(&path);
2541         if (rec->errors & I_ERR_NO_INODE_ITEM)
2542                 ret = repair_inode_no_item(trans, root, &path, rec);
2543         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2544                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2545         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2546                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2547         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2548                 ret = repair_inode_isize(trans, root, &path, rec);
2549         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2550                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2551         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2552                 ret = repair_inode_nlinks(trans, root, &path, rec);
2553         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2554                 ret = repair_inode_nbytes(trans, root, &path, rec);
2555         btrfs_commit_transaction(trans, root);
2556         btrfs_release_path(&path);
2557         return ret;
2558 }
2559
2560 static int check_inode_recs(struct btrfs_root *root,
2561                             struct cache_tree *inode_cache)
2562 {
2563         struct cache_extent *cache;
2564         struct ptr_node *node;
2565         struct inode_record *rec;
2566         struct inode_backref *backref;
2567         int stage = 0;
2568         int ret = 0;
2569         int err = 0;
2570         u64 error = 0;
2571         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2572
2573         if (btrfs_root_refs(&root->root_item) == 0) {
2574                 if (!cache_tree_empty(inode_cache))
2575                         fprintf(stderr, "warning line %d\n", __LINE__);
2576                 return 0;
2577         }
2578
2579         /*
2580          * We need to repair backrefs first because we could change some of the
2581          * errors in the inode recs.
2582          *
2583          * We also need to go through and delete invalid backrefs first and then
2584          * add the correct ones second.  We do this because we may get EEXIST
2585          * when adding back the correct index because we hadn't yet deleted the
2586          * invalid index.
2587          *
2588          * For example, if we were missing a dir index then the directories
2589          * isize would be wrong, so if we fixed the isize to what we thought it
2590          * would be and then fixed the backref we'd still have a invalid fs, so
2591          * we need to add back the dir index and then check to see if the isize
2592          * is still wrong.
2593          */
2594         while (stage < 3) {
2595                 stage++;
2596                 if (stage == 3 && !err)
2597                         break;
2598
2599                 cache = search_cache_extent(inode_cache, 0);
2600                 while (repair && cache) {
2601                         node = container_of(cache, struct ptr_node, cache);
2602                         rec = node->data;
2603                         cache = next_cache_extent(cache);
2604
2605                         /* Need to free everything up and rescan */
2606                         if (stage == 3) {
2607                                 remove_cache_extent(inode_cache, &node->cache);
2608                                 free(node);
2609                                 free_inode_rec(rec);
2610                                 continue;
2611                         }
2612
2613                         if (list_empty(&rec->backrefs))
2614                                 continue;
2615
2616                         ret = repair_inode_backrefs(root, rec, inode_cache,
2617                                                     stage == 1);
2618                         if (ret < 0) {
2619                                 err = ret;
2620                                 stage = 2;
2621                                 break;
2622                         } if (ret > 0) {
2623                                 err = -EAGAIN;
2624                         }
2625                 }
2626         }
2627         if (err)
2628                 return err;
2629
2630         rec = get_inode_rec(inode_cache, root_dirid, 0);
2631         BUG_ON(IS_ERR(rec));
2632         if (rec) {
2633                 ret = check_root_dir(rec);
2634                 if (ret) {
2635                         fprintf(stderr, "root %llu root dir %llu error\n",
2636                                 (unsigned long long)root->root_key.objectid,
2637                                 (unsigned long long)root_dirid);
2638                         print_inode_error(root, rec);
2639                         error++;
2640                 }
2641         } else {
2642                 if (repair) {
2643                         struct btrfs_trans_handle *trans;
2644
2645                         trans = btrfs_start_transaction(root, 1);
2646                         if (IS_ERR(trans)) {
2647                                 err = PTR_ERR(trans);
2648                                 return err;
2649                         }
2650
2651                         fprintf(stderr,
2652                                 "root %llu missing its root dir, recreating\n",
2653                                 (unsigned long long)root->objectid);
2654
2655                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2656                         BUG_ON(ret);
2657
2658                         btrfs_commit_transaction(trans, root);
2659                         return -EAGAIN;
2660                 }
2661
2662                 fprintf(stderr, "root %llu root dir %llu not found\n",
2663                         (unsigned long long)root->root_key.objectid,
2664                         (unsigned long long)root_dirid);
2665         }
2666
2667         while (1) {
2668                 cache = search_cache_extent(inode_cache, 0);
2669                 if (!cache)
2670                         break;
2671                 node = container_of(cache, struct ptr_node, cache);
2672                 rec = node->data;
2673                 remove_cache_extent(inode_cache, &node->cache);
2674                 free(node);
2675                 if (rec->ino == root_dirid ||
2676                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2677                         free_inode_rec(rec);
2678                         continue;
2679                 }
2680
2681                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2682                         ret = check_orphan_item(root, rec->ino);
2683                         if (ret == 0)
2684                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2685                         if (can_free_inode_rec(rec)) {
2686                                 free_inode_rec(rec);
2687                                 continue;
2688                         }
2689                 }
2690
2691                 if (!rec->found_inode_item)
2692                         rec->errors |= I_ERR_NO_INODE_ITEM;
2693                 if (rec->found_link != rec->nlink)
2694                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2695                 if (repair) {
2696                         ret = try_repair_inode(root, rec);
2697                         if (ret == 0 && can_free_inode_rec(rec)) {
2698                                 free_inode_rec(rec);
2699                                 continue;
2700                         }
2701                         ret = 0;
2702                 }
2703
2704                 if (!(repair && ret == 0))
2705                         error++;
2706                 print_inode_error(root, rec);
2707                 list_for_each_entry(backref, &rec->backrefs, list) {
2708                         if (!backref->found_dir_item)
2709                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2710                         if (!backref->found_dir_index)
2711                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2712                         if (!backref->found_inode_ref)
2713                                 backref->errors |= REF_ERR_NO_INODE_REF;
2714                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2715                                 " namelen %u name %s filetype %d errors %x",
2716                                 (unsigned long long)backref->dir,
2717                                 (unsigned long long)backref->index,
2718                                 backref->namelen, backref->name,
2719                                 backref->filetype, backref->errors);
2720                         print_ref_error(backref->errors);
2721                 }
2722                 free_inode_rec(rec);
2723         }
2724         return (error > 0) ? -1 : 0;
2725 }
2726
2727 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2728                                         u64 objectid)
2729 {
2730         struct cache_extent *cache;
2731         struct root_record *rec = NULL;
2732         int ret;
2733
2734         cache = lookup_cache_extent(root_cache, objectid, 1);
2735         if (cache) {
2736                 rec = container_of(cache, struct root_record, cache);
2737         } else {
2738                 rec = calloc(1, sizeof(*rec));
2739                 if (!rec)
2740                         return ERR_PTR(-ENOMEM);
2741                 rec->objectid = objectid;
2742                 INIT_LIST_HEAD(&rec->backrefs);
2743                 rec->cache.start = objectid;
2744                 rec->cache.size = 1;
2745
2746                 ret = insert_cache_extent(root_cache, &rec->cache);
2747                 if (ret)
2748                         return ERR_PTR(-EEXIST);
2749         }
2750         return rec;
2751 }
2752
2753 static struct root_backref *get_root_backref(struct root_record *rec,
2754                                              u64 ref_root, u64 dir, u64 index,
2755                                              const char *name, int namelen)
2756 {
2757         struct root_backref *backref;
2758
2759         list_for_each_entry(backref, &rec->backrefs, list) {
2760                 if (backref->ref_root != ref_root || backref->dir != dir ||
2761                     backref->namelen != namelen)
2762                         continue;
2763                 if (memcmp(name, backref->name, namelen))
2764                         continue;
2765                 return backref;
2766         }
2767
2768         backref = calloc(1, sizeof(*backref) + namelen + 1);
2769         if (!backref)
2770                 return NULL;
2771         backref->ref_root = ref_root;
2772         backref->dir = dir;
2773         backref->index = index;
2774         backref->namelen = namelen;
2775         memcpy(backref->name, name, namelen);
2776         backref->name[namelen] = '\0';
2777         list_add_tail(&backref->list, &rec->backrefs);
2778         return backref;
2779 }
2780
2781 static void free_root_record(struct cache_extent *cache)
2782 {
2783         struct root_record *rec;
2784         struct root_backref *backref;
2785
2786         rec = container_of(cache, struct root_record, cache);
2787         while (!list_empty(&rec->backrefs)) {
2788                 backref = to_root_backref(rec->backrefs.next);
2789                 list_del(&backref->list);
2790                 free(backref);
2791         }
2792
2793         free(rec);
2794 }
2795
2796 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
2797
2798 static int add_root_backref(struct cache_tree *root_cache,
2799                             u64 root_id, u64 ref_root, u64 dir, u64 index,
2800                             const char *name, int namelen,
2801                             int item_type, int errors)
2802 {
2803         struct root_record *rec;
2804         struct root_backref *backref;
2805
2806         rec = get_root_rec(root_cache, root_id);
2807         BUG_ON(IS_ERR(rec));
2808         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
2809         BUG_ON(!backref);
2810
2811         backref->errors |= errors;
2812
2813         if (item_type != BTRFS_DIR_ITEM_KEY) {
2814                 if (backref->found_dir_index || backref->found_back_ref ||
2815                     backref->found_forward_ref) {
2816                         if (backref->index != index)
2817                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
2818                 } else {
2819                         backref->index = index;
2820                 }
2821         }
2822
2823         if (item_type == BTRFS_DIR_ITEM_KEY) {
2824                 if (backref->found_forward_ref)
2825                         rec->found_ref++;
2826                 backref->found_dir_item = 1;
2827         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
2828                 backref->found_dir_index = 1;
2829         } else if (item_type == BTRFS_ROOT_REF_KEY) {
2830                 if (backref->found_forward_ref)
2831                         backref->errors |= REF_ERR_DUP_ROOT_REF;
2832                 else if (backref->found_dir_item)
2833                         rec->found_ref++;
2834                 backref->found_forward_ref = 1;
2835         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
2836                 if (backref->found_back_ref)
2837                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
2838                 backref->found_back_ref = 1;
2839         } else {
2840                 BUG_ON(1);
2841         }
2842
2843         if (backref->found_forward_ref && backref->found_dir_item)
2844                 backref->reachable = 1;
2845         return 0;
2846 }
2847
2848 static int merge_root_recs(struct btrfs_root *root,
2849                            struct cache_tree *src_cache,
2850                            struct cache_tree *dst_cache)
2851 {
2852         struct cache_extent *cache;
2853         struct ptr_node *node;
2854         struct inode_record *rec;
2855         struct inode_backref *backref;
2856         int ret = 0;
2857
2858         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2859                 free_inode_recs_tree(src_cache);
2860                 return 0;
2861         }
2862
2863         while (1) {
2864                 cache = search_cache_extent(src_cache, 0);
2865                 if (!cache)
2866                         break;
2867                 node = container_of(cache, struct ptr_node, cache);
2868                 rec = node->data;
2869                 remove_cache_extent(src_cache, &node->cache);
2870                 free(node);
2871
2872                 ret = is_child_root(root, root->objectid, rec->ino);
2873                 if (ret < 0)
2874                         break;
2875                 else if (ret == 0)
2876                         goto skip;
2877
2878                 list_for_each_entry(backref, &rec->backrefs, list) {
2879                         BUG_ON(backref->found_inode_ref);
2880                         if (backref->found_dir_item)
2881                                 add_root_backref(dst_cache, rec->ino,
2882                                         root->root_key.objectid, backref->dir,
2883                                         backref->index, backref->name,
2884                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
2885                                         backref->errors);
2886                         if (backref->found_dir_index)
2887                                 add_root_backref(dst_cache, rec->ino,
2888                                         root->root_key.objectid, backref->dir,
2889                                         backref->index, backref->name,
2890                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
2891                                         backref->errors);
2892                 }
2893 skip:
2894                 free_inode_rec(rec);
2895         }
2896         if (ret < 0)
2897                 return ret;
2898         return 0;
2899 }
2900
2901 static int check_root_refs(struct btrfs_root *root,
2902                            struct cache_tree *root_cache)
2903 {
2904         struct root_record *rec;
2905         struct root_record *ref_root;
2906         struct root_backref *backref;
2907         struct cache_extent *cache;
2908         int loop = 1;
2909         int ret;
2910         int error;
2911         int errors = 0;
2912
2913         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
2914         BUG_ON(IS_ERR(rec));
2915         rec->found_ref = 1;
2916
2917         /* fixme: this can not detect circular references */
2918         while (loop) {
2919                 loop = 0;
2920                 cache = search_cache_extent(root_cache, 0);
2921                 while (1) {
2922                         if (!cache)
2923                                 break;
2924                         rec = container_of(cache, struct root_record, cache);
2925                         cache = next_cache_extent(cache);
2926
2927                         if (rec->found_ref == 0)
2928                                 continue;
2929
2930                         list_for_each_entry(backref, &rec->backrefs, list) {
2931                                 if (!backref->reachable)
2932                                         continue;
2933
2934                                 ref_root = get_root_rec(root_cache,
2935                                                         backref->ref_root);
2936                                 BUG_ON(IS_ERR(ref_root));
2937                                 if (ref_root->found_ref > 0)
2938                                         continue;
2939
2940                                 backref->reachable = 0;
2941                                 rec->found_ref--;
2942                                 if (rec->found_ref == 0)
2943                                         loop = 1;
2944                         }
2945                 }
2946         }
2947
2948         cache = search_cache_extent(root_cache, 0);
2949         while (1) {
2950                 if (!cache)
2951                         break;
2952                 rec = container_of(cache, struct root_record, cache);
2953                 cache = next_cache_extent(cache);
2954
2955                 if (rec->found_ref == 0 &&
2956                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
2957                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
2958                         ret = check_orphan_item(root->fs_info->tree_root,
2959                                                 rec->objectid);
2960                         if (ret == 0)
2961                                 continue;
2962
2963                         /*
2964                          * If we don't have a root item then we likely just have
2965                          * a dir item in a snapshot for this root but no actual
2966                          * ref key or anything so it's meaningless.
2967                          */
2968                         if (!rec->found_root_item)
2969                                 continue;
2970                         errors++;
2971                         fprintf(stderr, "fs tree %llu not referenced\n",
2972                                 (unsigned long long)rec->objectid);
2973                 }
2974
2975                 error = 0;
2976                 if (rec->found_ref > 0 && !rec->found_root_item)
2977                         error = 1;
2978                 list_for_each_entry(backref, &rec->backrefs, list) {
2979                         if (!backref->found_dir_item)
2980                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2981                         if (!backref->found_dir_index)
2982                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2983                         if (!backref->found_back_ref)
2984                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
2985                         if (!backref->found_forward_ref)
2986                                 backref->errors |= REF_ERR_NO_ROOT_REF;
2987                         if (backref->reachable && backref->errors)
2988                                 error = 1;
2989                 }
2990                 if (!error)
2991                         continue;
2992
2993                 errors++;
2994                 fprintf(stderr, "fs tree %llu refs %u %s\n",
2995                         (unsigned long long)rec->objectid, rec->found_ref,
2996                          rec->found_root_item ? "" : "not found");
2997
2998                 list_for_each_entry(backref, &rec->backrefs, list) {
2999                         if (!backref->reachable)
3000                                 continue;
3001                         if (!backref->errors && rec->found_root_item)
3002                                 continue;
3003                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3004                                 " index %llu namelen %u name %s errors %x\n",
3005                                 (unsigned long long)backref->ref_root,
3006                                 (unsigned long long)backref->dir,
3007                                 (unsigned long long)backref->index,
3008                                 backref->namelen, backref->name,
3009                                 backref->errors);
3010                         print_ref_error(backref->errors);
3011                 }
3012         }
3013         return errors > 0 ? 1 : 0;
3014 }
3015
3016 static int process_root_ref(struct extent_buffer *eb, int slot,
3017                             struct btrfs_key *key,
3018                             struct cache_tree *root_cache)
3019 {
3020         u64 dirid;
3021         u64 index;
3022         u32 len;
3023         u32 name_len;
3024         struct btrfs_root_ref *ref;
3025         char namebuf[BTRFS_NAME_LEN];
3026         int error;
3027
3028         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3029
3030         dirid = btrfs_root_ref_dirid(eb, ref);
3031         index = btrfs_root_ref_sequence(eb, ref);
3032         name_len = btrfs_root_ref_name_len(eb, ref);
3033
3034         if (name_len <= BTRFS_NAME_LEN) {
3035                 len = name_len;
3036                 error = 0;
3037         } else {
3038                 len = BTRFS_NAME_LEN;
3039                 error = REF_ERR_NAME_TOO_LONG;
3040         }
3041         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3042
3043         if (key->type == BTRFS_ROOT_REF_KEY) {
3044                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3045                                  index, namebuf, len, key->type, error);
3046         } else {
3047                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3048                                  index, namebuf, len, key->type, error);
3049         }
3050         return 0;
3051 }
3052
3053 static void free_corrupt_block(struct cache_extent *cache)
3054 {
3055         struct btrfs_corrupt_block *corrupt;
3056
3057         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3058         free(corrupt);
3059 }
3060
3061 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3062
3063 /*
3064  * Repair the btree of the given root.
3065  *
3066  * The fix is to remove the node key in corrupt_blocks cache_tree.
3067  * and rebalance the tree.
3068  * After the fix, the btree should be writeable.
3069  */
3070 static int repair_btree(struct btrfs_root *root,
3071                         struct cache_tree *corrupt_blocks)
3072 {
3073         struct btrfs_trans_handle *trans;
3074         struct btrfs_path path;
3075         struct btrfs_corrupt_block *corrupt;
3076         struct cache_extent *cache;
3077         struct btrfs_key key;
3078         u64 offset;
3079         int level;
3080         int ret = 0;
3081
3082         if (cache_tree_empty(corrupt_blocks))
3083                 return 0;
3084
3085         trans = btrfs_start_transaction(root, 1);
3086         if (IS_ERR(trans)) {
3087                 ret = PTR_ERR(trans);
3088                 fprintf(stderr, "Error starting transaction: %s\n",
3089                         strerror(-ret));
3090                 return ret;
3091         }
3092         btrfs_init_path(&path);
3093         cache = first_cache_extent(corrupt_blocks);
3094         while (cache) {
3095                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3096                                        cache);
3097                 level = corrupt->level;
3098                 path.lowest_level = level;
3099                 key.objectid = corrupt->key.objectid;
3100                 key.type = corrupt->key.type;
3101                 key.offset = corrupt->key.offset;
3102
3103                 /*
3104                  * Here we don't want to do any tree balance, since it may
3105                  * cause a balance with corrupted brother leaf/node,
3106                  * so ins_len set to 0 here.
3107                  * Balance will be done after all corrupt node/leaf is deleted.
3108                  */
3109                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3110                 if (ret < 0)
3111                         goto out;
3112                 offset = btrfs_node_blockptr(path.nodes[level],
3113                                              path.slots[level]);
3114
3115                 /* Remove the ptr */
3116                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3117                 if (ret < 0)
3118                         goto out;
3119                 /*
3120                  * Remove the corresponding extent
3121                  * return value is not concerned.
3122                  */
3123                 btrfs_release_path(&path);
3124                 ret = btrfs_free_extent(trans, root, offset,
3125                                 root->fs_info->nodesize, 0,
3126                                 root->root_key.objectid, level - 1, 0);
3127                 cache = next_cache_extent(cache);
3128         }
3129
3130         /* Balance the btree using btrfs_search_slot() */
3131         cache = first_cache_extent(corrupt_blocks);
3132         while (cache) {
3133                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3134                                        cache);
3135                 memcpy(&key, &corrupt->key, sizeof(key));
3136                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3137                 if (ret < 0)
3138                         goto out;
3139                 /* return will always >0 since it won't find the item */
3140                 ret = 0;
3141                 btrfs_release_path(&path);
3142                 cache = next_cache_extent(cache);
3143         }
3144 out:
3145         btrfs_commit_transaction(trans, root);
3146         btrfs_release_path(&path);
3147         return ret;
3148 }
3149
3150 static int check_fs_root(struct btrfs_root *root,
3151                          struct cache_tree *root_cache,
3152                          struct walk_control *wc)
3153 {
3154         int ret = 0;
3155         int err = 0;
3156         int wret;
3157         int level;
3158         struct btrfs_path path;
3159         struct shared_node root_node;
3160         struct root_record *rec;
3161         struct btrfs_root_item *root_item = &root->root_item;
3162         struct cache_tree corrupt_blocks;
3163         struct orphan_data_extent *orphan;
3164         struct orphan_data_extent *tmp;
3165         enum btrfs_tree_block_status status;
3166         struct node_refs nrefs;
3167
3168         /*
3169          * Reuse the corrupt_block cache tree to record corrupted tree block
3170          *
3171          * Unlike the usage in extent tree check, here we do it in a per
3172          * fs/subvol tree base.
3173          */
3174         cache_tree_init(&corrupt_blocks);
3175         root->fs_info->corrupt_blocks = &corrupt_blocks;
3176
3177         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3178                 rec = get_root_rec(root_cache, root->root_key.objectid);
3179                 BUG_ON(IS_ERR(rec));
3180                 if (btrfs_root_refs(root_item) > 0)
3181                         rec->found_root_item = 1;
3182         }
3183
3184         btrfs_init_path(&path);
3185         memset(&root_node, 0, sizeof(root_node));
3186         cache_tree_init(&root_node.root_cache);
3187         cache_tree_init(&root_node.inode_cache);
3188         memset(&nrefs, 0, sizeof(nrefs));
3189
3190         /* Move the orphan extent record to corresponding inode_record */
3191         list_for_each_entry_safe(orphan, tmp,
3192                                  &root->orphan_data_extents, list) {
3193                 struct inode_record *inode;
3194
3195                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3196                                       1);
3197                 BUG_ON(IS_ERR(inode));
3198                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3199                 list_move(&orphan->list, &inode->orphan_extents);
3200         }
3201
3202         level = btrfs_header_level(root->node);
3203         memset(wc->nodes, 0, sizeof(wc->nodes));
3204         wc->nodes[level] = &root_node;
3205         wc->active_node = level;
3206         wc->root_level = level;
3207
3208         /* We may not have checked the root block, lets do that now */
3209         if (btrfs_is_leaf(root->node))
3210                 status = btrfs_check_leaf(root, NULL, root->node);
3211         else
3212                 status = btrfs_check_node(root, NULL, root->node);
3213         if (status != BTRFS_TREE_BLOCK_CLEAN)
3214                 return -EIO;
3215
3216         if (btrfs_root_refs(root_item) > 0 ||
3217             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3218                 path.nodes[level] = root->node;
3219                 extent_buffer_get(root->node);
3220                 path.slots[level] = 0;
3221         } else {
3222                 struct btrfs_key key;
3223                 struct btrfs_disk_key found_key;
3224
3225                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3226                 level = root_item->drop_level;
3227                 path.lowest_level = level;
3228                 if (level > btrfs_header_level(root->node) ||
3229                     level >= BTRFS_MAX_LEVEL) {
3230                         error("ignoring invalid drop level: %u", level);
3231                         goto skip_walking;
3232                 }
3233                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3234                 if (wret < 0)
3235                         goto skip_walking;
3236                 btrfs_node_key(path.nodes[level], &found_key,
3237                                 path.slots[level]);
3238                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3239                                         sizeof(found_key)));
3240         }
3241
3242         while (1) {
3243                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3244                 if (wret < 0)
3245                         ret = wret;
3246                 if (wret != 0)
3247                         break;
3248
3249                 wret = walk_up_tree(root, &path, wc, &level);
3250                 if (wret < 0)
3251                         ret = wret;
3252                 if (wret != 0)
3253                         break;
3254         }
3255 skip_walking:
3256         btrfs_release_path(&path);
3257
3258         if (!cache_tree_empty(&corrupt_blocks)) {
3259                 struct cache_extent *cache;
3260                 struct btrfs_corrupt_block *corrupt;
3261
3262                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3263                        root->root_key.objectid);
3264                 cache = first_cache_extent(&corrupt_blocks);
3265                 while (cache) {
3266                         corrupt = container_of(cache,
3267                                                struct btrfs_corrupt_block,
3268                                                cache);
3269                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3270                                cache->start, corrupt->level,
3271                                corrupt->key.objectid, corrupt->key.type,
3272                                corrupt->key.offset);
3273                         cache = next_cache_extent(cache);
3274                 }
3275                 if (repair) {
3276                         printf("Try to repair the btree for root %llu\n",
3277                                root->root_key.objectid);
3278                         ret = repair_btree(root, &corrupt_blocks);
3279                         if (ret < 0)
3280                                 fprintf(stderr, "Failed to repair btree: %s\n",
3281                                         strerror(-ret));
3282                         if (!ret)
3283                                 printf("Btree for root %llu is fixed\n",
3284                                        root->root_key.objectid);
3285                 }
3286         }
3287
3288         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3289         if (err < 0)
3290                 ret = err;
3291
3292         if (root_node.current) {
3293                 root_node.current->checked = 1;
3294                 maybe_free_inode_rec(&root_node.inode_cache,
3295                                 root_node.current);
3296         }
3297
3298         err = check_inode_recs(root, &root_node.inode_cache);
3299         if (!ret)
3300                 ret = err;
3301
3302         free_corrupt_blocks_tree(&corrupt_blocks);
3303         root->fs_info->corrupt_blocks = NULL;
3304         free_orphan_data_extents(&root->orphan_data_extents);
3305         return ret;
3306 }
3307
3308 static int check_fs_roots(struct btrfs_fs_info *fs_info,
3309                           struct cache_tree *root_cache)
3310 {
3311         struct btrfs_path path;
3312         struct btrfs_key key;
3313         struct walk_control wc;
3314         struct extent_buffer *leaf, *tree_node;
3315         struct btrfs_root *tmp_root;
3316         struct btrfs_root *tree_root = fs_info->tree_root;
3317         int ret;
3318         int err = 0;
3319
3320         if (ctx.progress_enabled) {
3321                 ctx.tp = TASK_FS_ROOTS;
3322                 task_start(ctx.info);
3323         }
3324
3325         /*
3326          * Just in case we made any changes to the extent tree that weren't
3327          * reflected into the free space cache yet.
3328          */
3329         if (repair)
3330                 reset_cached_block_groups(fs_info);
3331         memset(&wc, 0, sizeof(wc));
3332         cache_tree_init(&wc.shared);
3333         btrfs_init_path(&path);
3334
3335 again:
3336         key.offset = 0;
3337         key.objectid = 0;
3338         key.type = BTRFS_ROOT_ITEM_KEY;
3339         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3340         if (ret < 0) {
3341                 err = 1;
3342                 goto out;
3343         }
3344         tree_node = tree_root->node;
3345         while (1) {
3346                 if (tree_node != tree_root->node) {
3347                         free_root_recs_tree(root_cache);
3348                         btrfs_release_path(&path);
3349                         goto again;
3350                 }
3351                 leaf = path.nodes[0];
3352                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3353                         ret = btrfs_next_leaf(tree_root, &path);
3354                         if (ret) {
3355                                 if (ret < 0)
3356                                         err = 1;
3357                                 break;
3358                         }
3359                         leaf = path.nodes[0];
3360                 }
3361                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3362                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3363                     fs_root_objectid(key.objectid)) {
3364                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3365                                 tmp_root = btrfs_read_fs_root_no_cache(
3366                                                 fs_info, &key);
3367                         } else {
3368                                 key.offset = (u64)-1;
3369                                 tmp_root = btrfs_read_fs_root(
3370                                                 fs_info, &key);
3371                         }
3372                         if (IS_ERR(tmp_root)) {
3373                                 err = 1;
3374                                 goto next;
3375                         }
3376                         ret = check_fs_root(tmp_root, root_cache, &wc);
3377                         if (ret == -EAGAIN) {
3378                                 free_root_recs_tree(root_cache);
3379                                 btrfs_release_path(&path);
3380                                 goto again;
3381                         }
3382                         if (ret)
3383                                 err = 1;
3384                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3385                                 btrfs_free_fs_root(tmp_root);
3386                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3387                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3388                         process_root_ref(leaf, path.slots[0], &key,
3389                                          root_cache);
3390                 }
3391 next:
3392                 path.slots[0]++;
3393         }
3394 out:
3395         btrfs_release_path(&path);
3396         if (err)
3397                 free_extent_cache_tree(&wc.shared);
3398         if (!cache_tree_empty(&wc.shared))
3399                 fprintf(stderr, "warning line %d\n", __LINE__);
3400
3401         task_stop(ctx.info);
3402
3403         return err;
3404 }
3405
3406 static struct tree_backref *find_tree_backref(struct extent_record *rec,
3407                                                 u64 parent, u64 root)
3408 {
3409         struct rb_node *node;
3410         struct tree_backref *back = NULL;
3411         struct tree_backref match = {
3412                 .node = {
3413                         .is_data = 0,
3414                 },
3415         };
3416
3417         if (parent) {
3418                 match.parent = parent;
3419                 match.node.full_backref = 1;
3420         } else {
3421                 match.root = root;
3422         }
3423
3424         node = rb_search(&rec->backref_tree, &match.node.node,
3425                          (rb_compare_keys)compare_extent_backref, NULL);
3426         if (node)
3427                 back = to_tree_backref(rb_node_to_extent_backref(node));
3428
3429         return back;
3430 }
3431
3432 static struct data_backref *find_data_backref(struct extent_record *rec,
3433                                                 u64 parent, u64 root,
3434                                                 u64 owner, u64 offset,
3435                                                 int found_ref,
3436                                                 u64 disk_bytenr, u64 bytes)
3437 {
3438         struct rb_node *node;
3439         struct data_backref *back = NULL;
3440         struct data_backref match = {
3441                 .node = {
3442                         .is_data = 1,
3443                 },
3444                 .owner = owner,
3445                 .offset = offset,
3446                 .bytes = bytes,
3447                 .found_ref = found_ref,
3448                 .disk_bytenr = disk_bytenr,
3449         };
3450
3451         if (parent) {
3452                 match.parent = parent;
3453                 match.node.full_backref = 1;
3454         } else {
3455                 match.root = root;
3456         }
3457
3458         node = rb_search(&rec->backref_tree, &match.node.node,
3459                          (rb_compare_keys)compare_extent_backref, NULL);
3460         if (node)
3461                 back = to_data_backref(rb_node_to_extent_backref(node));
3462
3463         return back;
3464 }
3465
3466 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
3467                           struct cache_tree *root_cache)
3468 {
3469         int ret;
3470
3471         if (!ctx.progress_enabled)
3472                 fprintf(stderr, "checking fs roots\n");
3473         if (check_mode == CHECK_MODE_LOWMEM)
3474                 ret = check_fs_roots_lowmem(fs_info);
3475         else
3476                 ret = check_fs_roots(fs_info, root_cache);
3477
3478         return ret;
3479 }
3480
3481 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3482 {
3483         struct extent_backref *back, *tmp;
3484         struct tree_backref *tback;
3485         struct data_backref *dback;
3486         u64 found = 0;
3487         int err = 0;
3488
3489         rbtree_postorder_for_each_entry_safe(back, tmp,
3490                                              &rec->backref_tree, node) {
3491                 if (!back->found_extent_tree) {
3492                         err = 1;
3493                         if (!print_errs)
3494                                 goto out;
3495                         if (back->is_data) {
3496                                 dback = to_data_backref(back);
3497                                 fprintf(stderr,
3498 "data backref %llu %s %llu owner %llu offset %llu num_refs %lu not found in extent tree\n",
3499                                         (unsigned long long)rec->start,
3500                                         back->full_backref ?
3501                                         "parent" : "root",
3502                                         back->full_backref ?
3503                                         (unsigned long long)dback->parent :
3504                                         (unsigned long long)dback->root,
3505                                         (unsigned long long)dback->owner,
3506                                         (unsigned long long)dback->offset,
3507                                         (unsigned long)dback->num_refs);
3508                         } else {
3509                                 tback = to_tree_backref(back);
3510                                 fprintf(stderr,
3511 "tree backref %llu parent %llu root %llu not found in extent tree\n",
3512                                         (unsigned long long)rec->start,
3513                                         (unsigned long long)tback->parent,
3514                                         (unsigned long long)tback->root);
3515                         }
3516                 }
3517                 if (!back->is_data && !back->found_ref) {
3518                         err = 1;
3519                         if (!print_errs)
3520                                 goto out;
3521                         tback = to_tree_backref(back);
3522                         fprintf(stderr,
3523                                 "backref %llu %s %llu not referenced back %p\n",
3524                                 (unsigned long long)rec->start,
3525                                 back->full_backref ? "parent" : "root",
3526                                 back->full_backref ?
3527                                 (unsigned long long)tback->parent :
3528                                 (unsigned long long)tback->root, back);
3529                 }
3530                 if (back->is_data) {
3531                         dback = to_data_backref(back);
3532                         if (dback->found_ref != dback->num_refs) {
3533                                 err = 1;
3534                                 if (!print_errs)
3535                                         goto out;
3536                                 fprintf(stderr,
3537 "incorrect local backref count on %llu %s %llu owner %llu offset %llu found %u wanted %u back %p\n",
3538                                         (unsigned long long)rec->start,
3539                                         back->full_backref ?
3540                                         "parent" : "root",
3541                                         back->full_backref ?
3542                                         (unsigned long long)dback->parent :
3543                                         (unsigned long long)dback->root,
3544                                         (unsigned long long)dback->owner,
3545                                         (unsigned long long)dback->offset,
3546                                         dback->found_ref, dback->num_refs,
3547                                         back);
3548                         }
3549                         if (dback->disk_bytenr != rec->start) {
3550                                 err = 1;
3551                                 if (!print_errs)
3552                                         goto out;
3553                                 fprintf(stderr,
3554 "backref disk bytenr does not match extent record, bytenr=%llu, ref bytenr=%llu\n",
3555                                         (unsigned long long)rec->start,
3556                                         (unsigned long long)dback->disk_bytenr);
3557                         }
3558
3559                         if (dback->bytes != rec->nr) {
3560                                 err = 1;
3561                                 if (!print_errs)
3562                                         goto out;
3563                                 fprintf(stderr,
3564 "backref bytes do not match extent backref, bytenr=%llu, ref bytes=%llu, backref bytes=%llu\n",
3565                                         (unsigned long long)rec->start,
3566                                         (unsigned long long)rec->nr,
3567                                         (unsigned long long)dback->bytes);
3568                         }
3569                 }
3570                 if (!back->is_data) {
3571                         found += 1;
3572                 } else {
3573                         dback = to_data_backref(back);
3574                         found += dback->found_ref;
3575                 }
3576         }
3577         if (found != rec->refs) {
3578                 err = 1;
3579                 if (!print_errs)
3580                         goto out;
3581                 fprintf(stderr,
3582         "incorrect global backref count on %llu found %llu wanted %llu\n",
3583                         (unsigned long long)rec->start,
3584                         (unsigned long long)found,
3585                         (unsigned long long)rec->refs);
3586         }
3587 out:
3588         return err;
3589 }
3590
3591 static void __free_one_backref(struct rb_node *node)
3592 {
3593         struct extent_backref *back = rb_node_to_extent_backref(node);
3594
3595         free(back);
3596 }
3597
3598 static void free_all_extent_backrefs(struct extent_record *rec)
3599 {
3600         rb_free_nodes(&rec->backref_tree, __free_one_backref);
3601 }
3602
3603 static void free_extent_record_cache(struct cache_tree *extent_cache)
3604 {
3605         struct cache_extent *cache;
3606         struct extent_record *rec;
3607
3608         while (1) {
3609                 cache = first_cache_extent(extent_cache);
3610                 if (!cache)
3611                         break;
3612                 rec = container_of(cache, struct extent_record, cache);
3613                 remove_cache_extent(extent_cache, cache);
3614                 free_all_extent_backrefs(rec);
3615                 free(rec);
3616         }
3617 }
3618
3619 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3620                                  struct extent_record *rec)
3621 {
3622         if (rec->content_checked && rec->owner_ref_checked &&
3623             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3624             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3625             !rec->bad_full_backref && !rec->crossing_stripes &&
3626             !rec->wrong_chunk_type) {
3627                 remove_cache_extent(extent_cache, &rec->cache);
3628                 free_all_extent_backrefs(rec);
3629                 list_del_init(&rec->list);
3630                 free(rec);
3631         }
3632         return 0;
3633 }
3634
3635 static int check_owner_ref(struct btrfs_root *root,
3636                             struct extent_record *rec,
3637                             struct extent_buffer *buf)
3638 {
3639         struct extent_backref *node, *tmp;
3640         struct tree_backref *back;
3641         struct btrfs_root *ref_root;
3642         struct btrfs_key key;
3643         struct btrfs_path path;
3644         struct extent_buffer *parent;
3645         int level;
3646         int found = 0;
3647         int ret;
3648
3649         rbtree_postorder_for_each_entry_safe(node, tmp,
3650                                              &rec->backref_tree, node) {
3651                 if (node->is_data)
3652                         continue;
3653                 if (!node->found_ref)
3654                         continue;
3655                 if (node->full_backref)
3656                         continue;
3657                 back = to_tree_backref(node);
3658                 if (btrfs_header_owner(buf) == back->root)
3659                         return 0;
3660         }
3661         BUG_ON(rec->is_root);
3662
3663         /* try to find the block by search corresponding fs tree */
3664         key.objectid = btrfs_header_owner(buf);
3665         key.type = BTRFS_ROOT_ITEM_KEY;
3666         key.offset = (u64)-1;
3667
3668         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3669         if (IS_ERR(ref_root))
3670                 return 1;
3671
3672         level = btrfs_header_level(buf);
3673         if (level == 0)
3674                 btrfs_item_key_to_cpu(buf, &key, 0);
3675         else
3676                 btrfs_node_key_to_cpu(buf, &key, 0);
3677
3678         btrfs_init_path(&path);
3679         path.lowest_level = level + 1;
3680         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3681         if (ret < 0)
3682                 return 0;
3683
3684         parent = path.nodes[level + 1];
3685         if (parent && buf->start == btrfs_node_blockptr(parent,
3686                                                         path.slots[level + 1]))
3687                 found = 1;
3688
3689         btrfs_release_path(&path);
3690         return found ? 0 : 1;
3691 }
3692
3693 static int is_extent_tree_record(struct extent_record *rec)
3694 {
3695         struct extent_backref *node, *tmp;
3696         struct tree_backref *back;
3697         int is_extent = 0;
3698
3699         rbtree_postorder_for_each_entry_safe(node, tmp,
3700                                              &rec->backref_tree, node) {
3701                 if (node->is_data)
3702                         return 0;
3703                 back = to_tree_backref(node);
3704                 if (node->full_backref)
3705                         return 0;
3706                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3707                         is_extent = 1;
3708         }
3709         return is_extent;
3710 }
3711
3712
3713 static int record_bad_block_io(struct btrfs_fs_info *info,
3714                                struct cache_tree *extent_cache,
3715                                u64 start, u64 len)
3716 {
3717         struct extent_record *rec;
3718         struct cache_extent *cache;
3719         struct btrfs_key key;
3720
3721         cache = lookup_cache_extent(extent_cache, start, len);
3722         if (!cache)
3723                 return 0;
3724
3725         rec = container_of(cache, struct extent_record, cache);
3726         if (!is_extent_tree_record(rec))
3727                 return 0;
3728
3729         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3730         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3731 }
3732
3733 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3734                        struct extent_buffer *buf, int slot)
3735 {
3736         if (btrfs_header_level(buf)) {
3737                 struct btrfs_key_ptr ptr1, ptr2;
3738
3739                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3740                                    sizeof(struct btrfs_key_ptr));
3741                 read_extent_buffer(buf, &ptr2,
3742                                    btrfs_node_key_ptr_offset(slot + 1),
3743                                    sizeof(struct btrfs_key_ptr));
3744                 write_extent_buffer(buf, &ptr1,
3745                                     btrfs_node_key_ptr_offset(slot + 1),
3746                                     sizeof(struct btrfs_key_ptr));
3747                 write_extent_buffer(buf, &ptr2,
3748                                     btrfs_node_key_ptr_offset(slot),
3749                                     sizeof(struct btrfs_key_ptr));
3750                 if (slot == 0) {
3751                         struct btrfs_disk_key key;
3752
3753                         btrfs_node_key(buf, &key, 0);
3754                         btrfs_fixup_low_keys(root, path, &key,
3755                                              btrfs_header_level(buf) + 1);
3756                 }
3757         } else {
3758                 struct btrfs_item *item1, *item2;
3759                 struct btrfs_key k1, k2;
3760                 char *item1_data, *item2_data;
3761                 u32 item1_offset, item2_offset, item1_size, item2_size;
3762
3763                 item1 = btrfs_item_nr(slot);
3764                 item2 = btrfs_item_nr(slot + 1);
3765                 btrfs_item_key_to_cpu(buf, &k1, slot);
3766                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3767                 item1_offset = btrfs_item_offset(buf, item1);
3768                 item2_offset = btrfs_item_offset(buf, item2);
3769                 item1_size = btrfs_item_size(buf, item1);
3770                 item2_size = btrfs_item_size(buf, item2);
3771
3772                 item1_data = malloc(item1_size);
3773                 if (!item1_data)
3774                         return -ENOMEM;
3775                 item2_data = malloc(item2_size);
3776                 if (!item2_data) {
3777                         free(item1_data);
3778                         return -ENOMEM;
3779                 }
3780
3781                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3782                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3783
3784                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3785                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3786                 free(item1_data);
3787                 free(item2_data);
3788
3789                 btrfs_set_item_offset(buf, item1, item2_offset);
3790                 btrfs_set_item_offset(buf, item2, item1_offset);
3791                 btrfs_set_item_size(buf, item1, item2_size);
3792                 btrfs_set_item_size(buf, item2, item1_size);
3793
3794                 path->slots[0] = slot;
3795                 btrfs_set_item_key_unsafe(root, path, &k2);
3796                 path->slots[0] = slot + 1;
3797                 btrfs_set_item_key_unsafe(root, path, &k1);
3798         }
3799         return 0;
3800 }
3801
3802 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
3803 {
3804         struct extent_buffer *buf;
3805         struct btrfs_key k1, k2;
3806         int i;
3807         int level = path->lowest_level;
3808         int ret = -EIO;
3809
3810         buf = path->nodes[level];
3811         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3812                 if (level) {
3813                         btrfs_node_key_to_cpu(buf, &k1, i);
3814                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
3815                 } else {
3816                         btrfs_item_key_to_cpu(buf, &k1, i);
3817                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
3818                 }
3819                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
3820                         continue;
3821                 ret = swap_values(root, path, buf, i);
3822                 if (ret)
3823                         break;
3824                 btrfs_mark_buffer_dirty(buf);
3825                 i = 0;
3826         }
3827         return ret;
3828 }
3829
3830 static int delete_bogus_item(struct btrfs_root *root,
3831                              struct btrfs_path *path,
3832                              struct extent_buffer *buf, int slot)
3833 {
3834         struct btrfs_key key;
3835         int nritems = btrfs_header_nritems(buf);
3836
3837         btrfs_item_key_to_cpu(buf, &key, slot);
3838
3839         /* These are all the keys we can deal with missing. */
3840         if (key.type != BTRFS_DIR_INDEX_KEY &&
3841             key.type != BTRFS_EXTENT_ITEM_KEY &&
3842             key.type != BTRFS_METADATA_ITEM_KEY &&
3843             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
3844             key.type != BTRFS_EXTENT_DATA_REF_KEY)
3845                 return -1;
3846
3847         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
3848                (unsigned long long)key.objectid, key.type,
3849                (unsigned long long)key.offset, slot, buf->start);
3850         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
3851                               btrfs_item_nr_offset(slot + 1),
3852                               sizeof(struct btrfs_item) *
3853                               (nritems - slot - 1));
3854         btrfs_set_header_nritems(buf, nritems - 1);
3855         if (slot == 0) {
3856                 struct btrfs_disk_key disk_key;
3857
3858                 btrfs_item_key(buf, &disk_key, 0);
3859                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
3860         }
3861         btrfs_mark_buffer_dirty(buf);
3862         return 0;
3863 }
3864
3865 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
3866 {
3867         struct extent_buffer *buf;
3868         int i;
3869         int ret = 0;
3870
3871         /* We should only get this for leaves */
3872         BUG_ON(path->lowest_level);
3873         buf = path->nodes[0];
3874 again:
3875         for (i = 0; i < btrfs_header_nritems(buf); i++) {
3876                 unsigned int shift = 0, offset;
3877
3878                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
3879                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3880                         if (btrfs_item_end_nr(buf, i) >
3881                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3882                                 ret = delete_bogus_item(root, path, buf, i);
3883                                 if (!ret)
3884                                         goto again;
3885                                 fprintf(stderr,
3886                                 "item is off the end of the leaf, can't fix\n");
3887                                 ret = -EIO;
3888                                 break;
3889                         }
3890                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
3891                                 btrfs_item_end_nr(buf, i);
3892                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
3893                            btrfs_item_offset_nr(buf, i - 1)) {
3894                         if (btrfs_item_end_nr(buf, i) >
3895                             btrfs_item_offset_nr(buf, i - 1)) {
3896                                 ret = delete_bogus_item(root, path, buf, i);
3897                                 if (!ret)
3898                                         goto again;
3899                                 fprintf(stderr, "items overlap, can't fix\n");
3900                                 ret = -EIO;
3901                                 break;
3902                         }
3903                         shift = btrfs_item_offset_nr(buf, i - 1) -
3904                                 btrfs_item_end_nr(buf, i);
3905                 }
3906                 if (!shift)
3907                         continue;
3908
3909                 printf("Shifting item nr %d by %u bytes in block %llu\n",
3910                        i, shift, (unsigned long long)buf->start);
3911                 offset = btrfs_item_offset_nr(buf, i);
3912                 memmove_extent_buffer(buf,
3913                                       btrfs_leaf_data(buf) + offset + shift,
3914                                       btrfs_leaf_data(buf) + offset,
3915                                       btrfs_item_size_nr(buf, i));
3916                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
3917                                       offset + shift);
3918                 btrfs_mark_buffer_dirty(buf);
3919         }
3920
3921         /*
3922          * We may have moved things, in which case we want to exit so we don't
3923          * write those changes out.  Once we have proper abort functionality in
3924          * progs this can be changed to something nicer.
3925          */
3926         BUG_ON(ret);
3927         return ret;
3928 }
3929
3930 /*
3931  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
3932  * then just return -EIO.
3933  */
3934 static int try_to_fix_bad_block(struct btrfs_root *root,
3935                                 struct extent_buffer *buf,
3936                                 enum btrfs_tree_block_status status)
3937 {
3938         struct btrfs_trans_handle *trans;
3939         struct ulist *roots;
3940         struct ulist_node *node;
3941         struct btrfs_root *search_root;
3942         struct btrfs_path path;
3943         struct ulist_iterator iter;
3944         struct btrfs_key root_key, key;
3945         int ret;
3946
3947         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
3948             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
3949                 return -EIO;
3950
3951         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
3952         if (ret)
3953                 return -EIO;
3954
3955         btrfs_init_path(&path);
3956         ULIST_ITER_INIT(&iter);
3957         while ((node = ulist_next(roots, &iter))) {
3958                 root_key.objectid = node->val;
3959                 root_key.type = BTRFS_ROOT_ITEM_KEY;
3960                 root_key.offset = (u64)-1;
3961
3962                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
3963                 if (IS_ERR(root)) {
3964                         ret = -EIO;
3965                         break;
3966                 }
3967
3968
3969                 trans = btrfs_start_transaction(search_root, 0);
3970                 if (IS_ERR(trans)) {
3971                         ret = PTR_ERR(trans);
3972                         break;
3973                 }
3974
3975                 path.lowest_level = btrfs_header_level(buf);
3976                 path.skip_check_block = 1;
3977                 if (path.lowest_level)
3978                         btrfs_node_key_to_cpu(buf, &key, 0);
3979                 else
3980                         btrfs_item_key_to_cpu(buf, &key, 0);
3981                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
3982                 if (ret) {
3983                         ret = -EIO;
3984                         btrfs_commit_transaction(trans, search_root);
3985                         break;
3986                 }
3987                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
3988                         ret = fix_key_order(search_root, &path);
3989                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
3990                         ret = fix_item_offset(search_root, &path);
3991                 if (ret) {
3992                         btrfs_commit_transaction(trans, search_root);
3993                         break;
3994                 }
3995                 btrfs_release_path(&path);
3996                 btrfs_commit_transaction(trans, search_root);
3997         }
3998         ulist_free(roots);
3999         btrfs_release_path(&path);
4000         return ret;
4001 }
4002
4003 static int check_block(struct btrfs_root *root,
4004                        struct cache_tree *extent_cache,
4005                        struct extent_buffer *buf, u64 flags)
4006 {
4007         struct extent_record *rec;
4008         struct cache_extent *cache;
4009         struct btrfs_key key;
4010         enum btrfs_tree_block_status status;
4011         int ret = 0;
4012         int level;
4013
4014         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4015         if (!cache)
4016                 return 1;
4017         rec = container_of(cache, struct extent_record, cache);
4018         rec->generation = btrfs_header_generation(buf);
4019
4020         level = btrfs_header_level(buf);
4021         if (btrfs_header_nritems(buf) > 0) {
4022
4023                 if (level == 0)
4024                         btrfs_item_key_to_cpu(buf, &key, 0);
4025                 else
4026                         btrfs_node_key_to_cpu(buf, &key, 0);
4027
4028                 rec->info_objectid = key.objectid;
4029         }
4030         rec->info_level = level;
4031
4032         if (btrfs_is_leaf(buf))
4033                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4034         else
4035                 status = btrfs_check_node(root, &rec->parent_key, buf);
4036
4037         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4038                 if (repair)
4039                         status = try_to_fix_bad_block(root, buf, status);
4040                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4041                         ret = -EIO;
4042                         fprintf(stderr, "bad block %llu\n",
4043                                 (unsigned long long)buf->start);
4044                 } else {
4045                         /*
4046                          * Signal to callers we need to start the scan over
4047                          * again since we'll have cowed blocks.
4048                          */
4049                         ret = -EAGAIN;
4050                 }
4051         } else {
4052                 rec->content_checked = 1;
4053                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4054                         rec->owner_ref_checked = 1;
4055                 else {
4056                         ret = check_owner_ref(root, rec, buf);
4057                         if (!ret)
4058                                 rec->owner_ref_checked = 1;
4059                 }
4060         }
4061         if (!ret)
4062                 maybe_free_extent_rec(extent_cache, rec);
4063         return ret;
4064 }
4065
4066 #if 0
4067 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4068                                                 u64 parent, u64 root)
4069 {
4070         struct list_head *cur = rec->backrefs.next;
4071         struct extent_backref *node;
4072         struct tree_backref *back;
4073
4074         while (cur != &rec->backrefs) {
4075                 node = to_extent_backref(cur);
4076                 cur = cur->next;
4077                 if (node->is_data)
4078                         continue;
4079                 back = to_tree_backref(node);
4080                 if (parent > 0) {
4081                         if (!node->full_backref)
4082                                 continue;
4083                         if (parent == back->parent)
4084                                 return back;
4085                 } else {
4086                         if (node->full_backref)
4087                                 continue;
4088                         if (back->root == root)
4089                                 return back;
4090                 }
4091         }
4092         return NULL;
4093 }
4094 #endif
4095
4096 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4097                                                 u64 parent, u64 root)
4098 {
4099         struct tree_backref *ref = malloc(sizeof(*ref));
4100
4101         if (!ref)
4102                 return NULL;
4103         memset(&ref->node, 0, sizeof(ref->node));
4104         if (parent > 0) {
4105                 ref->parent = parent;
4106                 ref->node.full_backref = 1;
4107         } else {
4108                 ref->root = root;
4109                 ref->node.full_backref = 0;
4110         }
4111
4112         return ref;
4113 }
4114
4115 #if 0
4116 static struct data_backref *find_data_backref(struct extent_record *rec,
4117                                                 u64 parent, u64 root,
4118                                                 u64 owner, u64 offset,
4119                                                 int found_ref,
4120                                                 u64 disk_bytenr, u64 bytes)
4121 {
4122         struct list_head *cur = rec->backrefs.next;
4123         struct extent_backref *node;
4124         struct data_backref *back;
4125
4126         while (cur != &rec->backrefs) {
4127                 node = to_extent_backref(cur);
4128                 cur = cur->next;
4129                 if (!node->is_data)
4130                         continue;
4131                 back = to_data_backref(node);
4132                 if (parent > 0) {
4133                         if (!node->full_backref)
4134                                 continue;
4135                         if (parent == back->parent)
4136                                 return back;
4137                 } else {
4138                         if (node->full_backref)
4139                                 continue;
4140                         if (back->root == root && back->owner == owner &&
4141                             back->offset == offset) {
4142                                 if (found_ref && node->found_ref &&
4143                                     (back->bytes != bytes ||
4144                                     back->disk_bytenr != disk_bytenr))
4145                                         continue;
4146                                 return back;
4147                         }
4148                 }
4149         }
4150         return NULL;
4151 }
4152 #endif
4153
4154 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4155                                                 u64 parent, u64 root,
4156                                                 u64 owner, u64 offset,
4157                                                 u64 max_size)
4158 {
4159         struct data_backref *ref = malloc(sizeof(*ref));
4160
4161         if (!ref)
4162                 return NULL;
4163         memset(&ref->node, 0, sizeof(ref->node));
4164         ref->node.is_data = 1;
4165
4166         if (parent > 0) {
4167                 ref->parent = parent;
4168                 ref->owner = 0;
4169                 ref->offset = 0;
4170                 ref->node.full_backref = 1;
4171         } else {
4172                 ref->root = root;
4173                 ref->owner = owner;
4174                 ref->offset = offset;
4175                 ref->node.full_backref = 0;
4176         }
4177         ref->bytes = max_size;
4178         ref->found_ref = 0;
4179         ref->num_refs = 0;
4180         if (max_size > rec->max_size)
4181                 rec->max_size = max_size;
4182         return ref;
4183 }
4184
4185 /* Check if the type of extent matches with its chunk */
4186 static void check_extent_type(struct extent_record *rec)
4187 {
4188         struct btrfs_block_group_cache *bg_cache;
4189
4190         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4191         if (!bg_cache)
4192                 return;
4193
4194         /* data extent, check chunk directly*/
4195         if (!rec->metadata) {
4196                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4197                         rec->wrong_chunk_type = 1;
4198                 return;
4199         }
4200
4201         /* metadata extent, check the obvious case first */
4202         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4203                                  BTRFS_BLOCK_GROUP_METADATA))) {
4204                 rec->wrong_chunk_type = 1;
4205                 return;
4206         }
4207
4208         /*
4209          * Check SYSTEM extent, as it's also marked as metadata, we can only
4210          * make sure it's a SYSTEM extent by its backref
4211          */
4212         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4213                 struct extent_backref *node;
4214                 struct tree_backref *tback;
4215                 u64 bg_type;
4216
4217                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4218                 if (node->is_data) {
4219                         /* tree block shouldn't have data backref */
4220                         rec->wrong_chunk_type = 1;
4221                         return;
4222                 }
4223                 tback = container_of(node, struct tree_backref, node);
4224
4225                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4226                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4227                 else
4228                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4229                 if (!(bg_cache->flags & bg_type))
4230                         rec->wrong_chunk_type = 1;
4231         }
4232 }
4233
4234 /*
4235  * Allocate a new extent record, fill default values from @tmpl and insert int
4236  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4237  * the cache, otherwise it fails.
4238  */
4239 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4240                 struct extent_record *tmpl)
4241 {
4242         struct extent_record *rec;
4243         int ret = 0;
4244
4245         BUG_ON(tmpl->max_size == 0);
4246         rec = malloc(sizeof(*rec));
4247         if (!rec)
4248                 return -ENOMEM;
4249         rec->start = tmpl->start;
4250         rec->max_size = tmpl->max_size;
4251         rec->nr = max(tmpl->nr, tmpl->max_size);
4252         rec->found_rec = tmpl->found_rec;
4253         rec->content_checked = tmpl->content_checked;
4254         rec->owner_ref_checked = tmpl->owner_ref_checked;
4255         rec->num_duplicates = 0;
4256         rec->metadata = tmpl->metadata;
4257         rec->flag_block_full_backref = FLAG_UNSET;
4258         rec->bad_full_backref = 0;
4259         rec->crossing_stripes = 0;
4260         rec->wrong_chunk_type = 0;
4261         rec->is_root = tmpl->is_root;
4262         rec->refs = tmpl->refs;
4263         rec->extent_item_refs = tmpl->extent_item_refs;
4264         rec->parent_generation = tmpl->parent_generation;
4265         INIT_LIST_HEAD(&rec->backrefs);
4266         INIT_LIST_HEAD(&rec->dups);
4267         INIT_LIST_HEAD(&rec->list);
4268         rec->backref_tree = RB_ROOT;
4269         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4270         rec->cache.start = tmpl->start;
4271         rec->cache.size = tmpl->nr;
4272         ret = insert_cache_extent(extent_cache, &rec->cache);
4273         if (ret) {
4274                 free(rec);
4275                 return ret;
4276         }
4277         bytes_used += rec->nr;
4278
4279         if (tmpl->metadata)
4280                 rec->crossing_stripes = check_crossing_stripes(global_info,
4281                                 rec->start, global_info->nodesize);
4282         check_extent_type(rec);
4283         return ret;
4284 }
4285
4286 /*
4287  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4288  * some are hints:
4289  * - refs              - if found, increase refs
4290  * - is_root           - if found, set
4291  * - content_checked   - if found, set
4292  * - owner_ref_checked - if found, set
4293  *
4294  * If not found, create a new one, initialize and insert.
4295  */
4296 static int add_extent_rec(struct cache_tree *extent_cache,
4297                 struct extent_record *tmpl)
4298 {
4299         struct extent_record *rec;
4300         struct cache_extent *cache;
4301         int ret = 0;
4302         int dup = 0;
4303
4304         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4305         if (cache) {
4306                 rec = container_of(cache, struct extent_record, cache);
4307                 if (tmpl->refs)
4308                         rec->refs++;
4309                 if (rec->nr == 1)
4310                         rec->nr = max(tmpl->nr, tmpl->max_size);
4311
4312                 /*
4313                  * We need to make sure to reset nr to whatever the extent
4314                  * record says was the real size, this way we can compare it to
4315                  * the backrefs.
4316                  */
4317                 if (tmpl->found_rec) {
4318                         if (tmpl->start != rec->start || rec->found_rec) {
4319                                 struct extent_record *tmp;
4320
4321                                 dup = 1;
4322                                 if (list_empty(&rec->list))
4323                                         list_add_tail(&rec->list,
4324                                                       &duplicate_extents);
4325
4326                                 /*
4327                                  * We have to do this song and dance in case we
4328                                  * find an extent record that falls inside of
4329                                  * our current extent record but does not have
4330                                  * the same objectid.
4331                                  */
4332                                 tmp = malloc(sizeof(*tmp));
4333                                 if (!tmp)
4334                                         return -ENOMEM;
4335                                 tmp->start = tmpl->start;
4336                                 tmp->max_size = tmpl->max_size;
4337                                 tmp->nr = tmpl->nr;
4338                                 tmp->found_rec = 1;
4339                                 tmp->metadata = tmpl->metadata;
4340                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4341                                 INIT_LIST_HEAD(&tmp->list);
4342                                 list_add_tail(&tmp->list, &rec->dups);
4343                                 rec->num_duplicates++;
4344                         } else {
4345                                 rec->nr = tmpl->nr;
4346                                 rec->found_rec = 1;
4347                         }
4348                 }
4349
4350                 if (tmpl->extent_item_refs && !dup) {
4351                         if (rec->extent_item_refs) {
4352                                 fprintf(stderr,
4353                         "block %llu rec extent_item_refs %llu, passed %llu\n",
4354                                         (unsigned long long)tmpl->start,
4355                                         (unsigned long long)
4356                                                         rec->extent_item_refs,
4357                                         (unsigned long long)
4358                                                         tmpl->extent_item_refs);
4359                         }
4360                         rec->extent_item_refs = tmpl->extent_item_refs;
4361                 }
4362                 if (tmpl->is_root)
4363                         rec->is_root = 1;
4364                 if (tmpl->content_checked)
4365                         rec->content_checked = 1;
4366                 if (tmpl->owner_ref_checked)
4367                         rec->owner_ref_checked = 1;
4368                 memcpy(&rec->parent_key, &tmpl->parent_key,
4369                                 sizeof(tmpl->parent_key));
4370                 if (tmpl->parent_generation)
4371                         rec->parent_generation = tmpl->parent_generation;
4372                 if (rec->max_size < tmpl->max_size)
4373                         rec->max_size = tmpl->max_size;
4374
4375                 /*
4376                  * A metadata extent can't cross stripe_len boundary, otherwise
4377                  * kernel scrub won't be able to handle it.
4378                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4379                  * it.
4380                  */
4381                 if (tmpl->metadata)
4382                         rec->crossing_stripes = check_crossing_stripes(
4383                                         global_info, rec->start,
4384                                         global_info->nodesize);
4385                 check_extent_type(rec);
4386                 maybe_free_extent_rec(extent_cache, rec);
4387                 return ret;
4388         }
4389
4390         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4391
4392         return ret;
4393 }
4394
4395 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4396                             u64 parent, u64 root, int found_ref)
4397 {
4398         struct extent_record *rec;
4399         struct tree_backref *back;
4400         struct cache_extent *cache;
4401         int ret;
4402         bool insert = false;
4403
4404         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4405         if (!cache) {
4406                 struct extent_record tmpl;
4407
4408                 memset(&tmpl, 0, sizeof(tmpl));
4409                 tmpl.start = bytenr;
4410                 tmpl.nr = 1;
4411                 tmpl.metadata = 1;
4412                 tmpl.max_size = 1;
4413
4414                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4415                 if (ret)
4416                         return ret;
4417
4418                 /* really a bug in cache_extent implement now */
4419                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4420                 if (!cache)
4421                         return -ENOENT;
4422         }
4423
4424         rec = container_of(cache, struct extent_record, cache);
4425         if (rec->start != bytenr) {
4426                 /*
4427                  * Several cause, from unaligned bytenr to over lapping extents
4428                  */
4429                 return -EEXIST;
4430         }
4431
4432         back = find_tree_backref(rec, parent, root);
4433         if (!back) {
4434                 back = alloc_tree_backref(rec, parent, root);
4435                 if (!back)
4436                         return -ENOMEM;
4437                 insert = true;
4438         }
4439
4440         if (found_ref) {
4441                 if (back->node.found_ref) {
4442                         fprintf(stderr,
4443         "Extent back ref already exists for %llu parent %llu root %llu\n",
4444                                 (unsigned long long)bytenr,
4445                                 (unsigned long long)parent,
4446                                 (unsigned long long)root);
4447                 }
4448                 back->node.found_ref = 1;
4449         } else {
4450                 if (back->node.found_extent_tree) {
4451                         fprintf(stderr,
4452         "extent back ref already exists for %llu parent %llu root %llu\n",
4453                                 (unsigned long long)bytenr,
4454                                 (unsigned long long)parent,
4455                                 (unsigned long long)root);
4456                 }
4457                 back->node.found_extent_tree = 1;
4458         }
4459         if (insert)
4460                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4461                         compare_extent_backref));
4462         check_extent_type(rec);
4463         maybe_free_extent_rec(extent_cache, rec);
4464         return 0;
4465 }
4466
4467 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4468                             u64 parent, u64 root, u64 owner, u64 offset,
4469                             u32 num_refs, int found_ref, u64 max_size)
4470 {
4471         struct extent_record *rec;
4472         struct data_backref *back;
4473         struct cache_extent *cache;
4474         int ret;
4475         bool insert = false;
4476
4477         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4478         if (!cache) {
4479                 struct extent_record tmpl;
4480
4481                 memset(&tmpl, 0, sizeof(tmpl));
4482                 tmpl.start = bytenr;
4483                 tmpl.nr = 1;
4484                 tmpl.max_size = max_size;
4485
4486                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4487                 if (ret)
4488                         return ret;
4489
4490                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4491                 if (!cache)
4492                         abort();
4493         }
4494
4495         rec = container_of(cache, struct extent_record, cache);
4496         if (rec->max_size < max_size)
4497                 rec->max_size = max_size;
4498
4499         /*
4500          * If found_ref is set then max_size is the real size and must match the
4501          * existing refs.  So if we have already found a ref then we need to
4502          * make sure that this ref matches the existing one, otherwise we need
4503          * to add a new backref so we can notice that the backrefs don't match
4504          * and we need to figure out who is telling the truth.  This is to
4505          * account for that awful fsync bug I introduced where we'd end up with
4506          * a btrfs_file_extent_item that would have its length include multiple
4507          * prealloc extents or point inside of a prealloc extent.
4508          */
4509         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4510                                  bytenr, max_size);
4511         if (!back) {
4512                 back = alloc_data_backref(rec, parent, root, owner, offset,
4513                                           max_size);
4514                 BUG_ON(!back);
4515                 insert = true;
4516         }
4517
4518         if (found_ref) {
4519                 BUG_ON(num_refs != 1);
4520                 if (back->node.found_ref)
4521                         BUG_ON(back->bytes != max_size);
4522                 back->node.found_ref = 1;
4523                 back->found_ref += 1;
4524                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
4525                         back->bytes = max_size;
4526                         back->disk_bytenr = bytenr;
4527
4528                         /* Need to reinsert if not already in the tree */
4529                         if (!insert) {
4530                                 rb_erase(&back->node.node, &rec->backref_tree);
4531                                 insert = true;
4532                         }
4533                 }
4534                 rec->refs += 1;
4535                 rec->content_checked = 1;
4536                 rec->owner_ref_checked = 1;
4537         } else {
4538                 if (back->node.found_extent_tree) {
4539                         fprintf(stderr,
4540 "Extent back ref already exists for %llu parent %llu root %llu owner %llu offset %llu num_refs %lu\n",
4541                                 (unsigned long long)bytenr,
4542                                 (unsigned long long)parent,
4543                                 (unsigned long long)root,
4544                                 (unsigned long long)owner,
4545                                 (unsigned long long)offset,
4546                                 (unsigned long)num_refs);
4547                 }
4548                 back->num_refs = num_refs;
4549                 back->node.found_extent_tree = 1;
4550         }
4551         if (insert)
4552                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4553                         compare_extent_backref));
4554
4555         maybe_free_extent_rec(extent_cache, rec);
4556         return 0;
4557 }
4558
4559 static int add_pending(struct cache_tree *pending,
4560                        struct cache_tree *seen, u64 bytenr, u32 size)
4561 {
4562         int ret;
4563
4564         ret = add_cache_extent(seen, bytenr, size);
4565         if (ret)
4566                 return ret;
4567         add_cache_extent(pending, bytenr, size);
4568         return 0;
4569 }
4570
4571 static int pick_next_pending(struct cache_tree *pending,
4572                         struct cache_tree *reada,
4573                         struct cache_tree *nodes,
4574                         u64 last, struct block_info *bits, int bits_nr,
4575                         int *reada_bits)
4576 {
4577         unsigned long node_start = last;
4578         struct cache_extent *cache;
4579         int ret;
4580
4581         cache = search_cache_extent(reada, 0);
4582         if (cache) {
4583                 bits[0].start = cache->start;
4584                 bits[0].size = cache->size;
4585                 *reada_bits = 1;
4586                 return 1;
4587         }
4588         *reada_bits = 0;
4589         if (node_start > 32768)
4590                 node_start -= 32768;
4591
4592         cache = search_cache_extent(nodes, node_start);
4593         if (!cache)
4594                 cache = search_cache_extent(nodes, 0);
4595
4596         if (!cache) {
4597                 cache = search_cache_extent(pending, 0);
4598                 if (!cache)
4599                         return 0;
4600                 ret = 0;
4601                 do {
4602                         bits[ret].start = cache->start;
4603                         bits[ret].size = cache->size;
4604                         cache = next_cache_extent(cache);
4605                         ret++;
4606                 } while (cache && ret < bits_nr);
4607                 return ret;
4608         }
4609
4610         ret = 0;
4611         do {
4612                 bits[ret].start = cache->start;
4613                 bits[ret].size = cache->size;
4614                 cache = next_cache_extent(cache);
4615                 ret++;
4616         } while (cache && ret < bits_nr);
4617
4618         if (bits_nr - ret > 8) {
4619                 u64 lookup = bits[0].start + bits[0].size;
4620                 struct cache_extent *next;
4621
4622                 next = search_cache_extent(pending, lookup);
4623                 while (next) {
4624                         if (next->start - lookup > 32768)
4625                                 break;
4626                         bits[ret].start = next->start;
4627                         bits[ret].size = next->size;
4628                         lookup = next->start + next->size;
4629                         ret++;
4630                         if (ret == bits_nr)
4631                                 break;
4632                         next = next_cache_extent(next);
4633                         if (!next)
4634                                 break;
4635                 }
4636         }
4637         return ret;
4638 }
4639
4640 static void free_chunk_record(struct cache_extent *cache)
4641 {
4642         struct chunk_record *rec;
4643
4644         rec = container_of(cache, struct chunk_record, cache);
4645         list_del_init(&rec->list);
4646         list_del_init(&rec->dextents);
4647         free(rec);
4648 }
4649
4650 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4651 {
4652         cache_tree_free_extents(chunk_cache, free_chunk_record);
4653 }
4654
4655 static void free_device_record(struct rb_node *node)
4656 {
4657         struct device_record *rec;
4658
4659         rec = container_of(node, struct device_record, node);
4660         free(rec);
4661 }
4662
4663 FREE_RB_BASED_TREE(device_cache, free_device_record);
4664
4665 int insert_block_group_record(struct block_group_tree *tree,
4666                               struct block_group_record *bg_rec)
4667 {
4668         int ret;
4669
4670         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4671         if (ret)
4672                 return ret;
4673
4674         list_add_tail(&bg_rec->list, &tree->block_groups);
4675         return 0;
4676 }
4677
4678 static void free_block_group_record(struct cache_extent *cache)
4679 {
4680         struct block_group_record *rec;
4681
4682         rec = container_of(cache, struct block_group_record, cache);
4683         list_del_init(&rec->list);
4684         free(rec);
4685 }
4686
4687 void free_block_group_tree(struct block_group_tree *tree)
4688 {
4689         cache_tree_free_extents(&tree->tree, free_block_group_record);
4690 }
4691
4692 int insert_device_extent_record(struct device_extent_tree *tree,
4693                                 struct device_extent_record *de_rec)
4694 {
4695         int ret;
4696
4697         /*
4698          * Device extent is a bit different from the other extents, because
4699          * the extents which belong to the different devices may have the
4700          * same start and size, so we need use the special extent cache
4701          * search/insert functions.
4702          */
4703         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4704         if (ret)
4705                 return ret;
4706
4707         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4708         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4709         return 0;
4710 }
4711
4712 static void free_device_extent_record(struct cache_extent *cache)
4713 {
4714         struct device_extent_record *rec;
4715
4716         rec = container_of(cache, struct device_extent_record, cache);
4717         if (!list_empty(&rec->chunk_list))
4718                 list_del_init(&rec->chunk_list);
4719         if (!list_empty(&rec->device_list))
4720                 list_del_init(&rec->device_list);
4721         free(rec);
4722 }
4723
4724 void free_device_extent_tree(struct device_extent_tree *tree)
4725 {
4726         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4727 }
4728
4729 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4730 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4731                                  struct extent_buffer *leaf, int slot)
4732 {
4733         struct btrfs_extent_ref_v0 *ref0;
4734         struct btrfs_key key;
4735         int ret;
4736
4737         btrfs_item_key_to_cpu(leaf, &key, slot);
4738         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4739         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4740                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
4741                                 0, 0);
4742         } else {
4743                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
4744                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4745         }
4746         return ret;
4747 }
4748 #endif
4749
4750 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4751                                             struct btrfs_key *key,
4752                                             int slot)
4753 {
4754         struct btrfs_chunk *ptr;
4755         struct chunk_record *rec;
4756         int num_stripes, i;
4757
4758         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4759         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4760
4761         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4762         if (!rec) {
4763                 fprintf(stderr, "memory allocation failed\n");
4764                 exit(-1);
4765         }
4766
4767         INIT_LIST_HEAD(&rec->list);
4768         INIT_LIST_HEAD(&rec->dextents);
4769         rec->bg_rec = NULL;
4770
4771         rec->cache.start = key->offset;
4772         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4773
4774         rec->generation = btrfs_header_generation(leaf);
4775
4776         rec->objectid = key->objectid;
4777         rec->type = key->type;
4778         rec->offset = key->offset;
4779
4780         rec->length = rec->cache.size;
4781         rec->owner = btrfs_chunk_owner(leaf, ptr);
4782         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4783         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4784         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4785         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4786         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4787         rec->num_stripes = num_stripes;
4788         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4789
4790         for (i = 0; i < rec->num_stripes; ++i) {
4791                 rec->stripes[i].devid =
4792                         btrfs_stripe_devid_nr(leaf, ptr, i);
4793                 rec->stripes[i].offset =
4794                         btrfs_stripe_offset_nr(leaf, ptr, i);
4795                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4796                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4797                                 BTRFS_UUID_SIZE);
4798         }
4799
4800         return rec;
4801 }
4802
4803 static int process_chunk_item(struct cache_tree *chunk_cache,
4804                               struct btrfs_key *key, struct extent_buffer *eb,
4805                               int slot)
4806 {
4807         struct chunk_record *rec;
4808         struct btrfs_chunk *chunk;
4809         int ret = 0;
4810
4811         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
4812         /*
4813          * Do extra check for this chunk item,
4814          *
4815          * It's still possible one can craft a leaf with CHUNK_ITEM, with
4816          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
4817          * and owner<->key_type check.
4818          */
4819         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
4820                                       key->offset);
4821         if (ret < 0) {
4822                 error("chunk(%llu, %llu) is not valid, ignore it",
4823                       key->offset, btrfs_chunk_length(eb, chunk));
4824                 return 0;
4825         }
4826         rec = btrfs_new_chunk_record(eb, key, slot);
4827         ret = insert_cache_extent(chunk_cache, &rec->cache);
4828         if (ret) {
4829                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4830                         rec->offset, rec->length);
4831                 free(rec);
4832         }
4833
4834         return ret;
4835 }
4836
4837 static int process_device_item(struct rb_root *dev_cache,
4838                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4839 {
4840         struct btrfs_dev_item *ptr;
4841         struct device_record *rec;
4842         int ret = 0;
4843
4844         ptr = btrfs_item_ptr(eb,
4845                 slot, struct btrfs_dev_item);
4846
4847         rec = malloc(sizeof(*rec));
4848         if (!rec) {
4849                 fprintf(stderr, "memory allocation failed\n");
4850                 return -ENOMEM;
4851         }
4852
4853         rec->devid = key->offset;
4854         rec->generation = btrfs_header_generation(eb);
4855
4856         rec->objectid = key->objectid;
4857         rec->type = key->type;
4858         rec->offset = key->offset;
4859
4860         rec->devid = btrfs_device_id(eb, ptr);
4861         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4862         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4863
4864         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4865         if (ret) {
4866                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4867                 free(rec);
4868         }
4869
4870         return ret;
4871 }
4872
4873 struct block_group_record *
4874 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4875                              int slot)
4876 {
4877         struct btrfs_block_group_item *ptr;
4878         struct block_group_record *rec;
4879
4880         rec = calloc(1, sizeof(*rec));
4881         if (!rec) {
4882                 fprintf(stderr, "memory allocation failed\n");
4883                 exit(-1);
4884         }
4885
4886         rec->cache.start = key->objectid;
4887         rec->cache.size = key->offset;
4888
4889         rec->generation = btrfs_header_generation(leaf);
4890
4891         rec->objectid = key->objectid;
4892         rec->type = key->type;
4893         rec->offset = key->offset;
4894
4895         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
4896         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
4897
4898         INIT_LIST_HEAD(&rec->list);
4899
4900         return rec;
4901 }
4902
4903 static int process_block_group_item(struct block_group_tree *block_group_cache,
4904                                     struct btrfs_key *key,
4905                                     struct extent_buffer *eb, int slot)
4906 {
4907         struct block_group_record *rec;
4908         int ret = 0;
4909
4910         rec = btrfs_new_block_group_record(eb, key, slot);
4911         ret = insert_block_group_record(block_group_cache, rec);
4912         if (ret) {
4913                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
4914                         rec->objectid, rec->offset);
4915                 free(rec);
4916         }
4917
4918         return ret;
4919 }
4920
4921 struct device_extent_record *
4922 btrfs_new_device_extent_record(struct extent_buffer *leaf,
4923                                struct btrfs_key *key, int slot)
4924 {
4925         struct device_extent_record *rec;
4926         struct btrfs_dev_extent *ptr;
4927
4928         rec = calloc(1, sizeof(*rec));
4929         if (!rec) {
4930                 fprintf(stderr, "memory allocation failed\n");
4931                 exit(-1);
4932         }
4933
4934         rec->cache.objectid = key->objectid;
4935         rec->cache.start = key->offset;
4936
4937         rec->generation = btrfs_header_generation(leaf);
4938
4939         rec->objectid = key->objectid;
4940         rec->type = key->type;
4941         rec->offset = key->offset;
4942
4943         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
4944         rec->chunk_objecteid =
4945                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
4946         rec->chunk_offset =
4947                 btrfs_dev_extent_chunk_offset(leaf, ptr);
4948         rec->length = btrfs_dev_extent_length(leaf, ptr);
4949         rec->cache.size = rec->length;
4950
4951         INIT_LIST_HEAD(&rec->chunk_list);
4952         INIT_LIST_HEAD(&rec->device_list);
4953
4954         return rec;
4955 }
4956
4957 static int
4958 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
4959                            struct btrfs_key *key, struct extent_buffer *eb,
4960                            int slot)
4961 {
4962         struct device_extent_record *rec;
4963         int ret;
4964
4965         rec = btrfs_new_device_extent_record(eb, key, slot);
4966         ret = insert_device_extent_record(dev_extent_cache, rec);
4967         if (ret) {
4968                 fprintf(stderr,
4969                         "Device extent[%llu, %llu, %llu] existed.\n",
4970                         rec->objectid, rec->offset, rec->length);
4971                 free(rec);
4972         }
4973
4974         return ret;
4975 }
4976
4977 static int process_extent_item(struct btrfs_root *root,
4978                                struct cache_tree *extent_cache,
4979                                struct extent_buffer *eb, int slot)
4980 {
4981         struct btrfs_extent_item *ei;
4982         struct btrfs_extent_inline_ref *iref;
4983         struct btrfs_extent_data_ref *dref;
4984         struct btrfs_shared_data_ref *sref;
4985         struct btrfs_key key;
4986         struct extent_record tmpl;
4987         unsigned long end;
4988         unsigned long ptr;
4989         int ret;
4990         int type;
4991         u32 item_size = btrfs_item_size_nr(eb, slot);
4992         u64 refs = 0;
4993         u64 offset;
4994         u64 num_bytes;
4995         int metadata = 0;
4996
4997         btrfs_item_key_to_cpu(eb, &key, slot);
4998
4999         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5000                 metadata = 1;
5001                 num_bytes = root->fs_info->nodesize;
5002         } else {
5003                 num_bytes = key.offset;
5004         }
5005
5006         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
5007                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5008                       key.objectid, root->fs_info->sectorsize);
5009                 return -EIO;
5010         }
5011         if (item_size < sizeof(*ei)) {
5012 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5013                 struct btrfs_extent_item_v0 *ei0;
5014
5015                 if (item_size != sizeof(*ei0)) {
5016                         error(
5017         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
5018                                 key.objectid, key.type, key.offset,
5019                                 btrfs_header_bytenr(eb), slot);
5020                         BUG();
5021                 }
5022                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5023                 refs = btrfs_extent_refs_v0(eb, ei0);
5024 #else
5025                 BUG();
5026 #endif
5027                 memset(&tmpl, 0, sizeof(tmpl));
5028                 tmpl.start = key.objectid;
5029                 tmpl.nr = num_bytes;
5030                 tmpl.extent_item_refs = refs;
5031                 tmpl.metadata = metadata;
5032                 tmpl.found_rec = 1;
5033                 tmpl.max_size = num_bytes;
5034
5035                 return add_extent_rec(extent_cache, &tmpl);
5036         }
5037
5038         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5039         refs = btrfs_extent_refs(eb, ei);
5040         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5041                 metadata = 1;
5042         else
5043                 metadata = 0;
5044         if (metadata && num_bytes != root->fs_info->nodesize) {
5045                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5046                       num_bytes, root->fs_info->nodesize);
5047                 return -EIO;
5048         }
5049         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
5050                 error("ignore invalid data extent, length %llu is not aligned to %u",
5051                       num_bytes, root->fs_info->sectorsize);
5052                 return -EIO;
5053         }
5054
5055         memset(&tmpl, 0, sizeof(tmpl));
5056         tmpl.start = key.objectid;
5057         tmpl.nr = num_bytes;
5058         tmpl.extent_item_refs = refs;
5059         tmpl.metadata = metadata;
5060         tmpl.found_rec = 1;
5061         tmpl.max_size = num_bytes;
5062         add_extent_rec(extent_cache, &tmpl);
5063
5064         ptr = (unsigned long)(ei + 1);
5065         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5066             key.type == BTRFS_EXTENT_ITEM_KEY)
5067                 ptr += sizeof(struct btrfs_tree_block_info);
5068
5069         end = (unsigned long)ei + item_size;
5070         while (ptr < end) {
5071                 iref = (struct btrfs_extent_inline_ref *)ptr;
5072                 type = btrfs_extent_inline_ref_type(eb, iref);
5073                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5074                 switch (type) {
5075                 case BTRFS_TREE_BLOCK_REF_KEY:
5076                         ret = add_tree_backref(extent_cache, key.objectid,
5077                                         0, offset, 0);
5078                         if (ret < 0)
5079                                 error(
5080                         "add_tree_backref failed (extent items tree block): %s",
5081                                       strerror(-ret));
5082                         break;
5083                 case BTRFS_SHARED_BLOCK_REF_KEY:
5084                         ret = add_tree_backref(extent_cache, key.objectid,
5085                                         offset, 0, 0);
5086                         if (ret < 0)
5087                                 error(
5088                         "add_tree_backref failed (extent items shared block): %s",
5089                                       strerror(-ret));
5090                         break;
5091                 case BTRFS_EXTENT_DATA_REF_KEY:
5092                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5093                         add_data_backref(extent_cache, key.objectid, 0,
5094                                         btrfs_extent_data_ref_root(eb, dref),
5095                                         btrfs_extent_data_ref_objectid(eb,
5096                                                                        dref),
5097                                         btrfs_extent_data_ref_offset(eb, dref),
5098                                         btrfs_extent_data_ref_count(eb, dref),
5099                                         0, num_bytes);
5100                         break;
5101                 case BTRFS_SHARED_DATA_REF_KEY:
5102                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5103                         add_data_backref(extent_cache, key.objectid, offset,
5104                                         0, 0, 0,
5105                                         btrfs_shared_data_ref_count(eb, sref),
5106                                         0, num_bytes);
5107                         break;
5108                 default:
5109                         fprintf(stderr,
5110                                 "corrupt extent record: key [%llu,%u,%llu]\n",
5111                                 key.objectid, key.type, num_bytes);
5112                         goto out;
5113                 }
5114                 ptr += btrfs_extent_inline_ref_size(type);
5115         }
5116         WARN_ON(ptr > end);
5117 out:
5118         return 0;
5119 }
5120
5121 static int check_cache_range(struct btrfs_root *root,
5122                              struct btrfs_block_group_cache *cache,
5123                              u64 offset, u64 bytes)
5124 {
5125         struct btrfs_free_space *entry;
5126         u64 *logical;
5127         u64 bytenr;
5128         int stripe_len;
5129         int i, nr, ret;
5130
5131         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5132                 bytenr = btrfs_sb_offset(i);
5133                 ret = btrfs_rmap_block(root->fs_info,
5134                                        cache->key.objectid, bytenr, 0,
5135                                        &logical, &nr, &stripe_len);
5136                 if (ret)
5137                         return ret;
5138
5139                 while (nr--) {
5140                         if (logical[nr] + stripe_len <= offset)
5141                                 continue;
5142                         if (offset + bytes <= logical[nr])
5143                                 continue;
5144                         if (logical[nr] == offset) {
5145                                 if (stripe_len >= bytes) {
5146                                         free(logical);
5147                                         return 0;
5148                                 }
5149                                 bytes -= stripe_len;
5150                                 offset += stripe_len;
5151                         } else if (logical[nr] < offset) {
5152                                 if (logical[nr] + stripe_len >=
5153                                     offset + bytes) {
5154                                         free(logical);
5155                                         return 0;
5156                                 }
5157                                 bytes = (offset + bytes) -
5158                                         (logical[nr] + stripe_len);
5159                                 offset = logical[nr] + stripe_len;
5160                         } else {
5161                                 /*
5162                                  * Could be tricky, the super may land in the
5163                                  * middle of the area we're checking.  First
5164                                  * check the easiest case, it's at the end.
5165                                  */
5166                                 if (logical[nr] + stripe_len >=
5167                                     bytes + offset) {
5168                                         bytes = logical[nr] - offset;
5169                                         continue;
5170                                 }
5171
5172                                 /* Check the left side */
5173                                 ret = check_cache_range(root, cache,
5174                                                         offset,
5175                                                         logical[nr] - offset);
5176                                 if (ret) {
5177                                         free(logical);
5178                                         return ret;
5179                                 }
5180
5181                                 /* Now we continue with the right side */
5182                                 bytes = (offset + bytes) -
5183                                         (logical[nr] + stripe_len);
5184                                 offset = logical[nr] + stripe_len;
5185                         }
5186                 }
5187
5188                 free(logical);
5189         }
5190
5191         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5192         if (!entry) {
5193                 fprintf(stderr, "there is no free space entry for %llu-%llu\n",
5194                         offset, offset+bytes);
5195                 return -EINVAL;
5196         }
5197
5198         if (entry->offset != offset) {
5199                 fprintf(stderr, "wanted offset %llu, found %llu\n", offset,
5200                         entry->offset);
5201                 return -EINVAL;
5202         }
5203
5204         if (entry->bytes != bytes) {
5205                 fprintf(stderr, "wanted bytes %llu, found %llu for off %llu\n",
5206                         bytes, entry->bytes, offset);
5207                 return -EINVAL;
5208         }
5209
5210         unlink_free_space(cache->free_space_ctl, entry);
5211         free(entry);
5212         return 0;
5213 }
5214
5215 static int verify_space_cache(struct btrfs_root *root,
5216                               struct btrfs_block_group_cache *cache)
5217 {
5218         struct btrfs_path path;
5219         struct extent_buffer *leaf;
5220         struct btrfs_key key;
5221         u64 last;
5222         int ret = 0;
5223
5224         root = root->fs_info->extent_root;
5225
5226         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5227
5228         btrfs_init_path(&path);
5229         key.objectid = last;
5230         key.offset = 0;
5231         key.type = BTRFS_EXTENT_ITEM_KEY;
5232         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5233         if (ret < 0)
5234                 goto out;
5235         ret = 0;
5236         while (1) {
5237                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5238                         ret = btrfs_next_leaf(root, &path);
5239                         if (ret < 0)
5240                                 goto out;
5241                         if (ret > 0) {
5242                                 ret = 0;
5243                                 break;
5244                         }
5245                 }
5246                 leaf = path.nodes[0];
5247                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5248                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5249                         break;
5250                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5251                     key.type != BTRFS_METADATA_ITEM_KEY) {
5252                         path.slots[0]++;
5253                         continue;
5254                 }
5255
5256                 if (last == key.objectid) {
5257                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5258                                 last = key.objectid + key.offset;
5259                         else
5260                                 last = key.objectid + root->fs_info->nodesize;
5261                         path.slots[0]++;
5262                         continue;
5263                 }
5264
5265                 ret = check_cache_range(root, cache, last,
5266                                         key.objectid - last);
5267                 if (ret)
5268                         break;
5269                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5270                         last = key.objectid + key.offset;
5271                 else
5272                         last = key.objectid + root->fs_info->nodesize;
5273                 path.slots[0]++;
5274         }
5275
5276         if (last < cache->key.objectid + cache->key.offset)
5277                 ret = check_cache_range(root, cache, last,
5278                                         cache->key.objectid +
5279                                         cache->key.offset - last);
5280
5281 out:
5282         btrfs_release_path(&path);
5283
5284         if (!ret &&
5285             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5286                 fprintf(stderr, "There are still entries left in the space "
5287                         "cache\n");
5288                 ret = -EINVAL;
5289         }
5290
5291         return ret;
5292 }
5293
5294 static int check_space_cache(struct btrfs_root *root)
5295 {
5296         struct btrfs_block_group_cache *cache;
5297         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5298         int ret;
5299         int error = 0;
5300
5301         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5302             btrfs_super_generation(root->fs_info->super_copy) !=
5303             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5304                 printf("cache and super generation don't match, space cache "
5305                        "will be invalidated\n");
5306                 return 0;
5307         }
5308
5309         if (ctx.progress_enabled) {
5310                 ctx.tp = TASK_FREE_SPACE;
5311                 task_start(ctx.info);
5312         }
5313
5314         while (1) {
5315                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5316                 if (!cache)
5317                         break;
5318
5319                 start = cache->key.objectid + cache->key.offset;
5320                 if (!cache->free_space_ctl) {
5321                         if (btrfs_init_free_space_ctl(cache,
5322                                                 root->fs_info->sectorsize)) {
5323                                 ret = -ENOMEM;
5324                                 break;
5325                         }
5326                 } else {
5327                         btrfs_remove_free_space_cache(cache);
5328                 }
5329
5330                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
5331                         ret = exclude_super_stripes(root, cache);
5332                         if (ret) {
5333                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5334                                         strerror(-ret));
5335                                 error++;
5336                                 continue;
5337                         }
5338                         ret = load_free_space_tree(root->fs_info, cache);
5339                         free_excluded_extents(root, cache);
5340                         if (ret < 0) {
5341                                 fprintf(stderr, "could not load free space tree: %s\n",
5342                                         strerror(-ret));
5343                                 error++;
5344                                 continue;
5345                         }
5346                         error += ret;
5347                 } else {
5348                         ret = load_free_space_cache(root->fs_info, cache);
5349                         if (ret < 0)
5350                                 error++;
5351                         if (ret <= 0)
5352                                 continue;
5353                 }
5354
5355                 ret = verify_space_cache(root, cache);
5356                 if (ret) {
5357                         fprintf(stderr, "cache appears valid but isn't %llu\n",
5358                                 cache->key.objectid);
5359                         error++;
5360                 }
5361         }
5362
5363         task_stop(ctx.info);
5364
5365         return error ? -EINVAL : 0;
5366 }
5367
5368 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5369                         u64 num_bytes, unsigned long leaf_offset,
5370                         struct extent_buffer *eb)
5371 {
5372         struct btrfs_fs_info *fs_info = root->fs_info;
5373         u64 offset = 0;
5374         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
5375         char *data;
5376         unsigned long csum_offset;
5377         u32 csum;
5378         u32 csum_expected;
5379         u64 read_len;
5380         u64 data_checked = 0;
5381         u64 tmp;
5382         int ret = 0;
5383         int mirror;
5384         int num_copies;
5385
5386         if (num_bytes % fs_info->sectorsize)
5387                 return -EINVAL;
5388
5389         data = malloc(num_bytes);
5390         if (!data)
5391                 return -ENOMEM;
5392
5393         while (offset < num_bytes) {
5394                 mirror = 0;
5395 again:
5396                 read_len = num_bytes - offset;
5397                 /* read as much space once a time */
5398                 ret = read_extent_data(fs_info, data + offset,
5399                                 bytenr + offset, &read_len, mirror);
5400                 if (ret)
5401                         goto out;
5402                 data_checked = 0;
5403                 /* verify every 4k data's checksum */
5404                 while (data_checked < read_len) {
5405                         csum = ~(u32)0;
5406                         tmp = offset + data_checked;
5407
5408                         csum = btrfs_csum_data((char *)data + tmp,
5409                                                csum, fs_info->sectorsize);
5410                         btrfs_csum_final(csum, (u8 *)&csum);
5411
5412                         csum_offset = leaf_offset +
5413                                  tmp / fs_info->sectorsize * csum_size;
5414                         read_extent_buffer(eb, (char *)&csum_expected,
5415                                            csum_offset, csum_size);
5416                         /* try another mirror */
5417                         if (csum != csum_expected) {
5418                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5419                                                 mirror, bytenr + tmp,
5420                                                 csum, csum_expected);
5421                                 num_copies = btrfs_num_copies(root->fs_info,
5422                                                 bytenr, num_bytes);
5423                                 if (mirror < num_copies - 1) {
5424                                         mirror += 1;
5425                                         goto again;
5426                                 }
5427                         }
5428                         data_checked += fs_info->sectorsize;
5429                 }
5430                 offset += read_len;
5431         }
5432 out:
5433         free(data);
5434         return ret;
5435 }
5436
5437 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5438                                u64 num_bytes)
5439 {
5440         struct btrfs_path path;
5441         struct extent_buffer *leaf;
5442         struct btrfs_key key;
5443         int ret;
5444
5445         btrfs_init_path(&path);
5446         key.objectid = bytenr;
5447         key.type = BTRFS_EXTENT_ITEM_KEY;
5448         key.offset = (u64)-1;
5449
5450 again:
5451         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
5452                                 0, 0);
5453         if (ret < 0) {
5454                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5455                 btrfs_release_path(&path);
5456                 return ret;
5457         } else if (ret) {
5458                 if (path.slots[0] > 0) {
5459                         path.slots[0]--;
5460                 } else {
5461                         ret = btrfs_prev_leaf(root, &path);
5462                         if (ret < 0) {
5463                                 goto out;
5464                         } else if (ret > 0) {
5465                                 ret = 0;
5466                                 goto out;
5467                         }
5468                 }
5469         }
5470
5471         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5472
5473         /*
5474          * Block group items come before extent items if they have the same
5475          * bytenr, so walk back one more just in case.  Dear future traveller,
5476          * first congrats on mastering time travel.  Now if it's not too much
5477          * trouble could you go back to 2006 and tell Chris to make the
5478          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5479          * EXTENT_ITEM_KEY please?
5480          */
5481         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5482                 if (path.slots[0] > 0) {
5483                         path.slots[0]--;
5484                 } else {
5485                         ret = btrfs_prev_leaf(root, &path);
5486                         if (ret < 0) {
5487                                 goto out;
5488                         } else if (ret > 0) {
5489                                 ret = 0;
5490                                 goto out;
5491                         }
5492                 }
5493                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5494         }
5495
5496         while (num_bytes) {
5497                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5498                         ret = btrfs_next_leaf(root, &path);
5499                         if (ret < 0) {
5500                                 fprintf(stderr, "Error going to next leaf "
5501                                         "%d\n", ret);
5502                                 btrfs_release_path(&path);
5503                                 return ret;
5504                         } else if (ret) {
5505                                 break;
5506                         }
5507                 }
5508                 leaf = path.nodes[0];
5509                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5510                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5511                         path.slots[0]++;
5512                         continue;
5513                 }
5514                 if (key.objectid + key.offset < bytenr) {
5515                         path.slots[0]++;
5516                         continue;
5517                 }
5518                 if (key.objectid > bytenr + num_bytes)
5519                         break;
5520
5521                 if (key.objectid == bytenr) {
5522                         if (key.offset >= num_bytes) {
5523                                 num_bytes = 0;
5524                                 break;
5525                         }
5526                         num_bytes -= key.offset;
5527                         bytenr += key.offset;
5528                 } else if (key.objectid < bytenr) {
5529                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5530                                 num_bytes = 0;
5531                                 break;
5532                         }
5533                         num_bytes = (bytenr + num_bytes) -
5534                                 (key.objectid + key.offset);
5535                         bytenr = key.objectid + key.offset;
5536                 } else {
5537                         if (key.objectid + key.offset < bytenr + num_bytes) {
5538                                 u64 new_start = key.objectid + key.offset;
5539                                 u64 new_bytes = bytenr + num_bytes - new_start;
5540
5541                                 /*
5542                                  * Weird case, the extent is in the middle of
5543                                  * our range, we'll have to search one side
5544                                  * and then the other.  Not sure if this happens
5545                                  * in real life, but no harm in coding it up
5546                                  * anyway just in case.
5547                                  */
5548                                 btrfs_release_path(&path);
5549                                 ret = check_extent_exists(root, new_start,
5550                                                           new_bytes);
5551                                 if (ret) {
5552                                         fprintf(stderr, "Right section didn't "
5553                                                 "have a record\n");
5554                                         break;
5555                                 }
5556                                 num_bytes = key.objectid - bytenr;
5557                                 goto again;
5558                         }
5559                         num_bytes = key.objectid - bytenr;
5560                 }
5561                 path.slots[0]++;
5562         }
5563         ret = 0;
5564
5565 out:
5566         if (num_bytes && !ret) {
5567                 fprintf(stderr,
5568                         "there are no extents for csum range %llu-%llu\n",
5569                         bytenr, bytenr+num_bytes);
5570                 ret = 1;
5571         }
5572
5573         btrfs_release_path(&path);
5574         return ret;
5575 }
5576
5577 static int check_csums(struct btrfs_root *root)
5578 {
5579         struct btrfs_path path;
5580         struct extent_buffer *leaf;
5581         struct btrfs_key key;
5582         u64 offset = 0, num_bytes = 0;
5583         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5584         int errors = 0;
5585         int ret;
5586         u64 data_len;
5587         unsigned long leaf_offset;
5588
5589         root = root->fs_info->csum_root;
5590         if (!extent_buffer_uptodate(root->node)) {
5591                 fprintf(stderr, "No valid csum tree found\n");
5592                 return -ENOENT;
5593         }
5594
5595         btrfs_init_path(&path);
5596         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5597         key.type = BTRFS_EXTENT_CSUM_KEY;
5598         key.offset = 0;
5599         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5600         if (ret < 0) {
5601                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5602                 btrfs_release_path(&path);
5603                 return ret;
5604         }
5605
5606         if (ret > 0 && path.slots[0])
5607                 path.slots[0]--;
5608         ret = 0;
5609
5610         while (1) {
5611                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5612                         ret = btrfs_next_leaf(root, &path);
5613                         if (ret < 0) {
5614                                 fprintf(stderr, "Error going to next leaf "
5615                                         "%d\n", ret);
5616                                 break;
5617                         }
5618                         if (ret)
5619                                 break;
5620                 }
5621                 leaf = path.nodes[0];
5622
5623                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5624                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5625                         path.slots[0]++;
5626                         continue;
5627                 }
5628
5629                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
5630                               csum_size) * root->fs_info->sectorsize;
5631                 if (!check_data_csum)
5632                         goto skip_csum_check;
5633                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5634                 ret = check_extent_csums(root, key.offset, data_len,
5635                                          leaf_offset, leaf);
5636                 if (ret)
5637                         break;
5638 skip_csum_check:
5639                 if (!num_bytes) {
5640                         offset = key.offset;
5641                 } else if (key.offset != offset + num_bytes) {
5642                         ret = check_extent_exists(root, offset, num_bytes);
5643                         if (ret) {
5644                                 fprintf(stderr,
5645                 "csum exists for %llu-%llu but there is no extent record\n",
5646                                         offset, offset+num_bytes);
5647                                 errors++;
5648                         }
5649                         offset = key.offset;
5650                         num_bytes = 0;
5651                 }
5652                 num_bytes += data_len;
5653                 path.slots[0]++;
5654         }
5655
5656         btrfs_release_path(&path);
5657         return errors;
5658 }
5659
5660 static int is_dropped_key(struct btrfs_key *key,
5661                           struct btrfs_key *drop_key)
5662 {
5663         if (key->objectid < drop_key->objectid)
5664                 return 1;
5665         else if (key->objectid == drop_key->objectid) {
5666                 if (key->type < drop_key->type)
5667                         return 1;
5668                 else if (key->type == drop_key->type) {
5669                         if (key->offset < drop_key->offset)
5670                                 return 1;
5671                 }
5672         }
5673         return 0;
5674 }
5675
5676 /*
5677  * Here are the rules for FULL_BACKREF.
5678  *
5679  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5680  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5681  *      FULL_BACKREF set.
5682  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
5683  *    if it happened after the relocation occurred since we'll have dropped the
5684  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5685  *    have no real way to know for sure.
5686  *
5687  * We process the blocks one root at a time, and we start from the lowest root
5688  * objectid and go to the highest.  So we can just lookup the owner backref for
5689  * the record and if we don't find it then we know it doesn't exist and we have
5690  * a FULL BACKREF.
5691  *
5692  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5693  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5694  * be set or not and then we can check later once we've gathered all the refs.
5695  */
5696 static int calc_extent_flag(struct cache_tree *extent_cache,
5697                            struct extent_buffer *buf,
5698                            struct root_item_record *ri,
5699                            u64 *flags)
5700 {
5701         struct extent_record *rec;
5702         struct cache_extent *cache;
5703         struct tree_backref *tback;
5704         u64 owner = 0;
5705
5706         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5707         /* we have added this extent before */
5708         if (!cache)
5709                 return -ENOENT;
5710
5711         rec = container_of(cache, struct extent_record, cache);
5712
5713         /*
5714          * Except file/reloc tree, we can not have
5715          * FULL BACKREF MODE
5716          */
5717         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5718                 goto normal;
5719         /*
5720          * root node
5721          */
5722         if (buf->start == ri->bytenr)
5723                 goto normal;
5724
5725         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5726                 goto full_backref;
5727
5728         owner = btrfs_header_owner(buf);
5729         if (owner == ri->objectid)
5730                 goto normal;
5731
5732         tback = find_tree_backref(rec, 0, owner);
5733         if (!tback)
5734                 goto full_backref;
5735 normal:
5736         *flags = 0;
5737         if (rec->flag_block_full_backref != FLAG_UNSET &&
5738             rec->flag_block_full_backref != 0)
5739                 rec->bad_full_backref = 1;
5740         return 0;
5741 full_backref:
5742         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5743         if (rec->flag_block_full_backref != FLAG_UNSET &&
5744             rec->flag_block_full_backref != 1)
5745                 rec->bad_full_backref = 1;
5746         return 0;
5747 }
5748
5749 static void report_mismatch_key_root(u8 key_type, u64 rootid)
5750 {
5751         fprintf(stderr, "Invalid key type(");
5752         print_key_type(stderr, 0, key_type);
5753         fprintf(stderr, ") found in root(");
5754         print_objectid(stderr, rootid, 0);
5755         fprintf(stderr, ")\n");
5756 }
5757
5758 /*
5759  * Check if the key is valid with its extent buffer.
5760  *
5761  * This is a early check in case invalid key exists in a extent buffer
5762  * This is not comprehensive yet, but should prevent wrong key/item passed
5763  * further
5764  */
5765 static int check_type_with_root(u64 rootid, u8 key_type)
5766 {
5767         switch (key_type) {
5768         /* Only valid in chunk tree */
5769         case BTRFS_DEV_ITEM_KEY:
5770         case BTRFS_CHUNK_ITEM_KEY:
5771                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
5772                         goto err;
5773                 break;
5774         /* valid in csum and log tree */
5775         case BTRFS_CSUM_TREE_OBJECTID:
5776                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
5777                       is_fstree(rootid)))
5778                         goto err;
5779                 break;
5780         case BTRFS_EXTENT_ITEM_KEY:
5781         case BTRFS_METADATA_ITEM_KEY:
5782         case BTRFS_BLOCK_GROUP_ITEM_KEY:
5783                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
5784                         goto err;
5785                 break;
5786         case BTRFS_ROOT_ITEM_KEY:
5787                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
5788                         goto err;
5789                 break;
5790         case BTRFS_DEV_EXTENT_KEY:
5791                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
5792                         goto err;
5793                 break;
5794         }
5795         return 0;
5796 err:
5797         report_mismatch_key_root(key_type, rootid);
5798         return -EINVAL;
5799 }
5800
5801 static int run_next_block(struct btrfs_root *root,
5802                           struct block_info *bits,
5803                           int bits_nr,
5804                           u64 *last,
5805                           struct cache_tree *pending,
5806                           struct cache_tree *seen,
5807                           struct cache_tree *reada,
5808                           struct cache_tree *nodes,
5809                           struct cache_tree *extent_cache,
5810                           struct cache_tree *chunk_cache,
5811                           struct rb_root *dev_cache,
5812                           struct block_group_tree *block_group_cache,
5813                           struct device_extent_tree *dev_extent_cache,
5814                           struct root_item_record *ri)
5815 {
5816         struct btrfs_fs_info *fs_info = root->fs_info;
5817         struct extent_buffer *buf;
5818         struct extent_record *rec = NULL;
5819         u64 bytenr;
5820         u32 size;
5821         u64 parent;
5822         u64 owner;
5823         u64 flags;
5824         u64 ptr;
5825         u64 gen = 0;
5826         int ret = 0;
5827         int i;
5828         int nritems;
5829         struct btrfs_key key;
5830         struct cache_extent *cache;
5831         int reada_bits;
5832
5833         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5834                                     bits_nr, &reada_bits);
5835         if (nritems == 0)
5836                 return 1;
5837
5838         if (!reada_bits) {
5839                 for (i = 0; i < nritems; i++) {
5840                         ret = add_cache_extent(reada, bits[i].start,
5841                                                bits[i].size);
5842                         if (ret == -EEXIST)
5843                                 continue;
5844
5845                         /* fixme, get the parent transid */
5846                         readahead_tree_block(fs_info, bits[i].start, 0);
5847                 }
5848         }
5849         *last = bits[0].start;
5850         bytenr = bits[0].start;
5851         size = bits[0].size;
5852
5853         cache = lookup_cache_extent(pending, bytenr, size);
5854         if (cache) {
5855                 remove_cache_extent(pending, cache);
5856                 free(cache);
5857         }
5858         cache = lookup_cache_extent(reada, bytenr, size);
5859         if (cache) {
5860                 remove_cache_extent(reada, cache);
5861                 free(cache);
5862         }
5863         cache = lookup_cache_extent(nodes, bytenr, size);
5864         if (cache) {
5865                 remove_cache_extent(nodes, cache);
5866                 free(cache);
5867         }
5868         cache = lookup_cache_extent(extent_cache, bytenr, size);
5869         if (cache) {
5870                 rec = container_of(cache, struct extent_record, cache);
5871                 gen = rec->parent_generation;
5872         }
5873
5874         /* fixme, get the real parent transid */
5875         buf = read_tree_block(root->fs_info, bytenr, gen);
5876         if (!extent_buffer_uptodate(buf)) {
5877                 record_bad_block_io(root->fs_info,
5878                                     extent_cache, bytenr, size);
5879                 goto out;
5880         }
5881
5882         nritems = btrfs_header_nritems(buf);
5883
5884         flags = 0;
5885         if (!init_extent_tree) {
5886                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5887                                        btrfs_header_level(buf), 1, NULL,
5888                                        &flags);
5889                 if (ret < 0) {
5890                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5891                         if (ret < 0) {
5892                                 fprintf(stderr, "Couldn't calc extent flags\n");
5893                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5894                         }
5895                 }
5896         } else {
5897                 flags = 0;
5898                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5899                 if (ret < 0) {
5900                         fprintf(stderr, "Couldn't calc extent flags\n");
5901                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5902                 }
5903         }
5904
5905         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5906                 if (ri != NULL &&
5907                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5908                     ri->objectid == btrfs_header_owner(buf)) {
5909                         /*
5910                          * Ok we got to this block from it's original owner and
5911                          * we have FULL_BACKREF set.  Relocation can leave
5912                          * converted blocks over so this is altogether possible,
5913                          * however it's not possible if the generation > the
5914                          * last snapshot, so check for this case.
5915                          */
5916                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5917                             btrfs_header_generation(buf) > ri->last_snapshot) {
5918                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5919                                 rec->bad_full_backref = 1;
5920                         }
5921                 }
5922         } else {
5923                 if (ri != NULL &&
5924                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5925                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5926                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5927                         rec->bad_full_backref = 1;
5928                 }
5929         }
5930
5931         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5932                 rec->flag_block_full_backref = 1;
5933                 parent = bytenr;
5934                 owner = 0;
5935         } else {
5936                 rec->flag_block_full_backref = 0;
5937                 parent = 0;
5938                 owner = btrfs_header_owner(buf);
5939         }
5940
5941         ret = check_block(root, extent_cache, buf, flags);
5942         if (ret)
5943                 goto out;
5944
5945         if (btrfs_is_leaf(buf)) {
5946                 btree_space_waste += btrfs_leaf_free_space(root, buf);
5947                 for (i = 0; i < nritems; i++) {
5948                         struct btrfs_file_extent_item *fi;
5949
5950                         btrfs_item_key_to_cpu(buf, &key, i);
5951                         /*
5952                          * Check key type against the leaf owner.
5953                          * Could filter quite a lot of early error if
5954                          * owner is correct
5955                          */
5956                         if (check_type_with_root(btrfs_header_owner(buf),
5957                                                  key.type)) {
5958                                 fprintf(stderr, "ignoring invalid key\n");
5959                                 continue;
5960                         }
5961                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5962                                 process_extent_item(root, extent_cache, buf,
5963                                                     i);
5964                                 continue;
5965                         }
5966                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5967                                 process_extent_item(root, extent_cache, buf,
5968                                                     i);
5969                                 continue;
5970                         }
5971                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5972                                 total_csum_bytes +=
5973                                         btrfs_item_size_nr(buf, i);
5974                                 continue;
5975                         }
5976                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5977                                 process_chunk_item(chunk_cache, &key, buf, i);
5978                                 continue;
5979                         }
5980                         if (key.type == BTRFS_DEV_ITEM_KEY) {
5981                                 process_device_item(dev_cache, &key, buf, i);
5982                                 continue;
5983                         }
5984                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5985                                 process_block_group_item(block_group_cache,
5986                                         &key, buf, i);
5987                                 continue;
5988                         }
5989                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
5990                                 process_device_extent_item(dev_extent_cache,
5991                                         &key, buf, i);
5992                                 continue;
5993
5994                         }
5995                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5996 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5997                                 process_extent_ref_v0(extent_cache, buf, i);
5998 #else
5999                                 BUG();
6000 #endif
6001                                 continue;
6002                         }
6003
6004                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6005                                 ret = add_tree_backref(extent_cache,
6006                                                 key.objectid, 0, key.offset, 0);
6007                                 if (ret < 0)
6008                                         error(
6009                                 "add_tree_backref failed (leaf tree block): %s",
6010                                               strerror(-ret));
6011                                 continue;
6012                         }
6013                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6014                                 ret = add_tree_backref(extent_cache,
6015                                                 key.objectid, key.offset, 0, 0);
6016                                 if (ret < 0)
6017                                         error(
6018                                 "add_tree_backref failed (leaf shared block): %s",
6019                                               strerror(-ret));
6020                                 continue;
6021                         }
6022                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6023                                 struct btrfs_extent_data_ref *ref;
6024
6025                                 ref = btrfs_item_ptr(buf, i,
6026                                                 struct btrfs_extent_data_ref);
6027                                 add_data_backref(extent_cache,
6028                                         key.objectid, 0,
6029                                         btrfs_extent_data_ref_root(buf, ref),
6030                                         btrfs_extent_data_ref_objectid(buf,
6031                                                                        ref),
6032                                         btrfs_extent_data_ref_offset(buf, ref),
6033                                         btrfs_extent_data_ref_count(buf, ref),
6034                                         0, root->fs_info->sectorsize);
6035                                 continue;
6036                         }
6037                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6038                                 struct btrfs_shared_data_ref *ref;
6039
6040                                 ref = btrfs_item_ptr(buf, i,
6041                                                 struct btrfs_shared_data_ref);
6042                                 add_data_backref(extent_cache,
6043                                         key.objectid, key.offset, 0, 0, 0,
6044                                         btrfs_shared_data_ref_count(buf, ref),
6045                                         0, root->fs_info->sectorsize);
6046                                 continue;
6047                         }
6048                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6049                                 struct bad_item *bad;
6050
6051                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6052                                         continue;
6053                                 if (!owner)
6054                                         continue;
6055                                 bad = malloc(sizeof(struct bad_item));
6056                                 if (!bad)
6057                                         continue;
6058                                 INIT_LIST_HEAD(&bad->list);
6059                                 memcpy(&bad->key, &key,
6060                                        sizeof(struct btrfs_key));
6061                                 bad->root_id = owner;
6062                                 list_add_tail(&bad->list, &delete_items);
6063                                 continue;
6064                         }
6065                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6066                                 continue;
6067                         fi = btrfs_item_ptr(buf, i,
6068                                             struct btrfs_file_extent_item);
6069                         if (btrfs_file_extent_type(buf, fi) ==
6070                             BTRFS_FILE_EXTENT_INLINE)
6071                                 continue;
6072                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6073                                 continue;
6074
6075                         data_bytes_allocated +=
6076                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6077                         if (data_bytes_allocated < root->fs_info->sectorsize)
6078                                 abort();
6079
6080                         data_bytes_referenced +=
6081                                 btrfs_file_extent_num_bytes(buf, fi);
6082                         add_data_backref(extent_cache,
6083                                 btrfs_file_extent_disk_bytenr(buf, fi),
6084                                 parent, owner, key.objectid, key.offset -
6085                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6086                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6087                 }
6088         } else {
6089                 int level;
6090                 struct btrfs_key first_key;
6091
6092                 first_key.objectid = 0;
6093
6094                 if (nritems > 0)
6095                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6096                 level = btrfs_header_level(buf);
6097                 for (i = 0; i < nritems; i++) {
6098                         struct extent_record tmpl;
6099
6100                         ptr = btrfs_node_blockptr(buf, i);
6101                         size = root->fs_info->nodesize;
6102                         btrfs_node_key_to_cpu(buf, &key, i);
6103                         if (ri != NULL) {
6104                                 if ((level == ri->drop_level)
6105                                     && is_dropped_key(&key, &ri->drop_key)) {
6106                                         continue;
6107                                 }
6108                         }
6109
6110                         memset(&tmpl, 0, sizeof(tmpl));
6111                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6112                         tmpl.parent_generation =
6113                                 btrfs_node_ptr_generation(buf, i);
6114                         tmpl.start = ptr;
6115                         tmpl.nr = size;
6116                         tmpl.refs = 1;
6117                         tmpl.metadata = 1;
6118                         tmpl.max_size = size;
6119                         ret = add_extent_rec(extent_cache, &tmpl);
6120                         if (ret < 0)
6121                                 goto out;
6122
6123                         ret = add_tree_backref(extent_cache, ptr, parent,
6124                                         owner, 1);
6125                         if (ret < 0) {
6126                                 error(
6127                                 "add_tree_backref failed (non-leaf block): %s",
6128                                       strerror(-ret));
6129                                 continue;
6130                         }
6131
6132                         if (level > 1)
6133                                 add_pending(nodes, seen, ptr, size);
6134                         else
6135                                 add_pending(pending, seen, ptr, size);
6136                 }
6137                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
6138                                       nritems) * sizeof(struct btrfs_key_ptr);
6139         }
6140         total_btree_bytes += buf->len;
6141         if (fs_root_objectid(btrfs_header_owner(buf)))
6142                 total_fs_tree_bytes += buf->len;
6143         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6144                 total_extent_tree_bytes += buf->len;
6145 out:
6146         free_extent_buffer(buf);
6147         return ret;
6148 }
6149
6150 static int add_root_to_pending(struct extent_buffer *buf,
6151                                struct cache_tree *extent_cache,
6152                                struct cache_tree *pending,
6153                                struct cache_tree *seen,
6154                                struct cache_tree *nodes,
6155                                u64 objectid)
6156 {
6157         struct extent_record tmpl;
6158         int ret;
6159
6160         if (btrfs_header_level(buf) > 0)
6161                 add_pending(nodes, seen, buf->start, buf->len);
6162         else
6163                 add_pending(pending, seen, buf->start, buf->len);
6164
6165         memset(&tmpl, 0, sizeof(tmpl));
6166         tmpl.start = buf->start;
6167         tmpl.nr = buf->len;
6168         tmpl.is_root = 1;
6169         tmpl.refs = 1;
6170         tmpl.metadata = 1;
6171         tmpl.max_size = buf->len;
6172         add_extent_rec(extent_cache, &tmpl);
6173
6174         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6175             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6176                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6177                                 0, 1);
6178         else
6179                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6180                                 1);
6181         return ret;
6182 }
6183
6184 /* as we fix the tree, we might be deleting blocks that
6185  * we're tracking for repair.  This hook makes sure we
6186  * remove any backrefs for blocks as we are fixing them.
6187  */
6188 static int free_extent_hook(struct btrfs_trans_handle *trans,
6189                             struct btrfs_root *root,
6190                             u64 bytenr, u64 num_bytes, u64 parent,
6191                             u64 root_objectid, u64 owner, u64 offset,
6192                             int refs_to_drop)
6193 {
6194         struct extent_record *rec;
6195         struct cache_extent *cache;
6196         int is_data;
6197         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6198
6199         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6200         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6201         if (!cache)
6202                 return 0;
6203
6204         rec = container_of(cache, struct extent_record, cache);
6205         if (is_data) {
6206                 struct data_backref *back;
6207
6208                 back = find_data_backref(rec, parent, root_objectid, owner,
6209                                          offset, 1, bytenr, num_bytes);
6210                 if (!back)
6211                         goto out;
6212                 if (back->node.found_ref) {
6213                         back->found_ref -= refs_to_drop;
6214                         if (rec->refs)
6215                                 rec->refs -= refs_to_drop;
6216                 }
6217                 if (back->node.found_extent_tree) {
6218                         back->num_refs -= refs_to_drop;
6219                         if (rec->extent_item_refs)
6220                                 rec->extent_item_refs -= refs_to_drop;
6221                 }
6222                 if (back->found_ref == 0)
6223                         back->node.found_ref = 0;
6224                 if (back->num_refs == 0)
6225                         back->node.found_extent_tree = 0;
6226
6227                 if (!back->node.found_extent_tree && back->node.found_ref) {
6228                         rb_erase(&back->node.node, &rec->backref_tree);
6229                         free(back);
6230                 }
6231         } else {
6232                 struct tree_backref *back;
6233
6234                 back = find_tree_backref(rec, parent, root_objectid);
6235                 if (!back)
6236                         goto out;
6237                 if (back->node.found_ref) {
6238                         if (rec->refs)
6239                                 rec->refs--;
6240                         back->node.found_ref = 0;
6241                 }
6242                 if (back->node.found_extent_tree) {
6243                         if (rec->extent_item_refs)
6244                                 rec->extent_item_refs--;
6245                         back->node.found_extent_tree = 0;
6246                 }
6247                 if (!back->node.found_extent_tree && back->node.found_ref) {
6248                         rb_erase(&back->node.node, &rec->backref_tree);
6249                         free(back);
6250                 }
6251         }
6252         maybe_free_extent_rec(extent_cache, rec);
6253 out:
6254         return 0;
6255 }
6256
6257 static int delete_extent_records(struct btrfs_trans_handle *trans,
6258                                  struct btrfs_root *root,
6259                                  struct btrfs_path *path,
6260                                  u64 bytenr)
6261 {
6262         struct btrfs_key key;
6263         struct btrfs_key found_key;
6264         struct extent_buffer *leaf;
6265         int ret;
6266         int slot;
6267
6268
6269         key.objectid = bytenr;
6270         key.type = (u8)-1;
6271         key.offset = (u64)-1;
6272
6273         while (1) {
6274                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6275                                         &key, path, 0, 1);
6276                 if (ret < 0)
6277                         break;
6278
6279                 if (ret > 0) {
6280                         ret = 0;
6281                         if (path->slots[0] == 0)
6282                                 break;
6283                         path->slots[0]--;
6284                 }
6285                 ret = 0;
6286
6287                 leaf = path->nodes[0];
6288                 slot = path->slots[0];
6289
6290                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6291                 if (found_key.objectid != bytenr)
6292                         break;
6293
6294                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6295                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6296                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6297                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6298                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6299                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6300                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6301                         btrfs_release_path(path);
6302                         if (found_key.type == 0) {
6303                                 if (found_key.offset == 0)
6304                                         break;
6305                                 key.offset = found_key.offset - 1;
6306                                 key.type = found_key.type;
6307                         }
6308                         key.type = found_key.type - 1;
6309                         key.offset = (u64)-1;
6310                         continue;
6311                 }
6312
6313                 fprintf(stderr,
6314                         "repair deleting extent record: key [%llu,%u,%llu]\n",
6315                         found_key.objectid, found_key.type, found_key.offset);
6316
6317                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6318                 if (ret)
6319                         break;
6320                 btrfs_release_path(path);
6321
6322                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6323                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6324                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6325                                 found_key.offset : root->fs_info->nodesize;
6326
6327                         ret = btrfs_update_block_group(root, bytenr,
6328                                                        bytes, 0, 0);
6329                         if (ret)
6330                                 break;
6331                 }
6332         }
6333
6334         btrfs_release_path(path);
6335         return ret;
6336 }
6337
6338 /*
6339  * for a single backref, this will allocate a new extent
6340  * and add the backref to it.
6341  */
6342 static int record_extent(struct btrfs_trans_handle *trans,
6343                          struct btrfs_fs_info *info,
6344                          struct btrfs_path *path,
6345                          struct extent_record *rec,
6346                          struct extent_backref *back,
6347                          int allocated, u64 flags)
6348 {
6349         int ret = 0;
6350         struct btrfs_root *extent_root = info->extent_root;
6351         struct extent_buffer *leaf;
6352         struct btrfs_key ins_key;
6353         struct btrfs_extent_item *ei;
6354         struct data_backref *dback;
6355         struct btrfs_tree_block_info *bi;
6356
6357         if (!back->is_data)
6358                 rec->max_size = max_t(u64, rec->max_size,
6359                                     info->nodesize);
6360
6361         if (!allocated) {
6362                 u32 item_size = sizeof(*ei);
6363
6364                 if (!back->is_data)
6365                         item_size += sizeof(*bi);
6366
6367                 ins_key.objectid = rec->start;
6368                 ins_key.offset = rec->max_size;
6369                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6370
6371                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6372                                         &ins_key, item_size);
6373                 if (ret)
6374                         goto fail;
6375
6376                 leaf = path->nodes[0];
6377                 ei = btrfs_item_ptr(leaf, path->slots[0],
6378                                     struct btrfs_extent_item);
6379
6380                 btrfs_set_extent_refs(leaf, ei, 0);
6381                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6382
6383                 if (back->is_data) {
6384                         btrfs_set_extent_flags(leaf, ei,
6385                                                BTRFS_EXTENT_FLAG_DATA);
6386                 } else {
6387                         struct btrfs_disk_key copy_key;
6388
6389                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6390                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6391                                              sizeof(*bi));
6392
6393                         btrfs_set_disk_key_objectid(&copy_key,
6394                                                     rec->info_objectid);
6395                         btrfs_set_disk_key_type(&copy_key, 0);
6396                         btrfs_set_disk_key_offset(&copy_key, 0);
6397
6398                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6399                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6400
6401                         btrfs_set_extent_flags(leaf, ei,
6402                                         flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
6403                 }
6404
6405                 btrfs_mark_buffer_dirty(leaf);
6406                 ret = btrfs_update_block_group(extent_root, rec->start,
6407                                                rec->max_size, 1, 0);
6408                 if (ret)
6409                         goto fail;
6410                 btrfs_release_path(path);
6411         }
6412
6413         if (back->is_data) {
6414                 u64 parent;
6415                 int i;
6416
6417                 dback = to_data_backref(back);
6418                 if (back->full_backref)
6419                         parent = dback->parent;
6420                 else
6421                         parent = 0;
6422
6423                 for (i = 0; i < dback->found_ref; i++) {
6424                         /* if parent != 0, we're doing a full backref
6425                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6426                          * just makes the backref allocator create a data
6427                          * backref
6428                          */
6429                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6430                                                    rec->start, rec->max_size,
6431                                                    parent,
6432                                                    dback->root,
6433                                                    parent ?
6434                                                    BTRFS_FIRST_FREE_OBJECTID :
6435                                                    dback->owner,
6436                                                    dback->offset);
6437                         if (ret)
6438                                 break;
6439                 }
6440                 fprintf(stderr,
6441 "adding new data backref on %llu %s %llu owner %llu offset %llu found %d\n",
6442                         (unsigned long long)rec->start,
6443                         back->full_backref ? "parent" : "root",
6444                         back->full_backref ? (unsigned long long)parent :
6445                                              (unsigned long long)dback->root,
6446                         (unsigned long long)dback->owner,
6447                         (unsigned long long)dback->offset, dback->found_ref);
6448         } else {
6449                 u64 parent;
6450                 struct tree_backref *tback;
6451
6452                 tback = to_tree_backref(back);
6453                 if (back->full_backref)
6454                         parent = tback->parent;
6455                 else
6456                         parent = 0;
6457
6458                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6459                                            rec->start, rec->max_size,
6460                                            parent, tback->root, 0, 0);
6461                 fprintf(stderr,
6462 "adding new tree backref on start %llu len %llu parent %llu root %llu\n",
6463                         rec->start, rec->max_size, parent, tback->root);
6464         }
6465 fail:
6466         btrfs_release_path(path);
6467         return ret;
6468 }
6469
6470 static struct extent_entry *find_entry(struct list_head *entries,
6471                                        u64 bytenr, u64 bytes)
6472 {
6473         struct extent_entry *entry = NULL;
6474
6475         list_for_each_entry(entry, entries, list) {
6476                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6477                         return entry;
6478         }
6479
6480         return NULL;
6481 }
6482
6483 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6484 {
6485         struct extent_entry *entry, *best = NULL, *prev = NULL;
6486
6487         list_for_each_entry(entry, entries, list) {
6488                 /*
6489                  * If there are as many broken entries as entries then we know
6490                  * not to trust this particular entry.
6491                  */
6492                 if (entry->broken == entry->count)
6493                         continue;
6494
6495                 /*
6496                  * Special case, when there are only two entries and 'best' is
6497                  * the first one
6498                  */
6499                 if (!prev) {
6500                         best = entry;
6501                         prev = entry;
6502                         continue;
6503                 }
6504
6505                 /*
6506                  * If our current entry == best then we can't be sure our best
6507                  * is really the best, so we need to keep searching.
6508                  */
6509                 if (best && best->count == entry->count) {
6510                         prev = entry;
6511                         best = NULL;
6512                         continue;
6513                 }
6514
6515                 /* Prev == entry, not good enough, have to keep searching */
6516                 if (!prev->broken && prev->count == entry->count)
6517                         continue;
6518
6519                 if (!best)
6520                         best = (prev->count > entry->count) ? prev : entry;
6521                 else if (best->count < entry->count)
6522                         best = entry;
6523                 prev = entry;
6524         }
6525
6526         return best;
6527 }
6528
6529 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6530                       struct data_backref *dback, struct extent_entry *entry)
6531 {
6532         struct btrfs_trans_handle *trans;
6533         struct btrfs_root *root;
6534         struct btrfs_file_extent_item *fi;
6535         struct extent_buffer *leaf;
6536         struct btrfs_key key;
6537         u64 bytenr, bytes;
6538         int ret, err;
6539
6540         key.objectid = dback->root;
6541         key.type = BTRFS_ROOT_ITEM_KEY;
6542         key.offset = (u64)-1;
6543         root = btrfs_read_fs_root(info, &key);
6544         if (IS_ERR(root)) {
6545                 fprintf(stderr, "Couldn't find root for our ref\n");
6546                 return -EINVAL;
6547         }
6548
6549         /*
6550          * The backref points to the original offset of the extent if it was
6551          * split, so we need to search down to the offset we have and then walk
6552          * forward until we find the backref we're looking for.
6553          */
6554         key.objectid = dback->owner;
6555         key.type = BTRFS_EXTENT_DATA_KEY;
6556         key.offset = dback->offset;
6557         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6558         if (ret < 0) {
6559                 fprintf(stderr, "Error looking up ref %d\n", ret);
6560                 return ret;
6561         }
6562
6563         while (1) {
6564                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6565                         ret = btrfs_next_leaf(root, path);
6566                         if (ret) {
6567                                 fprintf(stderr, "Couldn't find our ref, next\n");
6568                                 return -EINVAL;
6569                         }
6570                 }
6571                 leaf = path->nodes[0];
6572                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6573                 if (key.objectid != dback->owner ||
6574                     key.type != BTRFS_EXTENT_DATA_KEY) {
6575                         fprintf(stderr, "Couldn't find our ref, search\n");
6576                         return -EINVAL;
6577                 }
6578                 fi = btrfs_item_ptr(leaf, path->slots[0],
6579                                     struct btrfs_file_extent_item);
6580                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6581                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6582
6583                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6584                         break;
6585                 path->slots[0]++;
6586         }
6587
6588         btrfs_release_path(path);
6589
6590         trans = btrfs_start_transaction(root, 1);
6591         if (IS_ERR(trans))
6592                 return PTR_ERR(trans);
6593
6594         /*
6595          * Ok we have the key of the file extent we want to fix, now we can cow
6596          * down to the thing and fix it.
6597          */
6598         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6599         if (ret < 0) {
6600                 fprintf(stderr, "error cowing down to ref [%llu,%u,%llu]: %d\n",
6601                         key.objectid, key.type, key.offset, ret);
6602                 goto out;
6603         }
6604         if (ret > 0) {
6605                 fprintf(stderr,
6606                 "well that's odd, we just found this key [%llu,%u,%llu]\n",
6607                         key.objectid, key.type, key.offset);
6608                 ret = -EINVAL;
6609                 goto out;
6610         }
6611         leaf = path->nodes[0];
6612         fi = btrfs_item_ptr(leaf, path->slots[0],
6613                             struct btrfs_file_extent_item);
6614
6615         if (btrfs_file_extent_compression(leaf, fi) &&
6616             dback->disk_bytenr != entry->bytenr) {
6617                 fprintf(stderr,
6618 "ref doesn't match the record start and is compressed, please take a btrfs-image of this file system and send it to a btrfs developer so they can complete this functionality for bytenr %llu\n",
6619                         dback->disk_bytenr);
6620                 ret = -EINVAL;
6621                 goto out;
6622         }
6623
6624         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6625                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6626         } else if (dback->disk_bytenr > entry->bytenr) {
6627                 u64 off_diff, offset;
6628
6629                 off_diff = dback->disk_bytenr - entry->bytenr;
6630                 offset = btrfs_file_extent_offset(leaf, fi);
6631                 if (dback->disk_bytenr + offset +
6632                     btrfs_file_extent_num_bytes(leaf, fi) >
6633                     entry->bytenr + entry->bytes) {
6634                         fprintf(stderr,
6635 "ref is past the entry end, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6636                                 dback->disk_bytenr);
6637                         ret = -EINVAL;
6638                         goto out;
6639                 }
6640                 offset += off_diff;
6641                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6642                 btrfs_set_file_extent_offset(leaf, fi, offset);
6643         } else if (dback->disk_bytenr < entry->bytenr) {
6644                 u64 offset;
6645
6646                 offset = btrfs_file_extent_offset(leaf, fi);
6647                 if (dback->disk_bytenr + offset < entry->bytenr) {
6648                         fprintf(stderr,
6649 "ref is before the entry start, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6650                                 dback->disk_bytenr);
6651                         ret = -EINVAL;
6652                         goto out;
6653                 }
6654
6655                 offset += dback->disk_bytenr;
6656                 offset -= entry->bytenr;
6657                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6658                 btrfs_set_file_extent_offset(leaf, fi, offset);
6659         }
6660
6661         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6662
6663         /*
6664          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6665          * only do this if we aren't using compression, otherwise it's a
6666          * trickier case.
6667          */
6668         if (!btrfs_file_extent_compression(leaf, fi))
6669                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6670         else
6671                 printf("ram bytes may be wrong?\n");
6672         btrfs_mark_buffer_dirty(leaf);
6673 out:
6674         err = btrfs_commit_transaction(trans, root);
6675         btrfs_release_path(path);
6676         return ret ? ret : err;
6677 }
6678
6679 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6680                            struct extent_record *rec)
6681 {
6682         struct extent_backref *back, *tmp;
6683         struct data_backref *dback;
6684         struct extent_entry *entry, *best = NULL;
6685         LIST_HEAD(entries);
6686         int nr_entries = 0;
6687         int broken_entries = 0;
6688         int ret = 0;
6689         short mismatch = 0;
6690
6691         /*
6692          * Metadata is easy and the backrefs should always agree on bytenr and
6693          * size, if not we've got bigger issues.
6694          */
6695         if (rec->metadata)
6696                 return 0;
6697
6698         rbtree_postorder_for_each_entry_safe(back, tmp,
6699                                              &rec->backref_tree, node) {
6700                 if (back->full_backref || !back->is_data)
6701                         continue;
6702
6703                 dback = to_data_backref(back);
6704
6705                 /*
6706                  * We only pay attention to backrefs that we found a real
6707                  * backref for.
6708                  */
6709                 if (dback->found_ref == 0)
6710                         continue;
6711
6712                 /*
6713                  * For now we only catch when the bytes don't match, not the
6714                  * bytenr.  We can easily do this at the same time, but I want
6715                  * to have a fs image to test on before we just add repair
6716                  * functionality willy-nilly so we know we won't screw up the
6717                  * repair.
6718                  */
6719
6720                 entry = find_entry(&entries, dback->disk_bytenr,
6721                                    dback->bytes);
6722                 if (!entry) {
6723                         entry = malloc(sizeof(struct extent_entry));
6724                         if (!entry) {
6725                                 ret = -ENOMEM;
6726                                 goto out;
6727                         }
6728                         memset(entry, 0, sizeof(*entry));
6729                         entry->bytenr = dback->disk_bytenr;
6730                         entry->bytes = dback->bytes;
6731                         list_add_tail(&entry->list, &entries);
6732                         nr_entries++;
6733                 }
6734
6735                 /*
6736                  * If we only have on entry we may think the entries agree when
6737                  * in reality they don't so we have to do some extra checking.
6738                  */
6739                 if (dback->disk_bytenr != rec->start ||
6740                     dback->bytes != rec->nr || back->broken)
6741                         mismatch = 1;
6742
6743                 if (back->broken) {
6744                         entry->broken++;
6745                         broken_entries++;
6746                 }
6747
6748                 entry->count++;
6749         }
6750
6751         /* Yay all the backrefs agree, carry on good sir */
6752         if (nr_entries <= 1 && !mismatch)
6753                 goto out;
6754
6755         fprintf(stderr,
6756                 "attempting to repair backref discrepency for bytenr %llu\n",
6757                 rec->start);
6758
6759         /*
6760          * First we want to see if the backrefs can agree amongst themselves who
6761          * is right, so figure out which one of the entries has the highest
6762          * count.
6763          */
6764         best = find_most_right_entry(&entries);
6765
6766         /*
6767          * Ok so we may have an even split between what the backrefs think, so
6768          * this is where we use the extent ref to see what it thinks.
6769          */
6770         if (!best) {
6771                 entry = find_entry(&entries, rec->start, rec->nr);
6772                 if (!entry && (!broken_entries || !rec->found_rec)) {
6773                         fprintf(stderr,
6774 "backrefs don't agree with each other and extent record doesn't agree with anybody, so we can't fix bytenr %llu bytes %llu\n",
6775                                 rec->start, rec->nr);
6776                         ret = -EINVAL;
6777                         goto out;
6778                 } else if (!entry) {
6779                         /*
6780                          * Ok our backrefs were broken, we'll assume this is the
6781                          * correct value and add an entry for this range.
6782                          */
6783                         entry = malloc(sizeof(struct extent_entry));
6784                         if (!entry) {
6785                                 ret = -ENOMEM;
6786                                 goto out;
6787                         }
6788                         memset(entry, 0, sizeof(*entry));
6789                         entry->bytenr = rec->start;
6790                         entry->bytes = rec->nr;
6791                         list_add_tail(&entry->list, &entries);
6792                         nr_entries++;
6793                 }
6794                 entry->count++;
6795                 best = find_most_right_entry(&entries);
6796                 if (!best) {
6797                         fprintf(stderr,
6798 "backrefs and extent record evenly split on who is right, this is going to require user input to fix bytenr %llu bytes %llu\n",
6799                                 rec->start, rec->nr);
6800                         ret = -EINVAL;
6801                         goto out;
6802                 }
6803         }
6804
6805         /*
6806          * I don't think this can happen currently as we'll abort() if we catch
6807          * this case higher up, but in case somebody removes that we still can't
6808          * deal with it properly here yet, so just bail out of that's the case.
6809          */
6810         if (best->bytenr != rec->start) {
6811                 fprintf(stderr,
6812 "extent start and backref starts don't match, please use btrfs-image on this file system and send it to a btrfs developer so they can make fsck fix this particular case.  bytenr is %llu, bytes is %llu\n",
6813                         rec->start, rec->nr);
6814                 ret = -EINVAL;
6815                 goto out;
6816         }
6817
6818         /*
6819          * Ok great we all agreed on an extent record, let's go find the real
6820          * references and fix up the ones that don't match.
6821          */
6822         rbtree_postorder_for_each_entry_safe(back, tmp,
6823                                              &rec->backref_tree, node) {
6824                 if (back->full_backref || !back->is_data)
6825                         continue;
6826
6827                 dback = to_data_backref(back);
6828
6829                 /*
6830                  * Still ignoring backrefs that don't have a real ref attached
6831                  * to them.
6832                  */
6833                 if (dback->found_ref == 0)
6834                         continue;
6835
6836                 if (dback->bytes == best->bytes &&
6837                     dback->disk_bytenr == best->bytenr)
6838                         continue;
6839
6840                 ret = repair_ref(info, path, dback, best);
6841                 if (ret)
6842                         goto out;
6843         }
6844
6845         /*
6846          * Ok we messed with the actual refs, which means we need to drop our
6847          * entire cache and go back and rescan.  I know this is a huge pain and
6848          * adds a lot of extra work, but it's the only way to be safe.  Once all
6849          * the backrefs agree we may not need to do anything to the extent
6850          * record itself.
6851          */
6852         ret = -EAGAIN;
6853 out:
6854         while (!list_empty(&entries)) {
6855                 entry = list_entry(entries.next, struct extent_entry, list);
6856                 list_del_init(&entry->list);
6857                 free(entry);
6858         }
6859         return ret;
6860 }
6861
6862 static int process_duplicates(struct cache_tree *extent_cache,
6863                               struct extent_record *rec)
6864 {
6865         struct extent_record *good, *tmp;
6866         struct cache_extent *cache;
6867         int ret;
6868
6869         /*
6870          * If we found a extent record for this extent then return, or if we
6871          * have more than one duplicate we are likely going to need to delete
6872          * something.
6873          */
6874         if (rec->found_rec || rec->num_duplicates > 1)
6875                 return 0;
6876
6877         /* Shouldn't happen but just in case */
6878         BUG_ON(!rec->num_duplicates);
6879
6880         /*
6881          * So this happens if we end up with a backref that doesn't match the
6882          * actual extent entry.  So either the backref is bad or the extent
6883          * entry is bad.  Either way we want to have the extent_record actually
6884          * reflect what we found in the extent_tree, so we need to take the
6885          * duplicate out and use that as the extent_record since the only way we
6886          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6887          */
6888         remove_cache_extent(extent_cache, &rec->cache);
6889
6890         good = to_extent_record(rec->dups.next);
6891         list_del_init(&good->list);
6892         INIT_LIST_HEAD(&good->backrefs);
6893         INIT_LIST_HEAD(&good->dups);
6894         good->cache.start = good->start;
6895         good->cache.size = good->nr;
6896         good->content_checked = 0;
6897         good->owner_ref_checked = 0;
6898         good->num_duplicates = 0;
6899         good->refs = rec->refs;
6900         list_splice_init(&rec->backrefs, &good->backrefs);
6901         while (1) {
6902                 cache = lookup_cache_extent(extent_cache, good->start,
6903                                             good->nr);
6904                 if (!cache)
6905                         break;
6906                 tmp = container_of(cache, struct extent_record, cache);
6907
6908                 /*
6909                  * If we find another overlapping extent and it's found_rec is
6910                  * set then it's a duplicate and we need to try and delete
6911                  * something.
6912                  */
6913                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6914                         if (list_empty(&good->list))
6915                                 list_add_tail(&good->list,
6916                                               &duplicate_extents);
6917                         good->num_duplicates += tmp->num_duplicates + 1;
6918                         list_splice_init(&tmp->dups, &good->dups);
6919                         list_del_init(&tmp->list);
6920                         list_add_tail(&tmp->list, &good->dups);
6921                         remove_cache_extent(extent_cache, &tmp->cache);
6922                         continue;
6923                 }
6924
6925                 /*
6926                  * Ok we have another non extent item backed extent rec, so lets
6927                  * just add it to this extent and carry on like we did above.
6928                  */
6929                 good->refs += tmp->refs;
6930                 list_splice_init(&tmp->backrefs, &good->backrefs);
6931                 remove_cache_extent(extent_cache, &tmp->cache);
6932                 free(tmp);
6933         }
6934         ret = insert_cache_extent(extent_cache, &good->cache);
6935         BUG_ON(ret);
6936         free(rec);
6937         return good->num_duplicates ? 0 : 1;
6938 }
6939
6940 static int delete_duplicate_records(struct btrfs_root *root,
6941                                     struct extent_record *rec)
6942 {
6943         struct btrfs_trans_handle *trans;
6944         LIST_HEAD(delete_list);
6945         struct btrfs_path path;
6946         struct extent_record *tmp, *good, *n;
6947         int nr_del = 0;
6948         int ret = 0, err;
6949         struct btrfs_key key;
6950
6951         btrfs_init_path(&path);
6952
6953         good = rec;
6954         /* Find the record that covers all of the duplicates. */
6955         list_for_each_entry(tmp, &rec->dups, list) {
6956                 if (good->start < tmp->start)
6957                         continue;
6958                 if (good->nr > tmp->nr)
6959                         continue;
6960
6961                 if (tmp->start + tmp->nr < good->start + good->nr) {
6962                         fprintf(stderr,
6963 "Ok we have overlapping extents that aren't completely covered by each other, this is going to require more careful thought. The extents are [%llu-%llu] and [%llu-%llu]\n",
6964                                 tmp->start, tmp->nr, good->start, good->nr);
6965                         abort();
6966                 }
6967                 good = tmp;
6968         }
6969
6970         if (good != rec)
6971                 list_add_tail(&rec->list, &delete_list);
6972
6973         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6974                 if (tmp == good)
6975                         continue;
6976                 list_move_tail(&tmp->list, &delete_list);
6977         }
6978
6979         root = root->fs_info->extent_root;
6980         trans = btrfs_start_transaction(root, 1);
6981         if (IS_ERR(trans)) {
6982                 ret = PTR_ERR(trans);
6983                 goto out;
6984         }
6985
6986         list_for_each_entry(tmp, &delete_list, list) {
6987                 if (tmp->found_rec == 0)
6988                         continue;
6989                 key.objectid = tmp->start;
6990                 key.type = BTRFS_EXTENT_ITEM_KEY;
6991                 key.offset = tmp->nr;
6992
6993                 /* Shouldn't happen but just in case */
6994                 if (tmp->metadata) {
6995                         fprintf(stderr,
6996 "well this shouldn't happen, extent record overlaps but is metadata? [%llu, %llu]\n",
6997                                 tmp->start, tmp->nr);
6998                         abort();
6999                 }
7000
7001                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7002                 if (ret) {
7003                         if (ret > 0)
7004                                 ret = -EINVAL;
7005                         break;
7006                 }
7007                 ret = btrfs_del_item(trans, root, &path);
7008                 if (ret)
7009                         break;
7010                 btrfs_release_path(&path);
7011                 nr_del++;
7012         }
7013         err = btrfs_commit_transaction(trans, root);
7014         if (err && !ret)
7015                 ret = err;
7016 out:
7017         while (!list_empty(&delete_list)) {
7018                 tmp = to_extent_record(delete_list.next);
7019                 list_del_init(&tmp->list);
7020                 if (tmp == rec)
7021                         continue;
7022                 free(tmp);
7023         }
7024
7025         while (!list_empty(&rec->dups)) {
7026                 tmp = to_extent_record(rec->dups.next);
7027                 list_del_init(&tmp->list);
7028                 free(tmp);
7029         }
7030
7031         btrfs_release_path(&path);
7032
7033         if (!ret && !nr_del)
7034                 rec->num_duplicates = 0;
7035
7036         return ret ? ret : nr_del;
7037 }
7038
7039 static int find_possible_backrefs(struct btrfs_fs_info *info,
7040                                   struct btrfs_path *path,
7041                                   struct cache_tree *extent_cache,
7042                                   struct extent_record *rec)
7043 {
7044         struct btrfs_root *root;
7045         struct extent_backref *back, *tmp;
7046         struct data_backref *dback;
7047         struct cache_extent *cache;
7048         struct btrfs_file_extent_item *fi;
7049         struct btrfs_key key;
7050         u64 bytenr, bytes;
7051         int ret;
7052
7053         rbtree_postorder_for_each_entry_safe(back, tmp,
7054                                              &rec->backref_tree, node) {
7055                 /* Don't care about full backrefs (poor unloved backrefs) */
7056                 if (back->full_backref || !back->is_data)
7057                         continue;
7058
7059                 dback = to_data_backref(back);
7060
7061                 /* We found this one, we don't need to do a lookup */
7062                 if (dback->found_ref)
7063                         continue;
7064
7065                 key.objectid = dback->root;
7066                 key.type = BTRFS_ROOT_ITEM_KEY;
7067                 key.offset = (u64)-1;
7068
7069                 root = btrfs_read_fs_root(info, &key);
7070
7071                 /* No root, definitely a bad ref, skip */
7072                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7073                         continue;
7074                 /* Other err, exit */
7075                 if (IS_ERR(root))
7076                         return PTR_ERR(root);
7077
7078                 key.objectid = dback->owner;
7079                 key.type = BTRFS_EXTENT_DATA_KEY;
7080                 key.offset = dback->offset;
7081                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7082                 if (ret) {
7083                         btrfs_release_path(path);
7084                         if (ret < 0)
7085                                 return ret;
7086                         /* Didn't find it, we can carry on */
7087                         ret = 0;
7088                         continue;
7089                 }
7090
7091                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7092                                     struct btrfs_file_extent_item);
7093                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7094                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7095                 btrfs_release_path(path);
7096                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7097                 if (cache) {
7098                         struct extent_record *tmp;
7099
7100                         tmp = container_of(cache, struct extent_record, cache);
7101
7102                         /*
7103                          * If we found an extent record for the bytenr for this
7104                          * particular backref then we can't add it to our
7105                          * current extent record.  We only want to add backrefs
7106                          * that don't have a corresponding extent item in the
7107                          * extent tree since they likely belong to this record
7108                          * and we need to fix it if it doesn't match bytenrs.
7109                          */
7110                         if  (tmp->found_rec)
7111                                 continue;
7112                 }
7113
7114                 dback->found_ref += 1;
7115                 dback->disk_bytenr = bytenr;
7116                 dback->bytes = bytes;
7117
7118                 /*
7119                  * Set this so the verify backref code knows not to trust the
7120                  * values in this backref.
7121                  */
7122                 back->broken = 1;
7123         }
7124
7125         return 0;
7126 }
7127
7128 /*
7129  * Record orphan data ref into corresponding root.
7130  *
7131  * Return 0 if the extent item contains data ref and recorded.
7132  * Return 1 if the extent item contains no useful data ref
7133  *   On that case, it may contains only shared_dataref or metadata backref
7134  *   or the file extent exists(this should be handled by the extent bytenr
7135  *   recovery routine)
7136  * Return <0 if something goes wrong.
7137  */
7138 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7139                                       struct extent_record *rec)
7140 {
7141         struct btrfs_key key;
7142         struct btrfs_root *dest_root;
7143         struct extent_backref *back, *tmp;
7144         struct data_backref *dback;
7145         struct orphan_data_extent *orphan;
7146         struct btrfs_path path;
7147         int recorded_data_ref = 0;
7148         int ret = 0;
7149
7150         if (rec->metadata)
7151                 return 1;
7152         btrfs_init_path(&path);
7153         rbtree_postorder_for_each_entry_safe(back, tmp,
7154                                              &rec->backref_tree, node) {
7155                 if (back->full_backref || !back->is_data ||
7156                     !back->found_extent_tree)
7157                         continue;
7158                 dback = to_data_backref(back);
7159                 if (dback->found_ref)
7160                         continue;
7161                 key.objectid = dback->root;
7162                 key.type = BTRFS_ROOT_ITEM_KEY;
7163                 key.offset = (u64)-1;
7164
7165                 dest_root = btrfs_read_fs_root(fs_info, &key);
7166
7167                 /* For non-exist root we just skip it */
7168                 if (IS_ERR(dest_root) || !dest_root)
7169                         continue;
7170
7171                 key.objectid = dback->owner;
7172                 key.type = BTRFS_EXTENT_DATA_KEY;
7173                 key.offset = dback->offset;
7174
7175                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7176                 btrfs_release_path(&path);
7177                 /*
7178                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7179                  * we need to record it for inode/file extent rebuild.
7180                  * For ret > 0, we record it only for file extent rebuild.
7181                  * For ret == 0, the file extent exists but only bytenr
7182                  * mismatch, let the original bytenr fix routine to handle,
7183                  * don't record it.
7184                  */
7185                 if (ret == 0)
7186                         continue;
7187                 ret = 0;
7188                 orphan = malloc(sizeof(*orphan));
7189                 if (!orphan) {
7190                         ret = -ENOMEM;
7191                         goto out;
7192                 }
7193                 INIT_LIST_HEAD(&orphan->list);
7194                 orphan->root = dback->root;
7195                 orphan->objectid = dback->owner;
7196                 orphan->offset = dback->offset;
7197                 orphan->disk_bytenr = rec->cache.start;
7198                 orphan->disk_len = rec->cache.size;
7199                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7200                 recorded_data_ref = 1;
7201         }
7202 out:
7203         btrfs_release_path(&path);
7204         if (!ret)
7205                 return !recorded_data_ref;
7206         else
7207                 return ret;
7208 }
7209
7210 /*
7211  * when an incorrect extent item is found, this will delete
7212  * all of the existing entries for it and recreate them
7213  * based on what the tree scan found.
7214  */
7215 static int fixup_extent_refs(struct btrfs_fs_info *info,
7216                              struct cache_tree *extent_cache,
7217                              struct extent_record *rec)
7218 {
7219         struct btrfs_trans_handle *trans = NULL;
7220         int ret;
7221         struct btrfs_path path;
7222         struct cache_extent *cache;
7223         struct extent_backref *back, *tmp;
7224         int allocated = 0;
7225         u64 flags = 0;
7226
7227         if (rec->flag_block_full_backref)
7228                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7229
7230         btrfs_init_path(&path);
7231         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7232                 /*
7233                  * Sometimes the backrefs themselves are so broken they don't
7234                  * get attached to any meaningful rec, so first go back and
7235                  * check any of our backrefs that we couldn't find and throw
7236                  * them into the list if we find the backref so that
7237                  * verify_backrefs can figure out what to do.
7238                  */
7239                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7240                 if (ret < 0)
7241                         goto out;
7242         }
7243
7244         /* step one, make sure all of the backrefs agree */
7245         ret = verify_backrefs(info, &path, rec);
7246         if (ret < 0)
7247                 goto out;
7248
7249         trans = btrfs_start_transaction(info->extent_root, 1);
7250         if (IS_ERR(trans)) {
7251                 ret = PTR_ERR(trans);
7252                 goto out;
7253         }
7254
7255         /* step two, delete all the existing records */
7256         ret = delete_extent_records(trans, info->extent_root, &path,
7257                                     rec->start);
7258
7259         if (ret < 0)
7260                 goto out;
7261
7262         /* was this block corrupt?  If so, don't add references to it */
7263         cache = lookup_cache_extent(info->corrupt_blocks,
7264                                     rec->start, rec->max_size);
7265         if (cache) {
7266                 ret = 0;
7267                 goto out;
7268         }
7269
7270         /* step three, recreate all the refs we did find */
7271         rbtree_postorder_for_each_entry_safe(back, tmp,
7272                                              &rec->backref_tree, node) {
7273                 /*
7274                  * if we didn't find any references, don't create a
7275                  * new extent record
7276                  */
7277                 if (!back->found_ref)
7278                         continue;
7279
7280                 rec->bad_full_backref = 0;
7281                 ret = record_extent(trans, info, &path, rec, back, allocated,
7282                                     flags);
7283                 allocated = 1;
7284
7285                 if (ret)
7286                         goto out;
7287         }
7288 out:
7289         if (trans) {
7290                 int err = btrfs_commit_transaction(trans, info->extent_root);
7291
7292                 if (!ret)
7293                         ret = err;
7294         }
7295
7296         if (!ret)
7297                 fprintf(stderr, "Repaired extent references for %llu\n",
7298                                 (unsigned long long)rec->start);
7299
7300         btrfs_release_path(&path);
7301         return ret;
7302 }
7303
7304 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7305                               struct extent_record *rec)
7306 {
7307         struct btrfs_trans_handle *trans;
7308         struct btrfs_root *root = fs_info->extent_root;
7309         struct btrfs_path path;
7310         struct btrfs_extent_item *ei;
7311         struct btrfs_key key;
7312         u64 flags;
7313         int ret = 0;
7314
7315         key.objectid = rec->start;
7316         if (rec->metadata) {
7317                 key.type = BTRFS_METADATA_ITEM_KEY;
7318                 key.offset = rec->info_level;
7319         } else {
7320                 key.type = BTRFS_EXTENT_ITEM_KEY;
7321                 key.offset = rec->max_size;
7322         }
7323
7324         trans = btrfs_start_transaction(root, 0);
7325         if (IS_ERR(trans))
7326                 return PTR_ERR(trans);
7327
7328         btrfs_init_path(&path);
7329         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7330         if (ret < 0) {
7331                 btrfs_release_path(&path);
7332                 btrfs_commit_transaction(trans, root);
7333                 return ret;
7334         } else if (ret) {
7335                 fprintf(stderr, "Didn't find extent for %llu\n",
7336                         (unsigned long long)rec->start);
7337                 btrfs_release_path(&path);
7338                 btrfs_commit_transaction(trans, root);
7339                 return -ENOENT;
7340         }
7341
7342         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7343                             struct btrfs_extent_item);
7344         flags = btrfs_extent_flags(path.nodes[0], ei);
7345         if (rec->flag_block_full_backref) {
7346                 fprintf(stderr, "setting full backref on %llu\n",
7347                         (unsigned long long)key.objectid);
7348                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7349         } else {
7350                 fprintf(stderr, "clearing full backref on %llu\n",
7351                         (unsigned long long)key.objectid);
7352                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7353         }
7354         btrfs_set_extent_flags(path.nodes[0], ei, flags);
7355         btrfs_mark_buffer_dirty(path.nodes[0]);
7356         btrfs_release_path(&path);
7357         ret = btrfs_commit_transaction(trans, root);
7358         if (!ret)
7359                 fprintf(stderr, "Repaired extent flags for %llu\n",
7360                                 (unsigned long long)rec->start);
7361
7362         return ret;
7363 }
7364
7365 /* right now we only prune from the extent allocation tree */
7366 static int prune_one_block(struct btrfs_trans_handle *trans,
7367                            struct btrfs_fs_info *info,
7368                            struct btrfs_corrupt_block *corrupt)
7369 {
7370         int ret;
7371         struct btrfs_path path;
7372         struct extent_buffer *eb;
7373         u64 found;
7374         int slot;
7375         int nritems;
7376         int level = corrupt->level + 1;
7377
7378         btrfs_init_path(&path);
7379 again:
7380         /* we want to stop at the parent to our busted block */
7381         path.lowest_level = level;
7382
7383         ret = btrfs_search_slot(trans, info->extent_root,
7384                                 &corrupt->key, &path, -1, 1);
7385
7386         if (ret < 0)
7387                 goto out;
7388
7389         eb = path.nodes[level];
7390         if (!eb) {
7391                 ret = -ENOENT;
7392                 goto out;
7393         }
7394
7395         /*
7396          * hopefully the search gave us the block we want to prune,
7397          * lets try that first
7398          */
7399         slot = path.slots[level];
7400         found =  btrfs_node_blockptr(eb, slot);
7401         if (found == corrupt->cache.start)
7402                 goto del_ptr;
7403
7404         nritems = btrfs_header_nritems(eb);
7405
7406         /* the search failed, lets scan this node and hope we find it */
7407         for (slot = 0; slot < nritems; slot++) {
7408                 found =  btrfs_node_blockptr(eb, slot);
7409                 if (found == corrupt->cache.start)
7410                         goto del_ptr;
7411         }
7412         /*
7413          * We couldn't find the bad block.
7414          * TODO: search all the nodes for pointers to this block
7415          */
7416         if (eb == info->extent_root->node) {
7417                 ret = -ENOENT;
7418                 goto out;
7419         } else {
7420                 level++;
7421                 btrfs_release_path(&path);
7422                 goto again;
7423         }
7424
7425 del_ptr:
7426         printk("deleting pointer to block %llu\n", corrupt->cache.start);
7427         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
7428
7429 out:
7430         btrfs_release_path(&path);
7431         return ret;
7432 }
7433
7434 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7435 {
7436         struct btrfs_trans_handle *trans = NULL;
7437         struct cache_extent *cache;
7438         struct btrfs_corrupt_block *corrupt;
7439
7440         while (1) {
7441                 cache = search_cache_extent(info->corrupt_blocks, 0);
7442                 if (!cache)
7443                         break;
7444                 if (!trans) {
7445                         trans = btrfs_start_transaction(info->extent_root, 1);
7446                         if (IS_ERR(trans))
7447                                 return PTR_ERR(trans);
7448                 }
7449                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7450                 prune_one_block(trans, info, corrupt);
7451                 remove_cache_extent(info->corrupt_blocks, cache);
7452         }
7453         if (trans)
7454                 return btrfs_commit_transaction(trans, info->extent_root);
7455         return 0;
7456 }
7457
7458 static int check_extent_refs(struct btrfs_root *root,
7459                              struct cache_tree *extent_cache)
7460 {
7461         struct extent_record *rec;
7462         struct cache_extent *cache;
7463         int ret = 0;
7464         int had_dups = 0;
7465         int err = 0;
7466
7467         if (repair) {
7468                 /*
7469                  * if we're doing a repair, we have to make sure
7470                  * we don't allocate from the problem extents.
7471                  * In the worst case, this will be all the
7472                  * extents in the FS
7473                  */
7474                 cache = search_cache_extent(extent_cache, 0);
7475                 while (cache) {
7476                         rec = container_of(cache, struct extent_record, cache);
7477                         set_extent_dirty(root->fs_info->excluded_extents,
7478                                          rec->start,
7479                                          rec->start + rec->max_size - 1);
7480                         cache = next_cache_extent(cache);
7481                 }
7482
7483                 /* pin down all the corrupted blocks too */
7484                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7485                 while (cache) {
7486                         set_extent_dirty(root->fs_info->excluded_extents,
7487                                          cache->start,
7488                                          cache->start + cache->size - 1);
7489                         cache = next_cache_extent(cache);
7490                 }
7491                 prune_corrupt_blocks(root->fs_info);
7492                 reset_cached_block_groups(root->fs_info);
7493         }
7494
7495         reset_cached_block_groups(root->fs_info);
7496
7497         /*
7498          * We need to delete any duplicate entries we find first otherwise we
7499          * could mess up the extent tree when we have backrefs that actually
7500          * belong to a different extent item and not the weird duplicate one.
7501          */
7502         while (repair && !list_empty(&duplicate_extents)) {
7503                 rec = to_extent_record(duplicate_extents.next);
7504                 list_del_init(&rec->list);
7505
7506                 /* Sometimes we can find a backref before we find an actual
7507                  * extent, so we need to process it a little bit to see if there
7508                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7509                  * if this is a backref screwup.  If we need to delete stuff
7510                  * process_duplicates() will return 0, otherwise it will return
7511                  * 1 and we
7512                  */
7513                 if (process_duplicates(extent_cache, rec))
7514                         continue;
7515                 ret = delete_duplicate_records(root, rec);
7516                 if (ret < 0)
7517                         return ret;
7518                 /*
7519                  * delete_duplicate_records will return the number of entries
7520                  * deleted, so if it's greater than 0 then we know we actually
7521                  * did something and we need to remove.
7522                  */
7523                 if (ret)
7524                         had_dups = 1;
7525         }
7526
7527         if (had_dups)
7528                 return -EAGAIN;
7529
7530         while (1) {
7531                 int cur_err = 0;
7532                 int fix = 0;
7533
7534                 cache = search_cache_extent(extent_cache, 0);
7535                 if (!cache)
7536                         break;
7537                 rec = container_of(cache, struct extent_record, cache);
7538                 if (rec->num_duplicates) {
7539                         fprintf(stderr,
7540                                 "extent item %llu has multiple extent items\n",
7541                                 (unsigned long long)rec->start);
7542                         cur_err = 1;
7543                 }
7544
7545                 if (rec->refs != rec->extent_item_refs) {
7546                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7547                                 (unsigned long long)rec->start,
7548                                 (unsigned long long)rec->nr);
7549                         fprintf(stderr, "extent item %llu, found %llu\n",
7550                                 (unsigned long long)rec->extent_item_refs,
7551                                 (unsigned long long)rec->refs);
7552                         ret = record_orphan_data_extents(root->fs_info, rec);
7553                         if (ret < 0)
7554                                 goto repair_abort;
7555                         fix = ret;
7556                         cur_err = 1;
7557                 }
7558                 if (all_backpointers_checked(rec, 1)) {
7559                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7560                                 (unsigned long long)rec->start,
7561                                 (unsigned long long)rec->nr);
7562                         fix = 1;
7563                         cur_err = 1;
7564                 }
7565                 if (!rec->owner_ref_checked) {
7566                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7567                                 (unsigned long long)rec->start,
7568                                 (unsigned long long)rec->nr);
7569                         fix = 1;
7570                         cur_err = 1;
7571                 }
7572
7573                 if (repair && fix) {
7574                         ret = fixup_extent_refs(root->fs_info, extent_cache,
7575                                                 rec);
7576                         if (ret)
7577                                 goto repair_abort;
7578                 }
7579
7580
7581                 if (rec->bad_full_backref) {
7582                         fprintf(stderr, "bad full backref, on [%llu]\n",
7583                                 (unsigned long long)rec->start);
7584                         if (repair) {
7585                                 ret = fixup_extent_flags(root->fs_info, rec);
7586                                 if (ret)
7587                                         goto repair_abort;
7588                                 fix = 1;
7589                         }
7590                         cur_err = 1;
7591                 }
7592                 /*
7593                  * Although it's not a extent ref's problem, we reuse this
7594                  * routine for error reporting.
7595                  * No repair function yet.
7596                  */
7597                 if (rec->crossing_stripes) {
7598                         fprintf(stderr,
7599                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7600                                 rec->start, rec->start + rec->max_size);
7601                         cur_err = 1;
7602                 }
7603
7604                 if (rec->wrong_chunk_type) {
7605                         fprintf(stderr,
7606                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7607                                 rec->start, rec->start + rec->max_size);
7608                         cur_err = 1;
7609                 }
7610
7611                 err = cur_err;
7612                 remove_cache_extent(extent_cache, cache);
7613                 free_all_extent_backrefs(rec);
7614                 if (!init_extent_tree && repair && (!cur_err || fix))
7615                         clear_extent_dirty(root->fs_info->excluded_extents,
7616                                            rec->start,
7617                                            rec->start + rec->max_size - 1);
7618                 free(rec);
7619         }
7620 repair_abort:
7621         if (repair) {
7622                 if (ret && ret != -EAGAIN) {
7623                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7624                         exit(1);
7625                 } else if (!ret) {
7626                         struct btrfs_trans_handle *trans;
7627
7628                         root = root->fs_info->extent_root;
7629                         trans = btrfs_start_transaction(root, 1);
7630                         if (IS_ERR(trans)) {
7631                                 ret = PTR_ERR(trans);
7632                                 goto repair_abort;
7633                         }
7634
7635                         ret = btrfs_fix_block_accounting(trans, root);
7636                         if (ret)
7637                                 goto repair_abort;
7638                         ret = btrfs_commit_transaction(trans, root);
7639                         if (ret)
7640                                 goto repair_abort;
7641                 }
7642                 return ret;
7643         }
7644
7645         if (err)
7646                 err = -EIO;
7647         return err;
7648 }
7649
7650 /*
7651  * Check the chunk with its block group/dev list ref:
7652  * Return 0 if all refs seems valid.
7653  * Return 1 if part of refs seems valid, need later check for rebuild ref
7654  * like missing block group and needs to search extent tree to rebuild them.
7655  * Return -1 if essential refs are missing and unable to rebuild.
7656  */
7657 static int check_chunk_refs(struct chunk_record *chunk_rec,
7658                             struct block_group_tree *block_group_cache,
7659                             struct device_extent_tree *dev_extent_cache,
7660                             int silent)
7661 {
7662         struct cache_extent *block_group_item;
7663         struct block_group_record *block_group_rec;
7664         struct cache_extent *dev_extent_item;
7665         struct device_extent_record *dev_extent_rec;
7666         u64 devid;
7667         u64 offset;
7668         u64 length;
7669         int metadump_v2 = 0;
7670         int i;
7671         int ret = 0;
7672
7673         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7674                                                chunk_rec->offset,
7675                                                chunk_rec->length);
7676         if (block_group_item) {
7677                 block_group_rec = container_of(block_group_item,
7678                                                struct block_group_record,
7679                                                cache);
7680                 if (chunk_rec->length != block_group_rec->offset ||
7681                     chunk_rec->offset != block_group_rec->objectid ||
7682                     (!metadump_v2 &&
7683                      chunk_rec->type_flags != block_group_rec->flags)) {
7684                         if (!silent)
7685                                 fprintf(stderr,
7686                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7687                                         chunk_rec->objectid,
7688                                         chunk_rec->type,
7689                                         chunk_rec->offset,
7690                                         chunk_rec->length,
7691                                         chunk_rec->offset,
7692                                         chunk_rec->type_flags,
7693                                         block_group_rec->objectid,
7694                                         block_group_rec->type,
7695                                         block_group_rec->offset,
7696                                         block_group_rec->offset,
7697                                         block_group_rec->objectid,
7698                                         block_group_rec->flags);
7699                         ret = -1;
7700                 } else {
7701                         list_del_init(&block_group_rec->list);
7702                         chunk_rec->bg_rec = block_group_rec;
7703                 }
7704         } else {
7705                 if (!silent)
7706                         fprintf(stderr,
7707                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7708                                 chunk_rec->objectid,
7709                                 chunk_rec->type,
7710                                 chunk_rec->offset,
7711                                 chunk_rec->length,
7712                                 chunk_rec->offset,
7713                                 chunk_rec->type_flags);
7714                 ret = 1;
7715         }
7716
7717         if (metadump_v2)
7718                 return ret;
7719
7720         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7721                                     chunk_rec->num_stripes);
7722         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7723                 devid = chunk_rec->stripes[i].devid;
7724                 offset = chunk_rec->stripes[i].offset;
7725                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7726                                                        devid, offset, length);
7727                 if (dev_extent_item) {
7728                         dev_extent_rec = container_of(dev_extent_item,
7729                                                 struct device_extent_record,
7730                                                 cache);
7731                         if (dev_extent_rec->objectid != devid ||
7732                             dev_extent_rec->offset != offset ||
7733                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7734                             dev_extent_rec->length != length) {
7735                                 if (!silent)
7736                                         fprintf(stderr,
7737                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7738                                                 chunk_rec->objectid,
7739                                                 chunk_rec->type,
7740                                                 chunk_rec->offset,
7741                                                 chunk_rec->stripes[i].devid,
7742                                                 chunk_rec->stripes[i].offset,
7743                                                 dev_extent_rec->objectid,
7744                                                 dev_extent_rec->offset,
7745                                                 dev_extent_rec->length);
7746                                 ret = -1;
7747                         } else {
7748                                 list_move(&dev_extent_rec->chunk_list,
7749                                           &chunk_rec->dextents);
7750                         }
7751                 } else {
7752                         if (!silent)
7753                                 fprintf(stderr,
7754                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7755                                         chunk_rec->objectid,
7756                                         chunk_rec->type,
7757                                         chunk_rec->offset,
7758                                         chunk_rec->stripes[i].devid,
7759                                         chunk_rec->stripes[i].offset);
7760                         ret = -1;
7761                 }
7762         }
7763         return ret;
7764 }
7765
7766 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7767 int check_chunks(struct cache_tree *chunk_cache,
7768                  struct block_group_tree *block_group_cache,
7769                  struct device_extent_tree *dev_extent_cache,
7770                  struct list_head *good, struct list_head *bad,
7771                  struct list_head *rebuild, int silent)
7772 {
7773         struct cache_extent *chunk_item;
7774         struct chunk_record *chunk_rec;
7775         struct block_group_record *bg_rec;
7776         struct device_extent_record *dext_rec;
7777         int err;
7778         int ret = 0;
7779
7780         chunk_item = first_cache_extent(chunk_cache);
7781         while (chunk_item) {
7782                 chunk_rec = container_of(chunk_item, struct chunk_record,
7783                                          cache);
7784                 err = check_chunk_refs(chunk_rec, block_group_cache,
7785                                        dev_extent_cache, silent);
7786                 if (err < 0)
7787                         ret = err;
7788                 if (err == 0 && good)
7789                         list_add_tail(&chunk_rec->list, good);
7790                 if (err > 0 && rebuild)
7791                         list_add_tail(&chunk_rec->list, rebuild);
7792                 if (err < 0 && bad)
7793                         list_add_tail(&chunk_rec->list, bad);
7794                 chunk_item = next_cache_extent(chunk_item);
7795         }
7796
7797         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7798                 if (!silent)
7799                         fprintf(stderr,
7800                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7801                                 bg_rec->objectid,
7802                                 bg_rec->offset,
7803                                 bg_rec->flags);
7804                 if (!ret)
7805                         ret = 1;
7806         }
7807
7808         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7809                             chunk_list) {
7810                 if (!silent)
7811                         fprintf(stderr,
7812                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7813                                 dext_rec->objectid,
7814                                 dext_rec->offset,
7815                                 dext_rec->length);
7816                 if (!ret)
7817                         ret = 1;
7818         }
7819         return ret;
7820 }
7821
7822
7823 static int check_device_used(struct device_record *dev_rec,
7824                              struct device_extent_tree *dext_cache)
7825 {
7826         struct cache_extent *cache;
7827         struct device_extent_record *dev_extent_rec;
7828         u64 total_byte = 0;
7829
7830         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7831         while (cache) {
7832                 dev_extent_rec = container_of(cache,
7833                                               struct device_extent_record,
7834                                               cache);
7835                 if (dev_extent_rec->objectid != dev_rec->devid)
7836                         break;
7837
7838                 list_del_init(&dev_extent_rec->device_list);
7839                 total_byte += dev_extent_rec->length;
7840                 cache = next_cache_extent(cache);
7841         }
7842
7843         if (total_byte != dev_rec->byte_used) {
7844                 fprintf(stderr,
7845                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7846                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7847                         dev_rec->type, dev_rec->offset);
7848                 return -1;
7849         } else {
7850                 return 0;
7851         }
7852 }
7853
7854 /*
7855  * Unlike device size alignment check above, some super total_bytes check
7856  * failure can lead to mount failure for newer kernel.
7857  *
7858  * So this function will return the error for a fatal super total_bytes problem.
7859  */
7860 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
7861 {
7862         struct btrfs_device *dev;
7863         struct list_head *dev_list = &fs_info->fs_devices->devices;
7864         u64 total_bytes = 0;
7865         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
7866
7867         list_for_each_entry(dev, dev_list, dev_list)
7868                 total_bytes += dev->total_bytes;
7869
7870         /* Important check, which can cause unmountable fs */
7871         if (super_bytes < total_bytes) {
7872                 error("super total bytes %llu smaller than real device(s) size %llu",
7873                         super_bytes, total_bytes);
7874                 error("mounting this fs may fail for newer kernels");
7875                 error("this can be fixed by 'btrfs rescue fix-device-size'");
7876                 return false;
7877         }
7878
7879         /*
7880          * Optional check, just to make everything aligned and match with each
7881          * other.
7882          *
7883          * For a btrfs-image restored fs, we don't need to check it anyway.
7884          */
7885         if (btrfs_super_flags(fs_info->super_copy) &
7886             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
7887                 return true;
7888         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
7889             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
7890             super_bytes != total_bytes) {
7891                 warning("minor unaligned/mismatch device size detected");
7892                 warning(
7893                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
7894         }
7895         return true;
7896 }
7897
7898 /* check btrfs_dev_item -> btrfs_dev_extent */
7899 static int check_devices(struct rb_root *dev_cache,
7900                          struct device_extent_tree *dev_extent_cache)
7901 {
7902         struct rb_node *dev_node;
7903         struct device_record *dev_rec;
7904         struct device_extent_record *dext_rec;
7905         int err;
7906         int ret = 0;
7907
7908         dev_node = rb_first(dev_cache);
7909         while (dev_node) {
7910                 dev_rec = container_of(dev_node, struct device_record, node);
7911                 err = check_device_used(dev_rec, dev_extent_cache);
7912                 if (err)
7913                         ret = err;
7914
7915                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
7916                                          global_info->sectorsize);
7917                 dev_node = rb_next(dev_node);
7918         }
7919         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7920                             device_list) {
7921                 fprintf(stderr,
7922                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7923                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7924                 if (!ret)
7925                         ret = 1;
7926         }
7927         return ret;
7928 }
7929
7930 static int add_root_item_to_list(struct list_head *head,
7931                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7932                                   u8 level, u8 drop_level,
7933                                   struct btrfs_key *drop_key)
7934 {
7935         struct root_item_record *ri_rec;
7936
7937         ri_rec = malloc(sizeof(*ri_rec));
7938         if (!ri_rec)
7939                 return -ENOMEM;
7940         ri_rec->bytenr = bytenr;
7941         ri_rec->objectid = objectid;
7942         ri_rec->level = level;
7943         ri_rec->drop_level = drop_level;
7944         ri_rec->last_snapshot = last_snapshot;
7945         if (drop_key)
7946                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7947         list_add_tail(&ri_rec->list, head);
7948
7949         return 0;
7950 }
7951
7952 static void free_root_item_list(struct list_head *list)
7953 {
7954         struct root_item_record *ri_rec;
7955
7956         while (!list_empty(list)) {
7957                 ri_rec = list_first_entry(list, struct root_item_record,
7958                                           list);
7959                 list_del_init(&ri_rec->list);
7960                 free(ri_rec);
7961         }
7962 }
7963
7964 static int deal_root_from_list(struct list_head *list,
7965                                struct btrfs_root *root,
7966                                struct block_info *bits,
7967                                int bits_nr,
7968                                struct cache_tree *pending,
7969                                struct cache_tree *seen,
7970                                struct cache_tree *reada,
7971                                struct cache_tree *nodes,
7972                                struct cache_tree *extent_cache,
7973                                struct cache_tree *chunk_cache,
7974                                struct rb_root *dev_cache,
7975                                struct block_group_tree *block_group_cache,
7976                                struct device_extent_tree *dev_extent_cache)
7977 {
7978         int ret = 0;
7979         u64 last;
7980
7981         while (!list_empty(list)) {
7982                 struct root_item_record *rec;
7983                 struct extent_buffer *buf;
7984
7985                 rec = list_entry(list->next,
7986                                  struct root_item_record, list);
7987                 last = 0;
7988                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
7989                 if (!extent_buffer_uptodate(buf)) {
7990                         free_extent_buffer(buf);
7991                         ret = -EIO;
7992                         break;
7993                 }
7994                 ret = add_root_to_pending(buf, extent_cache, pending,
7995                                     seen, nodes, rec->objectid);
7996                 if (ret < 0)
7997                         break;
7998                 /*
7999                  * To rebuild extent tree, we need deal with snapshot
8000                  * one by one, otherwise we deal with node firstly which
8001                  * can maximize readahead.
8002                  */
8003                 while (1) {
8004                         ret = run_next_block(root, bits, bits_nr, &last,
8005                                              pending, seen, reada, nodes,
8006                                              extent_cache, chunk_cache,
8007                                              dev_cache, block_group_cache,
8008                                              dev_extent_cache, rec);
8009                         if (ret != 0)
8010                                 break;
8011                 }
8012                 free_extent_buffer(buf);
8013                 list_del(&rec->list);
8014                 free(rec);
8015                 if (ret < 0)
8016                         break;
8017         }
8018         while (ret >= 0) {
8019                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8020                                      reada, nodes, extent_cache, chunk_cache,
8021                                      dev_cache, block_group_cache,
8022                                      dev_extent_cache, NULL);
8023                 if (ret != 0) {
8024                         if (ret > 0)
8025                                 ret = 0;
8026                         break;
8027                 }
8028         }
8029         return ret;
8030 }
8031
8032 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8033 {
8034         struct rb_root dev_cache;
8035         struct cache_tree chunk_cache;
8036         struct block_group_tree block_group_cache;
8037         struct device_extent_tree dev_extent_cache;
8038         struct cache_tree extent_cache;
8039         struct cache_tree seen;
8040         struct cache_tree pending;
8041         struct cache_tree reada;
8042         struct cache_tree nodes;
8043         struct extent_io_tree excluded_extents;
8044         struct cache_tree corrupt_blocks;
8045         struct btrfs_path path;
8046         struct btrfs_key key;
8047         struct btrfs_key found_key;
8048         int ret, err = 0;
8049         struct block_info *bits;
8050         int bits_nr;
8051         struct extent_buffer *leaf;
8052         int slot;
8053         struct btrfs_root_item ri;
8054         struct list_head dropping_trees;
8055         struct list_head normal_trees;
8056         struct btrfs_root *root1;
8057         struct btrfs_root *root;
8058         u64 objectid;
8059         u8 level;
8060
8061         root = fs_info->fs_root;
8062         dev_cache = RB_ROOT;
8063         cache_tree_init(&chunk_cache);
8064         block_group_tree_init(&block_group_cache);
8065         device_extent_tree_init(&dev_extent_cache);
8066
8067         cache_tree_init(&extent_cache);
8068         cache_tree_init(&seen);
8069         cache_tree_init(&pending);
8070         cache_tree_init(&nodes);
8071         cache_tree_init(&reada);
8072         cache_tree_init(&corrupt_blocks);
8073         extent_io_tree_init(&excluded_extents);
8074         INIT_LIST_HEAD(&dropping_trees);
8075         INIT_LIST_HEAD(&normal_trees);
8076
8077         if (repair) {
8078                 fs_info->excluded_extents = &excluded_extents;
8079                 fs_info->fsck_extent_cache = &extent_cache;
8080                 fs_info->free_extent_hook = free_extent_hook;
8081                 fs_info->corrupt_blocks = &corrupt_blocks;
8082         }
8083
8084         bits_nr = 1024;
8085         bits = malloc(bits_nr * sizeof(struct block_info));
8086         if (!bits) {
8087                 perror("malloc");
8088                 exit(1);
8089         }
8090
8091         if (ctx.progress_enabled) {
8092                 ctx.tp = TASK_EXTENTS;
8093                 task_start(ctx.info);
8094         }
8095
8096 again:
8097         root1 = fs_info->tree_root;
8098         level = btrfs_header_level(root1->node);
8099         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8100                                     root1->node->start, 0, level, 0, NULL);
8101         if (ret < 0)
8102                 goto out;
8103         root1 = fs_info->chunk_root;
8104         level = btrfs_header_level(root1->node);
8105         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8106                                     root1->node->start, 0, level, 0, NULL);
8107         if (ret < 0)
8108                 goto out;
8109         btrfs_init_path(&path);
8110         key.offset = 0;
8111         key.objectid = 0;
8112         key.type = BTRFS_ROOT_ITEM_KEY;
8113         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
8114         if (ret < 0)
8115                 goto out;
8116         while (1) {
8117                 leaf = path.nodes[0];
8118                 slot = path.slots[0];
8119                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8120                         ret = btrfs_next_leaf(root, &path);
8121                         if (ret != 0)
8122                                 break;
8123                         leaf = path.nodes[0];
8124                         slot = path.slots[0];
8125                 }
8126                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8127                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8128                         unsigned long offset;
8129                         u64 last_snapshot;
8130
8131                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8132                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8133                         last_snapshot = btrfs_root_last_snapshot(&ri);
8134                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8135                                 level = btrfs_root_level(&ri);
8136                                 ret = add_root_item_to_list(&normal_trees,
8137                                                 found_key.objectid,
8138                                                 btrfs_root_bytenr(&ri),
8139                                                 last_snapshot, level,
8140                                                 0, NULL);
8141                                 if (ret < 0)
8142                                         goto out;
8143                         } else {
8144                                 level = btrfs_root_level(&ri);
8145                                 objectid = found_key.objectid;
8146                                 btrfs_disk_key_to_cpu(&found_key,
8147                                                       &ri.drop_progress);
8148                                 ret = add_root_item_to_list(&dropping_trees,
8149                                                 objectid,
8150                                                 btrfs_root_bytenr(&ri),
8151                                                 last_snapshot, level,
8152                                                 ri.drop_level, &found_key);
8153                                 if (ret < 0)
8154                                         goto out;
8155                         }
8156                 }
8157                 path.slots[0]++;
8158         }
8159         btrfs_release_path(&path);
8160
8161         /*
8162          * check_block can return -EAGAIN if it fixes something, please keep
8163          * this in mind when dealing with return values from these functions, if
8164          * we get -EAGAIN we want to fall through and restart the loop.
8165          */
8166         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8167                                   &seen, &reada, &nodes, &extent_cache,
8168                                   &chunk_cache, &dev_cache, &block_group_cache,
8169                                   &dev_extent_cache);
8170         if (ret < 0) {
8171                 if (ret == -EAGAIN)
8172                         goto loop;
8173                 goto out;
8174         }
8175         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8176                                   &pending, &seen, &reada, &nodes,
8177                                   &extent_cache, &chunk_cache, &dev_cache,
8178                                   &block_group_cache, &dev_extent_cache);
8179         if (ret < 0) {
8180                 if (ret == -EAGAIN)
8181                         goto loop;
8182                 goto out;
8183         }
8184
8185         ret = check_chunks(&chunk_cache, &block_group_cache,
8186                            &dev_extent_cache, NULL, NULL, NULL, 0);
8187         if (ret) {
8188                 if (ret == -EAGAIN)
8189                         goto loop;
8190                 err = ret;
8191         }
8192
8193         ret = check_extent_refs(root, &extent_cache);
8194         if (ret < 0) {
8195                 if (ret == -EAGAIN)
8196                         goto loop;
8197                 goto out;
8198         }
8199
8200         ret = check_devices(&dev_cache, &dev_extent_cache);
8201         if (ret && err)
8202                 ret = err;
8203
8204 out:
8205         task_stop(ctx.info);
8206         if (repair) {
8207                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8208                 extent_io_tree_cleanup(&excluded_extents);
8209                 fs_info->fsck_extent_cache = NULL;
8210                 fs_info->free_extent_hook = NULL;
8211                 fs_info->corrupt_blocks = NULL;
8212                 fs_info->excluded_extents = NULL;
8213         }
8214         free(bits);
8215         free_chunk_cache_tree(&chunk_cache);
8216         free_device_cache_tree(&dev_cache);
8217         free_block_group_tree(&block_group_cache);
8218         free_device_extent_tree(&dev_extent_cache);
8219         free_extent_cache_tree(&seen);
8220         free_extent_cache_tree(&pending);
8221         free_extent_cache_tree(&reada);
8222         free_extent_cache_tree(&nodes);
8223         free_root_item_list(&normal_trees);
8224         free_root_item_list(&dropping_trees);
8225         return ret;
8226 loop:
8227         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8228         free_extent_cache_tree(&seen);
8229         free_extent_cache_tree(&pending);
8230         free_extent_cache_tree(&reada);
8231         free_extent_cache_tree(&nodes);
8232         free_chunk_cache_tree(&chunk_cache);
8233         free_block_group_tree(&block_group_cache);
8234         free_device_cache_tree(&dev_cache);
8235         free_device_extent_tree(&dev_extent_cache);
8236         free_extent_record_cache(&extent_cache);
8237         free_root_item_list(&normal_trees);
8238         free_root_item_list(&dropping_trees);
8239         extent_io_tree_cleanup(&excluded_extents);
8240         goto again;
8241 }
8242
8243 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8244 {
8245         int ret;
8246
8247         if (!ctx.progress_enabled)
8248                 fprintf(stderr, "checking extents\n");
8249         if (check_mode == CHECK_MODE_LOWMEM)
8250                 ret = check_chunks_and_extents_lowmem(fs_info);
8251         else
8252                 ret = check_chunks_and_extents(fs_info);
8253
8254         /* Also repair device size related problems */
8255         if (repair && !ret) {
8256                 ret = btrfs_fix_device_and_super_size(fs_info);
8257                 if (ret > 0)
8258                         ret = 0;
8259         }
8260         return ret;
8261 }
8262
8263 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8264                            struct btrfs_root *root, int overwrite)
8265 {
8266         struct extent_buffer *c;
8267         struct extent_buffer *old = root->node;
8268         int level;
8269         int ret;
8270         struct btrfs_disk_key disk_key = {0,0,0};
8271
8272         level = 0;
8273
8274         if (overwrite) {
8275                 c = old;
8276                 extent_buffer_get(c);
8277                 goto init;
8278         }
8279         c = btrfs_alloc_free_block(trans, root,
8280                                    root->fs_info->nodesize,
8281                                    root->root_key.objectid,
8282                                    &disk_key, level, 0, 0);
8283         if (IS_ERR(c)) {
8284                 c = old;
8285                 extent_buffer_get(c);
8286                 overwrite = 1;
8287         }
8288 init:
8289         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8290         btrfs_set_header_level(c, level);
8291         btrfs_set_header_bytenr(c, c->start);
8292         btrfs_set_header_generation(c, trans->transid);
8293         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8294         btrfs_set_header_owner(c, root->root_key.objectid);
8295
8296         write_extent_buffer(c, root->fs_info->fsid,
8297                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8298
8299         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8300                             btrfs_header_chunk_tree_uuid(c),
8301                             BTRFS_UUID_SIZE);
8302
8303         btrfs_mark_buffer_dirty(c);
8304         /*
8305          * this case can happen in the following case:
8306          *
8307          * 1.overwrite previous root.
8308          *
8309          * 2.reinit reloc data root, this is because we skip pin
8310          * down reloc data tree before which means we can allocate
8311          * same block bytenr here.
8312          */
8313         if (old->start == c->start) {
8314                 btrfs_set_root_generation(&root->root_item,
8315                                           trans->transid);
8316                 root->root_item.level = btrfs_header_level(root->node);
8317                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8318                                         &root->root_key, &root->root_item);
8319                 if (ret) {
8320                         free_extent_buffer(c);
8321                         return ret;
8322                 }
8323         }
8324         free_extent_buffer(old);
8325         root->node = c;
8326         add_root_to_dirty_list(root);
8327         return 0;
8328 }
8329
8330 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8331                                 struct extent_buffer *eb, int tree_root)
8332 {
8333         struct extent_buffer *tmp;
8334         struct btrfs_root_item *ri;
8335         struct btrfs_key key;
8336         u64 bytenr;
8337         int level = btrfs_header_level(eb);
8338         int nritems;
8339         int ret;
8340         int i;
8341
8342         /*
8343          * If we have pinned this block before, don't pin it again.
8344          * This can not only avoid forever loop with broken filesystem
8345          * but also give us some speedups.
8346          */
8347         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8348                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8349                 return 0;
8350
8351         btrfs_pin_extent(fs_info, eb->start, eb->len);
8352
8353         nritems = btrfs_header_nritems(eb);
8354         for (i = 0; i < nritems; i++) {
8355                 if (level == 0) {
8356                         btrfs_item_key_to_cpu(eb, &key, i);
8357                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8358                                 continue;
8359                         /* Skip the extent root and reloc roots */
8360                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8361                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8362                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8363                                 continue;
8364                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8365                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8366
8367                         /*
8368                          * If at any point we start needing the real root we
8369                          * will have to build a stump root for the root we are
8370                          * in, but for now this doesn't actually use the root so
8371                          * just pass in extent_root.
8372                          */
8373                         tmp = read_tree_block(fs_info, bytenr, 0);
8374                         if (!extent_buffer_uptodate(tmp)) {
8375                                 fprintf(stderr, "Error reading root block\n");
8376                                 return -EIO;
8377                         }
8378                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8379                         free_extent_buffer(tmp);
8380                         if (ret)
8381                                 return ret;
8382                 } else {
8383                         bytenr = btrfs_node_blockptr(eb, i);
8384
8385                         /* If we aren't the tree root don't read the block */
8386                         if (level == 1 && !tree_root) {
8387                                 btrfs_pin_extent(fs_info, bytenr,
8388                                                 fs_info->nodesize);
8389                                 continue;
8390                         }
8391
8392                         tmp = read_tree_block(fs_info, bytenr, 0);
8393                         if (!extent_buffer_uptodate(tmp)) {
8394                                 fprintf(stderr, "Error reading tree block\n");
8395                                 return -EIO;
8396                         }
8397                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8398                         free_extent_buffer(tmp);
8399                         if (ret)
8400                                 return ret;
8401                 }
8402         }
8403
8404         return 0;
8405 }
8406
8407 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8408 {
8409         int ret;
8410
8411         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8412         if (ret)
8413                 return ret;
8414
8415         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8416 }
8417
8418 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8419 {
8420         struct btrfs_block_group_cache *cache;
8421         struct btrfs_path path;
8422         struct extent_buffer *leaf;
8423         struct btrfs_chunk *chunk;
8424         struct btrfs_key key;
8425         int ret;
8426         u64 start;
8427
8428         btrfs_init_path(&path);
8429         key.objectid = 0;
8430         key.type = BTRFS_CHUNK_ITEM_KEY;
8431         key.offset = 0;
8432         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
8433         if (ret < 0) {
8434                 btrfs_release_path(&path);
8435                 return ret;
8436         }
8437
8438         /*
8439          * We do this in case the block groups were screwed up and had alloc
8440          * bits that aren't actually set on the chunks.  This happens with
8441          * restored images every time and could happen in real life I guess.
8442          */
8443         fs_info->avail_data_alloc_bits = 0;
8444         fs_info->avail_metadata_alloc_bits = 0;
8445         fs_info->avail_system_alloc_bits = 0;
8446
8447         /* First we need to create the in-memory block groups */
8448         while (1) {
8449                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8450                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
8451                         if (ret < 0) {
8452                                 btrfs_release_path(&path);
8453                                 return ret;
8454                         }
8455                         if (ret) {
8456                                 ret = 0;
8457                                 break;
8458                         }
8459                 }
8460                 leaf = path.nodes[0];
8461                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8462                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8463                         path.slots[0]++;
8464                         continue;
8465                 }
8466
8467                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
8468                 btrfs_add_block_group(fs_info, 0,
8469                                       btrfs_chunk_type(leaf, chunk), key.offset,
8470                                       btrfs_chunk_length(leaf, chunk));
8471                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8472                                  key.offset + btrfs_chunk_length(leaf, chunk));
8473                 path.slots[0]++;
8474         }
8475         start = 0;
8476         while (1) {
8477                 cache = btrfs_lookup_first_block_group(fs_info, start);
8478                 if (!cache)
8479                         break;
8480                 cache->cached = 1;
8481                 start = cache->key.objectid + cache->key.offset;
8482         }
8483
8484         btrfs_release_path(&path);
8485         return 0;
8486 }
8487
8488 static int reset_balance(struct btrfs_trans_handle *trans,
8489                          struct btrfs_fs_info *fs_info)
8490 {
8491         struct btrfs_root *root = fs_info->tree_root;
8492         struct btrfs_path path;
8493         struct extent_buffer *leaf;
8494         struct btrfs_key key;
8495         int del_slot, del_nr = 0;
8496         int ret;
8497         int found = 0;
8498
8499         btrfs_init_path(&path);
8500         key.objectid = BTRFS_BALANCE_OBJECTID;
8501         key.type = BTRFS_BALANCE_ITEM_KEY;
8502         key.offset = 0;
8503         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8504         if (ret) {
8505                 if (ret > 0)
8506                         ret = 0;
8507                 if (!ret)
8508                         goto reinit_data_reloc;
8509                 else
8510                         goto out;
8511         }
8512
8513         ret = btrfs_del_item(trans, root, &path);
8514         if (ret)
8515                 goto out;
8516         btrfs_release_path(&path);
8517
8518         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8519         key.type = BTRFS_ROOT_ITEM_KEY;
8520         key.offset = 0;
8521         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8522         if (ret < 0)
8523                 goto out;
8524         while (1) {
8525                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8526                         if (!found)
8527                                 break;
8528
8529                         if (del_nr) {
8530                                 ret = btrfs_del_items(trans, root, &path,
8531                                                       del_slot, del_nr);
8532                                 del_nr = 0;
8533                                 if (ret)
8534                                         goto out;
8535                         }
8536                         key.offset++;
8537                         btrfs_release_path(&path);
8538
8539                         found = 0;
8540                         ret = btrfs_search_slot(trans, root, &key, &path,
8541                                                 -1, 1);
8542                         if (ret < 0)
8543                                 goto out;
8544                         continue;
8545                 }
8546                 found = 1;
8547                 leaf = path.nodes[0];
8548                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8549                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8550                         break;
8551                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8552                         path.slots[0]++;
8553                         continue;
8554                 }
8555                 if (!del_nr) {
8556                         del_slot = path.slots[0];
8557                         del_nr = 1;
8558                 } else {
8559                         del_nr++;
8560                 }
8561                 path.slots[0]++;
8562         }
8563
8564         if (del_nr) {
8565                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
8566                 if (ret)
8567                         goto out;
8568         }
8569         btrfs_release_path(&path);
8570
8571 reinit_data_reloc:
8572         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8573         key.type = BTRFS_ROOT_ITEM_KEY;
8574         key.offset = (u64)-1;
8575         root = btrfs_read_fs_root(fs_info, &key);
8576         if (IS_ERR(root)) {
8577                 fprintf(stderr, "Error reading data reloc tree\n");
8578                 ret = PTR_ERR(root);
8579                 goto out;
8580         }
8581         record_root_in_trans(trans, root);
8582         ret = btrfs_fsck_reinit_root(trans, root, 0);
8583         if (ret)
8584                 goto out;
8585         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8586 out:
8587         btrfs_release_path(&path);
8588         return ret;
8589 }
8590
8591 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8592                               struct btrfs_fs_info *fs_info)
8593 {
8594         u64 start = 0;
8595         int ret;
8596
8597         /*
8598          * The only reason we don't do this is because right now we're just
8599          * walking the trees we find and pinning down their bytes, we don't look
8600          * at any of the leaves.  In order to do mixed groups we'd have to check
8601          * the leaves of any fs roots and pin down the bytes for any file
8602          * extents we find.  Not hard but why do it if we don't have to?
8603          */
8604         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
8605                 fprintf(stderr, "We don't support re-initing the extent tree "
8606                         "for mixed block groups yet, please notify a btrfs "
8607                         "developer you want to do this so they can add this "
8608                         "functionality.\n");
8609                 return -EINVAL;
8610         }
8611
8612         /*
8613          * first we need to walk all of the trees except the extent tree and pin
8614          * down the bytes that are in use so we don't overwrite any existing
8615          * metadata.
8616          */
8617         ret = pin_metadata_blocks(fs_info);
8618         if (ret) {
8619                 fprintf(stderr, "error pinning down used bytes\n");
8620                 return ret;
8621         }
8622
8623         /*
8624          * Need to drop all the block groups since we're going to recreate all
8625          * of them again.
8626          */
8627         btrfs_free_block_groups(fs_info);
8628         ret = reset_block_groups(fs_info);
8629         if (ret) {
8630                 fprintf(stderr, "error resetting the block groups\n");
8631                 return ret;
8632         }
8633
8634         /* Ok we can allocate now, reinit the extent root */
8635         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8636         if (ret) {
8637                 fprintf(stderr, "extent root initialization failed\n");
8638                 /*
8639                  * When the transaction code is updated we should end the
8640                  * transaction, but for now progs only knows about commit so
8641                  * just return an error.
8642                  */
8643                 return ret;
8644         }
8645
8646         /*
8647          * Now we have all the in-memory block groups setup so we can make
8648          * allocations properly, and the metadata we care about is safe since we
8649          * pinned all of it above.
8650          */
8651         while (1) {
8652                 struct btrfs_block_group_cache *cache;
8653
8654                 cache = btrfs_lookup_first_block_group(fs_info, start);
8655                 if (!cache)
8656                         break;
8657                 start = cache->key.objectid + cache->key.offset;
8658                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8659                                         &cache->key, &cache->item,
8660                                         sizeof(cache->item));
8661                 if (ret) {
8662                         fprintf(stderr, "Error adding block group\n");
8663                         return ret;
8664                 }
8665                 btrfs_extent_post_op(trans, fs_info->extent_root);
8666         }
8667
8668         ret = reset_balance(trans, fs_info);
8669         if (ret)
8670                 fprintf(stderr, "error resetting the pending balance\n");
8671
8672         return ret;
8673 }
8674
8675 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8676 {
8677         struct btrfs_path path;
8678         struct btrfs_trans_handle *trans;
8679         struct btrfs_key key;
8680         int ret;
8681
8682         printf("Recowing metadata block %llu\n", eb->start);
8683         key.objectid = btrfs_header_owner(eb);
8684         key.type = BTRFS_ROOT_ITEM_KEY;
8685         key.offset = (u64)-1;
8686
8687         root = btrfs_read_fs_root(root->fs_info, &key);
8688         if (IS_ERR(root)) {
8689                 fprintf(stderr, "Couldn't find owner root %llu\n",
8690                         key.objectid);
8691                 return PTR_ERR(root);
8692         }
8693
8694         trans = btrfs_start_transaction(root, 1);
8695         if (IS_ERR(trans))
8696                 return PTR_ERR(trans);
8697
8698         btrfs_init_path(&path);
8699         path.lowest_level = btrfs_header_level(eb);
8700         if (path.lowest_level)
8701                 btrfs_node_key_to_cpu(eb, &key, 0);
8702         else
8703                 btrfs_item_key_to_cpu(eb, &key, 0);
8704
8705         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8706         btrfs_commit_transaction(trans, root);
8707         btrfs_release_path(&path);
8708         return ret;
8709 }
8710
8711 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8712 {
8713         struct btrfs_path path;
8714         struct btrfs_trans_handle *trans;
8715         struct btrfs_key key;
8716         int ret;
8717
8718         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8719                bad->key.type, bad->key.offset);
8720         key.objectid = bad->root_id;
8721         key.type = BTRFS_ROOT_ITEM_KEY;
8722         key.offset = (u64)-1;
8723
8724         root = btrfs_read_fs_root(root->fs_info, &key);
8725         if (IS_ERR(root)) {
8726                 fprintf(stderr, "Couldn't find owner root %llu\n",
8727                         key.objectid);
8728                 return PTR_ERR(root);
8729         }
8730
8731         trans = btrfs_start_transaction(root, 1);
8732         if (IS_ERR(trans))
8733                 return PTR_ERR(trans);
8734
8735         btrfs_init_path(&path);
8736         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
8737         if (ret) {
8738                 if (ret > 0)
8739                         ret = 0;
8740                 goto out;
8741         }
8742         ret = btrfs_del_item(trans, root, &path);
8743 out:
8744         btrfs_commit_transaction(trans, root);
8745         btrfs_release_path(&path);
8746         return ret;
8747 }
8748
8749 static int zero_log_tree(struct btrfs_root *root)
8750 {
8751         struct btrfs_trans_handle *trans;
8752         int ret;
8753
8754         trans = btrfs_start_transaction(root, 1);
8755         if (IS_ERR(trans)) {
8756                 ret = PTR_ERR(trans);
8757                 return ret;
8758         }
8759         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8760         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8761         ret = btrfs_commit_transaction(trans, root);
8762         return ret;
8763 }
8764
8765 static int populate_csum(struct btrfs_trans_handle *trans,
8766                          struct btrfs_root *csum_root, char *buf, u64 start,
8767                          u64 len)
8768 {
8769         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8770         u64 offset = 0;
8771         u64 sectorsize;
8772         int ret = 0;
8773
8774         while (offset < len) {
8775                 sectorsize = fs_info->sectorsize;
8776                 ret = read_extent_data(fs_info, buf, start + offset,
8777                                        &sectorsize, 0);
8778                 if (ret)
8779                         break;
8780                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8781                                             start + offset, buf, sectorsize);
8782                 if (ret)
8783                         break;
8784                 offset += sectorsize;
8785         }
8786         return ret;
8787 }
8788
8789 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8790                                       struct btrfs_root *csum_root,
8791                                       struct btrfs_root *cur_root)
8792 {
8793         struct btrfs_path path;
8794         struct btrfs_key key;
8795         struct extent_buffer *node;
8796         struct btrfs_file_extent_item *fi;
8797         char *buf = NULL;
8798         u64 start = 0;
8799         u64 len = 0;
8800         int slot = 0;
8801         int ret = 0;
8802
8803         buf = malloc(cur_root->fs_info->sectorsize);
8804         if (!buf)
8805                 return -ENOMEM;
8806
8807         btrfs_init_path(&path);
8808         key.objectid = 0;
8809         key.offset = 0;
8810         key.type = 0;
8811         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
8812         if (ret < 0)
8813                 goto out;
8814         /* Iterate all regular file extents and fill its csum */
8815         while (1) {
8816                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8817
8818                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8819                         goto next;
8820                 node = path.nodes[0];
8821                 slot = path.slots[0];
8822                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8823                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8824                         goto next;
8825                 start = btrfs_file_extent_disk_bytenr(node, fi);
8826                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8827
8828                 ret = populate_csum(trans, csum_root, buf, start, len);
8829                 if (ret == -EEXIST)
8830                         ret = 0;
8831                 if (ret < 0)
8832                         goto out;
8833 next:
8834                 /*
8835                  * TODO: if next leaf is corrupted, jump to nearest next valid
8836                  * leaf.
8837                  */
8838                 ret = btrfs_next_item(cur_root, &path);
8839                 if (ret < 0)
8840                         goto out;
8841                 if (ret > 0) {
8842                         ret = 0;
8843                         goto out;
8844                 }
8845         }
8846
8847 out:
8848         btrfs_release_path(&path);
8849         free(buf);
8850         return ret;
8851 }
8852
8853 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8854                                   struct btrfs_root *csum_root)
8855 {
8856         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8857         struct btrfs_path path;
8858         struct btrfs_root *tree_root = fs_info->tree_root;
8859         struct btrfs_root *cur_root;
8860         struct extent_buffer *node;
8861         struct btrfs_key key;
8862         int slot = 0;
8863         int ret = 0;
8864
8865         btrfs_init_path(&path);
8866         key.objectid = BTRFS_FS_TREE_OBJECTID;
8867         key.offset = 0;
8868         key.type = BTRFS_ROOT_ITEM_KEY;
8869         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
8870         if (ret < 0)
8871                 goto out;
8872         if (ret > 0) {
8873                 ret = -ENOENT;
8874                 goto out;
8875         }
8876
8877         while (1) {
8878                 node = path.nodes[0];
8879                 slot = path.slots[0];
8880                 btrfs_item_key_to_cpu(node, &key, slot);
8881                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8882                         goto out;
8883                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8884                         goto next;
8885                 if (!is_fstree(key.objectid))
8886                         goto next;
8887                 key.offset = (u64)-1;
8888
8889                 cur_root = btrfs_read_fs_root(fs_info, &key);
8890                 if (IS_ERR(cur_root) || !cur_root) {
8891                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8892                                 key.objectid);
8893                         goto out;
8894                 }
8895                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8896                                 cur_root);
8897                 if (ret < 0)
8898                         goto out;
8899 next:
8900                 ret = btrfs_next_item(tree_root, &path);
8901                 if (ret > 0) {
8902                         ret = 0;
8903                         goto out;
8904                 }
8905                 if (ret < 0)
8906                         goto out;
8907         }
8908
8909 out:
8910         btrfs_release_path(&path);
8911         return ret;
8912 }
8913
8914 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8915                                       struct btrfs_root *csum_root)
8916 {
8917         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8918         struct btrfs_path path;
8919         struct btrfs_extent_item *ei;
8920         struct extent_buffer *leaf;
8921         char *buf;
8922         struct btrfs_key key;
8923         int ret;
8924
8925         btrfs_init_path(&path);
8926         key.objectid = 0;
8927         key.type = BTRFS_EXTENT_ITEM_KEY;
8928         key.offset = 0;
8929         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8930         if (ret < 0) {
8931                 btrfs_release_path(&path);
8932                 return ret;
8933         }
8934
8935         buf = malloc(csum_root->fs_info->sectorsize);
8936         if (!buf) {
8937                 btrfs_release_path(&path);
8938                 return -ENOMEM;
8939         }
8940
8941         while (1) {
8942                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8943                         ret = btrfs_next_leaf(extent_root, &path);
8944                         if (ret < 0)
8945                                 break;
8946                         if (ret) {
8947                                 ret = 0;
8948                                 break;
8949                         }
8950                 }
8951                 leaf = path.nodes[0];
8952
8953                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8954                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8955                         path.slots[0]++;
8956                         continue;
8957                 }
8958
8959                 ei = btrfs_item_ptr(leaf, path.slots[0],
8960                                     struct btrfs_extent_item);
8961                 if (!(btrfs_extent_flags(leaf, ei) &
8962                       BTRFS_EXTENT_FLAG_DATA)) {
8963                         path.slots[0]++;
8964                         continue;
8965                 }
8966
8967                 ret = populate_csum(trans, csum_root, buf, key.objectid,
8968                                     key.offset);
8969                 if (ret)
8970                         break;
8971                 path.slots[0]++;
8972         }
8973
8974         btrfs_release_path(&path);
8975         free(buf);
8976         return ret;
8977 }
8978
8979 /*
8980  * Recalculate the csum and put it into the csum tree.
8981  *
8982  * Extent tree init will wipe out all the extent info, so in that case, we
8983  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
8984  * will use fs/subvol trees to init the csum tree.
8985  */
8986 static int fill_csum_tree(struct btrfs_trans_handle *trans,
8987                           struct btrfs_root *csum_root,
8988                           int search_fs_tree)
8989 {
8990         if (search_fs_tree)
8991                 return fill_csum_tree_from_fs(trans, csum_root);
8992         else
8993                 return fill_csum_tree_from_extent(trans, csum_root);
8994 }
8995
8996 static void free_roots_info_cache(void)
8997 {
8998         if (!roots_info_cache)
8999                 return;
9000
9001         while (!cache_tree_empty(roots_info_cache)) {
9002                 struct cache_extent *entry;
9003                 struct root_item_info *rii;
9004
9005                 entry = first_cache_extent(roots_info_cache);
9006                 if (!entry)
9007                         break;
9008                 remove_cache_extent(roots_info_cache, entry);
9009                 rii = container_of(entry, struct root_item_info, cache_extent);
9010                 free(rii);
9011         }
9012
9013         free(roots_info_cache);
9014         roots_info_cache = NULL;
9015 }
9016
9017 static int build_roots_info_cache(struct btrfs_fs_info *info)
9018 {
9019         int ret = 0;
9020         struct btrfs_key key;
9021         struct extent_buffer *leaf;
9022         struct btrfs_path path;
9023
9024         if (!roots_info_cache) {
9025                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9026                 if (!roots_info_cache)
9027                         return -ENOMEM;
9028                 cache_tree_init(roots_info_cache);
9029         }
9030
9031         btrfs_init_path(&path);
9032         key.objectid = 0;
9033         key.type = BTRFS_EXTENT_ITEM_KEY;
9034         key.offset = 0;
9035         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
9036         if (ret < 0)
9037                 goto out;
9038         leaf = path.nodes[0];
9039
9040         while (1) {
9041                 struct btrfs_key found_key;
9042                 struct btrfs_extent_item *ei;
9043                 struct btrfs_extent_inline_ref *iref;
9044                 unsigned long item_end;
9045                 int slot = path.slots[0];
9046                 int type;
9047                 u64 flags;
9048                 u64 root_id;
9049                 u8 level;
9050                 struct cache_extent *entry;
9051                 struct root_item_info *rii;
9052
9053                 if (slot >= btrfs_header_nritems(leaf)) {
9054                         ret = btrfs_next_leaf(info->extent_root, &path);
9055                         if (ret < 0) {
9056                                 break;
9057                         } else if (ret) {
9058                                 ret = 0;
9059                                 break;
9060                         }
9061                         leaf = path.nodes[0];
9062                         slot = path.slots[0];
9063                 }
9064
9065                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9066
9067                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9068                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9069                         goto next;
9070
9071                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9072                 flags = btrfs_extent_flags(leaf, ei);
9073                 item_end = (unsigned long)ei + btrfs_item_size_nr(leaf, slot);
9074
9075                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9076                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9077                         goto next;
9078
9079                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9080                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9081                         level = found_key.offset;
9082                 } else {
9083                         struct btrfs_tree_block_info *binfo;
9084
9085                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9086                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9087                         level = btrfs_tree_block_level(leaf, binfo);
9088                 }
9089
9090                 /*
9091                  * It's a valid extent/metadata item that has no inline ref,
9092                  * but SHARED_BLOCK_REF or other shared references.
9093                  * So we need to do extra check to avoid reading beyond leaf
9094                  * boudnary.
9095                  */
9096                 if ((unsigned long)iref >= item_end)
9097                         goto next;
9098
9099                 /*
9100                  * For a root extent, it must be of the following type and the
9101                  * first (and only one) iref in the item.
9102                  */
9103                 type = btrfs_extent_inline_ref_type(leaf, iref);
9104                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9105                         goto next;
9106
9107                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9108                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9109                 if (!entry) {
9110                         rii = malloc(sizeof(struct root_item_info));
9111                         if (!rii) {
9112                                 ret = -ENOMEM;
9113                                 goto out;
9114                         }
9115                         rii->cache_extent.start = root_id;
9116                         rii->cache_extent.size = 1;
9117                         rii->level = (u8)-1;
9118                         entry = &rii->cache_extent;
9119                         ret = insert_cache_extent(roots_info_cache, entry);
9120                         ASSERT(ret == 0);
9121                 } else {
9122                         rii = container_of(entry, struct root_item_info,
9123                                            cache_extent);
9124                 }
9125
9126                 ASSERT(rii->cache_extent.start == root_id);
9127                 ASSERT(rii->cache_extent.size == 1);
9128
9129                 if (level > rii->level || rii->level == (u8)-1) {
9130                         rii->level = level;
9131                         rii->bytenr = found_key.objectid;
9132                         rii->gen = btrfs_extent_generation(leaf, ei);
9133                         rii->node_count = 1;
9134                 } else if (level == rii->level) {
9135                         rii->node_count++;
9136                 }
9137 next:
9138                 path.slots[0]++;
9139         }
9140
9141 out:
9142         btrfs_release_path(&path);
9143
9144         return ret;
9145 }
9146
9147 static int maybe_repair_root_item(struct btrfs_path *path,
9148                                   const struct btrfs_key *root_key,
9149                                   const int read_only_mode)
9150 {
9151         const u64 root_id = root_key->objectid;
9152         struct cache_extent *entry;
9153         struct root_item_info *rii;
9154         struct btrfs_root_item ri;
9155         unsigned long offset;
9156
9157         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9158         if (!entry) {
9159                 fprintf(stderr,
9160                         "Error: could not find extent items for root %llu\n",
9161                         root_key->objectid);
9162                 return -ENOENT;
9163         }
9164
9165         rii = container_of(entry, struct root_item_info, cache_extent);
9166         ASSERT(rii->cache_extent.start == root_id);
9167         ASSERT(rii->cache_extent.size == 1);
9168
9169         if (rii->node_count != 1) {
9170                 fprintf(stderr,
9171                         "Error: could not find btree root extent for root %llu\n",
9172                         root_id);
9173                 return -ENOENT;
9174         }
9175
9176         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9177         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9178
9179         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9180             btrfs_root_level(&ri) != rii->level ||
9181             btrfs_root_generation(&ri) != rii->gen) {
9182
9183                 /*
9184                  * If we're in repair mode but our caller told us to not update
9185                  * the root item, i.e. just check if it needs to be updated, don't
9186                  * print this message, since the caller will call us again shortly
9187                  * for the same root item without read only mode (the caller will
9188                  * open a transaction first).
9189                  */
9190                 if (!(read_only_mode && repair))
9191                         fprintf(stderr,
9192                                 "%sroot item for root %llu,"
9193                                 " current bytenr %llu, current gen %llu, current level %u,"
9194                                 " new bytenr %llu, new gen %llu, new level %u\n",
9195                                 (read_only_mode ? "" : "fixing "),
9196                                 root_id,
9197                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9198                                 btrfs_root_level(&ri),
9199                                 rii->bytenr, rii->gen, rii->level);
9200
9201                 if (btrfs_root_generation(&ri) > rii->gen) {
9202                         fprintf(stderr,
9203                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9204                                 root_id, btrfs_root_generation(&ri), rii->gen);
9205                         return -EINVAL;
9206                 }
9207
9208                 if (!read_only_mode) {
9209                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9210                         btrfs_set_root_level(&ri, rii->level);
9211                         btrfs_set_root_generation(&ri, rii->gen);
9212                         write_extent_buffer(path->nodes[0], &ri,
9213                                             offset, sizeof(ri));
9214                 }
9215
9216                 return 1;
9217         }
9218
9219         return 0;
9220 }
9221
9222 /*
9223  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9224  * caused read-only snapshots to be corrupted if they were created at a moment
9225  * when the source subvolume/snapshot had orphan items. The issue was that the
9226  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9227  * node instead of the post orphan cleanup root node.
9228  * So this function, and its callees, just detects and fixes those cases. Even
9229  * though the regression was for read-only snapshots, this function applies to
9230  * any snapshot/subvolume root.
9231  * This must be run before any other repair code - not doing it so, makes other
9232  * repair code delete or modify backrefs in the extent tree for example, which
9233  * will result in an inconsistent fs after repairing the root items.
9234  */
9235 static int repair_root_items(struct btrfs_fs_info *info)
9236 {
9237         struct btrfs_path path;
9238         struct btrfs_key key;
9239         struct extent_buffer *leaf;
9240         struct btrfs_trans_handle *trans = NULL;
9241         int ret = 0;
9242         int bad_roots = 0;
9243         int need_trans = 0;
9244
9245         btrfs_init_path(&path);
9246
9247         ret = build_roots_info_cache(info);
9248         if (ret)
9249                 goto out;
9250
9251         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9252         key.type = BTRFS_ROOT_ITEM_KEY;
9253         key.offset = 0;
9254
9255 again:
9256         /*
9257          * Avoid opening and committing transactions if a leaf doesn't have
9258          * any root items that need to be fixed, so that we avoid rotating
9259          * backup roots unnecessarily.
9260          */
9261         if (need_trans) {
9262                 trans = btrfs_start_transaction(info->tree_root, 1);
9263                 if (IS_ERR(trans)) {
9264                         ret = PTR_ERR(trans);
9265                         goto out;
9266                 }
9267         }
9268
9269         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
9270                                 0, trans ? 1 : 0);
9271         if (ret < 0)
9272                 goto out;
9273         leaf = path.nodes[0];
9274
9275         while (1) {
9276                 struct btrfs_key found_key;
9277
9278                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
9279                         int no_more_keys = find_next_key(&path, &key);
9280
9281                         btrfs_release_path(&path);
9282                         if (trans) {
9283                                 ret = btrfs_commit_transaction(trans,
9284                                                                info->tree_root);
9285                                 trans = NULL;
9286                                 if (ret < 0)
9287                                         goto out;
9288                         }
9289                         need_trans = 0;
9290                         if (no_more_keys)
9291                                 break;
9292                         goto again;
9293                 }
9294
9295                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9296
9297                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9298                         goto next;
9299                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9300                         goto next;
9301
9302                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
9303                 if (ret < 0)
9304                         goto out;
9305                 if (ret) {
9306                         if (!trans && repair) {
9307                                 need_trans = 1;
9308                                 key = found_key;
9309                                 btrfs_release_path(&path);
9310                                 goto again;
9311                         }
9312                         bad_roots++;
9313                 }
9314 next:
9315                 path.slots[0]++;
9316         }
9317         ret = 0;
9318 out:
9319         free_roots_info_cache();
9320         btrfs_release_path(&path);
9321         if (trans)
9322                 btrfs_commit_transaction(trans, info->tree_root);
9323         if (ret < 0)
9324                 return ret;
9325
9326         return bad_roots;
9327 }
9328
9329 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
9330 {
9331         struct btrfs_trans_handle *trans;
9332         struct btrfs_block_group_cache *bg_cache;
9333         u64 current = 0;
9334         int ret = 0;
9335
9336         /* Clear all free space cache inodes and its extent data */
9337         while (1) {
9338                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
9339                 if (!bg_cache)
9340                         break;
9341                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
9342                 if (ret < 0)
9343                         return ret;
9344                 current = bg_cache->key.objectid + bg_cache->key.offset;
9345         }
9346
9347         /* Don't forget to set cache_generation to -1 */
9348         trans = btrfs_start_transaction(fs_info->tree_root, 0);
9349         if (IS_ERR(trans)) {
9350                 error("failed to update super block cache generation");
9351                 return PTR_ERR(trans);
9352         }
9353         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
9354         btrfs_commit_transaction(trans, fs_info->tree_root);
9355
9356         return ret;
9357 }
9358
9359 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
9360                 int clear_version)
9361 {
9362         int ret = 0;
9363
9364         if (clear_version == 1) {
9365                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9366                         error(
9367                 "free space cache v2 detected, use --clear-space-cache v2");
9368                         ret = 1;
9369                         goto close_out;
9370                 }
9371                 printf("Clearing free space cache\n");
9372                 ret = clear_free_space_cache(fs_info);
9373                 if (ret) {
9374                         error("failed to clear free space cache");
9375                         ret = 1;
9376                 } else {
9377                         printf("Free space cache cleared\n");
9378                 }
9379         } else if (clear_version == 2) {
9380                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9381                         printf("no free space cache v2 to clear\n");
9382                         ret = 0;
9383                         goto close_out;
9384                 }
9385                 printf("Clear free space cache v2\n");
9386                 ret = btrfs_clear_free_space_tree(fs_info);
9387                 if (ret) {
9388                         error("failed to clear free space cache v2: %d", ret);
9389                         ret = 1;
9390                 } else {
9391                         printf("free space cache v2 cleared\n");
9392                 }
9393         }
9394 close_out:
9395         return ret;
9396 }
9397
9398 const char * const cmd_check_usage[] = {
9399         "btrfs check [options] <device>",
9400         "Check structural integrity of a filesystem (unmounted).",
9401         "Check structural integrity of an unmounted filesystem. Verify internal",
9402         "trees' consistency and item connectivity. In the repair mode try to",
9403         "fix the problems found. ",
9404         "WARNING: the repair mode is considered dangerous",
9405         "",
9406         "-s|--super <superblock>     use this superblock copy",
9407         "-b|--backup                 use the first valid backup root copy",
9408         "--force                     skip mount checks, repair is not possible",
9409         "--repair                    try to repair the filesystem",
9410         "--readonly                  run in read-only mode (default)",
9411         "--init-csum-tree            create a new CRC tree",
9412         "--init-extent-tree          create a new extent tree",
9413         "--mode <MODE>               allows choice of memory/IO trade-offs",
9414         "                            where MODE is one of:",
9415         "                            original - read inodes and extents to memory (requires",
9416         "                                       more memory, does less IO)",
9417         "                            lowmem   - try to use less memory but read blocks again",
9418         "                                       when needed",
9419         "--check-data-csum           verify checksums of data blocks",
9420         "-Q|--qgroup-report          print a report on qgroup consistency",
9421         "-E|--subvol-extents <subvolid>",
9422         "                            print subvolume extents and sharing state",
9423         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9424         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9425         "-p|--progress               indicate progress",
9426         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
9427         NULL
9428 };
9429
9430 int cmd_check(int argc, char **argv)
9431 {
9432         struct cache_tree root_cache;
9433         struct btrfs_root *root;
9434         struct btrfs_fs_info *info;
9435         u64 bytenr = 0;
9436         u64 subvolid = 0;
9437         u64 tree_root_bytenr = 0;
9438         u64 chunk_root_bytenr = 0;
9439         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9440         int ret = 0;
9441         int err = 0;
9442         u64 num;
9443         int init_csum_tree = 0;
9444         int readonly = 0;
9445         int clear_space_cache = 0;
9446         int qgroup_report = 0;
9447         int qgroups_repaired = 0;
9448         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
9449         int force = 0;
9450
9451         while(1) {
9452                 int c;
9453                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9454                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9455                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
9456                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
9457                         GETOPT_VAL_FORCE };
9458                 static const struct option long_options[] = {
9459                         { "super", required_argument, NULL, 's' },
9460                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9461                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9462                         { "init-csum-tree", no_argument, NULL,
9463                                 GETOPT_VAL_INIT_CSUM },
9464                         { "init-extent-tree", no_argument, NULL,
9465                                 GETOPT_VAL_INIT_EXTENT },
9466                         { "check-data-csum", no_argument, NULL,
9467                                 GETOPT_VAL_CHECK_CSUM },
9468                         { "backup", no_argument, NULL, 'b' },
9469                         { "subvol-extents", required_argument, NULL, 'E' },
9470                         { "qgroup-report", no_argument, NULL, 'Q' },
9471                         { "tree-root", required_argument, NULL, 'r' },
9472                         { "chunk-root", required_argument, NULL,
9473                                 GETOPT_VAL_CHUNK_TREE },
9474                         { "progress", no_argument, NULL, 'p' },
9475                         { "mode", required_argument, NULL,
9476                                 GETOPT_VAL_MODE },
9477                         { "clear-space-cache", required_argument, NULL,
9478                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
9479                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
9480                         { NULL, 0, NULL, 0}
9481                 };
9482
9483                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
9484                 if (c < 0)
9485                         break;
9486                 switch(c) {
9487                         case 'a': /* ignored */ break;
9488                         case 'b':
9489                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9490                                 break;
9491                         case 's':
9492                                 num = arg_strtou64(optarg);
9493                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9494                                         error(
9495                                         "super mirror should be less than %d",
9496                                                 BTRFS_SUPER_MIRROR_MAX);
9497                                         exit(1);
9498                                 }
9499                                 bytenr = btrfs_sb_offset(((int)num));
9500                                 printf("using SB copy %llu, bytenr %llu\n", num,
9501                                        (unsigned long long)bytenr);
9502                                 break;
9503                         case 'Q':
9504                                 qgroup_report = 1;
9505                                 break;
9506                         case 'E':
9507                                 subvolid = arg_strtou64(optarg);
9508                                 break;
9509                         case 'r':
9510                                 tree_root_bytenr = arg_strtou64(optarg);
9511                                 break;
9512                         case GETOPT_VAL_CHUNK_TREE:
9513                                 chunk_root_bytenr = arg_strtou64(optarg);
9514                                 break;
9515                         case 'p':
9516                                 ctx.progress_enabled = true;
9517                                 break;
9518                         case '?':
9519                         case 'h':
9520                                 usage(cmd_check_usage);
9521                         case GETOPT_VAL_REPAIR:
9522                                 printf("enabling repair mode\n");
9523                                 repair = 1;
9524                                 ctree_flags |= OPEN_CTREE_WRITES;
9525                                 break;
9526                         case GETOPT_VAL_READONLY:
9527                                 readonly = 1;
9528                                 break;
9529                         case GETOPT_VAL_INIT_CSUM:
9530                                 printf("Creating a new CRC tree\n");
9531                                 init_csum_tree = 1;
9532                                 repair = 1;
9533                                 ctree_flags |= OPEN_CTREE_WRITES;
9534                                 break;
9535                         case GETOPT_VAL_INIT_EXTENT:
9536                                 init_extent_tree = 1;
9537                                 ctree_flags |= (OPEN_CTREE_WRITES |
9538                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9539                                 repair = 1;
9540                                 break;
9541                         case GETOPT_VAL_CHECK_CSUM:
9542                                 check_data_csum = 1;
9543                                 break;
9544                         case GETOPT_VAL_MODE:
9545                                 check_mode = parse_check_mode(optarg);
9546                                 if (check_mode == CHECK_MODE_UNKNOWN) {
9547                                         error("unknown mode: %s", optarg);
9548                                         exit(1);
9549                                 }
9550                                 break;
9551                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
9552                                 if (strcmp(optarg, "v1") == 0) {
9553                                         clear_space_cache = 1;
9554                                 } else if (strcmp(optarg, "v2") == 0) {
9555                                         clear_space_cache = 2;
9556                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
9557                                 } else {
9558                                         error(
9559                 "invalid argument to --clear-space-cache, must be v1 or v2");
9560                                         exit(1);
9561                                 }
9562                                 ctree_flags |= OPEN_CTREE_WRITES;
9563                                 break;
9564                         case GETOPT_VAL_FORCE:
9565                                 force = 1;
9566                                 break;
9567                 }
9568         }
9569
9570         if (check_argc_exact(argc - optind, 1))
9571                 usage(cmd_check_usage);
9572
9573         if (ctx.progress_enabled) {
9574                 ctx.tp = TASK_NOTHING;
9575                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9576         }
9577
9578         /* This check is the only reason for --readonly to exist */
9579         if (readonly && repair) {
9580                 error("repair options are not compatible with --readonly");
9581                 exit(1);
9582         }
9583
9584         /*
9585          * experimental and dangerous
9586          */
9587         if (repair && check_mode == CHECK_MODE_LOWMEM)
9588                 warning("low-memory mode repair support is only partial");
9589
9590         radix_tree_init();
9591         cache_tree_init(&root_cache);
9592
9593         ret = check_mounted(argv[optind]);
9594         if (!force) {
9595                 if (ret < 0) {
9596                         error("could not check mount status: %s",
9597                                         strerror(-ret));
9598                         err |= !!ret;
9599                         goto err_out;
9600                 } else if (ret) {
9601                         error(
9602 "%s is currently mounted, use --force if you really intend to check the filesystem",
9603                                 argv[optind]);
9604                         ret = -EBUSY;
9605                         err |= !!ret;
9606                         goto err_out;
9607                 }
9608         } else {
9609                 if (repair) {
9610                         error("repair and --force is not yet supported");
9611                         ret = 1;
9612                         err |= !!ret;
9613                         goto err_out;
9614                 }
9615                 if (ret < 0) {
9616                         warning(
9617 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
9618                                 argv[optind]);
9619                 } else if (ret) {
9620                         warning(
9621                         "filesystem mounted, continuing because of --force");
9622                 }
9623                 /* A block device is mounted in exclusive mode by kernel */
9624                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
9625         }
9626
9627         /* only allow partial opening under repair mode */
9628         if (repair)
9629                 ctree_flags |= OPEN_CTREE_PARTIAL;
9630
9631         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9632                                   chunk_root_bytenr, ctree_flags);
9633         if (!info) {
9634                 error("cannot open file system");
9635                 ret = -EIO;
9636                 err |= !!ret;
9637                 goto err_out;
9638         }
9639
9640         global_info = info;
9641         root = info->fs_root;
9642         uuid_unparse(info->super_copy->fsid, uuidbuf);
9643
9644         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9645
9646         /*
9647          * Check the bare minimum before starting anything else that could rely
9648          * on it, namely the tree roots, any local consistency checks
9649          */
9650         if (!extent_buffer_uptodate(info->tree_root->node) ||
9651             !extent_buffer_uptodate(info->dev_root->node) ||
9652             !extent_buffer_uptodate(info->chunk_root->node)) {
9653                 error("critical roots corrupted, unable to check the filesystem");
9654                 err |= !!ret;
9655                 ret = -EIO;
9656                 goto close_out;
9657         }
9658
9659         if (clear_space_cache) {
9660                 ret = do_clear_free_space_cache(info, clear_space_cache);
9661                 err |= !!ret;
9662                 goto close_out;
9663         }
9664
9665         /*
9666          * repair mode will force us to commit transaction which
9667          * will make us fail to load log tree when mounting.
9668          */
9669         if (repair && btrfs_super_log_root(info->super_copy)) {
9670                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
9671                 if (!ret) {
9672                         ret = 1;
9673                         err |= !!ret;
9674                         goto close_out;
9675                 }
9676                 ret = zero_log_tree(root);
9677                 err |= !!ret;
9678                 if (ret) {
9679                         error("failed to zero log tree: %d", ret);
9680                         goto close_out;
9681                 }
9682         }
9683
9684         if (qgroup_report) {
9685                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9686                        uuidbuf);
9687                 ret = qgroup_verify_all(info);
9688                 err |= !!ret;
9689                 if (ret == 0)
9690                         report_qgroups(1);
9691                 goto close_out;
9692         }
9693         if (subvolid) {
9694                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9695                        subvolid, argv[optind], uuidbuf);
9696                 ret = print_extent_state(info, subvolid);
9697                 err |= !!ret;
9698                 goto close_out;
9699         }
9700
9701         if (init_extent_tree || init_csum_tree) {
9702                 struct btrfs_trans_handle *trans;
9703
9704                 trans = btrfs_start_transaction(info->extent_root, 0);
9705                 if (IS_ERR(trans)) {
9706                         error("error starting transaction");
9707                         ret = PTR_ERR(trans);
9708                         err |= !!ret;
9709                         goto close_out;
9710                 }
9711
9712                 if (init_extent_tree) {
9713                         printf("Creating a new extent tree\n");
9714                         ret = reinit_extent_tree(trans, info);
9715                         err |= !!ret;
9716                         if (ret)
9717                                 goto close_out;
9718                 }
9719
9720                 if (init_csum_tree) {
9721                         printf("Reinitialize checksum tree\n");
9722                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9723                         if (ret) {
9724                                 error("checksum tree initialization failed: %d",
9725                                                 ret);
9726                                 ret = -EIO;
9727                                 err |= !!ret;
9728                                 goto close_out;
9729                         }
9730
9731                         ret = fill_csum_tree(trans, info->csum_root,
9732                                              init_extent_tree);
9733                         err |= !!ret;
9734                         if (ret) {
9735                                 error("checksum tree refilling failed: %d", ret);
9736                                 return -EIO;
9737                         }
9738                 }
9739                 /*
9740                  * Ok now we commit and run the normal fsck, which will add
9741                  * extent entries for all of the items it finds.
9742                  */
9743                 ret = btrfs_commit_transaction(trans, info->extent_root);
9744                 err |= !!ret;
9745                 if (ret)
9746                         goto close_out;
9747         }
9748         if (!extent_buffer_uptodate(info->extent_root->node)) {
9749                 error("critical: extent_root, unable to check the filesystem");
9750                 ret = -EIO;
9751                 err |= !!ret;
9752                 goto close_out;
9753         }
9754         if (!extent_buffer_uptodate(info->csum_root->node)) {
9755                 error("critical: csum_root, unable to check the filesystem");
9756                 ret = -EIO;
9757                 err |= !!ret;
9758                 goto close_out;
9759         }
9760
9761         if (!init_extent_tree) {
9762                 ret = repair_root_items(info);
9763                 if (ret < 0) {
9764                         err = !!ret;
9765                         error("failed to repair root items: %s", strerror(-ret));
9766                         goto close_out;
9767                 }
9768                 if (repair) {
9769                         fprintf(stderr, "Fixed %d roots.\n", ret);
9770                         ret = 0;
9771                 } else if (ret > 0) {
9772                         fprintf(stderr,
9773                                 "Found %d roots with an outdated root item.\n",
9774                                 ret);
9775                         fprintf(stderr,
9776         "Please run a filesystem check with the option --repair to fix them.\n");
9777                         ret = 1;
9778                         err |= ret;
9779                         goto close_out;
9780                 }
9781         }
9782
9783         ret = do_check_chunks_and_extents(info);
9784         err |= !!ret;
9785         if (ret)
9786                 error(
9787                 "errors found in extent allocation tree or chunk allocation");
9788
9789         /* Only re-check super size after we checked and repaired the fs */
9790         err |= !is_super_size_valid(info);
9791
9792         if (!ctx.progress_enabled) {
9793                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9794                         fprintf(stderr, "checking free space tree\n");
9795                 else
9796                         fprintf(stderr, "checking free space cache\n");
9797         }
9798         ret = check_space_cache(root);
9799         err |= !!ret;
9800         if (ret) {
9801                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9802                         error("errors found in free space tree");
9803                 else
9804                         error("errors found in free space cache");
9805                 goto out;
9806         }
9807
9808         /*
9809          * We used to have to have these hole extents in between our real
9810          * extents so if we don't have this flag set we need to make sure there
9811          * are no gaps in the file extents for inodes, otherwise we can just
9812          * ignore it when this happens.
9813          */
9814         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
9815         ret = do_check_fs_roots(info, &root_cache);
9816         err |= !!ret;
9817         if (ret) {
9818                 error("errors found in fs roots");
9819                 goto out;
9820         }
9821
9822         fprintf(stderr, "checking csums\n");
9823         ret = check_csums(root);
9824         err |= !!ret;
9825         if (ret) {
9826                 error("errors found in csum tree");
9827                 goto out;
9828         }
9829
9830         fprintf(stderr, "checking root refs\n");
9831         /* For low memory mode, check_fs_roots_v2 handles root refs */
9832         if (check_mode != CHECK_MODE_LOWMEM) {
9833                 ret = check_root_refs(root, &root_cache);
9834                 err |= !!ret;
9835                 if (ret) {
9836                         error("errors found in root refs");
9837                         goto out;
9838                 }
9839         }
9840
9841         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9842                 struct extent_buffer *eb;
9843
9844                 eb = list_first_entry(&root->fs_info->recow_ebs,
9845                                       struct extent_buffer, recow);
9846                 list_del_init(&eb->recow);
9847                 ret = recow_extent_buffer(root, eb);
9848                 err |= !!ret;
9849                 if (ret) {
9850                         error("fails to fix transid errors");
9851                         break;
9852                 }
9853         }
9854
9855         while (!list_empty(&delete_items)) {
9856                 struct bad_item *bad;
9857
9858                 bad = list_first_entry(&delete_items, struct bad_item, list);
9859                 list_del_init(&bad->list);
9860                 if (repair) {
9861                         ret = delete_bad_item(root, bad);
9862                         err |= !!ret;
9863                 }
9864                 free(bad);
9865         }
9866
9867         if (info->quota_enabled) {
9868                 fprintf(stderr, "checking quota groups\n");
9869                 ret = qgroup_verify_all(info);
9870                 err |= !!ret;
9871                 if (ret) {
9872                         error("failed to check quota groups");
9873                         goto out;
9874                 }
9875                 report_qgroups(0);
9876                 ret = repair_qgroups(info, &qgroups_repaired);
9877                 err |= !!ret;
9878                 if (err) {
9879                         error("failed to repair quota groups");
9880                         goto out;
9881                 }
9882                 ret = 0;
9883         }
9884
9885         if (!list_empty(&root->fs_info->recow_ebs)) {
9886                 error("transid errors in file system");
9887                 ret = 1;
9888                 err |= !!ret;
9889         }
9890 out:
9891         printf("found %llu bytes used, ",
9892                (unsigned long long)bytes_used);
9893         if (err)
9894                 printf("error(s) found\n");
9895         else
9896                 printf("no error found\n");
9897         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9898         printf("total tree bytes: %llu\n",
9899                (unsigned long long)total_btree_bytes);
9900         printf("total fs tree bytes: %llu\n",
9901                (unsigned long long)total_fs_tree_bytes);
9902         printf("total extent tree bytes: %llu\n",
9903                (unsigned long long)total_extent_tree_bytes);
9904         printf("btree space waste bytes: %llu\n",
9905                (unsigned long long)btree_space_waste);
9906         printf("file data blocks allocated: %llu\n referenced %llu\n",
9907                 (unsigned long long)data_bytes_allocated,
9908                 (unsigned long long)data_bytes_referenced);
9909
9910         free_qgroup_counts();
9911         free_root_recs_tree(&root_cache);
9912 close_out:
9913         close_ctree(root);
9914 err_out:
9915         if (ctx.progress_enabled)
9916                 task_deinit(ctx.info);
9917
9918         return err;
9919 }