btrfs-progs: fsck-tests: Introduce test case with keyed data backref with the extent...
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/mode-common.h"
47 #include "check/mode-original.h"
48 #include "check/mode-lowmem.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void record_root_in_trans(struct btrfs_trans_handle *trans,
416                                  struct btrfs_root *root)
417 {
418         if (root->last_trans != trans->transid) {
419                 root->track_dirty = 1;
420                 root->last_trans = trans->transid;
421                 root->commit_root = root->node;
422                 extent_buffer_get(root->node);
423         }
424 }
425
426 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
427 {
428         struct device_record *rec1;
429         struct device_record *rec2;
430
431         rec1 = rb_entry(node1, struct device_record, node);
432         rec2 = rb_entry(node2, struct device_record, node);
433         if (rec1->devid > rec2->devid)
434                 return -1;
435         else if (rec1->devid < rec2->devid)
436                 return 1;
437         else
438                 return 0;
439 }
440
441 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
442 {
443         struct inode_record *rec;
444         struct inode_backref *backref;
445         struct inode_backref *orig;
446         struct inode_backref *tmp;
447         struct orphan_data_extent *src_orphan;
448         struct orphan_data_extent *dst_orphan;
449         struct rb_node *rb;
450         size_t size;
451         int ret;
452
453         rec = malloc(sizeof(*rec));
454         if (!rec)
455                 return ERR_PTR(-ENOMEM);
456         memcpy(rec, orig_rec, sizeof(*rec));
457         rec->refs = 1;
458         INIT_LIST_HEAD(&rec->backrefs);
459         INIT_LIST_HEAD(&rec->orphan_extents);
460         rec->holes = RB_ROOT;
461
462         list_for_each_entry(orig, &orig_rec->backrefs, list) {
463                 size = sizeof(*orig) + orig->namelen + 1;
464                 backref = malloc(size);
465                 if (!backref) {
466                         ret = -ENOMEM;
467                         goto cleanup;
468                 }
469                 memcpy(backref, orig, size);
470                 list_add_tail(&backref->list, &rec->backrefs);
471         }
472         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
473                 dst_orphan = malloc(sizeof(*dst_orphan));
474                 if (!dst_orphan) {
475                         ret = -ENOMEM;
476                         goto cleanup;
477                 }
478                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
479                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
480         }
481         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
482         if (ret < 0)
483                 goto cleanup_rb;
484
485         return rec;
486
487 cleanup_rb:
488         rb = rb_first(&rec->holes);
489         while (rb) {
490                 struct file_extent_hole *hole;
491
492                 hole = rb_entry(rb, struct file_extent_hole, node);
493                 rb = rb_next(rb);
494                 free(hole);
495         }
496
497 cleanup:
498         if (!list_empty(&rec->backrefs))
499                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
500                         list_del(&orig->list);
501                         free(orig);
502                 }
503
504         if (!list_empty(&rec->orphan_extents))
505                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
506                         list_del(&orig->list);
507                         free(orig);
508                 }
509
510         free(rec);
511
512         return ERR_PTR(ret);
513 }
514
515 static void print_orphan_data_extents(struct list_head *orphan_extents,
516                                       u64 objectid)
517 {
518         struct orphan_data_extent *orphan;
519
520         if (list_empty(orphan_extents))
521                 return;
522         printf("The following data extent is lost in tree %llu:\n",
523                objectid);
524         list_for_each_entry(orphan, orphan_extents, list) {
525                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
526                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
527                        orphan->disk_len);
528         }
529 }
530
531 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
532 {
533         u64 root_objectid = root->root_key.objectid;
534         int errors = rec->errors;
535
536         if (!errors)
537                 return;
538         /* reloc root errors, we print its corresponding fs root objectid*/
539         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
540                 root_objectid = root->root_key.offset;
541                 fprintf(stderr, "reloc");
542         }
543         fprintf(stderr, "root %llu inode %llu errors %x",
544                 (unsigned long long) root_objectid,
545                 (unsigned long long) rec->ino, rec->errors);
546
547         if (errors & I_ERR_NO_INODE_ITEM)
548                 fprintf(stderr, ", no inode item");
549         if (errors & I_ERR_NO_ORPHAN_ITEM)
550                 fprintf(stderr, ", no orphan item");
551         if (errors & I_ERR_DUP_INODE_ITEM)
552                 fprintf(stderr, ", dup inode item");
553         if (errors & I_ERR_DUP_DIR_INDEX)
554                 fprintf(stderr, ", dup dir index");
555         if (errors & I_ERR_ODD_DIR_ITEM)
556                 fprintf(stderr, ", odd dir item");
557         if (errors & I_ERR_ODD_FILE_EXTENT)
558                 fprintf(stderr, ", odd file extent");
559         if (errors & I_ERR_BAD_FILE_EXTENT)
560                 fprintf(stderr, ", bad file extent");
561         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
562                 fprintf(stderr, ", file extent overlap");
563         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
564                 fprintf(stderr, ", file extent discount");
565         if (errors & I_ERR_DIR_ISIZE_WRONG)
566                 fprintf(stderr, ", dir isize wrong");
567         if (errors & I_ERR_FILE_NBYTES_WRONG)
568                 fprintf(stderr, ", nbytes wrong");
569         if (errors & I_ERR_ODD_CSUM_ITEM)
570                 fprintf(stderr, ", odd csum item");
571         if (errors & I_ERR_SOME_CSUM_MISSING)
572                 fprintf(stderr, ", some csum missing");
573         if (errors & I_ERR_LINK_COUNT_WRONG)
574                 fprintf(stderr, ", link count wrong");
575         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
576                 fprintf(stderr, ", orphan file extent");
577         fprintf(stderr, "\n");
578         /* Print the orphan extents if needed */
579         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
580                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
581
582         /* Print the holes if needed */
583         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
584                 struct file_extent_hole *hole;
585                 struct rb_node *node;
586                 int found = 0;
587
588                 node = rb_first(&rec->holes);
589                 fprintf(stderr, "Found file extent holes:\n");
590                 while (node) {
591                         found = 1;
592                         hole = rb_entry(node, struct file_extent_hole, node);
593                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
594                                 hole->start, hole->len);
595                         node = rb_next(node);
596                 }
597                 if (!found)
598                         fprintf(stderr, "\tstart: 0, len: %llu\n",
599                                 round_up(rec->isize,
600                                          root->fs_info->sectorsize));
601         }
602 }
603
604 static void print_ref_error(int errors)
605 {
606         if (errors & REF_ERR_NO_DIR_ITEM)
607                 fprintf(stderr, ", no dir item");
608         if (errors & REF_ERR_NO_DIR_INDEX)
609                 fprintf(stderr, ", no dir index");
610         if (errors & REF_ERR_NO_INODE_REF)
611                 fprintf(stderr, ", no inode ref");
612         if (errors & REF_ERR_DUP_DIR_ITEM)
613                 fprintf(stderr, ", dup dir item");
614         if (errors & REF_ERR_DUP_DIR_INDEX)
615                 fprintf(stderr, ", dup dir index");
616         if (errors & REF_ERR_DUP_INODE_REF)
617                 fprintf(stderr, ", dup inode ref");
618         if (errors & REF_ERR_INDEX_UNMATCH)
619                 fprintf(stderr, ", index mismatch");
620         if (errors & REF_ERR_FILETYPE_UNMATCH)
621                 fprintf(stderr, ", filetype mismatch");
622         if (errors & REF_ERR_NAME_TOO_LONG)
623                 fprintf(stderr, ", name too long");
624         if (errors & REF_ERR_NO_ROOT_REF)
625                 fprintf(stderr, ", no root ref");
626         if (errors & REF_ERR_NO_ROOT_BACKREF)
627                 fprintf(stderr, ", no root backref");
628         if (errors & REF_ERR_DUP_ROOT_REF)
629                 fprintf(stderr, ", dup root ref");
630         if (errors & REF_ERR_DUP_ROOT_BACKREF)
631                 fprintf(stderr, ", dup root backref");
632         fprintf(stderr, "\n");
633 }
634
635 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
636                                           u64 ino, int mod)
637 {
638         struct ptr_node *node;
639         struct cache_extent *cache;
640         struct inode_record *rec = NULL;
641         int ret;
642
643         cache = lookup_cache_extent(inode_cache, ino, 1);
644         if (cache) {
645                 node = container_of(cache, struct ptr_node, cache);
646                 rec = node->data;
647                 if (mod && rec->refs > 1) {
648                         node->data = clone_inode_rec(rec);
649                         if (IS_ERR(node->data))
650                                 return node->data;
651                         rec->refs--;
652                         rec = node->data;
653                 }
654         } else if (mod) {
655                 rec = calloc(1, sizeof(*rec));
656                 if (!rec)
657                         return ERR_PTR(-ENOMEM);
658                 rec->ino = ino;
659                 rec->extent_start = (u64)-1;
660                 rec->refs = 1;
661                 INIT_LIST_HEAD(&rec->backrefs);
662                 INIT_LIST_HEAD(&rec->orphan_extents);
663                 rec->holes = RB_ROOT;
664
665                 node = malloc(sizeof(*node));
666                 if (!node) {
667                         free(rec);
668                         return ERR_PTR(-ENOMEM);
669                 }
670                 node->cache.start = ino;
671                 node->cache.size = 1;
672                 node->data = rec;
673
674                 if (ino == BTRFS_FREE_INO_OBJECTID)
675                         rec->found_link = 1;
676
677                 ret = insert_cache_extent(inode_cache, &node->cache);
678                 if (ret)
679                         return ERR_PTR(-EEXIST);
680         }
681         return rec;
682 }
683
684 static void free_orphan_data_extents(struct list_head *orphan_extents)
685 {
686         struct orphan_data_extent *orphan;
687
688         while (!list_empty(orphan_extents)) {
689                 orphan = list_entry(orphan_extents->next,
690                                     struct orphan_data_extent, list);
691                 list_del(&orphan->list);
692                 free(orphan);
693         }
694 }
695
696 static void free_inode_rec(struct inode_record *rec)
697 {
698         struct inode_backref *backref;
699
700         if (--rec->refs > 0)
701                 return;
702
703         while (!list_empty(&rec->backrefs)) {
704                 backref = to_inode_backref(rec->backrefs.next);
705                 list_del(&backref->list);
706                 free(backref);
707         }
708         free_orphan_data_extents(&rec->orphan_extents);
709         free_file_extent_holes(&rec->holes);
710         free(rec);
711 }
712
713 static int can_free_inode_rec(struct inode_record *rec)
714 {
715         if (!rec->errors && rec->checked && rec->found_inode_item &&
716             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
717                 return 1;
718         return 0;
719 }
720
721 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
722                                  struct inode_record *rec)
723 {
724         struct cache_extent *cache;
725         struct inode_backref *tmp, *backref;
726         struct ptr_node *node;
727         u8 filetype;
728
729         if (!rec->found_inode_item)
730                 return;
731
732         filetype = imode_to_type(rec->imode);
733         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
734                 if (backref->found_dir_item && backref->found_dir_index) {
735                         if (backref->filetype != filetype)
736                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
737                         if (!backref->errors && backref->found_inode_ref &&
738                             rec->nlink == rec->found_link) {
739                                 list_del(&backref->list);
740                                 free(backref);
741                         }
742                 }
743         }
744
745         if (!rec->checked || rec->merging)
746                 return;
747
748         if (S_ISDIR(rec->imode)) {
749                 if (rec->found_size != rec->isize)
750                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
751                 if (rec->found_file_extent)
752                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
753         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
754                 if (rec->found_dir_item)
755                         rec->errors |= I_ERR_ODD_DIR_ITEM;
756                 if (rec->found_size != rec->nbytes)
757                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
758                 if (rec->nlink > 0 && !no_holes &&
759                     (rec->extent_end < rec->isize ||
760                      first_extent_gap(&rec->holes) < rec->isize))
761                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
762         }
763
764         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
765                 if (rec->found_csum_item && rec->nodatasum)
766                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
767                 if (rec->some_csum_missing && !rec->nodatasum)
768                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
769         }
770
771         BUG_ON(rec->refs != 1);
772         if (can_free_inode_rec(rec)) {
773                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
774                 node = container_of(cache, struct ptr_node, cache);
775                 BUG_ON(node->data != rec);
776                 remove_cache_extent(inode_cache, &node->cache);
777                 free(node);
778                 free_inode_rec(rec);
779         }
780 }
781
782 static int check_orphan_item(struct btrfs_root *root, u64 ino)
783 {
784         struct btrfs_path path;
785         struct btrfs_key key;
786         int ret;
787
788         key.objectid = BTRFS_ORPHAN_OBJECTID;
789         key.type = BTRFS_ORPHAN_ITEM_KEY;
790         key.offset = ino;
791
792         btrfs_init_path(&path);
793         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
794         btrfs_release_path(&path);
795         if (ret > 0)
796                 ret = -ENOENT;
797         return ret;
798 }
799
800 static int process_inode_item(struct extent_buffer *eb,
801                               int slot, struct btrfs_key *key,
802                               struct shared_node *active_node)
803 {
804         struct inode_record *rec;
805         struct btrfs_inode_item *item;
806
807         rec = active_node->current;
808         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
809         if (rec->found_inode_item) {
810                 rec->errors |= I_ERR_DUP_INODE_ITEM;
811                 return 1;
812         }
813         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
814         rec->nlink = btrfs_inode_nlink(eb, item);
815         rec->isize = btrfs_inode_size(eb, item);
816         rec->nbytes = btrfs_inode_nbytes(eb, item);
817         rec->imode = btrfs_inode_mode(eb, item);
818         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
819                 rec->nodatasum = 1;
820         rec->found_inode_item = 1;
821         if (rec->nlink == 0)
822                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
823         maybe_free_inode_rec(&active_node->inode_cache, rec);
824         return 0;
825 }
826
827 static struct inode_backref *get_inode_backref(struct inode_record *rec,
828                                                 const char *name,
829                                                 int namelen, u64 dir)
830 {
831         struct inode_backref *backref;
832
833         list_for_each_entry(backref, &rec->backrefs, list) {
834                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
835                         break;
836                 if (backref->dir != dir || backref->namelen != namelen)
837                         continue;
838                 if (memcmp(name, backref->name, namelen))
839                         continue;
840                 return backref;
841         }
842
843         backref = malloc(sizeof(*backref) + namelen + 1);
844         if (!backref)
845                 return NULL;
846         memset(backref, 0, sizeof(*backref));
847         backref->dir = dir;
848         backref->namelen = namelen;
849         memcpy(backref->name, name, namelen);
850         backref->name[namelen] = '\0';
851         list_add_tail(&backref->list, &rec->backrefs);
852         return backref;
853 }
854
855 static int add_inode_backref(struct cache_tree *inode_cache,
856                              u64 ino, u64 dir, u64 index,
857                              const char *name, int namelen,
858                              u8 filetype, u8 itemtype, int errors)
859 {
860         struct inode_record *rec;
861         struct inode_backref *backref;
862
863         rec = get_inode_rec(inode_cache, ino, 1);
864         BUG_ON(IS_ERR(rec));
865         backref = get_inode_backref(rec, name, namelen, dir);
866         BUG_ON(!backref);
867         if (errors)
868                 backref->errors |= errors;
869         if (itemtype == BTRFS_DIR_INDEX_KEY) {
870                 if (backref->found_dir_index)
871                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
872                 if (backref->found_inode_ref && backref->index != index)
873                         backref->errors |= REF_ERR_INDEX_UNMATCH;
874                 if (backref->found_dir_item && backref->filetype != filetype)
875                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
876
877                 backref->index = index;
878                 backref->filetype = filetype;
879                 backref->found_dir_index = 1;
880         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
881                 rec->found_link++;
882                 if (backref->found_dir_item)
883                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
884                 if (backref->found_dir_index && backref->filetype != filetype)
885                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
886
887                 backref->filetype = filetype;
888                 backref->found_dir_item = 1;
889         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
890                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
891                 if (backref->found_inode_ref)
892                         backref->errors |= REF_ERR_DUP_INODE_REF;
893                 if (backref->found_dir_index && backref->index != index)
894                         backref->errors |= REF_ERR_INDEX_UNMATCH;
895                 else
896                         backref->index = index;
897
898                 backref->ref_type = itemtype;
899                 backref->found_inode_ref = 1;
900         } else {
901                 BUG_ON(1);
902         }
903
904         maybe_free_inode_rec(inode_cache, rec);
905         return 0;
906 }
907
908 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
909                             struct cache_tree *dst_cache)
910 {
911         struct inode_backref *backref;
912         u32 dir_count = 0;
913         int ret = 0;
914
915         dst->merging = 1;
916         list_for_each_entry(backref, &src->backrefs, list) {
917                 if (backref->found_dir_index) {
918                         add_inode_backref(dst_cache, dst->ino, backref->dir,
919                                         backref->index, backref->name,
920                                         backref->namelen, backref->filetype,
921                                         BTRFS_DIR_INDEX_KEY, backref->errors);
922                 }
923                 if (backref->found_dir_item) {
924                         dir_count++;
925                         add_inode_backref(dst_cache, dst->ino,
926                                         backref->dir, 0, backref->name,
927                                         backref->namelen, backref->filetype,
928                                         BTRFS_DIR_ITEM_KEY, backref->errors);
929                 }
930                 if (backref->found_inode_ref) {
931                         add_inode_backref(dst_cache, dst->ino,
932                                         backref->dir, backref->index,
933                                         backref->name, backref->namelen, 0,
934                                         backref->ref_type, backref->errors);
935                 }
936         }
937
938         if (src->found_dir_item)
939                 dst->found_dir_item = 1;
940         if (src->found_file_extent)
941                 dst->found_file_extent = 1;
942         if (src->found_csum_item)
943                 dst->found_csum_item = 1;
944         if (src->some_csum_missing)
945                 dst->some_csum_missing = 1;
946         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
947                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
948                 if (ret < 0)
949                         return ret;
950         }
951
952         BUG_ON(src->found_link < dir_count);
953         dst->found_link += src->found_link - dir_count;
954         dst->found_size += src->found_size;
955         if (src->extent_start != (u64)-1) {
956                 if (dst->extent_start == (u64)-1) {
957                         dst->extent_start = src->extent_start;
958                         dst->extent_end = src->extent_end;
959                 } else {
960                         if (dst->extent_end > src->extent_start)
961                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
962                         else if (dst->extent_end < src->extent_start) {
963                                 ret = add_file_extent_hole(&dst->holes,
964                                         dst->extent_end,
965                                         src->extent_start - dst->extent_end);
966                         }
967                         if (dst->extent_end < src->extent_end)
968                                 dst->extent_end = src->extent_end;
969                 }
970         }
971
972         dst->errors |= src->errors;
973         if (src->found_inode_item) {
974                 if (!dst->found_inode_item) {
975                         dst->nlink = src->nlink;
976                         dst->isize = src->isize;
977                         dst->nbytes = src->nbytes;
978                         dst->imode = src->imode;
979                         dst->nodatasum = src->nodatasum;
980                         dst->found_inode_item = 1;
981                 } else {
982                         dst->errors |= I_ERR_DUP_INODE_ITEM;
983                 }
984         }
985         dst->merging = 0;
986
987         return 0;
988 }
989
990 static int splice_shared_node(struct shared_node *src_node,
991                               struct shared_node *dst_node)
992 {
993         struct cache_extent *cache;
994         struct ptr_node *node, *ins;
995         struct cache_tree *src, *dst;
996         struct inode_record *rec, *conflict;
997         u64 current_ino = 0;
998         int splice = 0;
999         int ret;
1000
1001         if (--src_node->refs == 0)
1002                 splice = 1;
1003         if (src_node->current)
1004                 current_ino = src_node->current->ino;
1005
1006         src = &src_node->root_cache;
1007         dst = &dst_node->root_cache;
1008 again:
1009         cache = search_cache_extent(src, 0);
1010         while (cache) {
1011                 node = container_of(cache, struct ptr_node, cache);
1012                 rec = node->data;
1013                 cache = next_cache_extent(cache);
1014
1015                 if (splice) {
1016                         remove_cache_extent(src, &node->cache);
1017                         ins = node;
1018                 } else {
1019                         ins = malloc(sizeof(*ins));
1020                         BUG_ON(!ins);
1021                         ins->cache.start = node->cache.start;
1022                         ins->cache.size = node->cache.size;
1023                         ins->data = rec;
1024                         rec->refs++;
1025                 }
1026                 ret = insert_cache_extent(dst, &ins->cache);
1027                 if (ret == -EEXIST) {
1028                         conflict = get_inode_rec(dst, rec->ino, 1);
1029                         BUG_ON(IS_ERR(conflict));
1030                         merge_inode_recs(rec, conflict, dst);
1031                         if (rec->checked) {
1032                                 conflict->checked = 1;
1033                                 if (dst_node->current == conflict)
1034                                         dst_node->current = NULL;
1035                         }
1036                         maybe_free_inode_rec(dst, conflict);
1037                         free_inode_rec(rec);
1038                         free(ins);
1039                 } else {
1040                         BUG_ON(ret);
1041                 }
1042         }
1043
1044         if (src == &src_node->root_cache) {
1045                 src = &src_node->inode_cache;
1046                 dst = &dst_node->inode_cache;
1047                 goto again;
1048         }
1049
1050         if (current_ino > 0 && (!dst_node->current ||
1051             current_ino > dst_node->current->ino)) {
1052                 if (dst_node->current) {
1053                         dst_node->current->checked = 1;
1054                         maybe_free_inode_rec(dst, dst_node->current);
1055                 }
1056                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1057                 BUG_ON(IS_ERR(dst_node->current));
1058         }
1059         return 0;
1060 }
1061
1062 static void free_inode_ptr(struct cache_extent *cache)
1063 {
1064         struct ptr_node *node;
1065         struct inode_record *rec;
1066
1067         node = container_of(cache, struct ptr_node, cache);
1068         rec = node->data;
1069         free_inode_rec(rec);
1070         free(node);
1071 }
1072
1073 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1074
1075 static struct shared_node *find_shared_node(struct cache_tree *shared,
1076                                             u64 bytenr)
1077 {
1078         struct cache_extent *cache;
1079         struct shared_node *node;
1080
1081         cache = lookup_cache_extent(shared, bytenr, 1);
1082         if (cache) {
1083                 node = container_of(cache, struct shared_node, cache);
1084                 return node;
1085         }
1086         return NULL;
1087 }
1088
1089 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1090 {
1091         int ret;
1092         struct shared_node *node;
1093
1094         node = calloc(1, sizeof(*node));
1095         if (!node)
1096                 return -ENOMEM;
1097         node->cache.start = bytenr;
1098         node->cache.size = 1;
1099         cache_tree_init(&node->root_cache);
1100         cache_tree_init(&node->inode_cache);
1101         node->refs = refs;
1102
1103         ret = insert_cache_extent(shared, &node->cache);
1104
1105         return ret;
1106 }
1107
1108 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1109                              struct walk_control *wc, int level)
1110 {
1111         struct shared_node *node;
1112         struct shared_node *dest;
1113         int ret;
1114
1115         if (level == wc->active_node)
1116                 return 0;
1117
1118         BUG_ON(wc->active_node <= level);
1119         node = find_shared_node(&wc->shared, bytenr);
1120         if (!node) {
1121                 ret = add_shared_node(&wc->shared, bytenr, refs);
1122                 BUG_ON(ret);
1123                 node = find_shared_node(&wc->shared, bytenr);
1124                 wc->nodes[level] = node;
1125                 wc->active_node = level;
1126                 return 0;
1127         }
1128
1129         if (wc->root_level == wc->active_node &&
1130             btrfs_root_refs(&root->root_item) == 0) {
1131                 if (--node->refs == 0) {
1132                         free_inode_recs_tree(&node->root_cache);
1133                         free_inode_recs_tree(&node->inode_cache);
1134                         remove_cache_extent(&wc->shared, &node->cache);
1135                         free(node);
1136                 }
1137                 return 1;
1138         }
1139
1140         dest = wc->nodes[wc->active_node];
1141         splice_shared_node(node, dest);
1142         if (node->refs == 0) {
1143                 remove_cache_extent(&wc->shared, &node->cache);
1144                 free(node);
1145         }
1146         return 1;
1147 }
1148
1149 static int leave_shared_node(struct btrfs_root *root,
1150                              struct walk_control *wc, int level)
1151 {
1152         struct shared_node *node;
1153         struct shared_node *dest;
1154         int i;
1155
1156         if (level == wc->root_level)
1157                 return 0;
1158
1159         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1160                 if (wc->nodes[i])
1161                         break;
1162         }
1163         BUG_ON(i >= BTRFS_MAX_LEVEL);
1164
1165         node = wc->nodes[wc->active_node];
1166         wc->nodes[wc->active_node] = NULL;
1167         wc->active_node = i;
1168
1169         dest = wc->nodes[wc->active_node];
1170         if (wc->active_node < wc->root_level ||
1171             btrfs_root_refs(&root->root_item) > 0) {
1172                 BUG_ON(node->refs <= 1);
1173                 splice_shared_node(node, dest);
1174         } else {
1175                 BUG_ON(node->refs < 2);
1176                 node->refs--;
1177         }
1178         return 0;
1179 }
1180
1181 /*
1182  * Returns:
1183  * < 0 - on error
1184  * 1   - if the root with id child_root_id is a child of root parent_root_id
1185  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1186  *       has other root(s) as parent(s)
1187  * 2   - if the root child_root_id doesn't have any parent roots
1188  */
1189 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1190                          u64 child_root_id)
1191 {
1192         struct btrfs_path path;
1193         struct btrfs_key key;
1194         struct extent_buffer *leaf;
1195         int has_parent = 0;
1196         int ret;
1197
1198         btrfs_init_path(&path);
1199
1200         key.objectid = parent_root_id;
1201         key.type = BTRFS_ROOT_REF_KEY;
1202         key.offset = child_root_id;
1203         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1204                                 0, 0);
1205         if (ret < 0)
1206                 return ret;
1207         btrfs_release_path(&path);
1208         if (!ret)
1209                 return 1;
1210
1211         key.objectid = child_root_id;
1212         key.type = BTRFS_ROOT_BACKREF_KEY;
1213         key.offset = 0;
1214         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1215                                 0, 0);
1216         if (ret < 0)
1217                 goto out;
1218
1219         while (1) {
1220                 leaf = path.nodes[0];
1221                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1222                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1223                         if (ret)
1224                                 break;
1225                         leaf = path.nodes[0];
1226                 }
1227
1228                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1229                 if (key.objectid != child_root_id ||
1230                     key.type != BTRFS_ROOT_BACKREF_KEY)
1231                         break;
1232
1233                 has_parent = 1;
1234
1235                 if (key.offset == parent_root_id) {
1236                         btrfs_release_path(&path);
1237                         return 1;
1238                 }
1239
1240                 path.slots[0]++;
1241         }
1242 out:
1243         btrfs_release_path(&path);
1244         if (ret < 0)
1245                 return ret;
1246         return has_parent ? 0 : 2;
1247 }
1248
1249 static int process_dir_item(struct extent_buffer *eb,
1250                             int slot, struct btrfs_key *key,
1251                             struct shared_node *active_node)
1252 {
1253         u32 total;
1254         u32 cur = 0;
1255         u32 len;
1256         u32 name_len;
1257         u32 data_len;
1258         int error;
1259         int nritems = 0;
1260         u8 filetype;
1261         struct btrfs_dir_item *di;
1262         struct inode_record *rec;
1263         struct cache_tree *root_cache;
1264         struct cache_tree *inode_cache;
1265         struct btrfs_key location;
1266         char namebuf[BTRFS_NAME_LEN];
1267
1268         root_cache = &active_node->root_cache;
1269         inode_cache = &active_node->inode_cache;
1270         rec = active_node->current;
1271         rec->found_dir_item = 1;
1272
1273         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1274         total = btrfs_item_size_nr(eb, slot);
1275         while (cur < total) {
1276                 nritems++;
1277                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1278                 name_len = btrfs_dir_name_len(eb, di);
1279                 data_len = btrfs_dir_data_len(eb, di);
1280                 filetype = btrfs_dir_type(eb, di);
1281
1282                 rec->found_size += name_len;
1283                 if (cur + sizeof(*di) + name_len > total ||
1284                     name_len > BTRFS_NAME_LEN) {
1285                         error = REF_ERR_NAME_TOO_LONG;
1286
1287                         if (cur + sizeof(*di) > total)
1288                                 break;
1289                         len = min_t(u32, total - cur - sizeof(*di),
1290                                     BTRFS_NAME_LEN);
1291                 } else {
1292                         len = name_len;
1293                         error = 0;
1294                 }
1295
1296                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1297
1298                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1299                     key->offset != btrfs_name_hash(namebuf, len)) {
1300                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1301                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1302                         key->objectid, key->offset, namebuf, len, filetype,
1303                         key->offset, btrfs_name_hash(namebuf, len));
1304                 }
1305
1306                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1307                         add_inode_backref(inode_cache, location.objectid,
1308                                           key->objectid, key->offset, namebuf,
1309                                           len, filetype, key->type, error);
1310                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1311                         add_inode_backref(root_cache, location.objectid,
1312                                           key->objectid, key->offset,
1313                                           namebuf, len, filetype,
1314                                           key->type, error);
1315                 } else {
1316                         fprintf(stderr,
1317                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1318                                 location.type, key->objectid, key->offset);
1319                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1320                                           key->objectid, key->offset, namebuf,
1321                                           len, filetype, key->type, error);
1322                 }
1323
1324                 len = sizeof(*di) + name_len + data_len;
1325                 di = (struct btrfs_dir_item *)((char *)di + len);
1326                 cur += len;
1327         }
1328         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1329                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1330
1331         return 0;
1332 }
1333
1334 static int process_inode_ref(struct extent_buffer *eb,
1335                              int slot, struct btrfs_key *key,
1336                              struct shared_node *active_node)
1337 {
1338         u32 total;
1339         u32 cur = 0;
1340         u32 len;
1341         u32 name_len;
1342         u64 index;
1343         int error;
1344         struct cache_tree *inode_cache;
1345         struct btrfs_inode_ref *ref;
1346         char namebuf[BTRFS_NAME_LEN];
1347
1348         inode_cache = &active_node->inode_cache;
1349
1350         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1351         total = btrfs_item_size_nr(eb, slot);
1352         while (cur < total) {
1353                 name_len = btrfs_inode_ref_name_len(eb, ref);
1354                 index = btrfs_inode_ref_index(eb, ref);
1355
1356                 /* inode_ref + namelen should not cross item boundary */
1357                 if (cur + sizeof(*ref) + name_len > total ||
1358                     name_len > BTRFS_NAME_LEN) {
1359                         if (total < cur + sizeof(*ref))
1360                                 break;
1361
1362                         /* Still try to read out the remaining part */
1363                         len = min_t(u32, total - cur - sizeof(*ref),
1364                                     BTRFS_NAME_LEN);
1365                         error = REF_ERR_NAME_TOO_LONG;
1366                 } else {
1367                         len = name_len;
1368                         error = 0;
1369                 }
1370
1371                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1372                 add_inode_backref(inode_cache, key->objectid, key->offset,
1373                                   index, namebuf, len, 0, key->type, error);
1374
1375                 len = sizeof(*ref) + name_len;
1376                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1377                 cur += len;
1378         }
1379         return 0;
1380 }
1381
1382 static int process_inode_extref(struct extent_buffer *eb,
1383                                 int slot, struct btrfs_key *key,
1384                                 struct shared_node *active_node)
1385 {
1386         u32 total;
1387         u32 cur = 0;
1388         u32 len;
1389         u32 name_len;
1390         u64 index;
1391         u64 parent;
1392         int error;
1393         struct cache_tree *inode_cache;
1394         struct btrfs_inode_extref *extref;
1395         char namebuf[BTRFS_NAME_LEN];
1396
1397         inode_cache = &active_node->inode_cache;
1398
1399         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1400         total = btrfs_item_size_nr(eb, slot);
1401         while (cur < total) {
1402                 name_len = btrfs_inode_extref_name_len(eb, extref);
1403                 index = btrfs_inode_extref_index(eb, extref);
1404                 parent = btrfs_inode_extref_parent(eb, extref);
1405                 if (name_len <= BTRFS_NAME_LEN) {
1406                         len = name_len;
1407                         error = 0;
1408                 } else {
1409                         len = BTRFS_NAME_LEN;
1410                         error = REF_ERR_NAME_TOO_LONG;
1411                 }
1412                 read_extent_buffer(eb, namebuf,
1413                                    (unsigned long)(extref + 1), len);
1414                 add_inode_backref(inode_cache, key->objectid, parent,
1415                                   index, namebuf, len, 0, key->type, error);
1416
1417                 len = sizeof(*extref) + name_len;
1418                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1419                 cur += len;
1420         }
1421         return 0;
1422
1423 }
1424
1425 static int process_file_extent(struct btrfs_root *root,
1426                                 struct extent_buffer *eb,
1427                                 int slot, struct btrfs_key *key,
1428                                 struct shared_node *active_node)
1429 {
1430         struct inode_record *rec;
1431         struct btrfs_file_extent_item *fi;
1432         u64 num_bytes = 0;
1433         u64 disk_bytenr = 0;
1434         u64 extent_offset = 0;
1435         u64 mask = root->fs_info->sectorsize - 1;
1436         int extent_type;
1437         int ret;
1438
1439         rec = active_node->current;
1440         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1441         rec->found_file_extent = 1;
1442
1443         if (rec->extent_start == (u64)-1) {
1444                 rec->extent_start = key->offset;
1445                 rec->extent_end = key->offset;
1446         }
1447
1448         if (rec->extent_end > key->offset)
1449                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1450         else if (rec->extent_end < key->offset) {
1451                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1452                                            key->offset - rec->extent_end);
1453                 if (ret < 0)
1454                         return ret;
1455         }
1456
1457         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1458         extent_type = btrfs_file_extent_type(eb, fi);
1459
1460         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1461                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1462                 if (num_bytes == 0)
1463                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1464                 rec->found_size += num_bytes;
1465                 num_bytes = (num_bytes + mask) & ~mask;
1466         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1467                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1468                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1469                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1470                 extent_offset = btrfs_file_extent_offset(eb, fi);
1471                 if (num_bytes == 0 || (num_bytes & mask))
1472                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1473                 if (num_bytes + extent_offset >
1474                     btrfs_file_extent_ram_bytes(eb, fi))
1475                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1476                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1477                     (btrfs_file_extent_compression(eb, fi) ||
1478                      btrfs_file_extent_encryption(eb, fi) ||
1479                      btrfs_file_extent_other_encoding(eb, fi)))
1480                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1481                 if (disk_bytenr > 0)
1482                         rec->found_size += num_bytes;
1483         } else {
1484                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1485         }
1486         rec->extent_end = key->offset + num_bytes;
1487
1488         /*
1489          * The data reloc tree will copy full extents into its inode and then
1490          * copy the corresponding csums.  Because the extent it copied could be
1491          * a preallocated extent that hasn't been written to yet there may be no
1492          * csums to copy, ergo we won't have csums for our file extent.  This is
1493          * ok so just don't bother checking csums if the inode belongs to the
1494          * data reloc tree.
1495          */
1496         if (disk_bytenr > 0 &&
1497             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1498                 u64 found;
1499                 if (btrfs_file_extent_compression(eb, fi))
1500                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1501                 else
1502                         disk_bytenr += extent_offset;
1503
1504                 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1505                                        &found);
1506                 if (ret < 0)
1507                         return ret;
1508                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1509                         if (found > 0)
1510                                 rec->found_csum_item = 1;
1511                         if (found < num_bytes)
1512                                 rec->some_csum_missing = 1;
1513                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1514                         if (found > 0)
1515                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1516                 }
1517         }
1518         return 0;
1519 }
1520
1521 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1522                             struct walk_control *wc)
1523 {
1524         struct btrfs_key key;
1525         u32 nritems;
1526         int i;
1527         int ret = 0;
1528         struct cache_tree *inode_cache;
1529         struct shared_node *active_node;
1530
1531         if (wc->root_level == wc->active_node &&
1532             btrfs_root_refs(&root->root_item) == 0)
1533                 return 0;
1534
1535         active_node = wc->nodes[wc->active_node];
1536         inode_cache = &active_node->inode_cache;
1537         nritems = btrfs_header_nritems(eb);
1538         for (i = 0; i < nritems; i++) {
1539                 btrfs_item_key_to_cpu(eb, &key, i);
1540
1541                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1542                         continue;
1543                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1544                         continue;
1545
1546                 if (active_node->current == NULL ||
1547                     active_node->current->ino < key.objectid) {
1548                         if (active_node->current) {
1549                                 active_node->current->checked = 1;
1550                                 maybe_free_inode_rec(inode_cache,
1551                                                      active_node->current);
1552                         }
1553                         active_node->current = get_inode_rec(inode_cache,
1554                                                              key.objectid, 1);
1555                         BUG_ON(IS_ERR(active_node->current));
1556                 }
1557                 switch (key.type) {
1558                 case BTRFS_DIR_ITEM_KEY:
1559                 case BTRFS_DIR_INDEX_KEY:
1560                         ret = process_dir_item(eb, i, &key, active_node);
1561                         break;
1562                 case BTRFS_INODE_REF_KEY:
1563                         ret = process_inode_ref(eb, i, &key, active_node);
1564                         break;
1565                 case BTRFS_INODE_EXTREF_KEY:
1566                         ret = process_inode_extref(eb, i, &key, active_node);
1567                         break;
1568                 case BTRFS_INODE_ITEM_KEY:
1569                         ret = process_inode_item(eb, i, &key, active_node);
1570                         break;
1571                 case BTRFS_EXTENT_DATA_KEY:
1572                         ret = process_file_extent(root, eb, i, &key,
1573                                                   active_node);
1574                         break;
1575                 default:
1576                         break;
1577                 };
1578         }
1579         return ret;
1580 }
1581
1582 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1583                           struct walk_control *wc, int *level,
1584                           struct node_refs *nrefs)
1585 {
1586         enum btrfs_tree_block_status status;
1587         u64 bytenr;
1588         u64 ptr_gen;
1589         struct btrfs_fs_info *fs_info = root->fs_info;
1590         struct extent_buffer *next;
1591         struct extent_buffer *cur;
1592         int ret, err = 0;
1593         u64 refs;
1594
1595         WARN_ON(*level < 0);
1596         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1597
1598         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1599                 refs = nrefs->refs[*level];
1600                 ret = 0;
1601         } else {
1602                 ret = btrfs_lookup_extent_info(NULL, root,
1603                                        path->nodes[*level]->start,
1604                                        *level, 1, &refs, NULL);
1605                 if (ret < 0) {
1606                         err = ret;
1607                         goto out;
1608                 }
1609                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1610                 nrefs->refs[*level] = refs;
1611         }
1612
1613         if (refs > 1) {
1614                 ret = enter_shared_node(root, path->nodes[*level]->start,
1615                                         refs, wc, *level);
1616                 if (ret > 0) {
1617                         err = ret;
1618                         goto out;
1619                 }
1620         }
1621
1622         while (*level >= 0) {
1623                 WARN_ON(*level < 0);
1624                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1625                 cur = path->nodes[*level];
1626
1627                 if (btrfs_header_level(cur) != *level)
1628                         WARN_ON(1);
1629
1630                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1631                         break;
1632                 if (*level == 0) {
1633                         ret = process_one_leaf(root, cur, wc);
1634                         if (ret < 0)
1635                                 err = ret;
1636                         break;
1637                 }
1638                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1639                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1640
1641                 if (bytenr == nrefs->bytenr[*level - 1]) {
1642                         refs = nrefs->refs[*level - 1];
1643                 } else {
1644                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1645                                         *level - 1, 1, &refs, NULL);
1646                         if (ret < 0) {
1647                                 refs = 0;
1648                         } else {
1649                                 nrefs->bytenr[*level - 1] = bytenr;
1650                                 nrefs->refs[*level - 1] = refs;
1651                         }
1652                 }
1653
1654                 if (refs > 1) {
1655                         ret = enter_shared_node(root, bytenr, refs,
1656                                                 wc, *level - 1);
1657                         if (ret > 0) {
1658                                 path->slots[*level]++;
1659                                 continue;
1660                         }
1661                 }
1662
1663                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
1664                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1665                         free_extent_buffer(next);
1666                         reada_walk_down(root, cur, path->slots[*level]);
1667                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
1668                         if (!extent_buffer_uptodate(next)) {
1669                                 struct btrfs_key node_key;
1670
1671                                 btrfs_node_key_to_cpu(path->nodes[*level],
1672                                                       &node_key,
1673                                                       path->slots[*level]);
1674                                 btrfs_add_corrupt_extent_record(root->fs_info,
1675                                                 &node_key,
1676                                                 path->nodes[*level]->start,
1677                                                 root->fs_info->nodesize,
1678                                                 *level);
1679                                 err = -EIO;
1680                                 goto out;
1681                         }
1682                 }
1683
1684                 ret = check_child_node(cur, path->slots[*level], next);
1685                 if (ret) {
1686                         free_extent_buffer(next);
1687                         err = ret;
1688                         goto out;
1689                 }
1690
1691                 if (btrfs_is_leaf(next))
1692                         status = btrfs_check_leaf(root, NULL, next);
1693                 else
1694                         status = btrfs_check_node(root, NULL, next);
1695                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1696                         free_extent_buffer(next);
1697                         err = -EIO;
1698                         goto out;
1699                 }
1700
1701                 *level = *level - 1;
1702                 free_extent_buffer(path->nodes[*level]);
1703                 path->nodes[*level] = next;
1704                 path->slots[*level] = 0;
1705         }
1706 out:
1707         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1708         return err;
1709 }
1710
1711 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1712                         struct walk_control *wc, int *level)
1713 {
1714         int i;
1715         struct extent_buffer *leaf;
1716
1717         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1718                 leaf = path->nodes[i];
1719                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1720                         path->slots[i]++;
1721                         *level = i;
1722                         return 0;
1723                 }
1724                 free_extent_buffer(path->nodes[*level]);
1725                 path->nodes[*level] = NULL;
1726                 BUG_ON(*level > wc->active_node);
1727                 if (*level == wc->active_node)
1728                         leave_shared_node(root, wc, *level);
1729                 *level = i + 1;
1730         }
1731         return 1;
1732 }
1733
1734 static int check_root_dir(struct inode_record *rec)
1735 {
1736         struct inode_backref *backref;
1737         int ret = -1;
1738
1739         if (!rec->found_inode_item || rec->errors)
1740                 goto out;
1741         if (rec->nlink != 1 || rec->found_link != 0)
1742                 goto out;
1743         if (list_empty(&rec->backrefs))
1744                 goto out;
1745         backref = to_inode_backref(rec->backrefs.next);
1746         if (!backref->found_inode_ref)
1747                 goto out;
1748         if (backref->index != 0 || backref->namelen != 2 ||
1749             memcmp(backref->name, "..", 2))
1750                 goto out;
1751         if (backref->found_dir_index || backref->found_dir_item)
1752                 goto out;
1753         ret = 0;
1754 out:
1755         return ret;
1756 }
1757
1758 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1759                               struct btrfs_root *root, struct btrfs_path *path,
1760                               struct inode_record *rec)
1761 {
1762         struct btrfs_inode_item *ei;
1763         struct btrfs_key key;
1764         int ret;
1765
1766         key.objectid = rec->ino;
1767         key.type = BTRFS_INODE_ITEM_KEY;
1768         key.offset = (u64)-1;
1769
1770         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1771         if (ret < 0)
1772                 goto out;
1773         if (ret) {
1774                 if (!path->slots[0]) {
1775                         ret = -ENOENT;
1776                         goto out;
1777                 }
1778                 path->slots[0]--;
1779                 ret = 0;
1780         }
1781         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1782         if (key.objectid != rec->ino) {
1783                 ret = -ENOENT;
1784                 goto out;
1785         }
1786
1787         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1788                             struct btrfs_inode_item);
1789         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1790         btrfs_mark_buffer_dirty(path->nodes[0]);
1791         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1792         printf("reset isize for dir %llu root %llu\n", rec->ino,
1793                root->root_key.objectid);
1794 out:
1795         btrfs_release_path(path);
1796         return ret;
1797 }
1798
1799 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1800                                     struct btrfs_root *root,
1801                                     struct btrfs_path *path,
1802                                     struct inode_record *rec)
1803 {
1804         int ret;
1805
1806         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1807         btrfs_release_path(path);
1808         if (!ret)
1809                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1810         return ret;
1811 }
1812
1813 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1814                                struct btrfs_root *root,
1815                                struct btrfs_path *path,
1816                                struct inode_record *rec)
1817 {
1818         struct btrfs_inode_item *ei;
1819         struct btrfs_key key;
1820         int ret = 0;
1821
1822         key.objectid = rec->ino;
1823         key.type = BTRFS_INODE_ITEM_KEY;
1824         key.offset = 0;
1825
1826         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1827         if (ret) {
1828                 if (ret > 0)
1829                         ret = -ENOENT;
1830                 goto out;
1831         }
1832
1833         /* Since ret == 0, no need to check anything */
1834         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1835                             struct btrfs_inode_item);
1836         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
1837         btrfs_mark_buffer_dirty(path->nodes[0]);
1838         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
1839         printf("reset nbytes for ino %llu root %llu\n",
1840                rec->ino, root->root_key.objectid);
1841 out:
1842         btrfs_release_path(path);
1843         return ret;
1844 }
1845
1846 static int add_missing_dir_index(struct btrfs_root *root,
1847                                  struct cache_tree *inode_cache,
1848                                  struct inode_record *rec,
1849                                  struct inode_backref *backref)
1850 {
1851         struct btrfs_path path;
1852         struct btrfs_trans_handle *trans;
1853         struct btrfs_dir_item *dir_item;
1854         struct extent_buffer *leaf;
1855         struct btrfs_key key;
1856         struct btrfs_disk_key disk_key;
1857         struct inode_record *dir_rec;
1858         unsigned long name_ptr;
1859         u32 data_size = sizeof(*dir_item) + backref->namelen;
1860         int ret;
1861
1862         trans = btrfs_start_transaction(root, 1);
1863         if (IS_ERR(trans))
1864                 return PTR_ERR(trans);
1865
1866         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
1867                 (unsigned long long)rec->ino);
1868
1869         btrfs_init_path(&path);
1870         key.objectid = backref->dir;
1871         key.type = BTRFS_DIR_INDEX_KEY;
1872         key.offset = backref->index;
1873         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
1874         BUG_ON(ret);
1875
1876         leaf = path.nodes[0];
1877         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
1878
1879         disk_key.objectid = cpu_to_le64(rec->ino);
1880         disk_key.type = BTRFS_INODE_ITEM_KEY;
1881         disk_key.offset = 0;
1882
1883         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
1884         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
1885         btrfs_set_dir_data_len(leaf, dir_item, 0);
1886         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
1887         name_ptr = (unsigned long)(dir_item + 1);
1888         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
1889         btrfs_mark_buffer_dirty(leaf);
1890         btrfs_release_path(&path);
1891         btrfs_commit_transaction(trans, root);
1892
1893         backref->found_dir_index = 1;
1894         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
1895         BUG_ON(IS_ERR(dir_rec));
1896         if (!dir_rec)
1897                 return 0;
1898         dir_rec->found_size += backref->namelen;
1899         if (dir_rec->found_size == dir_rec->isize &&
1900             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
1901                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1902         if (dir_rec->found_size != dir_rec->isize)
1903                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1904
1905         return 0;
1906 }
1907
1908 static int delete_dir_index(struct btrfs_root *root,
1909                             struct inode_backref *backref)
1910 {
1911         struct btrfs_trans_handle *trans;
1912         struct btrfs_dir_item *di;
1913         struct btrfs_path path;
1914         int ret = 0;
1915
1916         trans = btrfs_start_transaction(root, 1);
1917         if (IS_ERR(trans))
1918                 return PTR_ERR(trans);
1919
1920         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1921                 (unsigned long long)backref->dir,
1922                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
1923                 (unsigned long long)root->objectid);
1924
1925         btrfs_init_path(&path);
1926         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
1927                                     backref->name, backref->namelen,
1928                                     backref->index, -1);
1929         if (IS_ERR(di)) {
1930                 ret = PTR_ERR(di);
1931                 btrfs_release_path(&path);
1932                 btrfs_commit_transaction(trans, root);
1933                 if (ret == -ENOENT)
1934                         return 0;
1935                 return ret;
1936         }
1937
1938         if (!di)
1939                 ret = btrfs_del_item(trans, root, &path);
1940         else
1941                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
1942         BUG_ON(ret);
1943         btrfs_release_path(&path);
1944         btrfs_commit_transaction(trans, root);
1945         return ret;
1946 }
1947
1948 static int create_inode_item(struct btrfs_root *root,
1949                              struct inode_record *rec, int root_dir)
1950 {
1951         struct btrfs_trans_handle *trans;
1952         u64 nlink = 0;
1953         u32 mode = 0;
1954         u64 size = 0;
1955         int ret;
1956
1957         trans = btrfs_start_transaction(root, 1);
1958         if (IS_ERR(trans)) {
1959                 ret = PTR_ERR(trans);
1960                 return ret;
1961         }
1962
1963         nlink = root_dir ? 1 : rec->found_link;
1964         if (rec->found_dir_item) {
1965                 if (rec->found_file_extent)
1966                         fprintf(stderr, "root %llu inode %llu has both a dir "
1967                                 "item and extents, unsure if it is a dir or a "
1968                                 "regular file so setting it as a directory\n",
1969                                 (unsigned long long)root->objectid,
1970                                 (unsigned long long)rec->ino);
1971                 mode = S_IFDIR | 0755;
1972                 size = rec->found_size;
1973         } else if (!rec->found_dir_item) {
1974                 size = rec->extent_end;
1975                 mode =  S_IFREG | 0755;
1976         }
1977
1978         ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
1979                                   nlink, mode);
1980         btrfs_commit_transaction(trans, root);
1981         return 0;
1982 }
1983
1984 static int repair_inode_backrefs(struct btrfs_root *root,
1985                                  struct inode_record *rec,
1986                                  struct cache_tree *inode_cache,
1987                                  int delete)
1988 {
1989         struct inode_backref *tmp, *backref;
1990         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1991         int ret = 0;
1992         int repaired = 0;
1993
1994         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1995                 if (!delete && rec->ino == root_dirid) {
1996                         if (!rec->found_inode_item) {
1997                                 ret = create_inode_item(root, rec, 1);
1998                                 if (ret)
1999                                         break;
2000                                 repaired++;
2001                         }
2002                 }
2003
2004                 /* Index 0 for root dir's are special, don't mess with it */
2005                 if (rec->ino == root_dirid && backref->index == 0)
2006                         continue;
2007
2008                 if (delete &&
2009                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2010                      (backref->found_dir_index && backref->found_inode_ref &&
2011                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2012                         ret = delete_dir_index(root, backref);
2013                         if (ret)
2014                                 break;
2015                         repaired++;
2016                         list_del(&backref->list);
2017                         free(backref);
2018                         continue;
2019                 }
2020
2021                 if (!delete && !backref->found_dir_index &&
2022                     backref->found_dir_item && backref->found_inode_ref) {
2023                         ret = add_missing_dir_index(root, inode_cache, rec,
2024                                                     backref);
2025                         if (ret)
2026                                 break;
2027                         repaired++;
2028                         if (backref->found_dir_item &&
2029                             backref->found_dir_index) {
2030                                 if (!backref->errors &&
2031                                     backref->found_inode_ref) {
2032                                         list_del(&backref->list);
2033                                         free(backref);
2034                                         continue;
2035                                 }
2036                         }
2037                 }
2038
2039                 if (!delete && (!backref->found_dir_index &&
2040                                 !backref->found_dir_item &&
2041                                 backref->found_inode_ref)) {
2042                         struct btrfs_trans_handle *trans;
2043                         struct btrfs_key location;
2044
2045                         ret = check_dir_conflict(root, backref->name,
2046                                                  backref->namelen,
2047                                                  backref->dir,
2048                                                  backref->index);
2049                         if (ret) {
2050                                 /*
2051                                  * let nlink fixing routine to handle it,
2052                                  * which can do it better.
2053                                  */
2054                                 ret = 0;
2055                                 break;
2056                         }
2057                         location.objectid = rec->ino;
2058                         location.type = BTRFS_INODE_ITEM_KEY;
2059                         location.offset = 0;
2060
2061                         trans = btrfs_start_transaction(root, 1);
2062                         if (IS_ERR(trans)) {
2063                                 ret = PTR_ERR(trans);
2064                                 break;
2065                         }
2066                         fprintf(stderr, "adding missing dir index/item pair "
2067                                 "for inode %llu\n",
2068                                 (unsigned long long)rec->ino);
2069                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2070                                                     backref->namelen,
2071                                                     backref->dir, &location,
2072                                                     imode_to_type(rec->imode),
2073                                                     backref->index);
2074                         BUG_ON(ret);
2075                         btrfs_commit_transaction(trans, root);
2076                         repaired++;
2077                 }
2078
2079                 if (!delete && (backref->found_inode_ref &&
2080                                 backref->found_dir_index &&
2081                                 backref->found_dir_item &&
2082                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2083                                 !rec->found_inode_item)) {
2084                         ret = create_inode_item(root, rec, 0);
2085                         if (ret)
2086                                 break;
2087                         repaired++;
2088                 }
2089
2090         }
2091         return ret ? ret : repaired;
2092 }
2093
2094 /*
2095  * To determine the file type for nlink/inode_item repair
2096  *
2097  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2098  * Return -ENOENT if file type is not found.
2099  */
2100 static int find_file_type(struct inode_record *rec, u8 *type)
2101 {
2102         struct inode_backref *backref;
2103
2104         /* For inode item recovered case */
2105         if (rec->found_inode_item) {
2106                 *type = imode_to_type(rec->imode);
2107                 return 0;
2108         }
2109
2110         list_for_each_entry(backref, &rec->backrefs, list) {
2111                 if (backref->found_dir_index || backref->found_dir_item) {
2112                         *type = backref->filetype;
2113                         return 0;
2114                 }
2115         }
2116         return -ENOENT;
2117 }
2118
2119 /*
2120  * To determine the file name for nlink repair
2121  *
2122  * Return 0 if file name is found, set name and namelen.
2123  * Return -ENOENT if file name is not found.
2124  */
2125 static int find_file_name(struct inode_record *rec,
2126                           char *name, int *namelen)
2127 {
2128         struct inode_backref *backref;
2129
2130         list_for_each_entry(backref, &rec->backrefs, list) {
2131                 if (backref->found_dir_index || backref->found_dir_item ||
2132                     backref->found_inode_ref) {
2133                         memcpy(name, backref->name, backref->namelen);
2134                         *namelen = backref->namelen;
2135                         return 0;
2136                 }
2137         }
2138         return -ENOENT;
2139 }
2140
2141 /* Reset the nlink of the inode to the correct one */
2142 static int reset_nlink(struct btrfs_trans_handle *trans,
2143                        struct btrfs_root *root,
2144                        struct btrfs_path *path,
2145                        struct inode_record *rec)
2146 {
2147         struct inode_backref *backref;
2148         struct inode_backref *tmp;
2149         struct btrfs_key key;
2150         struct btrfs_inode_item *inode_item;
2151         int ret = 0;
2152
2153         /* We don't believe this either, reset it and iterate backref */
2154         rec->found_link = 0;
2155
2156         /* Remove all backref including the valid ones */
2157         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2158                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2159                                    backref->index, backref->name,
2160                                    backref->namelen, 0);
2161                 if (ret < 0)
2162                         goto out;
2163
2164                 /* remove invalid backref, so it won't be added back */
2165                 if (!(backref->found_dir_index &&
2166                       backref->found_dir_item &&
2167                       backref->found_inode_ref)) {
2168                         list_del(&backref->list);
2169                         free(backref);
2170                 } else {
2171                         rec->found_link++;
2172                 }
2173         }
2174
2175         /* Set nlink to 0 */
2176         key.objectid = rec->ino;
2177         key.type = BTRFS_INODE_ITEM_KEY;
2178         key.offset = 0;
2179         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2180         if (ret < 0)
2181                 goto out;
2182         if (ret > 0) {
2183                 ret = -ENOENT;
2184                 goto out;
2185         }
2186         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2187                                     struct btrfs_inode_item);
2188         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2189         btrfs_mark_buffer_dirty(path->nodes[0]);
2190         btrfs_release_path(path);
2191
2192         /*
2193          * Add back valid inode_ref/dir_item/dir_index,
2194          * add_link() will handle the nlink inc, so new nlink must be correct
2195          */
2196         list_for_each_entry(backref, &rec->backrefs, list) {
2197                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2198                                      backref->name, backref->namelen,
2199                                      backref->filetype, &backref->index, 1, 0);
2200                 if (ret < 0)
2201                         goto out;
2202         }
2203 out:
2204         btrfs_release_path(path);
2205         return ret;
2206 }
2207
2208 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2209                                struct btrfs_root *root,
2210                                struct btrfs_path *path,
2211                                struct inode_record *rec)
2212 {
2213         char namebuf[BTRFS_NAME_LEN] = {0};
2214         u8 type = 0;
2215         int namelen = 0;
2216         int name_recovered = 0;
2217         int type_recovered = 0;
2218         int ret = 0;
2219
2220         /*
2221          * Get file name and type first before these invalid inode ref
2222          * are deleted by remove_all_invalid_backref()
2223          */
2224         name_recovered = !find_file_name(rec, namebuf, &namelen);
2225         type_recovered = !find_file_type(rec, &type);
2226
2227         if (!name_recovered) {
2228                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2229                        rec->ino, rec->ino);
2230                 namelen = count_digits(rec->ino);
2231                 sprintf(namebuf, "%llu", rec->ino);
2232                 name_recovered = 1;
2233         }
2234         if (!type_recovered) {
2235                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2236                        rec->ino);
2237                 type = BTRFS_FT_REG_FILE;
2238                 type_recovered = 1;
2239         }
2240
2241         ret = reset_nlink(trans, root, path, rec);
2242         if (ret < 0) {
2243                 fprintf(stderr,
2244                         "Failed to reset nlink for inode %llu: %s\n",
2245                         rec->ino, strerror(-ret));
2246                 goto out;
2247         }
2248
2249         if (rec->found_link == 0) {
2250                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2251                                               namebuf, namelen, type,
2252                                               (u64 *)&rec->found_link);
2253                 if (ret)
2254                         goto out;
2255         }
2256         printf("Fixed the nlink of inode %llu\n", rec->ino);
2257 out:
2258         /*
2259          * Clear the flag anyway, or we will loop forever for the same inode
2260          * as it will not be removed from the bad inode list and the dead loop
2261          * happens.
2262          */
2263         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2264         btrfs_release_path(path);
2265         return ret;
2266 }
2267
2268 /*
2269  * Check if there is any normal(reg or prealloc) file extent for given
2270  * ino.
2271  * This is used to determine the file type when neither its dir_index/item or
2272  * inode_item exists.
2273  *
2274  * This will *NOT* report error, if any error happens, just consider it does
2275  * not have any normal file extent.
2276  */
2277 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2278 {
2279         struct btrfs_path path;
2280         struct btrfs_key key;
2281         struct btrfs_key found_key;
2282         struct btrfs_file_extent_item *fi;
2283         u8 type;
2284         int ret = 0;
2285
2286         btrfs_init_path(&path);
2287         key.objectid = ino;
2288         key.type = BTRFS_EXTENT_DATA_KEY;
2289         key.offset = 0;
2290
2291         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2292         if (ret < 0) {
2293                 ret = 0;
2294                 goto out;
2295         }
2296         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2297                 ret = btrfs_next_leaf(root, &path);
2298                 if (ret) {
2299                         ret = 0;
2300                         goto out;
2301                 }
2302         }
2303         while (1) {
2304                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2305                                       path.slots[0]);
2306                 if (found_key.objectid != ino ||
2307                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2308                         break;
2309                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2310                                     struct btrfs_file_extent_item);
2311                 type = btrfs_file_extent_type(path.nodes[0], fi);
2312                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2313                         ret = 1;
2314                         goto out;
2315                 }
2316         }
2317 out:
2318         btrfs_release_path(&path);
2319         return ret;
2320 }
2321
2322 static u32 btrfs_type_to_imode(u8 type)
2323 {
2324         static u32 imode_by_btrfs_type[] = {
2325                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2326                 [BTRFS_FT_DIR]          = S_IFDIR,
2327                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2328                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2329                 [BTRFS_FT_FIFO]         = S_IFIFO,
2330                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2331                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2332         };
2333
2334         return imode_by_btrfs_type[(type)];
2335 }
2336
2337 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2338                                 struct btrfs_root *root,
2339                                 struct btrfs_path *path,
2340                                 struct inode_record *rec)
2341 {
2342         u8 filetype;
2343         u32 mode = 0700;
2344         int type_recovered = 0;
2345         int ret = 0;
2346
2347         printf("Trying to rebuild inode:%llu\n", rec->ino);
2348
2349         type_recovered = !find_file_type(rec, &filetype);
2350
2351         /*
2352          * Try to determine inode type if type not found.
2353          *
2354          * For found regular file extent, it must be FILE.
2355          * For found dir_item/index, it must be DIR.
2356          *
2357          * For undetermined one, use FILE as fallback.
2358          *
2359          * TODO:
2360          * 1. If found backref(inode_index/item is already handled) to it,
2361          *    it must be DIR.
2362          *    Need new inode-inode ref structure to allow search for that.
2363          */
2364         if (!type_recovered) {
2365                 if (rec->found_file_extent &&
2366                     find_normal_file_extent(root, rec->ino)) {
2367                         type_recovered = 1;
2368                         filetype = BTRFS_FT_REG_FILE;
2369                 } else if (rec->found_dir_item) {
2370                         type_recovered = 1;
2371                         filetype = BTRFS_FT_DIR;
2372                 } else if (!list_empty(&rec->orphan_extents)) {
2373                         type_recovered = 1;
2374                         filetype = BTRFS_FT_REG_FILE;
2375                 } else{
2376                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2377                                rec->ino);
2378                         type_recovered = 1;
2379                         filetype = BTRFS_FT_REG_FILE;
2380                 }
2381         }
2382
2383         ret = btrfs_new_inode(trans, root, rec->ino,
2384                               mode | btrfs_type_to_imode(filetype));
2385         if (ret < 0)
2386                 goto out;
2387
2388         /*
2389          * Here inode rebuild is done, we only rebuild the inode item,
2390          * don't repair the nlink(like move to lost+found).
2391          * That is the job of nlink repair.
2392          *
2393          * We just fill the record and return
2394          */
2395         rec->found_dir_item = 1;
2396         rec->imode = mode | btrfs_type_to_imode(filetype);
2397         rec->nlink = 0;
2398         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2399         /* Ensure the inode_nlinks repair function will be called */
2400         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2401 out:
2402         return ret;
2403 }
2404
2405 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2406                                       struct btrfs_root *root,
2407                                       struct btrfs_path *path,
2408                                       struct inode_record *rec)
2409 {
2410         struct orphan_data_extent *orphan;
2411         struct orphan_data_extent *tmp;
2412         int ret = 0;
2413
2414         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2415                 /*
2416                  * Check for conflicting file extents
2417                  *
2418                  * Here we don't know whether the extents is compressed or not,
2419                  * so we can only assume it not compressed nor data offset,
2420                  * and use its disk_len as extent length.
2421                  */
2422                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2423                                        orphan->offset, orphan->disk_len, 0);
2424                 btrfs_release_path(path);
2425                 if (ret < 0)
2426                         goto out;
2427                 if (!ret) {
2428                         fprintf(stderr,
2429                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2430                                 orphan->disk_bytenr, orphan->disk_len);
2431                         ret = btrfs_free_extent(trans,
2432                                         root->fs_info->extent_root,
2433                                         orphan->disk_bytenr, orphan->disk_len,
2434                                         0, root->objectid, orphan->objectid,
2435                                         orphan->offset);
2436                         if (ret < 0)
2437                                 goto out;
2438                 }
2439                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2440                                 orphan->offset, orphan->disk_bytenr,
2441                                 orphan->disk_len, orphan->disk_len);
2442                 if (ret < 0)
2443                         goto out;
2444
2445                 /* Update file size info */
2446                 rec->found_size += orphan->disk_len;
2447                 if (rec->found_size == rec->nbytes)
2448                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2449
2450                 /* Update the file extent hole info too */
2451                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2452                                            orphan->disk_len);
2453                 if (ret < 0)
2454                         goto out;
2455                 if (RB_EMPTY_ROOT(&rec->holes))
2456                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2457
2458                 list_del(&orphan->list);
2459                 free(orphan);
2460         }
2461         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2462 out:
2463         return ret;
2464 }
2465
2466 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2467                                         struct btrfs_root *root,
2468                                         struct btrfs_path *path,
2469                                         struct inode_record *rec)
2470 {
2471         struct rb_node *node;
2472         struct file_extent_hole *hole;
2473         int found = 0;
2474         int ret = 0;
2475
2476         node = rb_first(&rec->holes);
2477
2478         while (node) {
2479                 found = 1;
2480                 hole = rb_entry(node, struct file_extent_hole, node);
2481                 ret = btrfs_punch_hole(trans, root, rec->ino,
2482                                        hole->start, hole->len);
2483                 if (ret < 0)
2484                         goto out;
2485                 ret = del_file_extent_hole(&rec->holes, hole->start,
2486                                            hole->len);
2487                 if (ret < 0)
2488                         goto out;
2489                 if (RB_EMPTY_ROOT(&rec->holes))
2490                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2491                 node = rb_first(&rec->holes);
2492         }
2493         /* special case for a file losing all its file extent */
2494         if (!found) {
2495                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2496                                        round_up(rec->isize,
2497                                                 root->fs_info->sectorsize));
2498                 if (ret < 0)
2499                         goto out;
2500         }
2501         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2502                rec->ino, root->objectid);
2503 out:
2504         return ret;
2505 }
2506
2507 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2508 {
2509         struct btrfs_trans_handle *trans;
2510         struct btrfs_path path;
2511         int ret = 0;
2512
2513         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2514                              I_ERR_NO_ORPHAN_ITEM |
2515                              I_ERR_LINK_COUNT_WRONG |
2516                              I_ERR_NO_INODE_ITEM |
2517                              I_ERR_FILE_EXTENT_ORPHAN |
2518                              I_ERR_FILE_EXTENT_DISCOUNT|
2519                              I_ERR_FILE_NBYTES_WRONG)))
2520                 return rec->errors;
2521
2522         /*
2523          * For nlink repair, it may create a dir and add link, so
2524          * 2 for parent(256)'s dir_index and dir_item
2525          * 2 for lost+found dir's inode_item and inode_ref
2526          * 1 for the new inode_ref of the file
2527          * 2 for lost+found dir's dir_index and dir_item for the file
2528          */
2529         trans = btrfs_start_transaction(root, 7);
2530         if (IS_ERR(trans))
2531                 return PTR_ERR(trans);
2532
2533         btrfs_init_path(&path);
2534         if (rec->errors & I_ERR_NO_INODE_ITEM)
2535                 ret = repair_inode_no_item(trans, root, &path, rec);
2536         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2537                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2538         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2539                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2540         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2541                 ret = repair_inode_isize(trans, root, &path, rec);
2542         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2543                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2544         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2545                 ret = repair_inode_nlinks(trans, root, &path, rec);
2546         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2547                 ret = repair_inode_nbytes(trans, root, &path, rec);
2548         btrfs_commit_transaction(trans, root);
2549         btrfs_release_path(&path);
2550         return ret;
2551 }
2552
2553 static int check_inode_recs(struct btrfs_root *root,
2554                             struct cache_tree *inode_cache)
2555 {
2556         struct cache_extent *cache;
2557         struct ptr_node *node;
2558         struct inode_record *rec;
2559         struct inode_backref *backref;
2560         int stage = 0;
2561         int ret = 0;
2562         int err = 0;
2563         u64 error = 0;
2564         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2565
2566         if (btrfs_root_refs(&root->root_item) == 0) {
2567                 if (!cache_tree_empty(inode_cache))
2568                         fprintf(stderr, "warning line %d\n", __LINE__);
2569                 return 0;
2570         }
2571
2572         /*
2573          * We need to repair backrefs first because we could change some of the
2574          * errors in the inode recs.
2575          *
2576          * We also need to go through and delete invalid backrefs first and then
2577          * add the correct ones second.  We do this because we may get EEXIST
2578          * when adding back the correct index because we hadn't yet deleted the
2579          * invalid index.
2580          *
2581          * For example, if we were missing a dir index then the directories
2582          * isize would be wrong, so if we fixed the isize to what we thought it
2583          * would be and then fixed the backref we'd still have a invalid fs, so
2584          * we need to add back the dir index and then check to see if the isize
2585          * is still wrong.
2586          */
2587         while (stage < 3) {
2588                 stage++;
2589                 if (stage == 3 && !err)
2590                         break;
2591
2592                 cache = search_cache_extent(inode_cache, 0);
2593                 while (repair && cache) {
2594                         node = container_of(cache, struct ptr_node, cache);
2595                         rec = node->data;
2596                         cache = next_cache_extent(cache);
2597
2598                         /* Need to free everything up and rescan */
2599                         if (stage == 3) {
2600                                 remove_cache_extent(inode_cache, &node->cache);
2601                                 free(node);
2602                                 free_inode_rec(rec);
2603                                 continue;
2604                         }
2605
2606                         if (list_empty(&rec->backrefs))
2607                                 continue;
2608
2609                         ret = repair_inode_backrefs(root, rec, inode_cache,
2610                                                     stage == 1);
2611                         if (ret < 0) {
2612                                 err = ret;
2613                                 stage = 2;
2614                                 break;
2615                         } if (ret > 0) {
2616                                 err = -EAGAIN;
2617                         }
2618                 }
2619         }
2620         if (err)
2621                 return err;
2622
2623         rec = get_inode_rec(inode_cache, root_dirid, 0);
2624         BUG_ON(IS_ERR(rec));
2625         if (rec) {
2626                 ret = check_root_dir(rec);
2627                 if (ret) {
2628                         fprintf(stderr, "root %llu root dir %llu error\n",
2629                                 (unsigned long long)root->root_key.objectid,
2630                                 (unsigned long long)root_dirid);
2631                         print_inode_error(root, rec);
2632                         error++;
2633                 }
2634         } else {
2635                 if (repair) {
2636                         struct btrfs_trans_handle *trans;
2637
2638                         trans = btrfs_start_transaction(root, 1);
2639                         if (IS_ERR(trans)) {
2640                                 err = PTR_ERR(trans);
2641                                 return err;
2642                         }
2643
2644                         fprintf(stderr,
2645                                 "root %llu missing its root dir, recreating\n",
2646                                 (unsigned long long)root->objectid);
2647
2648                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2649                         BUG_ON(ret);
2650
2651                         btrfs_commit_transaction(trans, root);
2652                         return -EAGAIN;
2653                 }
2654
2655                 fprintf(stderr, "root %llu root dir %llu not found\n",
2656                         (unsigned long long)root->root_key.objectid,
2657                         (unsigned long long)root_dirid);
2658         }
2659
2660         while (1) {
2661                 cache = search_cache_extent(inode_cache, 0);
2662                 if (!cache)
2663                         break;
2664                 node = container_of(cache, struct ptr_node, cache);
2665                 rec = node->data;
2666                 remove_cache_extent(inode_cache, &node->cache);
2667                 free(node);
2668                 if (rec->ino == root_dirid ||
2669                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2670                         free_inode_rec(rec);
2671                         continue;
2672                 }
2673
2674                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2675                         ret = check_orphan_item(root, rec->ino);
2676                         if (ret == 0)
2677                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2678                         if (can_free_inode_rec(rec)) {
2679                                 free_inode_rec(rec);
2680                                 continue;
2681                         }
2682                 }
2683
2684                 if (!rec->found_inode_item)
2685                         rec->errors |= I_ERR_NO_INODE_ITEM;
2686                 if (rec->found_link != rec->nlink)
2687                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2688                 if (repair) {
2689                         ret = try_repair_inode(root, rec);
2690                         if (ret == 0 && can_free_inode_rec(rec)) {
2691                                 free_inode_rec(rec);
2692                                 continue;
2693                         }
2694                         ret = 0;
2695                 }
2696
2697                 if (!(repair && ret == 0))
2698                         error++;
2699                 print_inode_error(root, rec);
2700                 list_for_each_entry(backref, &rec->backrefs, list) {
2701                         if (!backref->found_dir_item)
2702                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2703                         if (!backref->found_dir_index)
2704                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2705                         if (!backref->found_inode_ref)
2706                                 backref->errors |= REF_ERR_NO_INODE_REF;
2707                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2708                                 " namelen %u name %s filetype %d errors %x",
2709                                 (unsigned long long)backref->dir,
2710                                 (unsigned long long)backref->index,
2711                                 backref->namelen, backref->name,
2712                                 backref->filetype, backref->errors);
2713                         print_ref_error(backref->errors);
2714                 }
2715                 free_inode_rec(rec);
2716         }
2717         return (error > 0) ? -1 : 0;
2718 }
2719
2720 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2721                                         u64 objectid)
2722 {
2723         struct cache_extent *cache;
2724         struct root_record *rec = NULL;
2725         int ret;
2726
2727         cache = lookup_cache_extent(root_cache, objectid, 1);
2728         if (cache) {
2729                 rec = container_of(cache, struct root_record, cache);
2730         } else {
2731                 rec = calloc(1, sizeof(*rec));
2732                 if (!rec)
2733                         return ERR_PTR(-ENOMEM);
2734                 rec->objectid = objectid;
2735                 INIT_LIST_HEAD(&rec->backrefs);
2736                 rec->cache.start = objectid;
2737                 rec->cache.size = 1;
2738
2739                 ret = insert_cache_extent(root_cache, &rec->cache);
2740                 if (ret)
2741                         return ERR_PTR(-EEXIST);
2742         }
2743         return rec;
2744 }
2745
2746 static struct root_backref *get_root_backref(struct root_record *rec,
2747                                              u64 ref_root, u64 dir, u64 index,
2748                                              const char *name, int namelen)
2749 {
2750         struct root_backref *backref;
2751
2752         list_for_each_entry(backref, &rec->backrefs, list) {
2753                 if (backref->ref_root != ref_root || backref->dir != dir ||
2754                     backref->namelen != namelen)
2755                         continue;
2756                 if (memcmp(name, backref->name, namelen))
2757                         continue;
2758                 return backref;
2759         }
2760
2761         backref = calloc(1, sizeof(*backref) + namelen + 1);
2762         if (!backref)
2763                 return NULL;
2764         backref->ref_root = ref_root;
2765         backref->dir = dir;
2766         backref->index = index;
2767         backref->namelen = namelen;
2768         memcpy(backref->name, name, namelen);
2769         backref->name[namelen] = '\0';
2770         list_add_tail(&backref->list, &rec->backrefs);
2771         return backref;
2772 }
2773
2774 static void free_root_record(struct cache_extent *cache)
2775 {
2776         struct root_record *rec;
2777         struct root_backref *backref;
2778
2779         rec = container_of(cache, struct root_record, cache);
2780         while (!list_empty(&rec->backrefs)) {
2781                 backref = to_root_backref(rec->backrefs.next);
2782                 list_del(&backref->list);
2783                 free(backref);
2784         }
2785
2786         free(rec);
2787 }
2788
2789 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
2790
2791 static int add_root_backref(struct cache_tree *root_cache,
2792                             u64 root_id, u64 ref_root, u64 dir, u64 index,
2793                             const char *name, int namelen,
2794                             int item_type, int errors)
2795 {
2796         struct root_record *rec;
2797         struct root_backref *backref;
2798
2799         rec = get_root_rec(root_cache, root_id);
2800         BUG_ON(IS_ERR(rec));
2801         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
2802         BUG_ON(!backref);
2803
2804         backref->errors |= errors;
2805
2806         if (item_type != BTRFS_DIR_ITEM_KEY) {
2807                 if (backref->found_dir_index || backref->found_back_ref ||
2808                     backref->found_forward_ref) {
2809                         if (backref->index != index)
2810                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
2811                 } else {
2812                         backref->index = index;
2813                 }
2814         }
2815
2816         if (item_type == BTRFS_DIR_ITEM_KEY) {
2817                 if (backref->found_forward_ref)
2818                         rec->found_ref++;
2819                 backref->found_dir_item = 1;
2820         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
2821                 backref->found_dir_index = 1;
2822         } else if (item_type == BTRFS_ROOT_REF_KEY) {
2823                 if (backref->found_forward_ref)
2824                         backref->errors |= REF_ERR_DUP_ROOT_REF;
2825                 else if (backref->found_dir_item)
2826                         rec->found_ref++;
2827                 backref->found_forward_ref = 1;
2828         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
2829                 if (backref->found_back_ref)
2830                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
2831                 backref->found_back_ref = 1;
2832         } else {
2833                 BUG_ON(1);
2834         }
2835
2836         if (backref->found_forward_ref && backref->found_dir_item)
2837                 backref->reachable = 1;
2838         return 0;
2839 }
2840
2841 static int merge_root_recs(struct btrfs_root *root,
2842                            struct cache_tree *src_cache,
2843                            struct cache_tree *dst_cache)
2844 {
2845         struct cache_extent *cache;
2846         struct ptr_node *node;
2847         struct inode_record *rec;
2848         struct inode_backref *backref;
2849         int ret = 0;
2850
2851         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2852                 free_inode_recs_tree(src_cache);
2853                 return 0;
2854         }
2855
2856         while (1) {
2857                 cache = search_cache_extent(src_cache, 0);
2858                 if (!cache)
2859                         break;
2860                 node = container_of(cache, struct ptr_node, cache);
2861                 rec = node->data;
2862                 remove_cache_extent(src_cache, &node->cache);
2863                 free(node);
2864
2865                 ret = is_child_root(root, root->objectid, rec->ino);
2866                 if (ret < 0)
2867                         break;
2868                 else if (ret == 0)
2869                         goto skip;
2870
2871                 list_for_each_entry(backref, &rec->backrefs, list) {
2872                         BUG_ON(backref->found_inode_ref);
2873                         if (backref->found_dir_item)
2874                                 add_root_backref(dst_cache, rec->ino,
2875                                         root->root_key.objectid, backref->dir,
2876                                         backref->index, backref->name,
2877                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
2878                                         backref->errors);
2879                         if (backref->found_dir_index)
2880                                 add_root_backref(dst_cache, rec->ino,
2881                                         root->root_key.objectid, backref->dir,
2882                                         backref->index, backref->name,
2883                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
2884                                         backref->errors);
2885                 }
2886 skip:
2887                 free_inode_rec(rec);
2888         }
2889         if (ret < 0)
2890                 return ret;
2891         return 0;
2892 }
2893
2894 static int check_root_refs(struct btrfs_root *root,
2895                            struct cache_tree *root_cache)
2896 {
2897         struct root_record *rec;
2898         struct root_record *ref_root;
2899         struct root_backref *backref;
2900         struct cache_extent *cache;
2901         int loop = 1;
2902         int ret;
2903         int error;
2904         int errors = 0;
2905
2906         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
2907         BUG_ON(IS_ERR(rec));
2908         rec->found_ref = 1;
2909
2910         /* fixme: this can not detect circular references */
2911         while (loop) {
2912                 loop = 0;
2913                 cache = search_cache_extent(root_cache, 0);
2914                 while (1) {
2915                         if (!cache)
2916                                 break;
2917                         rec = container_of(cache, struct root_record, cache);
2918                         cache = next_cache_extent(cache);
2919
2920                         if (rec->found_ref == 0)
2921                                 continue;
2922
2923                         list_for_each_entry(backref, &rec->backrefs, list) {
2924                                 if (!backref->reachable)
2925                                         continue;
2926
2927                                 ref_root = get_root_rec(root_cache,
2928                                                         backref->ref_root);
2929                                 BUG_ON(IS_ERR(ref_root));
2930                                 if (ref_root->found_ref > 0)
2931                                         continue;
2932
2933                                 backref->reachable = 0;
2934                                 rec->found_ref--;
2935                                 if (rec->found_ref == 0)
2936                                         loop = 1;
2937                         }
2938                 }
2939         }
2940
2941         cache = search_cache_extent(root_cache, 0);
2942         while (1) {
2943                 if (!cache)
2944                         break;
2945                 rec = container_of(cache, struct root_record, cache);
2946                 cache = next_cache_extent(cache);
2947
2948                 if (rec->found_ref == 0 &&
2949                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
2950                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
2951                         ret = check_orphan_item(root->fs_info->tree_root,
2952                                                 rec->objectid);
2953                         if (ret == 0)
2954                                 continue;
2955
2956                         /*
2957                          * If we don't have a root item then we likely just have
2958                          * a dir item in a snapshot for this root but no actual
2959                          * ref key or anything so it's meaningless.
2960                          */
2961                         if (!rec->found_root_item)
2962                                 continue;
2963                         errors++;
2964                         fprintf(stderr, "fs tree %llu not referenced\n",
2965                                 (unsigned long long)rec->objectid);
2966                 }
2967
2968                 error = 0;
2969                 if (rec->found_ref > 0 && !rec->found_root_item)
2970                         error = 1;
2971                 list_for_each_entry(backref, &rec->backrefs, list) {
2972                         if (!backref->found_dir_item)
2973                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2974                         if (!backref->found_dir_index)
2975                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2976                         if (!backref->found_back_ref)
2977                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
2978                         if (!backref->found_forward_ref)
2979                                 backref->errors |= REF_ERR_NO_ROOT_REF;
2980                         if (backref->reachable && backref->errors)
2981                                 error = 1;
2982                 }
2983                 if (!error)
2984                         continue;
2985
2986                 errors++;
2987                 fprintf(stderr, "fs tree %llu refs %u %s\n",
2988                         (unsigned long long)rec->objectid, rec->found_ref,
2989                          rec->found_root_item ? "" : "not found");
2990
2991                 list_for_each_entry(backref, &rec->backrefs, list) {
2992                         if (!backref->reachable)
2993                                 continue;
2994                         if (!backref->errors && rec->found_root_item)
2995                                 continue;
2996                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
2997                                 " index %llu namelen %u name %s errors %x\n",
2998                                 (unsigned long long)backref->ref_root,
2999                                 (unsigned long long)backref->dir,
3000                                 (unsigned long long)backref->index,
3001                                 backref->namelen, backref->name,
3002                                 backref->errors);
3003                         print_ref_error(backref->errors);
3004                 }
3005         }
3006         return errors > 0 ? 1 : 0;
3007 }
3008
3009 static int process_root_ref(struct extent_buffer *eb, int slot,
3010                             struct btrfs_key *key,
3011                             struct cache_tree *root_cache)
3012 {
3013         u64 dirid;
3014         u64 index;
3015         u32 len;
3016         u32 name_len;
3017         struct btrfs_root_ref *ref;
3018         char namebuf[BTRFS_NAME_LEN];
3019         int error;
3020
3021         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3022
3023         dirid = btrfs_root_ref_dirid(eb, ref);
3024         index = btrfs_root_ref_sequence(eb, ref);
3025         name_len = btrfs_root_ref_name_len(eb, ref);
3026
3027         if (name_len <= BTRFS_NAME_LEN) {
3028                 len = name_len;
3029                 error = 0;
3030         } else {
3031                 len = BTRFS_NAME_LEN;
3032                 error = REF_ERR_NAME_TOO_LONG;
3033         }
3034         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3035
3036         if (key->type == BTRFS_ROOT_REF_KEY) {
3037                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3038                                  index, namebuf, len, key->type, error);
3039         } else {
3040                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3041                                  index, namebuf, len, key->type, error);
3042         }
3043         return 0;
3044 }
3045
3046 static void free_corrupt_block(struct cache_extent *cache)
3047 {
3048         struct btrfs_corrupt_block *corrupt;
3049
3050         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3051         free(corrupt);
3052 }
3053
3054 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3055
3056 /*
3057  * Repair the btree of the given root.
3058  *
3059  * The fix is to remove the node key in corrupt_blocks cache_tree.
3060  * and rebalance the tree.
3061  * After the fix, the btree should be writeable.
3062  */
3063 static int repair_btree(struct btrfs_root *root,
3064                         struct cache_tree *corrupt_blocks)
3065 {
3066         struct btrfs_trans_handle *trans;
3067         struct btrfs_path path;
3068         struct btrfs_corrupt_block *corrupt;
3069         struct cache_extent *cache;
3070         struct btrfs_key key;
3071         u64 offset;
3072         int level;
3073         int ret = 0;
3074
3075         if (cache_tree_empty(corrupt_blocks))
3076                 return 0;
3077
3078         trans = btrfs_start_transaction(root, 1);
3079         if (IS_ERR(trans)) {
3080                 ret = PTR_ERR(trans);
3081                 fprintf(stderr, "Error starting transaction: %s\n",
3082                         strerror(-ret));
3083                 return ret;
3084         }
3085         btrfs_init_path(&path);
3086         cache = first_cache_extent(corrupt_blocks);
3087         while (cache) {
3088                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3089                                        cache);
3090                 level = corrupt->level;
3091                 path.lowest_level = level;
3092                 key.objectid = corrupt->key.objectid;
3093                 key.type = corrupt->key.type;
3094                 key.offset = corrupt->key.offset;
3095
3096                 /*
3097                  * Here we don't want to do any tree balance, since it may
3098                  * cause a balance with corrupted brother leaf/node,
3099                  * so ins_len set to 0 here.
3100                  * Balance will be done after all corrupt node/leaf is deleted.
3101                  */
3102                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3103                 if (ret < 0)
3104                         goto out;
3105                 offset = btrfs_node_blockptr(path.nodes[level],
3106                                              path.slots[level]);
3107
3108                 /* Remove the ptr */
3109                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3110                 if (ret < 0)
3111                         goto out;
3112                 /*
3113                  * Remove the corresponding extent
3114                  * return value is not concerned.
3115                  */
3116                 btrfs_release_path(&path);
3117                 ret = btrfs_free_extent(trans, root, offset,
3118                                 root->fs_info->nodesize, 0,
3119                                 root->root_key.objectid, level - 1, 0);
3120                 cache = next_cache_extent(cache);
3121         }
3122
3123         /* Balance the btree using btrfs_search_slot() */
3124         cache = first_cache_extent(corrupt_blocks);
3125         while (cache) {
3126                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3127                                        cache);
3128                 memcpy(&key, &corrupt->key, sizeof(key));
3129                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3130                 if (ret < 0)
3131                         goto out;
3132                 /* return will always >0 since it won't find the item */
3133                 ret = 0;
3134                 btrfs_release_path(&path);
3135                 cache = next_cache_extent(cache);
3136         }
3137 out:
3138         btrfs_commit_transaction(trans, root);
3139         btrfs_release_path(&path);
3140         return ret;
3141 }
3142
3143 static int check_fs_root(struct btrfs_root *root,
3144                          struct cache_tree *root_cache,
3145                          struct walk_control *wc)
3146 {
3147         int ret = 0;
3148         int err = 0;
3149         int wret;
3150         int level;
3151         struct btrfs_path path;
3152         struct shared_node root_node;
3153         struct root_record *rec;
3154         struct btrfs_root_item *root_item = &root->root_item;
3155         struct cache_tree corrupt_blocks;
3156         struct orphan_data_extent *orphan;
3157         struct orphan_data_extent *tmp;
3158         enum btrfs_tree_block_status status;
3159         struct node_refs nrefs;
3160
3161         /*
3162          * Reuse the corrupt_block cache tree to record corrupted tree block
3163          *
3164          * Unlike the usage in extent tree check, here we do it in a per
3165          * fs/subvol tree base.
3166          */
3167         cache_tree_init(&corrupt_blocks);
3168         root->fs_info->corrupt_blocks = &corrupt_blocks;
3169
3170         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3171                 rec = get_root_rec(root_cache, root->root_key.objectid);
3172                 BUG_ON(IS_ERR(rec));
3173                 if (btrfs_root_refs(root_item) > 0)
3174                         rec->found_root_item = 1;
3175         }
3176
3177         btrfs_init_path(&path);
3178         memset(&root_node, 0, sizeof(root_node));
3179         cache_tree_init(&root_node.root_cache);
3180         cache_tree_init(&root_node.inode_cache);
3181         memset(&nrefs, 0, sizeof(nrefs));
3182
3183         /* Move the orphan extent record to corresponding inode_record */
3184         list_for_each_entry_safe(orphan, tmp,
3185                                  &root->orphan_data_extents, list) {
3186                 struct inode_record *inode;
3187
3188                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3189                                       1);
3190                 BUG_ON(IS_ERR(inode));
3191                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3192                 list_move(&orphan->list, &inode->orphan_extents);
3193         }
3194
3195         level = btrfs_header_level(root->node);
3196         memset(wc->nodes, 0, sizeof(wc->nodes));
3197         wc->nodes[level] = &root_node;
3198         wc->active_node = level;
3199         wc->root_level = level;
3200
3201         /* We may not have checked the root block, lets do that now */
3202         if (btrfs_is_leaf(root->node))
3203                 status = btrfs_check_leaf(root, NULL, root->node);
3204         else
3205                 status = btrfs_check_node(root, NULL, root->node);
3206         if (status != BTRFS_TREE_BLOCK_CLEAN)
3207                 return -EIO;
3208
3209         if (btrfs_root_refs(root_item) > 0 ||
3210             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3211                 path.nodes[level] = root->node;
3212                 extent_buffer_get(root->node);
3213                 path.slots[level] = 0;
3214         } else {
3215                 struct btrfs_key key;
3216                 struct btrfs_disk_key found_key;
3217
3218                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3219                 level = root_item->drop_level;
3220                 path.lowest_level = level;
3221                 if (level > btrfs_header_level(root->node) ||
3222                     level >= BTRFS_MAX_LEVEL) {
3223                         error("ignoring invalid drop level: %u", level);
3224                         goto skip_walking;
3225                 }
3226                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3227                 if (wret < 0)
3228                         goto skip_walking;
3229                 btrfs_node_key(path.nodes[level], &found_key,
3230                                 path.slots[level]);
3231                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3232                                         sizeof(found_key)));
3233         }
3234
3235         while (1) {
3236                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3237                 if (wret < 0)
3238                         ret = wret;
3239                 if (wret != 0)
3240                         break;
3241
3242                 wret = walk_up_tree(root, &path, wc, &level);
3243                 if (wret < 0)
3244                         ret = wret;
3245                 if (wret != 0)
3246                         break;
3247         }
3248 skip_walking:
3249         btrfs_release_path(&path);
3250
3251         if (!cache_tree_empty(&corrupt_blocks)) {
3252                 struct cache_extent *cache;
3253                 struct btrfs_corrupt_block *corrupt;
3254
3255                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3256                        root->root_key.objectid);
3257                 cache = first_cache_extent(&corrupt_blocks);
3258                 while (cache) {
3259                         corrupt = container_of(cache,
3260                                                struct btrfs_corrupt_block,
3261                                                cache);
3262                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3263                                cache->start, corrupt->level,
3264                                corrupt->key.objectid, corrupt->key.type,
3265                                corrupt->key.offset);
3266                         cache = next_cache_extent(cache);
3267                 }
3268                 if (repair) {
3269                         printf("Try to repair the btree for root %llu\n",
3270                                root->root_key.objectid);
3271                         ret = repair_btree(root, &corrupt_blocks);
3272                         if (ret < 0)
3273                                 fprintf(stderr, "Failed to repair btree: %s\n",
3274                                         strerror(-ret));
3275                         if (!ret)
3276                                 printf("Btree for root %llu is fixed\n",
3277                                        root->root_key.objectid);
3278                 }
3279         }
3280
3281         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3282         if (err < 0)
3283                 ret = err;
3284
3285         if (root_node.current) {
3286                 root_node.current->checked = 1;
3287                 maybe_free_inode_rec(&root_node.inode_cache,
3288                                 root_node.current);
3289         }
3290
3291         err = check_inode_recs(root, &root_node.inode_cache);
3292         if (!ret)
3293                 ret = err;
3294
3295         free_corrupt_blocks_tree(&corrupt_blocks);
3296         root->fs_info->corrupt_blocks = NULL;
3297         free_orphan_data_extents(&root->orphan_data_extents);
3298         return ret;
3299 }
3300
3301 static int check_fs_roots(struct btrfs_fs_info *fs_info,
3302                           struct cache_tree *root_cache)
3303 {
3304         struct btrfs_path path;
3305         struct btrfs_key key;
3306         struct walk_control wc;
3307         struct extent_buffer *leaf, *tree_node;
3308         struct btrfs_root *tmp_root;
3309         struct btrfs_root *tree_root = fs_info->tree_root;
3310         int ret;
3311         int err = 0;
3312
3313         if (ctx.progress_enabled) {
3314                 ctx.tp = TASK_FS_ROOTS;
3315                 task_start(ctx.info);
3316         }
3317
3318         /*
3319          * Just in case we made any changes to the extent tree that weren't
3320          * reflected into the free space cache yet.
3321          */
3322         if (repair)
3323                 reset_cached_block_groups(fs_info);
3324         memset(&wc, 0, sizeof(wc));
3325         cache_tree_init(&wc.shared);
3326         btrfs_init_path(&path);
3327
3328 again:
3329         key.offset = 0;
3330         key.objectid = 0;
3331         key.type = BTRFS_ROOT_ITEM_KEY;
3332         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3333         if (ret < 0) {
3334                 err = 1;
3335                 goto out;
3336         }
3337         tree_node = tree_root->node;
3338         while (1) {
3339                 if (tree_node != tree_root->node) {
3340                         free_root_recs_tree(root_cache);
3341                         btrfs_release_path(&path);
3342                         goto again;
3343                 }
3344                 leaf = path.nodes[0];
3345                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3346                         ret = btrfs_next_leaf(tree_root, &path);
3347                         if (ret) {
3348                                 if (ret < 0)
3349                                         err = 1;
3350                                 break;
3351                         }
3352                         leaf = path.nodes[0];
3353                 }
3354                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3355                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3356                     fs_root_objectid(key.objectid)) {
3357                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3358                                 tmp_root = btrfs_read_fs_root_no_cache(
3359                                                 fs_info, &key);
3360                         } else {
3361                                 key.offset = (u64)-1;
3362                                 tmp_root = btrfs_read_fs_root(
3363                                                 fs_info, &key);
3364                         }
3365                         if (IS_ERR(tmp_root)) {
3366                                 err = 1;
3367                                 goto next;
3368                         }
3369                         ret = check_fs_root(tmp_root, root_cache, &wc);
3370                         if (ret == -EAGAIN) {
3371                                 free_root_recs_tree(root_cache);
3372                                 btrfs_release_path(&path);
3373                                 goto again;
3374                         }
3375                         if (ret)
3376                                 err = 1;
3377                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3378                                 btrfs_free_fs_root(tmp_root);
3379                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3380                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3381                         process_root_ref(leaf, path.slots[0], &key,
3382                                          root_cache);
3383                 }
3384 next:
3385                 path.slots[0]++;
3386         }
3387 out:
3388         btrfs_release_path(&path);
3389         if (err)
3390                 free_extent_cache_tree(&wc.shared);
3391         if (!cache_tree_empty(&wc.shared))
3392                 fprintf(stderr, "warning line %d\n", __LINE__);
3393
3394         task_stop(ctx.info);
3395
3396         return err;
3397 }
3398
3399 static struct tree_backref *find_tree_backref(struct extent_record *rec,
3400                                                 u64 parent, u64 root)
3401 {
3402         struct rb_node *node;
3403         struct tree_backref *back = NULL;
3404         struct tree_backref match = {
3405                 .node = {
3406                         .is_data = 0,
3407                 },
3408         };
3409
3410         if (parent) {
3411                 match.parent = parent;
3412                 match.node.full_backref = 1;
3413         } else {
3414                 match.root = root;
3415         }
3416
3417         node = rb_search(&rec->backref_tree, &match.node.node,
3418                          (rb_compare_keys)compare_extent_backref, NULL);
3419         if (node)
3420                 back = to_tree_backref(rb_node_to_extent_backref(node));
3421
3422         return back;
3423 }
3424
3425 static struct data_backref *find_data_backref(struct extent_record *rec,
3426                                                 u64 parent, u64 root,
3427                                                 u64 owner, u64 offset,
3428                                                 int found_ref,
3429                                                 u64 disk_bytenr, u64 bytes)
3430 {
3431         struct rb_node *node;
3432         struct data_backref *back = NULL;
3433         struct data_backref match = {
3434                 .node = {
3435                         .is_data = 1,
3436                 },
3437                 .owner = owner,
3438                 .offset = offset,
3439                 .bytes = bytes,
3440                 .found_ref = found_ref,
3441                 .disk_bytenr = disk_bytenr,
3442         };
3443
3444         if (parent) {
3445                 match.parent = parent;
3446                 match.node.full_backref = 1;
3447         } else {
3448                 match.root = root;
3449         }
3450
3451         node = rb_search(&rec->backref_tree, &match.node.node,
3452                          (rb_compare_keys)compare_extent_backref, NULL);
3453         if (node)
3454                 back = to_data_backref(rb_node_to_extent_backref(node));
3455
3456         return back;
3457 }
3458
3459 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
3460                           struct cache_tree *root_cache)
3461 {
3462         int ret;
3463
3464         if (!ctx.progress_enabled)
3465                 fprintf(stderr, "checking fs roots\n");
3466         if (check_mode == CHECK_MODE_LOWMEM)
3467                 ret = check_fs_roots_lowmem(fs_info);
3468         else
3469                 ret = check_fs_roots(fs_info, root_cache);
3470
3471         return ret;
3472 }
3473
3474 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3475 {
3476         struct extent_backref *back, *tmp;
3477         struct tree_backref *tback;
3478         struct data_backref *dback;
3479         u64 found = 0;
3480         int err = 0;
3481
3482         rbtree_postorder_for_each_entry_safe(back, tmp,
3483                                              &rec->backref_tree, node) {
3484                 if (!back->found_extent_tree) {
3485                         err = 1;
3486                         if (!print_errs)
3487                                 goto out;
3488                         if (back->is_data) {
3489                                 dback = to_data_backref(back);
3490                                 fprintf(stderr,
3491 "data backref %llu %s %llu owner %llu offset %llu num_refs %lu not found in extent tree\n",
3492                                         (unsigned long long)rec->start,
3493                                         back->full_backref ?
3494                                         "parent" : "root",
3495                                         back->full_backref ?
3496                                         (unsigned long long)dback->parent :
3497                                         (unsigned long long)dback->root,
3498                                         (unsigned long long)dback->owner,
3499                                         (unsigned long long)dback->offset,
3500                                         (unsigned long)dback->num_refs);
3501                         } else {
3502                                 tback = to_tree_backref(back);
3503                                 fprintf(stderr,
3504 "tree backref %llu parent %llu root %llu not found in extent tree\n",
3505                                         (unsigned long long)rec->start,
3506                                         (unsigned long long)tback->parent,
3507                                         (unsigned long long)tback->root);
3508                         }
3509                 }
3510                 if (!back->is_data && !back->found_ref) {
3511                         err = 1;
3512                         if (!print_errs)
3513                                 goto out;
3514                         tback = to_tree_backref(back);
3515                         fprintf(stderr,
3516                                 "backref %llu %s %llu not referenced back %p\n",
3517                                 (unsigned long long)rec->start,
3518                                 back->full_backref ? "parent" : "root",
3519                                 back->full_backref ?
3520                                 (unsigned long long)tback->parent :
3521                                 (unsigned long long)tback->root, back);
3522                 }
3523                 if (back->is_data) {
3524                         dback = to_data_backref(back);
3525                         if (dback->found_ref != dback->num_refs) {
3526                                 err = 1;
3527                                 if (!print_errs)
3528                                         goto out;
3529                                 fprintf(stderr,
3530 "incorrect local backref count on %llu %s %llu owner %llu offset %llu found %u wanted %u back %p\n",
3531                                         (unsigned long long)rec->start,
3532                                         back->full_backref ?
3533                                         "parent" : "root",
3534                                         back->full_backref ?
3535                                         (unsigned long long)dback->parent :
3536                                         (unsigned long long)dback->root,
3537                                         (unsigned long long)dback->owner,
3538                                         (unsigned long long)dback->offset,
3539                                         dback->found_ref, dback->num_refs,
3540                                         back);
3541                         }
3542                         if (dback->disk_bytenr != rec->start) {
3543                                 err = 1;
3544                                 if (!print_errs)
3545                                         goto out;
3546                                 fprintf(stderr,
3547 "backref disk bytenr does not match extent record, bytenr=%llu, ref bytenr=%llu\n",
3548                                         (unsigned long long)rec->start,
3549                                         (unsigned long long)dback->disk_bytenr);
3550                         }
3551
3552                         if (dback->bytes != rec->nr) {
3553                                 err = 1;
3554                                 if (!print_errs)
3555                                         goto out;
3556                                 fprintf(stderr,
3557 "backref bytes do not match extent backref, bytenr=%llu, ref bytes=%llu, backref bytes=%llu\n",
3558                                         (unsigned long long)rec->start,
3559                                         (unsigned long long)rec->nr,
3560                                         (unsigned long long)dback->bytes);
3561                         }
3562                 }
3563                 if (!back->is_data) {
3564                         found += 1;
3565                 } else {
3566                         dback = to_data_backref(back);
3567                         found += dback->found_ref;
3568                 }
3569         }
3570         if (found != rec->refs) {
3571                 err = 1;
3572                 if (!print_errs)
3573                         goto out;
3574                 fprintf(stderr,
3575         "incorrect global backref count on %llu found %llu wanted %llu\n",
3576                         (unsigned long long)rec->start,
3577                         (unsigned long long)found,
3578                         (unsigned long long)rec->refs);
3579         }
3580 out:
3581         return err;
3582 }
3583
3584 static void __free_one_backref(struct rb_node *node)
3585 {
3586         struct extent_backref *back = rb_node_to_extent_backref(node);
3587
3588         free(back);
3589 }
3590
3591 static void free_all_extent_backrefs(struct extent_record *rec)
3592 {
3593         rb_free_nodes(&rec->backref_tree, __free_one_backref);
3594 }
3595
3596 static void free_extent_record_cache(struct cache_tree *extent_cache)
3597 {
3598         struct cache_extent *cache;
3599         struct extent_record *rec;
3600
3601         while (1) {
3602                 cache = first_cache_extent(extent_cache);
3603                 if (!cache)
3604                         break;
3605                 rec = container_of(cache, struct extent_record, cache);
3606                 remove_cache_extent(extent_cache, cache);
3607                 free_all_extent_backrefs(rec);
3608                 free(rec);
3609         }
3610 }
3611
3612 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3613                                  struct extent_record *rec)
3614 {
3615         if (rec->content_checked && rec->owner_ref_checked &&
3616             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3617             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3618             !rec->bad_full_backref && !rec->crossing_stripes &&
3619             !rec->wrong_chunk_type) {
3620                 remove_cache_extent(extent_cache, &rec->cache);
3621                 free_all_extent_backrefs(rec);
3622                 list_del_init(&rec->list);
3623                 free(rec);
3624         }
3625         return 0;
3626 }
3627
3628 static int check_owner_ref(struct btrfs_root *root,
3629                             struct extent_record *rec,
3630                             struct extent_buffer *buf)
3631 {
3632         struct extent_backref *node, *tmp;
3633         struct tree_backref *back;
3634         struct btrfs_root *ref_root;
3635         struct btrfs_key key;
3636         struct btrfs_path path;
3637         struct extent_buffer *parent;
3638         int level;
3639         int found = 0;
3640         int ret;
3641
3642         rbtree_postorder_for_each_entry_safe(node, tmp,
3643                                              &rec->backref_tree, node) {
3644                 if (node->is_data)
3645                         continue;
3646                 if (!node->found_ref)
3647                         continue;
3648                 if (node->full_backref)
3649                         continue;
3650                 back = to_tree_backref(node);
3651                 if (btrfs_header_owner(buf) == back->root)
3652                         return 0;
3653         }
3654         BUG_ON(rec->is_root);
3655
3656         /* try to find the block by search corresponding fs tree */
3657         key.objectid = btrfs_header_owner(buf);
3658         key.type = BTRFS_ROOT_ITEM_KEY;
3659         key.offset = (u64)-1;
3660
3661         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3662         if (IS_ERR(ref_root))
3663                 return 1;
3664
3665         level = btrfs_header_level(buf);
3666         if (level == 0)
3667                 btrfs_item_key_to_cpu(buf, &key, 0);
3668         else
3669                 btrfs_node_key_to_cpu(buf, &key, 0);
3670
3671         btrfs_init_path(&path);
3672         path.lowest_level = level + 1;
3673         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3674         if (ret < 0)
3675                 return 0;
3676
3677         parent = path.nodes[level + 1];
3678         if (parent && buf->start == btrfs_node_blockptr(parent,
3679                                                         path.slots[level + 1]))
3680                 found = 1;
3681
3682         btrfs_release_path(&path);
3683         return found ? 0 : 1;
3684 }
3685
3686 static int is_extent_tree_record(struct extent_record *rec)
3687 {
3688         struct extent_backref *node, *tmp;
3689         struct tree_backref *back;
3690         int is_extent = 0;
3691
3692         rbtree_postorder_for_each_entry_safe(node, tmp,
3693                                              &rec->backref_tree, node) {
3694                 if (node->is_data)
3695                         return 0;
3696                 back = to_tree_backref(node);
3697                 if (node->full_backref)
3698                         return 0;
3699                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3700                         is_extent = 1;
3701         }
3702         return is_extent;
3703 }
3704
3705
3706 static int record_bad_block_io(struct btrfs_fs_info *info,
3707                                struct cache_tree *extent_cache,
3708                                u64 start, u64 len)
3709 {
3710         struct extent_record *rec;
3711         struct cache_extent *cache;
3712         struct btrfs_key key;
3713
3714         cache = lookup_cache_extent(extent_cache, start, len);
3715         if (!cache)
3716                 return 0;
3717
3718         rec = container_of(cache, struct extent_record, cache);
3719         if (!is_extent_tree_record(rec))
3720                 return 0;
3721
3722         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3723         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3724 }
3725
3726 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3727                        struct extent_buffer *buf, int slot)
3728 {
3729         if (btrfs_header_level(buf)) {
3730                 struct btrfs_key_ptr ptr1, ptr2;
3731
3732                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3733                                    sizeof(struct btrfs_key_ptr));
3734                 read_extent_buffer(buf, &ptr2,
3735                                    btrfs_node_key_ptr_offset(slot + 1),
3736                                    sizeof(struct btrfs_key_ptr));
3737                 write_extent_buffer(buf, &ptr1,
3738                                     btrfs_node_key_ptr_offset(slot + 1),
3739                                     sizeof(struct btrfs_key_ptr));
3740                 write_extent_buffer(buf, &ptr2,
3741                                     btrfs_node_key_ptr_offset(slot),
3742                                     sizeof(struct btrfs_key_ptr));
3743                 if (slot == 0) {
3744                         struct btrfs_disk_key key;
3745
3746                         btrfs_node_key(buf, &key, 0);
3747                         btrfs_fixup_low_keys(root, path, &key,
3748                                              btrfs_header_level(buf) + 1);
3749                 }
3750         } else {
3751                 struct btrfs_item *item1, *item2;
3752                 struct btrfs_key k1, k2;
3753                 char *item1_data, *item2_data;
3754                 u32 item1_offset, item2_offset, item1_size, item2_size;
3755
3756                 item1 = btrfs_item_nr(slot);
3757                 item2 = btrfs_item_nr(slot + 1);
3758                 btrfs_item_key_to_cpu(buf, &k1, slot);
3759                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3760                 item1_offset = btrfs_item_offset(buf, item1);
3761                 item2_offset = btrfs_item_offset(buf, item2);
3762                 item1_size = btrfs_item_size(buf, item1);
3763                 item2_size = btrfs_item_size(buf, item2);
3764
3765                 item1_data = malloc(item1_size);
3766                 if (!item1_data)
3767                         return -ENOMEM;
3768                 item2_data = malloc(item2_size);
3769                 if (!item2_data) {
3770                         free(item1_data);
3771                         return -ENOMEM;
3772                 }
3773
3774                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3775                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3776
3777                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3778                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3779                 free(item1_data);
3780                 free(item2_data);
3781
3782                 btrfs_set_item_offset(buf, item1, item2_offset);
3783                 btrfs_set_item_offset(buf, item2, item1_offset);
3784                 btrfs_set_item_size(buf, item1, item2_size);
3785                 btrfs_set_item_size(buf, item2, item1_size);
3786
3787                 path->slots[0] = slot;
3788                 btrfs_set_item_key_unsafe(root, path, &k2);
3789                 path->slots[0] = slot + 1;
3790                 btrfs_set_item_key_unsafe(root, path, &k1);
3791         }
3792         return 0;
3793 }
3794
3795 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
3796 {
3797         struct extent_buffer *buf;
3798         struct btrfs_key k1, k2;
3799         int i;
3800         int level = path->lowest_level;
3801         int ret = -EIO;
3802
3803         buf = path->nodes[level];
3804         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3805                 if (level) {
3806                         btrfs_node_key_to_cpu(buf, &k1, i);
3807                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
3808                 } else {
3809                         btrfs_item_key_to_cpu(buf, &k1, i);
3810                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
3811                 }
3812                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
3813                         continue;
3814                 ret = swap_values(root, path, buf, i);
3815                 if (ret)
3816                         break;
3817                 btrfs_mark_buffer_dirty(buf);
3818                 i = 0;
3819         }
3820         return ret;
3821 }
3822
3823 static int delete_bogus_item(struct btrfs_root *root,
3824                              struct btrfs_path *path,
3825                              struct extent_buffer *buf, int slot)
3826 {
3827         struct btrfs_key key;
3828         int nritems = btrfs_header_nritems(buf);
3829
3830         btrfs_item_key_to_cpu(buf, &key, slot);
3831
3832         /* These are all the keys we can deal with missing. */
3833         if (key.type != BTRFS_DIR_INDEX_KEY &&
3834             key.type != BTRFS_EXTENT_ITEM_KEY &&
3835             key.type != BTRFS_METADATA_ITEM_KEY &&
3836             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
3837             key.type != BTRFS_EXTENT_DATA_REF_KEY)
3838                 return -1;
3839
3840         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
3841                (unsigned long long)key.objectid, key.type,
3842                (unsigned long long)key.offset, slot, buf->start);
3843         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
3844                               btrfs_item_nr_offset(slot + 1),
3845                               sizeof(struct btrfs_item) *
3846                               (nritems - slot - 1));
3847         btrfs_set_header_nritems(buf, nritems - 1);
3848         if (slot == 0) {
3849                 struct btrfs_disk_key disk_key;
3850
3851                 btrfs_item_key(buf, &disk_key, 0);
3852                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
3853         }
3854         btrfs_mark_buffer_dirty(buf);
3855         return 0;
3856 }
3857
3858 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
3859 {
3860         struct extent_buffer *buf;
3861         int i;
3862         int ret = 0;
3863
3864         /* We should only get this for leaves */
3865         BUG_ON(path->lowest_level);
3866         buf = path->nodes[0];
3867 again:
3868         for (i = 0; i < btrfs_header_nritems(buf); i++) {
3869                 unsigned int shift = 0, offset;
3870
3871                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
3872                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3873                         if (btrfs_item_end_nr(buf, i) >
3874                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3875                                 ret = delete_bogus_item(root, path, buf, i);
3876                                 if (!ret)
3877                                         goto again;
3878                                 fprintf(stderr,
3879                                 "item is off the end of the leaf, can't fix\n");
3880                                 ret = -EIO;
3881                                 break;
3882                         }
3883                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
3884                                 btrfs_item_end_nr(buf, i);
3885                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
3886                            btrfs_item_offset_nr(buf, i - 1)) {
3887                         if (btrfs_item_end_nr(buf, i) >
3888                             btrfs_item_offset_nr(buf, i - 1)) {
3889                                 ret = delete_bogus_item(root, path, buf, i);
3890                                 if (!ret)
3891                                         goto again;
3892                                 fprintf(stderr, "items overlap, can't fix\n");
3893                                 ret = -EIO;
3894                                 break;
3895                         }
3896                         shift = btrfs_item_offset_nr(buf, i - 1) -
3897                                 btrfs_item_end_nr(buf, i);
3898                 }
3899                 if (!shift)
3900                         continue;
3901
3902                 printf("Shifting item nr %d by %u bytes in block %llu\n",
3903                        i, shift, (unsigned long long)buf->start);
3904                 offset = btrfs_item_offset_nr(buf, i);
3905                 memmove_extent_buffer(buf,
3906                                       btrfs_leaf_data(buf) + offset + shift,
3907                                       btrfs_leaf_data(buf) + offset,
3908                                       btrfs_item_size_nr(buf, i));
3909                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
3910                                       offset + shift);
3911                 btrfs_mark_buffer_dirty(buf);
3912         }
3913
3914         /*
3915          * We may have moved things, in which case we want to exit so we don't
3916          * write those changes out.  Once we have proper abort functionality in
3917          * progs this can be changed to something nicer.
3918          */
3919         BUG_ON(ret);
3920         return ret;
3921 }
3922
3923 /*
3924  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
3925  * then just return -EIO.
3926  */
3927 static int try_to_fix_bad_block(struct btrfs_root *root,
3928                                 struct extent_buffer *buf,
3929                                 enum btrfs_tree_block_status status)
3930 {
3931         struct btrfs_trans_handle *trans;
3932         struct ulist *roots;
3933         struct ulist_node *node;
3934         struct btrfs_root *search_root;
3935         struct btrfs_path path;
3936         struct ulist_iterator iter;
3937         struct btrfs_key root_key, key;
3938         int ret;
3939
3940         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
3941             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
3942                 return -EIO;
3943
3944         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
3945         if (ret)
3946                 return -EIO;
3947
3948         btrfs_init_path(&path);
3949         ULIST_ITER_INIT(&iter);
3950         while ((node = ulist_next(roots, &iter))) {
3951                 root_key.objectid = node->val;
3952                 root_key.type = BTRFS_ROOT_ITEM_KEY;
3953                 root_key.offset = (u64)-1;
3954
3955                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
3956                 if (IS_ERR(root)) {
3957                         ret = -EIO;
3958                         break;
3959                 }
3960
3961
3962                 trans = btrfs_start_transaction(search_root, 0);
3963                 if (IS_ERR(trans)) {
3964                         ret = PTR_ERR(trans);
3965                         break;
3966                 }
3967
3968                 path.lowest_level = btrfs_header_level(buf);
3969                 path.skip_check_block = 1;
3970                 if (path.lowest_level)
3971                         btrfs_node_key_to_cpu(buf, &key, 0);
3972                 else
3973                         btrfs_item_key_to_cpu(buf, &key, 0);
3974                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
3975                 if (ret) {
3976                         ret = -EIO;
3977                         btrfs_commit_transaction(trans, search_root);
3978                         break;
3979                 }
3980                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
3981                         ret = fix_key_order(search_root, &path);
3982                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
3983                         ret = fix_item_offset(search_root, &path);
3984                 if (ret) {
3985                         btrfs_commit_transaction(trans, search_root);
3986                         break;
3987                 }
3988                 btrfs_release_path(&path);
3989                 btrfs_commit_transaction(trans, search_root);
3990         }
3991         ulist_free(roots);
3992         btrfs_release_path(&path);
3993         return ret;
3994 }
3995
3996 static int check_block(struct btrfs_root *root,
3997                        struct cache_tree *extent_cache,
3998                        struct extent_buffer *buf, u64 flags)
3999 {
4000         struct extent_record *rec;
4001         struct cache_extent *cache;
4002         struct btrfs_key key;
4003         enum btrfs_tree_block_status status;
4004         int ret = 0;
4005         int level;
4006
4007         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4008         if (!cache)
4009                 return 1;
4010         rec = container_of(cache, struct extent_record, cache);
4011         rec->generation = btrfs_header_generation(buf);
4012
4013         level = btrfs_header_level(buf);
4014         if (btrfs_header_nritems(buf) > 0) {
4015
4016                 if (level == 0)
4017                         btrfs_item_key_to_cpu(buf, &key, 0);
4018                 else
4019                         btrfs_node_key_to_cpu(buf, &key, 0);
4020
4021                 rec->info_objectid = key.objectid;
4022         }
4023         rec->info_level = level;
4024
4025         if (btrfs_is_leaf(buf))
4026                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4027         else
4028                 status = btrfs_check_node(root, &rec->parent_key, buf);
4029
4030         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4031                 if (repair)
4032                         status = try_to_fix_bad_block(root, buf, status);
4033                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4034                         ret = -EIO;
4035                         fprintf(stderr, "bad block %llu\n",
4036                                 (unsigned long long)buf->start);
4037                 } else {
4038                         /*
4039                          * Signal to callers we need to start the scan over
4040                          * again since we'll have cowed blocks.
4041                          */
4042                         ret = -EAGAIN;
4043                 }
4044         } else {
4045                 rec->content_checked = 1;
4046                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4047                         rec->owner_ref_checked = 1;
4048                 else {
4049                         ret = check_owner_ref(root, rec, buf);
4050                         if (!ret)
4051                                 rec->owner_ref_checked = 1;
4052                 }
4053         }
4054         if (!ret)
4055                 maybe_free_extent_rec(extent_cache, rec);
4056         return ret;
4057 }
4058
4059 #if 0
4060 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4061                                                 u64 parent, u64 root)
4062 {
4063         struct list_head *cur = rec->backrefs.next;
4064         struct extent_backref *node;
4065         struct tree_backref *back;
4066
4067         while (cur != &rec->backrefs) {
4068                 node = to_extent_backref(cur);
4069                 cur = cur->next;
4070                 if (node->is_data)
4071                         continue;
4072                 back = to_tree_backref(node);
4073                 if (parent > 0) {
4074                         if (!node->full_backref)
4075                                 continue;
4076                         if (parent == back->parent)
4077                                 return back;
4078                 } else {
4079                         if (node->full_backref)
4080                                 continue;
4081                         if (back->root == root)
4082                                 return back;
4083                 }
4084         }
4085         return NULL;
4086 }
4087 #endif
4088
4089 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4090                                                 u64 parent, u64 root)
4091 {
4092         struct tree_backref *ref = malloc(sizeof(*ref));
4093
4094         if (!ref)
4095                 return NULL;
4096         memset(&ref->node, 0, sizeof(ref->node));
4097         if (parent > 0) {
4098                 ref->parent = parent;
4099                 ref->node.full_backref = 1;
4100         } else {
4101                 ref->root = root;
4102                 ref->node.full_backref = 0;
4103         }
4104
4105         return ref;
4106 }
4107
4108 #if 0
4109 static struct data_backref *find_data_backref(struct extent_record *rec,
4110                                                 u64 parent, u64 root,
4111                                                 u64 owner, u64 offset,
4112                                                 int found_ref,
4113                                                 u64 disk_bytenr, u64 bytes)
4114 {
4115         struct list_head *cur = rec->backrefs.next;
4116         struct extent_backref *node;
4117         struct data_backref *back;
4118
4119         while (cur != &rec->backrefs) {
4120                 node = to_extent_backref(cur);
4121                 cur = cur->next;
4122                 if (!node->is_data)
4123                         continue;
4124                 back = to_data_backref(node);
4125                 if (parent > 0) {
4126                         if (!node->full_backref)
4127                                 continue;
4128                         if (parent == back->parent)
4129                                 return back;
4130                 } else {
4131                         if (node->full_backref)
4132                                 continue;
4133                         if (back->root == root && back->owner == owner &&
4134                             back->offset == offset) {
4135                                 if (found_ref && node->found_ref &&
4136                                     (back->bytes != bytes ||
4137                                     back->disk_bytenr != disk_bytenr))
4138                                         continue;
4139                                 return back;
4140                         }
4141                 }
4142         }
4143         return NULL;
4144 }
4145 #endif
4146
4147 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4148                                                 u64 parent, u64 root,
4149                                                 u64 owner, u64 offset,
4150                                                 u64 max_size)
4151 {
4152         struct data_backref *ref = malloc(sizeof(*ref));
4153
4154         if (!ref)
4155                 return NULL;
4156         memset(&ref->node, 0, sizeof(ref->node));
4157         ref->node.is_data = 1;
4158
4159         if (parent > 0) {
4160                 ref->parent = parent;
4161                 ref->owner = 0;
4162                 ref->offset = 0;
4163                 ref->node.full_backref = 1;
4164         } else {
4165                 ref->root = root;
4166                 ref->owner = owner;
4167                 ref->offset = offset;
4168                 ref->node.full_backref = 0;
4169         }
4170         ref->bytes = max_size;
4171         ref->found_ref = 0;
4172         ref->num_refs = 0;
4173         if (max_size > rec->max_size)
4174                 rec->max_size = max_size;
4175         return ref;
4176 }
4177
4178 /* Check if the type of extent matches with its chunk */
4179 static void check_extent_type(struct extent_record *rec)
4180 {
4181         struct btrfs_block_group_cache *bg_cache;
4182
4183         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4184         if (!bg_cache)
4185                 return;
4186
4187         /* data extent, check chunk directly*/
4188         if (!rec->metadata) {
4189                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4190                         rec->wrong_chunk_type = 1;
4191                 return;
4192         }
4193
4194         /* metadata extent, check the obvious case first */
4195         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4196                                  BTRFS_BLOCK_GROUP_METADATA))) {
4197                 rec->wrong_chunk_type = 1;
4198                 return;
4199         }
4200
4201         /*
4202          * Check SYSTEM extent, as it's also marked as metadata, we can only
4203          * make sure it's a SYSTEM extent by its backref
4204          */
4205         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4206                 struct extent_backref *node;
4207                 struct tree_backref *tback;
4208                 u64 bg_type;
4209
4210                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4211                 if (node->is_data) {
4212                         /* tree block shouldn't have data backref */
4213                         rec->wrong_chunk_type = 1;
4214                         return;
4215                 }
4216                 tback = container_of(node, struct tree_backref, node);
4217
4218                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4219                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4220                 else
4221                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4222                 if (!(bg_cache->flags & bg_type))
4223                         rec->wrong_chunk_type = 1;
4224         }
4225 }
4226
4227 /*
4228  * Allocate a new extent record, fill default values from @tmpl and insert int
4229  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4230  * the cache, otherwise it fails.
4231  */
4232 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4233                 struct extent_record *tmpl)
4234 {
4235         struct extent_record *rec;
4236         int ret = 0;
4237
4238         BUG_ON(tmpl->max_size == 0);
4239         rec = malloc(sizeof(*rec));
4240         if (!rec)
4241                 return -ENOMEM;
4242         rec->start = tmpl->start;
4243         rec->max_size = tmpl->max_size;
4244         rec->nr = max(tmpl->nr, tmpl->max_size);
4245         rec->found_rec = tmpl->found_rec;
4246         rec->content_checked = tmpl->content_checked;
4247         rec->owner_ref_checked = tmpl->owner_ref_checked;
4248         rec->num_duplicates = 0;
4249         rec->metadata = tmpl->metadata;
4250         rec->flag_block_full_backref = FLAG_UNSET;
4251         rec->bad_full_backref = 0;
4252         rec->crossing_stripes = 0;
4253         rec->wrong_chunk_type = 0;
4254         rec->is_root = tmpl->is_root;
4255         rec->refs = tmpl->refs;
4256         rec->extent_item_refs = tmpl->extent_item_refs;
4257         rec->parent_generation = tmpl->parent_generation;
4258         INIT_LIST_HEAD(&rec->backrefs);
4259         INIT_LIST_HEAD(&rec->dups);
4260         INIT_LIST_HEAD(&rec->list);
4261         rec->backref_tree = RB_ROOT;
4262         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4263         rec->cache.start = tmpl->start;
4264         rec->cache.size = tmpl->nr;
4265         ret = insert_cache_extent(extent_cache, &rec->cache);
4266         if (ret) {
4267                 free(rec);
4268                 return ret;
4269         }
4270         bytes_used += rec->nr;
4271
4272         if (tmpl->metadata)
4273                 rec->crossing_stripes = check_crossing_stripes(global_info,
4274                                 rec->start, global_info->nodesize);
4275         check_extent_type(rec);
4276         return ret;
4277 }
4278
4279 /*
4280  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4281  * some are hints:
4282  * - refs              - if found, increase refs
4283  * - is_root           - if found, set
4284  * - content_checked   - if found, set
4285  * - owner_ref_checked - if found, set
4286  *
4287  * If not found, create a new one, initialize and insert.
4288  */
4289 static int add_extent_rec(struct cache_tree *extent_cache,
4290                 struct extent_record *tmpl)
4291 {
4292         struct extent_record *rec;
4293         struct cache_extent *cache;
4294         int ret = 0;
4295         int dup = 0;
4296
4297         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4298         if (cache) {
4299                 rec = container_of(cache, struct extent_record, cache);
4300                 if (tmpl->refs)
4301                         rec->refs++;
4302                 if (rec->nr == 1)
4303                         rec->nr = max(tmpl->nr, tmpl->max_size);
4304
4305                 /*
4306                  * We need to make sure to reset nr to whatever the extent
4307                  * record says was the real size, this way we can compare it to
4308                  * the backrefs.
4309                  */
4310                 if (tmpl->found_rec) {
4311                         if (tmpl->start != rec->start || rec->found_rec) {
4312                                 struct extent_record *tmp;
4313
4314                                 dup = 1;
4315                                 if (list_empty(&rec->list))
4316                                         list_add_tail(&rec->list,
4317                                                       &duplicate_extents);
4318
4319                                 /*
4320                                  * We have to do this song and dance in case we
4321                                  * find an extent record that falls inside of
4322                                  * our current extent record but does not have
4323                                  * the same objectid.
4324                                  */
4325                                 tmp = malloc(sizeof(*tmp));
4326                                 if (!tmp)
4327                                         return -ENOMEM;
4328                                 tmp->start = tmpl->start;
4329                                 tmp->max_size = tmpl->max_size;
4330                                 tmp->nr = tmpl->nr;
4331                                 tmp->found_rec = 1;
4332                                 tmp->metadata = tmpl->metadata;
4333                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4334                                 INIT_LIST_HEAD(&tmp->list);
4335                                 list_add_tail(&tmp->list, &rec->dups);
4336                                 rec->num_duplicates++;
4337                         } else {
4338                                 rec->nr = tmpl->nr;
4339                                 rec->found_rec = 1;
4340                         }
4341                 }
4342
4343                 if (tmpl->extent_item_refs && !dup) {
4344                         if (rec->extent_item_refs) {
4345                                 fprintf(stderr,
4346                         "block %llu rec extent_item_refs %llu, passed %llu\n",
4347                                         (unsigned long long)tmpl->start,
4348                                         (unsigned long long)
4349                                                         rec->extent_item_refs,
4350                                         (unsigned long long)
4351                                                         tmpl->extent_item_refs);
4352                         }
4353                         rec->extent_item_refs = tmpl->extent_item_refs;
4354                 }
4355                 if (tmpl->is_root)
4356                         rec->is_root = 1;
4357                 if (tmpl->content_checked)
4358                         rec->content_checked = 1;
4359                 if (tmpl->owner_ref_checked)
4360                         rec->owner_ref_checked = 1;
4361                 memcpy(&rec->parent_key, &tmpl->parent_key,
4362                                 sizeof(tmpl->parent_key));
4363                 if (tmpl->parent_generation)
4364                         rec->parent_generation = tmpl->parent_generation;
4365                 if (rec->max_size < tmpl->max_size)
4366                         rec->max_size = tmpl->max_size;
4367
4368                 /*
4369                  * A metadata extent can't cross stripe_len boundary, otherwise
4370                  * kernel scrub won't be able to handle it.
4371                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4372                  * it.
4373                  */
4374                 if (tmpl->metadata)
4375                         rec->crossing_stripes = check_crossing_stripes(
4376                                         global_info, rec->start,
4377                                         global_info->nodesize);
4378                 check_extent_type(rec);
4379                 maybe_free_extent_rec(extent_cache, rec);
4380                 return ret;
4381         }
4382
4383         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4384
4385         return ret;
4386 }
4387
4388 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4389                             u64 parent, u64 root, int found_ref)
4390 {
4391         struct extent_record *rec;
4392         struct tree_backref *back;
4393         struct cache_extent *cache;
4394         int ret;
4395         bool insert = false;
4396
4397         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4398         if (!cache) {
4399                 struct extent_record tmpl;
4400
4401                 memset(&tmpl, 0, sizeof(tmpl));
4402                 tmpl.start = bytenr;
4403                 tmpl.nr = 1;
4404                 tmpl.metadata = 1;
4405                 tmpl.max_size = 1;
4406
4407                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4408                 if (ret)
4409                         return ret;
4410
4411                 /* really a bug in cache_extent implement now */
4412                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4413                 if (!cache)
4414                         return -ENOENT;
4415         }
4416
4417         rec = container_of(cache, struct extent_record, cache);
4418         if (rec->start != bytenr) {
4419                 /*
4420                  * Several cause, from unaligned bytenr to over lapping extents
4421                  */
4422                 return -EEXIST;
4423         }
4424
4425         back = find_tree_backref(rec, parent, root);
4426         if (!back) {
4427                 back = alloc_tree_backref(rec, parent, root);
4428                 if (!back)
4429                         return -ENOMEM;
4430                 insert = true;
4431         }
4432
4433         if (found_ref) {
4434                 if (back->node.found_ref) {
4435                         fprintf(stderr,
4436         "Extent back ref already exists for %llu parent %llu root %llu\n",
4437                                 (unsigned long long)bytenr,
4438                                 (unsigned long long)parent,
4439                                 (unsigned long long)root);
4440                 }
4441                 back->node.found_ref = 1;
4442         } else {
4443                 if (back->node.found_extent_tree) {
4444                         fprintf(stderr,
4445         "extent back ref already exists for %llu parent %llu root %llu\n",
4446                                 (unsigned long long)bytenr,
4447                                 (unsigned long long)parent,
4448                                 (unsigned long long)root);
4449                 }
4450                 back->node.found_extent_tree = 1;
4451         }
4452         if (insert)
4453                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4454                         compare_extent_backref));
4455         check_extent_type(rec);
4456         maybe_free_extent_rec(extent_cache, rec);
4457         return 0;
4458 }
4459
4460 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4461                             u64 parent, u64 root, u64 owner, u64 offset,
4462                             u32 num_refs, int found_ref, u64 max_size)
4463 {
4464         struct extent_record *rec;
4465         struct data_backref *back;
4466         struct cache_extent *cache;
4467         int ret;
4468         bool insert = false;
4469
4470         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4471         if (!cache) {
4472                 struct extent_record tmpl;
4473
4474                 memset(&tmpl, 0, sizeof(tmpl));
4475                 tmpl.start = bytenr;
4476                 tmpl.nr = 1;
4477                 tmpl.max_size = max_size;
4478
4479                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4480                 if (ret)
4481                         return ret;
4482
4483                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4484                 if (!cache)
4485                         abort();
4486         }
4487
4488         rec = container_of(cache, struct extent_record, cache);
4489         if (rec->max_size < max_size)
4490                 rec->max_size = max_size;
4491
4492         /*
4493          * If found_ref is set then max_size is the real size and must match the
4494          * existing refs.  So if we have already found a ref then we need to
4495          * make sure that this ref matches the existing one, otherwise we need
4496          * to add a new backref so we can notice that the backrefs don't match
4497          * and we need to figure out who is telling the truth.  This is to
4498          * account for that awful fsync bug I introduced where we'd end up with
4499          * a btrfs_file_extent_item that would have its length include multiple
4500          * prealloc extents or point inside of a prealloc extent.
4501          */
4502         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4503                                  bytenr, max_size);
4504         if (!back) {
4505                 back = alloc_data_backref(rec, parent, root, owner, offset,
4506                                           max_size);
4507                 BUG_ON(!back);
4508                 insert = true;
4509         }
4510
4511         if (found_ref) {
4512                 BUG_ON(num_refs != 1);
4513                 if (back->node.found_ref)
4514                         BUG_ON(back->bytes != max_size);
4515                 back->node.found_ref = 1;
4516                 back->found_ref += 1;
4517                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
4518                         back->bytes = max_size;
4519                         back->disk_bytenr = bytenr;
4520
4521                         /* Need to reinsert if not already in the tree */
4522                         if (!insert) {
4523                                 rb_erase(&back->node.node, &rec->backref_tree);
4524                                 insert = true;
4525                         }
4526                 }
4527                 rec->refs += 1;
4528                 rec->content_checked = 1;
4529                 rec->owner_ref_checked = 1;
4530         } else {
4531                 if (back->node.found_extent_tree) {
4532                         fprintf(stderr,
4533 "Extent back ref already exists for %llu parent %llu root %llu owner %llu offset %llu num_refs %lu\n",
4534                                 (unsigned long long)bytenr,
4535                                 (unsigned long long)parent,
4536                                 (unsigned long long)root,
4537                                 (unsigned long long)owner,
4538                                 (unsigned long long)offset,
4539                                 (unsigned long)num_refs);
4540                 }
4541                 back->num_refs = num_refs;
4542                 back->node.found_extent_tree = 1;
4543         }
4544         if (insert)
4545                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4546                         compare_extent_backref));
4547
4548         maybe_free_extent_rec(extent_cache, rec);
4549         return 0;
4550 }
4551
4552 static int add_pending(struct cache_tree *pending,
4553                        struct cache_tree *seen, u64 bytenr, u32 size)
4554 {
4555         int ret;
4556
4557         ret = add_cache_extent(seen, bytenr, size);
4558         if (ret)
4559                 return ret;
4560         add_cache_extent(pending, bytenr, size);
4561         return 0;
4562 }
4563
4564 static int pick_next_pending(struct cache_tree *pending,
4565                         struct cache_tree *reada,
4566                         struct cache_tree *nodes,
4567                         u64 last, struct block_info *bits, int bits_nr,
4568                         int *reada_bits)
4569 {
4570         unsigned long node_start = last;
4571         struct cache_extent *cache;
4572         int ret;
4573
4574         cache = search_cache_extent(reada, 0);
4575         if (cache) {
4576                 bits[0].start = cache->start;
4577                 bits[0].size = cache->size;
4578                 *reada_bits = 1;
4579                 return 1;
4580         }
4581         *reada_bits = 0;
4582         if (node_start > 32768)
4583                 node_start -= 32768;
4584
4585         cache = search_cache_extent(nodes, node_start);
4586         if (!cache)
4587                 cache = search_cache_extent(nodes, 0);
4588
4589         if (!cache) {
4590                 cache = search_cache_extent(pending, 0);
4591                 if (!cache)
4592                         return 0;
4593                 ret = 0;
4594                 do {
4595                         bits[ret].start = cache->start;
4596                         bits[ret].size = cache->size;
4597                         cache = next_cache_extent(cache);
4598                         ret++;
4599                 } while (cache && ret < bits_nr);
4600                 return ret;
4601         }
4602
4603         ret = 0;
4604         do {
4605                 bits[ret].start = cache->start;
4606                 bits[ret].size = cache->size;
4607                 cache = next_cache_extent(cache);
4608                 ret++;
4609         } while (cache && ret < bits_nr);
4610
4611         if (bits_nr - ret > 8) {
4612                 u64 lookup = bits[0].start + bits[0].size;
4613                 struct cache_extent *next;
4614
4615                 next = search_cache_extent(pending, lookup);
4616                 while (next) {
4617                         if (next->start - lookup > 32768)
4618                                 break;
4619                         bits[ret].start = next->start;
4620                         bits[ret].size = next->size;
4621                         lookup = next->start + next->size;
4622                         ret++;
4623                         if (ret == bits_nr)
4624                                 break;
4625                         next = next_cache_extent(next);
4626                         if (!next)
4627                                 break;
4628                 }
4629         }
4630         return ret;
4631 }
4632
4633 static void free_chunk_record(struct cache_extent *cache)
4634 {
4635         struct chunk_record *rec;
4636
4637         rec = container_of(cache, struct chunk_record, cache);
4638         list_del_init(&rec->list);
4639         list_del_init(&rec->dextents);
4640         free(rec);
4641 }
4642
4643 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4644 {
4645         cache_tree_free_extents(chunk_cache, free_chunk_record);
4646 }
4647
4648 static void free_device_record(struct rb_node *node)
4649 {
4650         struct device_record *rec;
4651
4652         rec = container_of(node, struct device_record, node);
4653         free(rec);
4654 }
4655
4656 FREE_RB_BASED_TREE(device_cache, free_device_record);
4657
4658 int insert_block_group_record(struct block_group_tree *tree,
4659                               struct block_group_record *bg_rec)
4660 {
4661         int ret;
4662
4663         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4664         if (ret)
4665                 return ret;
4666
4667         list_add_tail(&bg_rec->list, &tree->block_groups);
4668         return 0;
4669 }
4670
4671 static void free_block_group_record(struct cache_extent *cache)
4672 {
4673         struct block_group_record *rec;
4674
4675         rec = container_of(cache, struct block_group_record, cache);
4676         list_del_init(&rec->list);
4677         free(rec);
4678 }
4679
4680 void free_block_group_tree(struct block_group_tree *tree)
4681 {
4682         cache_tree_free_extents(&tree->tree, free_block_group_record);
4683 }
4684
4685 int insert_device_extent_record(struct device_extent_tree *tree,
4686                                 struct device_extent_record *de_rec)
4687 {
4688         int ret;
4689
4690         /*
4691          * Device extent is a bit different from the other extents, because
4692          * the extents which belong to the different devices may have the
4693          * same start and size, so we need use the special extent cache
4694          * search/insert functions.
4695          */
4696         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4697         if (ret)
4698                 return ret;
4699
4700         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4701         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4702         return 0;
4703 }
4704
4705 static void free_device_extent_record(struct cache_extent *cache)
4706 {
4707         struct device_extent_record *rec;
4708
4709         rec = container_of(cache, struct device_extent_record, cache);
4710         if (!list_empty(&rec->chunk_list))
4711                 list_del_init(&rec->chunk_list);
4712         if (!list_empty(&rec->device_list))
4713                 list_del_init(&rec->device_list);
4714         free(rec);
4715 }
4716
4717 void free_device_extent_tree(struct device_extent_tree *tree)
4718 {
4719         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4720 }
4721
4722 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4723 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4724                                  struct extent_buffer *leaf, int slot)
4725 {
4726         struct btrfs_extent_ref_v0 *ref0;
4727         struct btrfs_key key;
4728         int ret;
4729
4730         btrfs_item_key_to_cpu(leaf, &key, slot);
4731         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4732         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4733                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
4734                                 0, 0);
4735         } else {
4736                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
4737                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4738         }
4739         return ret;
4740 }
4741 #endif
4742
4743 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4744                                             struct btrfs_key *key,
4745                                             int slot)
4746 {
4747         struct btrfs_chunk *ptr;
4748         struct chunk_record *rec;
4749         int num_stripes, i;
4750
4751         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4752         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4753
4754         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4755         if (!rec) {
4756                 fprintf(stderr, "memory allocation failed\n");
4757                 exit(-1);
4758         }
4759
4760         INIT_LIST_HEAD(&rec->list);
4761         INIT_LIST_HEAD(&rec->dextents);
4762         rec->bg_rec = NULL;
4763
4764         rec->cache.start = key->offset;
4765         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4766
4767         rec->generation = btrfs_header_generation(leaf);
4768
4769         rec->objectid = key->objectid;
4770         rec->type = key->type;
4771         rec->offset = key->offset;
4772
4773         rec->length = rec->cache.size;
4774         rec->owner = btrfs_chunk_owner(leaf, ptr);
4775         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4776         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4777         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4778         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4779         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4780         rec->num_stripes = num_stripes;
4781         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4782
4783         for (i = 0; i < rec->num_stripes; ++i) {
4784                 rec->stripes[i].devid =
4785                         btrfs_stripe_devid_nr(leaf, ptr, i);
4786                 rec->stripes[i].offset =
4787                         btrfs_stripe_offset_nr(leaf, ptr, i);
4788                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4789                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4790                                 BTRFS_UUID_SIZE);
4791         }
4792
4793         return rec;
4794 }
4795
4796 static int process_chunk_item(struct cache_tree *chunk_cache,
4797                               struct btrfs_key *key, struct extent_buffer *eb,
4798                               int slot)
4799 {
4800         struct chunk_record *rec;
4801         struct btrfs_chunk *chunk;
4802         int ret = 0;
4803
4804         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
4805         /*
4806          * Do extra check for this chunk item,
4807          *
4808          * It's still possible one can craft a leaf with CHUNK_ITEM, with
4809          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
4810          * and owner<->key_type check.
4811          */
4812         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
4813                                       key->offset);
4814         if (ret < 0) {
4815                 error("chunk(%llu, %llu) is not valid, ignore it",
4816                       key->offset, btrfs_chunk_length(eb, chunk));
4817                 return 0;
4818         }
4819         rec = btrfs_new_chunk_record(eb, key, slot);
4820         ret = insert_cache_extent(chunk_cache, &rec->cache);
4821         if (ret) {
4822                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4823                         rec->offset, rec->length);
4824                 free(rec);
4825         }
4826
4827         return ret;
4828 }
4829
4830 static int process_device_item(struct rb_root *dev_cache,
4831                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4832 {
4833         struct btrfs_dev_item *ptr;
4834         struct device_record *rec;
4835         int ret = 0;
4836
4837         ptr = btrfs_item_ptr(eb,
4838                 slot, struct btrfs_dev_item);
4839
4840         rec = malloc(sizeof(*rec));
4841         if (!rec) {
4842                 fprintf(stderr, "memory allocation failed\n");
4843                 return -ENOMEM;
4844         }
4845
4846         rec->devid = key->offset;
4847         rec->generation = btrfs_header_generation(eb);
4848
4849         rec->objectid = key->objectid;
4850         rec->type = key->type;
4851         rec->offset = key->offset;
4852
4853         rec->devid = btrfs_device_id(eb, ptr);
4854         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4855         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4856
4857         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4858         if (ret) {
4859                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4860                 free(rec);
4861         }
4862
4863         return ret;
4864 }
4865
4866 struct block_group_record *
4867 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4868                              int slot)
4869 {
4870         struct btrfs_block_group_item *ptr;
4871         struct block_group_record *rec;
4872
4873         rec = calloc(1, sizeof(*rec));
4874         if (!rec) {
4875                 fprintf(stderr, "memory allocation failed\n");
4876                 exit(-1);
4877         }
4878
4879         rec->cache.start = key->objectid;
4880         rec->cache.size = key->offset;
4881
4882         rec->generation = btrfs_header_generation(leaf);
4883
4884         rec->objectid = key->objectid;
4885         rec->type = key->type;
4886         rec->offset = key->offset;
4887
4888         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
4889         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
4890
4891         INIT_LIST_HEAD(&rec->list);
4892
4893         return rec;
4894 }
4895
4896 static int process_block_group_item(struct block_group_tree *block_group_cache,
4897                                     struct btrfs_key *key,
4898                                     struct extent_buffer *eb, int slot)
4899 {
4900         struct block_group_record *rec;
4901         int ret = 0;
4902
4903         rec = btrfs_new_block_group_record(eb, key, slot);
4904         ret = insert_block_group_record(block_group_cache, rec);
4905         if (ret) {
4906                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
4907                         rec->objectid, rec->offset);
4908                 free(rec);
4909         }
4910
4911         return ret;
4912 }
4913
4914 struct device_extent_record *
4915 btrfs_new_device_extent_record(struct extent_buffer *leaf,
4916                                struct btrfs_key *key, int slot)
4917 {
4918         struct device_extent_record *rec;
4919         struct btrfs_dev_extent *ptr;
4920
4921         rec = calloc(1, sizeof(*rec));
4922         if (!rec) {
4923                 fprintf(stderr, "memory allocation failed\n");
4924                 exit(-1);
4925         }
4926
4927         rec->cache.objectid = key->objectid;
4928         rec->cache.start = key->offset;
4929
4930         rec->generation = btrfs_header_generation(leaf);
4931
4932         rec->objectid = key->objectid;
4933         rec->type = key->type;
4934         rec->offset = key->offset;
4935
4936         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
4937         rec->chunk_objecteid =
4938                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
4939         rec->chunk_offset =
4940                 btrfs_dev_extent_chunk_offset(leaf, ptr);
4941         rec->length = btrfs_dev_extent_length(leaf, ptr);
4942         rec->cache.size = rec->length;
4943
4944         INIT_LIST_HEAD(&rec->chunk_list);
4945         INIT_LIST_HEAD(&rec->device_list);
4946
4947         return rec;
4948 }
4949
4950 static int
4951 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
4952                            struct btrfs_key *key, struct extent_buffer *eb,
4953                            int slot)
4954 {
4955         struct device_extent_record *rec;
4956         int ret;
4957
4958         rec = btrfs_new_device_extent_record(eb, key, slot);
4959         ret = insert_device_extent_record(dev_extent_cache, rec);
4960         if (ret) {
4961                 fprintf(stderr,
4962                         "Device extent[%llu, %llu, %llu] existed.\n",
4963                         rec->objectid, rec->offset, rec->length);
4964                 free(rec);
4965         }
4966
4967         return ret;
4968 }
4969
4970 static int process_extent_item(struct btrfs_root *root,
4971                                struct cache_tree *extent_cache,
4972                                struct extent_buffer *eb, int slot)
4973 {
4974         struct btrfs_extent_item *ei;
4975         struct btrfs_extent_inline_ref *iref;
4976         struct btrfs_extent_data_ref *dref;
4977         struct btrfs_shared_data_ref *sref;
4978         struct btrfs_key key;
4979         struct extent_record tmpl;
4980         unsigned long end;
4981         unsigned long ptr;
4982         int ret;
4983         int type;
4984         u32 item_size = btrfs_item_size_nr(eb, slot);
4985         u64 refs = 0;
4986         u64 offset;
4987         u64 num_bytes;
4988         int metadata = 0;
4989
4990         btrfs_item_key_to_cpu(eb, &key, slot);
4991
4992         if (key.type == BTRFS_METADATA_ITEM_KEY) {
4993                 metadata = 1;
4994                 num_bytes = root->fs_info->nodesize;
4995         } else {
4996                 num_bytes = key.offset;
4997         }
4998
4999         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
5000                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5001                       key.objectid, root->fs_info->sectorsize);
5002                 return -EIO;
5003         }
5004         if (item_size < sizeof(*ei)) {
5005 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5006                 struct btrfs_extent_item_v0 *ei0;
5007
5008                 if (item_size != sizeof(*ei0)) {
5009                         error(
5010         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
5011                                 key.objectid, key.type, key.offset,
5012                                 btrfs_header_bytenr(eb), slot);
5013                         BUG();
5014                 }
5015                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5016                 refs = btrfs_extent_refs_v0(eb, ei0);
5017 #else
5018                 BUG();
5019 #endif
5020                 memset(&tmpl, 0, sizeof(tmpl));
5021                 tmpl.start = key.objectid;
5022                 tmpl.nr = num_bytes;
5023                 tmpl.extent_item_refs = refs;
5024                 tmpl.metadata = metadata;
5025                 tmpl.found_rec = 1;
5026                 tmpl.max_size = num_bytes;
5027
5028                 return add_extent_rec(extent_cache, &tmpl);
5029         }
5030
5031         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5032         refs = btrfs_extent_refs(eb, ei);
5033         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5034                 metadata = 1;
5035         else
5036                 metadata = 0;
5037         if (metadata && num_bytes != root->fs_info->nodesize) {
5038                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5039                       num_bytes, root->fs_info->nodesize);
5040                 return -EIO;
5041         }
5042         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
5043                 error("ignore invalid data extent, length %llu is not aligned to %u",
5044                       num_bytes, root->fs_info->sectorsize);
5045                 return -EIO;
5046         }
5047
5048         memset(&tmpl, 0, sizeof(tmpl));
5049         tmpl.start = key.objectid;
5050         tmpl.nr = num_bytes;
5051         tmpl.extent_item_refs = refs;
5052         tmpl.metadata = metadata;
5053         tmpl.found_rec = 1;
5054         tmpl.max_size = num_bytes;
5055         add_extent_rec(extent_cache, &tmpl);
5056
5057         ptr = (unsigned long)(ei + 1);
5058         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5059             key.type == BTRFS_EXTENT_ITEM_KEY)
5060                 ptr += sizeof(struct btrfs_tree_block_info);
5061
5062         end = (unsigned long)ei + item_size;
5063         while (ptr < end) {
5064                 iref = (struct btrfs_extent_inline_ref *)ptr;
5065                 type = btrfs_extent_inline_ref_type(eb, iref);
5066                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5067                 switch (type) {
5068                 case BTRFS_TREE_BLOCK_REF_KEY:
5069                         ret = add_tree_backref(extent_cache, key.objectid,
5070                                         0, offset, 0);
5071                         if (ret < 0)
5072                                 error(
5073                         "add_tree_backref failed (extent items tree block): %s",
5074                                       strerror(-ret));
5075                         break;
5076                 case BTRFS_SHARED_BLOCK_REF_KEY:
5077                         ret = add_tree_backref(extent_cache, key.objectid,
5078                                         offset, 0, 0);
5079                         if (ret < 0)
5080                                 error(
5081                         "add_tree_backref failed (extent items shared block): %s",
5082                                       strerror(-ret));
5083                         break;
5084                 case BTRFS_EXTENT_DATA_REF_KEY:
5085                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5086                         add_data_backref(extent_cache, key.objectid, 0,
5087                                         btrfs_extent_data_ref_root(eb, dref),
5088                                         btrfs_extent_data_ref_objectid(eb,
5089                                                                        dref),
5090                                         btrfs_extent_data_ref_offset(eb, dref),
5091                                         btrfs_extent_data_ref_count(eb, dref),
5092                                         0, num_bytes);
5093                         break;
5094                 case BTRFS_SHARED_DATA_REF_KEY:
5095                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5096                         add_data_backref(extent_cache, key.objectid, offset,
5097                                         0, 0, 0,
5098                                         btrfs_shared_data_ref_count(eb, sref),
5099                                         0, num_bytes);
5100                         break;
5101                 default:
5102                         fprintf(stderr,
5103                                 "corrupt extent record: key [%llu,%u,%llu]\n",
5104                                 key.objectid, key.type, num_bytes);
5105                         goto out;
5106                 }
5107                 ptr += btrfs_extent_inline_ref_size(type);
5108         }
5109         WARN_ON(ptr > end);
5110 out:
5111         return 0;
5112 }
5113
5114 static int check_cache_range(struct btrfs_root *root,
5115                              struct btrfs_block_group_cache *cache,
5116                              u64 offset, u64 bytes)
5117 {
5118         struct btrfs_free_space *entry;
5119         u64 *logical;
5120         u64 bytenr;
5121         int stripe_len;
5122         int i, nr, ret;
5123
5124         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5125                 bytenr = btrfs_sb_offset(i);
5126                 ret = btrfs_rmap_block(root->fs_info,
5127                                        cache->key.objectid, bytenr, 0,
5128                                        &logical, &nr, &stripe_len);
5129                 if (ret)
5130                         return ret;
5131
5132                 while (nr--) {
5133                         if (logical[nr] + stripe_len <= offset)
5134                                 continue;
5135                         if (offset + bytes <= logical[nr])
5136                                 continue;
5137                         if (logical[nr] == offset) {
5138                                 if (stripe_len >= bytes) {
5139                                         free(logical);
5140                                         return 0;
5141                                 }
5142                                 bytes -= stripe_len;
5143                                 offset += stripe_len;
5144                         } else if (logical[nr] < offset) {
5145                                 if (logical[nr] + stripe_len >=
5146                                     offset + bytes) {
5147                                         free(logical);
5148                                         return 0;
5149                                 }
5150                                 bytes = (offset + bytes) -
5151                                         (logical[nr] + stripe_len);
5152                                 offset = logical[nr] + stripe_len;
5153                         } else {
5154                                 /*
5155                                  * Could be tricky, the super may land in the
5156                                  * middle of the area we're checking.  First
5157                                  * check the easiest case, it's at the end.
5158                                  */
5159                                 if (logical[nr] + stripe_len >=
5160                                     bytes + offset) {
5161                                         bytes = logical[nr] - offset;
5162                                         continue;
5163                                 }
5164
5165                                 /* Check the left side */
5166                                 ret = check_cache_range(root, cache,
5167                                                         offset,
5168                                                         logical[nr] - offset);
5169                                 if (ret) {
5170                                         free(logical);
5171                                         return ret;
5172                                 }
5173
5174                                 /* Now we continue with the right side */
5175                                 bytes = (offset + bytes) -
5176                                         (logical[nr] + stripe_len);
5177                                 offset = logical[nr] + stripe_len;
5178                         }
5179                 }
5180
5181                 free(logical);
5182         }
5183
5184         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5185         if (!entry) {
5186                 fprintf(stderr, "there is no free space entry for %llu-%llu\n",
5187                         offset, offset+bytes);
5188                 return -EINVAL;
5189         }
5190
5191         if (entry->offset != offset) {
5192                 fprintf(stderr, "wanted offset %llu, found %llu\n", offset,
5193                         entry->offset);
5194                 return -EINVAL;
5195         }
5196
5197         if (entry->bytes != bytes) {
5198                 fprintf(stderr, "wanted bytes %llu, found %llu for off %llu\n",
5199                         bytes, entry->bytes, offset);
5200                 return -EINVAL;
5201         }
5202
5203         unlink_free_space(cache->free_space_ctl, entry);
5204         free(entry);
5205         return 0;
5206 }
5207
5208 static int verify_space_cache(struct btrfs_root *root,
5209                               struct btrfs_block_group_cache *cache)
5210 {
5211         struct btrfs_path path;
5212         struct extent_buffer *leaf;
5213         struct btrfs_key key;
5214         u64 last;
5215         int ret = 0;
5216
5217         root = root->fs_info->extent_root;
5218
5219         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5220
5221         btrfs_init_path(&path);
5222         key.objectid = last;
5223         key.offset = 0;
5224         key.type = BTRFS_EXTENT_ITEM_KEY;
5225         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5226         if (ret < 0)
5227                 goto out;
5228         ret = 0;
5229         while (1) {
5230                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5231                         ret = btrfs_next_leaf(root, &path);
5232                         if (ret < 0)
5233                                 goto out;
5234                         if (ret > 0) {
5235                                 ret = 0;
5236                                 break;
5237                         }
5238                 }
5239                 leaf = path.nodes[0];
5240                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5241                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5242                         break;
5243                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5244                     key.type != BTRFS_METADATA_ITEM_KEY) {
5245                         path.slots[0]++;
5246                         continue;
5247                 }
5248
5249                 if (last == key.objectid) {
5250                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5251                                 last = key.objectid + key.offset;
5252                         else
5253                                 last = key.objectid + root->fs_info->nodesize;
5254                         path.slots[0]++;
5255                         continue;
5256                 }
5257
5258                 ret = check_cache_range(root, cache, last,
5259                                         key.objectid - last);
5260                 if (ret)
5261                         break;
5262                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5263                         last = key.objectid + key.offset;
5264                 else
5265                         last = key.objectid + root->fs_info->nodesize;
5266                 path.slots[0]++;
5267         }
5268
5269         if (last < cache->key.objectid + cache->key.offset)
5270                 ret = check_cache_range(root, cache, last,
5271                                         cache->key.objectid +
5272                                         cache->key.offset - last);
5273
5274 out:
5275         btrfs_release_path(&path);
5276
5277         if (!ret &&
5278             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5279                 fprintf(stderr, "There are still entries left in the space "
5280                         "cache\n");
5281                 ret = -EINVAL;
5282         }
5283
5284         return ret;
5285 }
5286
5287 static int check_space_cache(struct btrfs_root *root)
5288 {
5289         struct btrfs_block_group_cache *cache;
5290         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5291         int ret;
5292         int error = 0;
5293
5294         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5295             btrfs_super_generation(root->fs_info->super_copy) !=
5296             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5297                 printf("cache and super generation don't match, space cache "
5298                        "will be invalidated\n");
5299                 return 0;
5300         }
5301
5302         if (ctx.progress_enabled) {
5303                 ctx.tp = TASK_FREE_SPACE;
5304                 task_start(ctx.info);
5305         }
5306
5307         while (1) {
5308                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5309                 if (!cache)
5310                         break;
5311
5312                 start = cache->key.objectid + cache->key.offset;
5313                 if (!cache->free_space_ctl) {
5314                         if (btrfs_init_free_space_ctl(cache,
5315                                                 root->fs_info->sectorsize)) {
5316                                 ret = -ENOMEM;
5317                                 break;
5318                         }
5319                 } else {
5320                         btrfs_remove_free_space_cache(cache);
5321                 }
5322
5323                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
5324                         ret = exclude_super_stripes(root, cache);
5325                         if (ret) {
5326                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5327                                         strerror(-ret));
5328                                 error++;
5329                                 continue;
5330                         }
5331                         ret = load_free_space_tree(root->fs_info, cache);
5332                         free_excluded_extents(root, cache);
5333                         if (ret < 0) {
5334                                 fprintf(stderr, "could not load free space tree: %s\n",
5335                                         strerror(-ret));
5336                                 error++;
5337                                 continue;
5338                         }
5339                         error += ret;
5340                 } else {
5341                         ret = load_free_space_cache(root->fs_info, cache);
5342                         if (ret < 0)
5343                                 error++;
5344                         if (ret <= 0)
5345                                 continue;
5346                 }
5347
5348                 ret = verify_space_cache(root, cache);
5349                 if (ret) {
5350                         fprintf(stderr, "cache appears valid but isn't %llu\n",
5351                                 cache->key.objectid);
5352                         error++;
5353                 }
5354         }
5355
5356         task_stop(ctx.info);
5357
5358         return error ? -EINVAL : 0;
5359 }
5360
5361 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5362                         u64 num_bytes, unsigned long leaf_offset,
5363                         struct extent_buffer *eb)
5364 {
5365         struct btrfs_fs_info *fs_info = root->fs_info;
5366         u64 offset = 0;
5367         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
5368         char *data;
5369         unsigned long csum_offset;
5370         u32 csum;
5371         u32 csum_expected;
5372         u64 read_len;
5373         u64 data_checked = 0;
5374         u64 tmp;
5375         int ret = 0;
5376         int mirror;
5377         int num_copies;
5378
5379         if (num_bytes % fs_info->sectorsize)
5380                 return -EINVAL;
5381
5382         data = malloc(num_bytes);
5383         if (!data)
5384                 return -ENOMEM;
5385
5386         while (offset < num_bytes) {
5387                 mirror = 0;
5388 again:
5389                 read_len = num_bytes - offset;
5390                 /* read as much space once a time */
5391                 ret = read_extent_data(fs_info, data + offset,
5392                                 bytenr + offset, &read_len, mirror);
5393                 if (ret)
5394                         goto out;
5395                 data_checked = 0;
5396                 /* verify every 4k data's checksum */
5397                 while (data_checked < read_len) {
5398                         csum = ~(u32)0;
5399                         tmp = offset + data_checked;
5400
5401                         csum = btrfs_csum_data((char *)data + tmp,
5402                                                csum, fs_info->sectorsize);
5403                         btrfs_csum_final(csum, (u8 *)&csum);
5404
5405                         csum_offset = leaf_offset +
5406                                  tmp / fs_info->sectorsize * csum_size;
5407                         read_extent_buffer(eb, (char *)&csum_expected,
5408                                            csum_offset, csum_size);
5409                         /* try another mirror */
5410                         if (csum != csum_expected) {
5411                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5412                                                 mirror, bytenr + tmp,
5413                                                 csum, csum_expected);
5414                                 num_copies = btrfs_num_copies(root->fs_info,
5415                                                 bytenr, num_bytes);
5416                                 if (mirror < num_copies - 1) {
5417                                         mirror += 1;
5418                                         goto again;
5419                                 }
5420                         }
5421                         data_checked += fs_info->sectorsize;
5422                 }
5423                 offset += read_len;
5424         }
5425 out:
5426         free(data);
5427         return ret;
5428 }
5429
5430 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5431                                u64 num_bytes)
5432 {
5433         struct btrfs_path path;
5434         struct extent_buffer *leaf;
5435         struct btrfs_key key;
5436         int ret;
5437
5438         btrfs_init_path(&path);
5439         key.objectid = bytenr;
5440         key.type = BTRFS_EXTENT_ITEM_KEY;
5441         key.offset = (u64)-1;
5442
5443 again:
5444         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
5445                                 0, 0);
5446         if (ret < 0) {
5447                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5448                 btrfs_release_path(&path);
5449                 return ret;
5450         } else if (ret) {
5451                 if (path.slots[0] > 0) {
5452                         path.slots[0]--;
5453                 } else {
5454                         ret = btrfs_prev_leaf(root, &path);
5455                         if (ret < 0) {
5456                                 goto out;
5457                         } else if (ret > 0) {
5458                                 ret = 0;
5459                                 goto out;
5460                         }
5461                 }
5462         }
5463
5464         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5465
5466         /*
5467          * Block group items come before extent items if they have the same
5468          * bytenr, so walk back one more just in case.  Dear future traveller,
5469          * first congrats on mastering time travel.  Now if it's not too much
5470          * trouble could you go back to 2006 and tell Chris to make the
5471          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5472          * EXTENT_ITEM_KEY please?
5473          */
5474         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5475                 if (path.slots[0] > 0) {
5476                         path.slots[0]--;
5477                 } else {
5478                         ret = btrfs_prev_leaf(root, &path);
5479                         if (ret < 0) {
5480                                 goto out;
5481                         } else if (ret > 0) {
5482                                 ret = 0;
5483                                 goto out;
5484                         }
5485                 }
5486                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5487         }
5488
5489         while (num_bytes) {
5490                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5491                         ret = btrfs_next_leaf(root, &path);
5492                         if (ret < 0) {
5493                                 fprintf(stderr, "Error going to next leaf "
5494                                         "%d\n", ret);
5495                                 btrfs_release_path(&path);
5496                                 return ret;
5497                         } else if (ret) {
5498                                 break;
5499                         }
5500                 }
5501                 leaf = path.nodes[0];
5502                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5503                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5504                         path.slots[0]++;
5505                         continue;
5506                 }
5507                 if (key.objectid + key.offset < bytenr) {
5508                         path.slots[0]++;
5509                         continue;
5510                 }
5511                 if (key.objectid > bytenr + num_bytes)
5512                         break;
5513
5514                 if (key.objectid == bytenr) {
5515                         if (key.offset >= num_bytes) {
5516                                 num_bytes = 0;
5517                                 break;
5518                         }
5519                         num_bytes -= key.offset;
5520                         bytenr += key.offset;
5521                 } else if (key.objectid < bytenr) {
5522                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5523                                 num_bytes = 0;
5524                                 break;
5525                         }
5526                         num_bytes = (bytenr + num_bytes) -
5527                                 (key.objectid + key.offset);
5528                         bytenr = key.objectid + key.offset;
5529                 } else {
5530                         if (key.objectid + key.offset < bytenr + num_bytes) {
5531                                 u64 new_start = key.objectid + key.offset;
5532                                 u64 new_bytes = bytenr + num_bytes - new_start;
5533
5534                                 /*
5535                                  * Weird case, the extent is in the middle of
5536                                  * our range, we'll have to search one side
5537                                  * and then the other.  Not sure if this happens
5538                                  * in real life, but no harm in coding it up
5539                                  * anyway just in case.
5540                                  */
5541                                 btrfs_release_path(&path);
5542                                 ret = check_extent_exists(root, new_start,
5543                                                           new_bytes);
5544                                 if (ret) {
5545                                         fprintf(stderr, "Right section didn't "
5546                                                 "have a record\n");
5547                                         break;
5548                                 }
5549                                 num_bytes = key.objectid - bytenr;
5550                                 goto again;
5551                         }
5552                         num_bytes = key.objectid - bytenr;
5553                 }
5554                 path.slots[0]++;
5555         }
5556         ret = 0;
5557
5558 out:
5559         if (num_bytes && !ret) {
5560                 fprintf(stderr,
5561                         "there are no extents for csum range %llu-%llu\n",
5562                         bytenr, bytenr+num_bytes);
5563                 ret = 1;
5564         }
5565
5566         btrfs_release_path(&path);
5567         return ret;
5568 }
5569
5570 static int check_csums(struct btrfs_root *root)
5571 {
5572         struct btrfs_path path;
5573         struct extent_buffer *leaf;
5574         struct btrfs_key key;
5575         u64 offset = 0, num_bytes = 0;
5576         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5577         int errors = 0;
5578         int ret;
5579         u64 data_len;
5580         unsigned long leaf_offset;
5581
5582         root = root->fs_info->csum_root;
5583         if (!extent_buffer_uptodate(root->node)) {
5584                 fprintf(stderr, "No valid csum tree found\n");
5585                 return -ENOENT;
5586         }
5587
5588         btrfs_init_path(&path);
5589         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5590         key.type = BTRFS_EXTENT_CSUM_KEY;
5591         key.offset = 0;
5592         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5593         if (ret < 0) {
5594                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5595                 btrfs_release_path(&path);
5596                 return ret;
5597         }
5598
5599         if (ret > 0 && path.slots[0])
5600                 path.slots[0]--;
5601         ret = 0;
5602
5603         while (1) {
5604                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5605                         ret = btrfs_next_leaf(root, &path);
5606                         if (ret < 0) {
5607                                 fprintf(stderr, "Error going to next leaf "
5608                                         "%d\n", ret);
5609                                 break;
5610                         }
5611                         if (ret)
5612                                 break;
5613                 }
5614                 leaf = path.nodes[0];
5615
5616                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5617                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5618                         path.slots[0]++;
5619                         continue;
5620                 }
5621
5622                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
5623                               csum_size) * root->fs_info->sectorsize;
5624                 if (!check_data_csum)
5625                         goto skip_csum_check;
5626                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5627                 ret = check_extent_csums(root, key.offset, data_len,
5628                                          leaf_offset, leaf);
5629                 if (ret)
5630                         break;
5631 skip_csum_check:
5632                 if (!num_bytes) {
5633                         offset = key.offset;
5634                 } else if (key.offset != offset + num_bytes) {
5635                         ret = check_extent_exists(root, offset, num_bytes);
5636                         if (ret) {
5637                                 fprintf(stderr,
5638                 "csum exists for %llu-%llu but there is no extent record\n",
5639                                         offset, offset+num_bytes);
5640                                 errors++;
5641                         }
5642                         offset = key.offset;
5643                         num_bytes = 0;
5644                 }
5645                 num_bytes += data_len;
5646                 path.slots[0]++;
5647         }
5648
5649         btrfs_release_path(&path);
5650         return errors;
5651 }
5652
5653 static int is_dropped_key(struct btrfs_key *key,
5654                           struct btrfs_key *drop_key)
5655 {
5656         if (key->objectid < drop_key->objectid)
5657                 return 1;
5658         else if (key->objectid == drop_key->objectid) {
5659                 if (key->type < drop_key->type)
5660                         return 1;
5661                 else if (key->type == drop_key->type) {
5662                         if (key->offset < drop_key->offset)
5663                                 return 1;
5664                 }
5665         }
5666         return 0;
5667 }
5668
5669 /*
5670  * Here are the rules for FULL_BACKREF.
5671  *
5672  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5673  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5674  *      FULL_BACKREF set.
5675  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
5676  *    if it happened after the relocation occurred since we'll have dropped the
5677  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5678  *    have no real way to know for sure.
5679  *
5680  * We process the blocks one root at a time, and we start from the lowest root
5681  * objectid and go to the highest.  So we can just lookup the owner backref for
5682  * the record and if we don't find it then we know it doesn't exist and we have
5683  * a FULL BACKREF.
5684  *
5685  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5686  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5687  * be set or not and then we can check later once we've gathered all the refs.
5688  */
5689 static int calc_extent_flag(struct cache_tree *extent_cache,
5690                            struct extent_buffer *buf,
5691                            struct root_item_record *ri,
5692                            u64 *flags)
5693 {
5694         struct extent_record *rec;
5695         struct cache_extent *cache;
5696         struct tree_backref *tback;
5697         u64 owner = 0;
5698
5699         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5700         /* we have added this extent before */
5701         if (!cache)
5702                 return -ENOENT;
5703
5704         rec = container_of(cache, struct extent_record, cache);
5705
5706         /*
5707          * Except file/reloc tree, we can not have
5708          * FULL BACKREF MODE
5709          */
5710         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5711                 goto normal;
5712         /*
5713          * root node
5714          */
5715         if (buf->start == ri->bytenr)
5716                 goto normal;
5717
5718         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5719                 goto full_backref;
5720
5721         owner = btrfs_header_owner(buf);
5722         if (owner == ri->objectid)
5723                 goto normal;
5724
5725         tback = find_tree_backref(rec, 0, owner);
5726         if (!tback)
5727                 goto full_backref;
5728 normal:
5729         *flags = 0;
5730         if (rec->flag_block_full_backref != FLAG_UNSET &&
5731             rec->flag_block_full_backref != 0)
5732                 rec->bad_full_backref = 1;
5733         return 0;
5734 full_backref:
5735         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5736         if (rec->flag_block_full_backref != FLAG_UNSET &&
5737             rec->flag_block_full_backref != 1)
5738                 rec->bad_full_backref = 1;
5739         return 0;
5740 }
5741
5742 static void report_mismatch_key_root(u8 key_type, u64 rootid)
5743 {
5744         fprintf(stderr, "Invalid key type(");
5745         print_key_type(stderr, 0, key_type);
5746         fprintf(stderr, ") found in root(");
5747         print_objectid(stderr, rootid, 0);
5748         fprintf(stderr, ")\n");
5749 }
5750
5751 /*
5752  * Check if the key is valid with its extent buffer.
5753  *
5754  * This is a early check in case invalid key exists in a extent buffer
5755  * This is not comprehensive yet, but should prevent wrong key/item passed
5756  * further
5757  */
5758 static int check_type_with_root(u64 rootid, u8 key_type)
5759 {
5760         switch (key_type) {
5761         /* Only valid in chunk tree */
5762         case BTRFS_DEV_ITEM_KEY:
5763         case BTRFS_CHUNK_ITEM_KEY:
5764                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
5765                         goto err;
5766                 break;
5767         /* valid in csum and log tree */
5768         case BTRFS_CSUM_TREE_OBJECTID:
5769                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
5770                       is_fstree(rootid)))
5771                         goto err;
5772                 break;
5773         case BTRFS_EXTENT_ITEM_KEY:
5774         case BTRFS_METADATA_ITEM_KEY:
5775         case BTRFS_BLOCK_GROUP_ITEM_KEY:
5776                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
5777                         goto err;
5778                 break;
5779         case BTRFS_ROOT_ITEM_KEY:
5780                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
5781                         goto err;
5782                 break;
5783         case BTRFS_DEV_EXTENT_KEY:
5784                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
5785                         goto err;
5786                 break;
5787         }
5788         return 0;
5789 err:
5790         report_mismatch_key_root(key_type, rootid);
5791         return -EINVAL;
5792 }
5793
5794 static int run_next_block(struct btrfs_root *root,
5795                           struct block_info *bits,
5796                           int bits_nr,
5797                           u64 *last,
5798                           struct cache_tree *pending,
5799                           struct cache_tree *seen,
5800                           struct cache_tree *reada,
5801                           struct cache_tree *nodes,
5802                           struct cache_tree *extent_cache,
5803                           struct cache_tree *chunk_cache,
5804                           struct rb_root *dev_cache,
5805                           struct block_group_tree *block_group_cache,
5806                           struct device_extent_tree *dev_extent_cache,
5807                           struct root_item_record *ri)
5808 {
5809         struct btrfs_fs_info *fs_info = root->fs_info;
5810         struct extent_buffer *buf;
5811         struct extent_record *rec = NULL;
5812         u64 bytenr;
5813         u32 size;
5814         u64 parent;
5815         u64 owner;
5816         u64 flags;
5817         u64 ptr;
5818         u64 gen = 0;
5819         int ret = 0;
5820         int i;
5821         int nritems;
5822         struct btrfs_key key;
5823         struct cache_extent *cache;
5824         int reada_bits;
5825
5826         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5827                                     bits_nr, &reada_bits);
5828         if (nritems == 0)
5829                 return 1;
5830
5831         if (!reada_bits) {
5832                 for (i = 0; i < nritems; i++) {
5833                         ret = add_cache_extent(reada, bits[i].start,
5834                                                bits[i].size);
5835                         if (ret == -EEXIST)
5836                                 continue;
5837
5838                         /* fixme, get the parent transid */
5839                         readahead_tree_block(fs_info, bits[i].start, 0);
5840                 }
5841         }
5842         *last = bits[0].start;
5843         bytenr = bits[0].start;
5844         size = bits[0].size;
5845
5846         cache = lookup_cache_extent(pending, bytenr, size);
5847         if (cache) {
5848                 remove_cache_extent(pending, cache);
5849                 free(cache);
5850         }
5851         cache = lookup_cache_extent(reada, bytenr, size);
5852         if (cache) {
5853                 remove_cache_extent(reada, cache);
5854                 free(cache);
5855         }
5856         cache = lookup_cache_extent(nodes, bytenr, size);
5857         if (cache) {
5858                 remove_cache_extent(nodes, cache);
5859                 free(cache);
5860         }
5861         cache = lookup_cache_extent(extent_cache, bytenr, size);
5862         if (cache) {
5863                 rec = container_of(cache, struct extent_record, cache);
5864                 gen = rec->parent_generation;
5865         }
5866
5867         /* fixme, get the real parent transid */
5868         buf = read_tree_block(root->fs_info, bytenr, gen);
5869         if (!extent_buffer_uptodate(buf)) {
5870                 record_bad_block_io(root->fs_info,
5871                                     extent_cache, bytenr, size);
5872                 goto out;
5873         }
5874
5875         nritems = btrfs_header_nritems(buf);
5876
5877         flags = 0;
5878         if (!init_extent_tree) {
5879                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5880                                        btrfs_header_level(buf), 1, NULL,
5881                                        &flags);
5882                 if (ret < 0) {
5883                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5884                         if (ret < 0) {
5885                                 fprintf(stderr, "Couldn't calc extent flags\n");
5886                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5887                         }
5888                 }
5889         } else {
5890                 flags = 0;
5891                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5892                 if (ret < 0) {
5893                         fprintf(stderr, "Couldn't calc extent flags\n");
5894                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5895                 }
5896         }
5897
5898         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5899                 if (ri != NULL &&
5900                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5901                     ri->objectid == btrfs_header_owner(buf)) {
5902                         /*
5903                          * Ok we got to this block from it's original owner and
5904                          * we have FULL_BACKREF set.  Relocation can leave
5905                          * converted blocks over so this is altogether possible,
5906                          * however it's not possible if the generation > the
5907                          * last snapshot, so check for this case.
5908                          */
5909                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5910                             btrfs_header_generation(buf) > ri->last_snapshot) {
5911                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5912                                 rec->bad_full_backref = 1;
5913                         }
5914                 }
5915         } else {
5916                 if (ri != NULL &&
5917                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5918                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5919                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5920                         rec->bad_full_backref = 1;
5921                 }
5922         }
5923
5924         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5925                 rec->flag_block_full_backref = 1;
5926                 parent = bytenr;
5927                 owner = 0;
5928         } else {
5929                 rec->flag_block_full_backref = 0;
5930                 parent = 0;
5931                 owner = btrfs_header_owner(buf);
5932         }
5933
5934         ret = check_block(root, extent_cache, buf, flags);
5935         if (ret)
5936                 goto out;
5937
5938         if (btrfs_is_leaf(buf)) {
5939                 btree_space_waste += btrfs_leaf_free_space(root, buf);
5940                 for (i = 0; i < nritems; i++) {
5941                         struct btrfs_file_extent_item *fi;
5942
5943                         btrfs_item_key_to_cpu(buf, &key, i);
5944                         /*
5945                          * Check key type against the leaf owner.
5946                          * Could filter quite a lot of early error if
5947                          * owner is correct
5948                          */
5949                         if (check_type_with_root(btrfs_header_owner(buf),
5950                                                  key.type)) {
5951                                 fprintf(stderr, "ignoring invalid key\n");
5952                                 continue;
5953                         }
5954                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5955                                 process_extent_item(root, extent_cache, buf,
5956                                                     i);
5957                                 continue;
5958                         }
5959                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5960                                 process_extent_item(root, extent_cache, buf,
5961                                                     i);
5962                                 continue;
5963                         }
5964                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5965                                 total_csum_bytes +=
5966                                         btrfs_item_size_nr(buf, i);
5967                                 continue;
5968                         }
5969                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5970                                 process_chunk_item(chunk_cache, &key, buf, i);
5971                                 continue;
5972                         }
5973                         if (key.type == BTRFS_DEV_ITEM_KEY) {
5974                                 process_device_item(dev_cache, &key, buf, i);
5975                                 continue;
5976                         }
5977                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5978                                 process_block_group_item(block_group_cache,
5979                                         &key, buf, i);
5980                                 continue;
5981                         }
5982                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
5983                                 process_device_extent_item(dev_extent_cache,
5984                                         &key, buf, i);
5985                                 continue;
5986
5987                         }
5988                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5989 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5990                                 process_extent_ref_v0(extent_cache, buf, i);
5991 #else
5992                                 BUG();
5993 #endif
5994                                 continue;
5995                         }
5996
5997                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
5998                                 ret = add_tree_backref(extent_cache,
5999                                                 key.objectid, 0, key.offset, 0);
6000                                 if (ret < 0)
6001                                         error(
6002                                 "add_tree_backref failed (leaf tree block): %s",
6003                                               strerror(-ret));
6004                                 continue;
6005                         }
6006                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6007                                 ret = add_tree_backref(extent_cache,
6008                                                 key.objectid, key.offset, 0, 0);
6009                                 if (ret < 0)
6010                                         error(
6011                                 "add_tree_backref failed (leaf shared block): %s",
6012                                               strerror(-ret));
6013                                 continue;
6014                         }
6015                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6016                                 struct btrfs_extent_data_ref *ref;
6017
6018                                 ref = btrfs_item_ptr(buf, i,
6019                                                 struct btrfs_extent_data_ref);
6020                                 add_data_backref(extent_cache,
6021                                         key.objectid, 0,
6022                                         btrfs_extent_data_ref_root(buf, ref),
6023                                         btrfs_extent_data_ref_objectid(buf,
6024                                                                        ref),
6025                                         btrfs_extent_data_ref_offset(buf, ref),
6026                                         btrfs_extent_data_ref_count(buf, ref),
6027                                         0, root->fs_info->sectorsize);
6028                                 continue;
6029                         }
6030                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6031                                 struct btrfs_shared_data_ref *ref;
6032
6033                                 ref = btrfs_item_ptr(buf, i,
6034                                                 struct btrfs_shared_data_ref);
6035                                 add_data_backref(extent_cache,
6036                                         key.objectid, key.offset, 0, 0, 0,
6037                                         btrfs_shared_data_ref_count(buf, ref),
6038                                         0, root->fs_info->sectorsize);
6039                                 continue;
6040                         }
6041                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6042                                 struct bad_item *bad;
6043
6044                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6045                                         continue;
6046                                 if (!owner)
6047                                         continue;
6048                                 bad = malloc(sizeof(struct bad_item));
6049                                 if (!bad)
6050                                         continue;
6051                                 INIT_LIST_HEAD(&bad->list);
6052                                 memcpy(&bad->key, &key,
6053                                        sizeof(struct btrfs_key));
6054                                 bad->root_id = owner;
6055                                 list_add_tail(&bad->list, &delete_items);
6056                                 continue;
6057                         }
6058                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6059                                 continue;
6060                         fi = btrfs_item_ptr(buf, i,
6061                                             struct btrfs_file_extent_item);
6062                         if (btrfs_file_extent_type(buf, fi) ==
6063                             BTRFS_FILE_EXTENT_INLINE)
6064                                 continue;
6065                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6066                                 continue;
6067
6068                         data_bytes_allocated +=
6069                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6070                         if (data_bytes_allocated < root->fs_info->sectorsize)
6071                                 abort();
6072
6073                         data_bytes_referenced +=
6074                                 btrfs_file_extent_num_bytes(buf, fi);
6075                         add_data_backref(extent_cache,
6076                                 btrfs_file_extent_disk_bytenr(buf, fi),
6077                                 parent, owner, key.objectid, key.offset -
6078                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6079                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6080                 }
6081         } else {
6082                 int level;
6083                 struct btrfs_key first_key;
6084
6085                 first_key.objectid = 0;
6086
6087                 if (nritems > 0)
6088                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6089                 level = btrfs_header_level(buf);
6090                 for (i = 0; i < nritems; i++) {
6091                         struct extent_record tmpl;
6092
6093                         ptr = btrfs_node_blockptr(buf, i);
6094                         size = root->fs_info->nodesize;
6095                         btrfs_node_key_to_cpu(buf, &key, i);
6096                         if (ri != NULL) {
6097                                 if ((level == ri->drop_level)
6098                                     && is_dropped_key(&key, &ri->drop_key)) {
6099                                         continue;
6100                                 }
6101                         }
6102
6103                         memset(&tmpl, 0, sizeof(tmpl));
6104                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6105                         tmpl.parent_generation =
6106                                 btrfs_node_ptr_generation(buf, i);
6107                         tmpl.start = ptr;
6108                         tmpl.nr = size;
6109                         tmpl.refs = 1;
6110                         tmpl.metadata = 1;
6111                         tmpl.max_size = size;
6112                         ret = add_extent_rec(extent_cache, &tmpl);
6113                         if (ret < 0)
6114                                 goto out;
6115
6116                         ret = add_tree_backref(extent_cache, ptr, parent,
6117                                         owner, 1);
6118                         if (ret < 0) {
6119                                 error(
6120                                 "add_tree_backref failed (non-leaf block): %s",
6121                                       strerror(-ret));
6122                                 continue;
6123                         }
6124
6125                         if (level > 1)
6126                                 add_pending(nodes, seen, ptr, size);
6127                         else
6128                                 add_pending(pending, seen, ptr, size);
6129                 }
6130                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
6131                                       nritems) * sizeof(struct btrfs_key_ptr);
6132         }
6133         total_btree_bytes += buf->len;
6134         if (fs_root_objectid(btrfs_header_owner(buf)))
6135                 total_fs_tree_bytes += buf->len;
6136         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6137                 total_extent_tree_bytes += buf->len;
6138 out:
6139         free_extent_buffer(buf);
6140         return ret;
6141 }
6142
6143 static int add_root_to_pending(struct extent_buffer *buf,
6144                                struct cache_tree *extent_cache,
6145                                struct cache_tree *pending,
6146                                struct cache_tree *seen,
6147                                struct cache_tree *nodes,
6148                                u64 objectid)
6149 {
6150         struct extent_record tmpl;
6151         int ret;
6152
6153         if (btrfs_header_level(buf) > 0)
6154                 add_pending(nodes, seen, buf->start, buf->len);
6155         else
6156                 add_pending(pending, seen, buf->start, buf->len);
6157
6158         memset(&tmpl, 0, sizeof(tmpl));
6159         tmpl.start = buf->start;
6160         tmpl.nr = buf->len;
6161         tmpl.is_root = 1;
6162         tmpl.refs = 1;
6163         tmpl.metadata = 1;
6164         tmpl.max_size = buf->len;
6165         add_extent_rec(extent_cache, &tmpl);
6166
6167         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6168             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6169                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6170                                 0, 1);
6171         else
6172                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6173                                 1);
6174         return ret;
6175 }
6176
6177 /* as we fix the tree, we might be deleting blocks that
6178  * we're tracking for repair.  This hook makes sure we
6179  * remove any backrefs for blocks as we are fixing them.
6180  */
6181 static int free_extent_hook(struct btrfs_trans_handle *trans,
6182                             struct btrfs_root *root,
6183                             u64 bytenr, u64 num_bytes, u64 parent,
6184                             u64 root_objectid, u64 owner, u64 offset,
6185                             int refs_to_drop)
6186 {
6187         struct extent_record *rec;
6188         struct cache_extent *cache;
6189         int is_data;
6190         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6191
6192         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6193         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6194         if (!cache)
6195                 return 0;
6196
6197         rec = container_of(cache, struct extent_record, cache);
6198         if (is_data) {
6199                 struct data_backref *back;
6200
6201                 back = find_data_backref(rec, parent, root_objectid, owner,
6202                                          offset, 1, bytenr, num_bytes);
6203                 if (!back)
6204                         goto out;
6205                 if (back->node.found_ref) {
6206                         back->found_ref -= refs_to_drop;
6207                         if (rec->refs)
6208                                 rec->refs -= refs_to_drop;
6209                 }
6210                 if (back->node.found_extent_tree) {
6211                         back->num_refs -= refs_to_drop;
6212                         if (rec->extent_item_refs)
6213                                 rec->extent_item_refs -= refs_to_drop;
6214                 }
6215                 if (back->found_ref == 0)
6216                         back->node.found_ref = 0;
6217                 if (back->num_refs == 0)
6218                         back->node.found_extent_tree = 0;
6219
6220                 if (!back->node.found_extent_tree && back->node.found_ref) {
6221                         rb_erase(&back->node.node, &rec->backref_tree);
6222                         free(back);
6223                 }
6224         } else {
6225                 struct tree_backref *back;
6226
6227                 back = find_tree_backref(rec, parent, root_objectid);
6228                 if (!back)
6229                         goto out;
6230                 if (back->node.found_ref) {
6231                         if (rec->refs)
6232                                 rec->refs--;
6233                         back->node.found_ref = 0;
6234                 }
6235                 if (back->node.found_extent_tree) {
6236                         if (rec->extent_item_refs)
6237                                 rec->extent_item_refs--;
6238                         back->node.found_extent_tree = 0;
6239                 }
6240                 if (!back->node.found_extent_tree && back->node.found_ref) {
6241                         rb_erase(&back->node.node, &rec->backref_tree);
6242                         free(back);
6243                 }
6244         }
6245         maybe_free_extent_rec(extent_cache, rec);
6246 out:
6247         return 0;
6248 }
6249
6250 static int delete_extent_records(struct btrfs_trans_handle *trans,
6251                                  struct btrfs_root *root,
6252                                  struct btrfs_path *path,
6253                                  u64 bytenr)
6254 {
6255         struct btrfs_key key;
6256         struct btrfs_key found_key;
6257         struct extent_buffer *leaf;
6258         int ret;
6259         int slot;
6260
6261
6262         key.objectid = bytenr;
6263         key.type = (u8)-1;
6264         key.offset = (u64)-1;
6265
6266         while (1) {
6267                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6268                                         &key, path, 0, 1);
6269                 if (ret < 0)
6270                         break;
6271
6272                 if (ret > 0) {
6273                         ret = 0;
6274                         if (path->slots[0] == 0)
6275                                 break;
6276                         path->slots[0]--;
6277                 }
6278                 ret = 0;
6279
6280                 leaf = path->nodes[0];
6281                 slot = path->slots[0];
6282
6283                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6284                 if (found_key.objectid != bytenr)
6285                         break;
6286
6287                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6288                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6289                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6290                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6291                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6292                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6293                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6294                         btrfs_release_path(path);
6295                         if (found_key.type == 0) {
6296                                 if (found_key.offset == 0)
6297                                         break;
6298                                 key.offset = found_key.offset - 1;
6299                                 key.type = found_key.type;
6300                         }
6301                         key.type = found_key.type - 1;
6302                         key.offset = (u64)-1;
6303                         continue;
6304                 }
6305
6306                 fprintf(stderr,
6307                         "repair deleting extent record: key [%llu,%u,%llu]\n",
6308                         found_key.objectid, found_key.type, found_key.offset);
6309
6310                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6311                 if (ret)
6312                         break;
6313                 btrfs_release_path(path);
6314
6315                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6316                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6317                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6318                                 found_key.offset : root->fs_info->nodesize;
6319
6320                         ret = btrfs_update_block_group(root, bytenr,
6321                                                        bytes, 0, 0);
6322                         if (ret)
6323                                 break;
6324                 }
6325         }
6326
6327         btrfs_release_path(path);
6328         return ret;
6329 }
6330
6331 /*
6332  * for a single backref, this will allocate a new extent
6333  * and add the backref to it.
6334  */
6335 static int record_extent(struct btrfs_trans_handle *trans,
6336                          struct btrfs_fs_info *info,
6337                          struct btrfs_path *path,
6338                          struct extent_record *rec,
6339                          struct extent_backref *back,
6340                          int allocated, u64 flags)
6341 {
6342         int ret = 0;
6343         struct btrfs_root *extent_root = info->extent_root;
6344         struct extent_buffer *leaf;
6345         struct btrfs_key ins_key;
6346         struct btrfs_extent_item *ei;
6347         struct data_backref *dback;
6348         struct btrfs_tree_block_info *bi;
6349
6350         if (!back->is_data)
6351                 rec->max_size = max_t(u64, rec->max_size,
6352                                     info->nodesize);
6353
6354         if (!allocated) {
6355                 u32 item_size = sizeof(*ei);
6356
6357                 if (!back->is_data)
6358                         item_size += sizeof(*bi);
6359
6360                 ins_key.objectid = rec->start;
6361                 ins_key.offset = rec->max_size;
6362                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6363
6364                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6365                                         &ins_key, item_size);
6366                 if (ret)
6367                         goto fail;
6368
6369                 leaf = path->nodes[0];
6370                 ei = btrfs_item_ptr(leaf, path->slots[0],
6371                                     struct btrfs_extent_item);
6372
6373                 btrfs_set_extent_refs(leaf, ei, 0);
6374                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6375
6376                 if (back->is_data) {
6377                         btrfs_set_extent_flags(leaf, ei,
6378                                                BTRFS_EXTENT_FLAG_DATA);
6379                 } else {
6380                         struct btrfs_disk_key copy_key;
6381
6382                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6383                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6384                                              sizeof(*bi));
6385
6386                         btrfs_set_disk_key_objectid(&copy_key,
6387                                                     rec->info_objectid);
6388                         btrfs_set_disk_key_type(&copy_key, 0);
6389                         btrfs_set_disk_key_offset(&copy_key, 0);
6390
6391                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6392                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6393
6394                         btrfs_set_extent_flags(leaf, ei,
6395                                         flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
6396                 }
6397
6398                 btrfs_mark_buffer_dirty(leaf);
6399                 ret = btrfs_update_block_group(extent_root, rec->start,
6400                                                rec->max_size, 1, 0);
6401                 if (ret)
6402                         goto fail;
6403                 btrfs_release_path(path);
6404         }
6405
6406         if (back->is_data) {
6407                 u64 parent;
6408                 int i;
6409
6410                 dback = to_data_backref(back);
6411                 if (back->full_backref)
6412                         parent = dback->parent;
6413                 else
6414                         parent = 0;
6415
6416                 for (i = 0; i < dback->found_ref; i++) {
6417                         /* if parent != 0, we're doing a full backref
6418                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6419                          * just makes the backref allocator create a data
6420                          * backref
6421                          */
6422                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6423                                                    rec->start, rec->max_size,
6424                                                    parent,
6425                                                    dback->root,
6426                                                    parent ?
6427                                                    BTRFS_FIRST_FREE_OBJECTID :
6428                                                    dback->owner,
6429                                                    dback->offset);
6430                         if (ret)
6431                                 break;
6432                 }
6433                 fprintf(stderr,
6434 "adding new data backref on %llu %s %llu owner %llu offset %llu found %d\n",
6435                         (unsigned long long)rec->start,
6436                         back->full_backref ? "parent" : "root",
6437                         back->full_backref ? (unsigned long long)parent :
6438                                              (unsigned long long)dback->root,
6439                         (unsigned long long)dback->owner,
6440                         (unsigned long long)dback->offset, dback->found_ref);
6441         } else {
6442                 u64 parent;
6443                 struct tree_backref *tback;
6444
6445                 tback = to_tree_backref(back);
6446                 if (back->full_backref)
6447                         parent = tback->parent;
6448                 else
6449                         parent = 0;
6450
6451                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6452                                            rec->start, rec->max_size,
6453                                            parent, tback->root, 0, 0);
6454                 fprintf(stderr,
6455 "adding new tree backref on start %llu len %llu parent %llu root %llu\n",
6456                         rec->start, rec->max_size, parent, tback->root);
6457         }
6458 fail:
6459         btrfs_release_path(path);
6460         return ret;
6461 }
6462
6463 static struct extent_entry *find_entry(struct list_head *entries,
6464                                        u64 bytenr, u64 bytes)
6465 {
6466         struct extent_entry *entry = NULL;
6467
6468         list_for_each_entry(entry, entries, list) {
6469                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6470                         return entry;
6471         }
6472
6473         return NULL;
6474 }
6475
6476 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6477 {
6478         struct extent_entry *entry, *best = NULL, *prev = NULL;
6479
6480         list_for_each_entry(entry, entries, list) {
6481                 /*
6482                  * If there are as many broken entries as entries then we know
6483                  * not to trust this particular entry.
6484                  */
6485                 if (entry->broken == entry->count)
6486                         continue;
6487
6488                 /*
6489                  * Special case, when there are only two entries and 'best' is
6490                  * the first one
6491                  */
6492                 if (!prev) {
6493                         best = entry;
6494                         prev = entry;
6495                         continue;
6496                 }
6497
6498                 /*
6499                  * If our current entry == best then we can't be sure our best
6500                  * is really the best, so we need to keep searching.
6501                  */
6502                 if (best && best->count == entry->count) {
6503                         prev = entry;
6504                         best = NULL;
6505                         continue;
6506                 }
6507
6508                 /* Prev == entry, not good enough, have to keep searching */
6509                 if (!prev->broken && prev->count == entry->count)
6510                         continue;
6511
6512                 if (!best)
6513                         best = (prev->count > entry->count) ? prev : entry;
6514                 else if (best->count < entry->count)
6515                         best = entry;
6516                 prev = entry;
6517         }
6518
6519         return best;
6520 }
6521
6522 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6523                       struct data_backref *dback, struct extent_entry *entry)
6524 {
6525         struct btrfs_trans_handle *trans;
6526         struct btrfs_root *root;
6527         struct btrfs_file_extent_item *fi;
6528         struct extent_buffer *leaf;
6529         struct btrfs_key key;
6530         u64 bytenr, bytes;
6531         int ret, err;
6532
6533         key.objectid = dback->root;
6534         key.type = BTRFS_ROOT_ITEM_KEY;
6535         key.offset = (u64)-1;
6536         root = btrfs_read_fs_root(info, &key);
6537         if (IS_ERR(root)) {
6538                 fprintf(stderr, "Couldn't find root for our ref\n");
6539                 return -EINVAL;
6540         }
6541
6542         /*
6543          * The backref points to the original offset of the extent if it was
6544          * split, so we need to search down to the offset we have and then walk
6545          * forward until we find the backref we're looking for.
6546          */
6547         key.objectid = dback->owner;
6548         key.type = BTRFS_EXTENT_DATA_KEY;
6549         key.offset = dback->offset;
6550         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6551         if (ret < 0) {
6552                 fprintf(stderr, "Error looking up ref %d\n", ret);
6553                 return ret;
6554         }
6555
6556         while (1) {
6557                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6558                         ret = btrfs_next_leaf(root, path);
6559                         if (ret) {
6560                                 fprintf(stderr, "Couldn't find our ref, next\n");
6561                                 return -EINVAL;
6562                         }
6563                 }
6564                 leaf = path->nodes[0];
6565                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6566                 if (key.objectid != dback->owner ||
6567                     key.type != BTRFS_EXTENT_DATA_KEY) {
6568                         fprintf(stderr, "Couldn't find our ref, search\n");
6569                         return -EINVAL;
6570                 }
6571                 fi = btrfs_item_ptr(leaf, path->slots[0],
6572                                     struct btrfs_file_extent_item);
6573                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6574                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6575
6576                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6577                         break;
6578                 path->slots[0]++;
6579         }
6580
6581         btrfs_release_path(path);
6582
6583         trans = btrfs_start_transaction(root, 1);
6584         if (IS_ERR(trans))
6585                 return PTR_ERR(trans);
6586
6587         /*
6588          * Ok we have the key of the file extent we want to fix, now we can cow
6589          * down to the thing and fix it.
6590          */
6591         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6592         if (ret < 0) {
6593                 fprintf(stderr, "error cowing down to ref [%llu,%u,%llu]: %d\n",
6594                         key.objectid, key.type, key.offset, ret);
6595                 goto out;
6596         }
6597         if (ret > 0) {
6598                 fprintf(stderr,
6599                 "well that's odd, we just found this key [%llu,%u,%llu]\n",
6600                         key.objectid, key.type, key.offset);
6601                 ret = -EINVAL;
6602                 goto out;
6603         }
6604         leaf = path->nodes[0];
6605         fi = btrfs_item_ptr(leaf, path->slots[0],
6606                             struct btrfs_file_extent_item);
6607
6608         if (btrfs_file_extent_compression(leaf, fi) &&
6609             dback->disk_bytenr != entry->bytenr) {
6610                 fprintf(stderr,
6611 "ref doesn't match the record start and is compressed, please take a btrfs-image of this file system and send it to a btrfs developer so they can complete this functionality for bytenr %llu\n",
6612                         dback->disk_bytenr);
6613                 ret = -EINVAL;
6614                 goto out;
6615         }
6616
6617         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6618                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6619         } else if (dback->disk_bytenr > entry->bytenr) {
6620                 u64 off_diff, offset;
6621
6622                 off_diff = dback->disk_bytenr - entry->bytenr;
6623                 offset = btrfs_file_extent_offset(leaf, fi);
6624                 if (dback->disk_bytenr + offset +
6625                     btrfs_file_extent_num_bytes(leaf, fi) >
6626                     entry->bytenr + entry->bytes) {
6627                         fprintf(stderr,
6628 "ref is past the entry end, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6629                                 dback->disk_bytenr);
6630                         ret = -EINVAL;
6631                         goto out;
6632                 }
6633                 offset += off_diff;
6634                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6635                 btrfs_set_file_extent_offset(leaf, fi, offset);
6636         } else if (dback->disk_bytenr < entry->bytenr) {
6637                 u64 offset;
6638
6639                 offset = btrfs_file_extent_offset(leaf, fi);
6640                 if (dback->disk_bytenr + offset < entry->bytenr) {
6641                         fprintf(stderr,
6642 "ref is before the entry start, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6643                                 dback->disk_bytenr);
6644                         ret = -EINVAL;
6645                         goto out;
6646                 }
6647
6648                 offset += dback->disk_bytenr;
6649                 offset -= entry->bytenr;
6650                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6651                 btrfs_set_file_extent_offset(leaf, fi, offset);
6652         }
6653
6654         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6655
6656         /*
6657          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6658          * only do this if we aren't using compression, otherwise it's a
6659          * trickier case.
6660          */
6661         if (!btrfs_file_extent_compression(leaf, fi))
6662                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6663         else
6664                 printf("ram bytes may be wrong?\n");
6665         btrfs_mark_buffer_dirty(leaf);
6666 out:
6667         err = btrfs_commit_transaction(trans, root);
6668         btrfs_release_path(path);
6669         return ret ? ret : err;
6670 }
6671
6672 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6673                            struct extent_record *rec)
6674 {
6675         struct extent_backref *back, *tmp;
6676         struct data_backref *dback;
6677         struct extent_entry *entry, *best = NULL;
6678         LIST_HEAD(entries);
6679         int nr_entries = 0;
6680         int broken_entries = 0;
6681         int ret = 0;
6682         short mismatch = 0;
6683
6684         /*
6685          * Metadata is easy and the backrefs should always agree on bytenr and
6686          * size, if not we've got bigger issues.
6687          */
6688         if (rec->metadata)
6689                 return 0;
6690
6691         rbtree_postorder_for_each_entry_safe(back, tmp,
6692                                              &rec->backref_tree, node) {
6693                 if (back->full_backref || !back->is_data)
6694                         continue;
6695
6696                 dback = to_data_backref(back);
6697
6698                 /*
6699                  * We only pay attention to backrefs that we found a real
6700                  * backref for.
6701                  */
6702                 if (dback->found_ref == 0)
6703                         continue;
6704
6705                 /*
6706                  * For now we only catch when the bytes don't match, not the
6707                  * bytenr.  We can easily do this at the same time, but I want
6708                  * to have a fs image to test on before we just add repair
6709                  * functionality willy-nilly so we know we won't screw up the
6710                  * repair.
6711                  */
6712
6713                 entry = find_entry(&entries, dback->disk_bytenr,
6714                                    dback->bytes);
6715                 if (!entry) {
6716                         entry = malloc(sizeof(struct extent_entry));
6717                         if (!entry) {
6718                                 ret = -ENOMEM;
6719                                 goto out;
6720                         }
6721                         memset(entry, 0, sizeof(*entry));
6722                         entry->bytenr = dback->disk_bytenr;
6723                         entry->bytes = dback->bytes;
6724                         list_add_tail(&entry->list, &entries);
6725                         nr_entries++;
6726                 }
6727
6728                 /*
6729                  * If we only have on entry we may think the entries agree when
6730                  * in reality they don't so we have to do some extra checking.
6731                  */
6732                 if (dback->disk_bytenr != rec->start ||
6733                     dback->bytes != rec->nr || back->broken)
6734                         mismatch = 1;
6735
6736                 if (back->broken) {
6737                         entry->broken++;
6738                         broken_entries++;
6739                 }
6740
6741                 entry->count++;
6742         }
6743
6744         /* Yay all the backrefs agree, carry on good sir */
6745         if (nr_entries <= 1 && !mismatch)
6746                 goto out;
6747
6748         fprintf(stderr,
6749                 "attempting to repair backref discrepency for bytenr %llu\n",
6750                 rec->start);
6751
6752         /*
6753          * First we want to see if the backrefs can agree amongst themselves who
6754          * is right, so figure out which one of the entries has the highest
6755          * count.
6756          */
6757         best = find_most_right_entry(&entries);
6758
6759         /*
6760          * Ok so we may have an even split between what the backrefs think, so
6761          * this is where we use the extent ref to see what it thinks.
6762          */
6763         if (!best) {
6764                 entry = find_entry(&entries, rec->start, rec->nr);
6765                 if (!entry && (!broken_entries || !rec->found_rec)) {
6766                         fprintf(stderr,
6767 "backrefs don't agree with each other and extent record doesn't agree with anybody, so we can't fix bytenr %llu bytes %llu\n",
6768                                 rec->start, rec->nr);
6769                         ret = -EINVAL;
6770                         goto out;
6771                 } else if (!entry) {
6772                         /*
6773                          * Ok our backrefs were broken, we'll assume this is the
6774                          * correct value and add an entry for this range.
6775                          */
6776                         entry = malloc(sizeof(struct extent_entry));
6777                         if (!entry) {
6778                                 ret = -ENOMEM;
6779                                 goto out;
6780                         }
6781                         memset(entry, 0, sizeof(*entry));
6782                         entry->bytenr = rec->start;
6783                         entry->bytes = rec->nr;
6784                         list_add_tail(&entry->list, &entries);
6785                         nr_entries++;
6786                 }
6787                 entry->count++;
6788                 best = find_most_right_entry(&entries);
6789                 if (!best) {
6790                         fprintf(stderr,
6791 "backrefs and extent record evenly split on who is right, this is going to require user input to fix bytenr %llu bytes %llu\n",
6792                                 rec->start, rec->nr);
6793                         ret = -EINVAL;
6794                         goto out;
6795                 }
6796         }
6797
6798         /*
6799          * I don't think this can happen currently as we'll abort() if we catch
6800          * this case higher up, but in case somebody removes that we still can't
6801          * deal with it properly here yet, so just bail out of that's the case.
6802          */
6803         if (best->bytenr != rec->start) {
6804                 fprintf(stderr,
6805 "extent start and backref starts don't match, please use btrfs-image on this file system and send it to a btrfs developer so they can make fsck fix this particular case.  bytenr is %llu, bytes is %llu\n",
6806                         rec->start, rec->nr);
6807                 ret = -EINVAL;
6808                 goto out;
6809         }
6810
6811         /*
6812          * Ok great we all agreed on an extent record, let's go find the real
6813          * references and fix up the ones that don't match.
6814          */
6815         rbtree_postorder_for_each_entry_safe(back, tmp,
6816                                              &rec->backref_tree, node) {
6817                 if (back->full_backref || !back->is_data)
6818                         continue;
6819
6820                 dback = to_data_backref(back);
6821
6822                 /*
6823                  * Still ignoring backrefs that don't have a real ref attached
6824                  * to them.
6825                  */
6826                 if (dback->found_ref == 0)
6827                         continue;
6828
6829                 if (dback->bytes == best->bytes &&
6830                     dback->disk_bytenr == best->bytenr)
6831                         continue;
6832
6833                 ret = repair_ref(info, path, dback, best);
6834                 if (ret)
6835                         goto out;
6836         }
6837
6838         /*
6839          * Ok we messed with the actual refs, which means we need to drop our
6840          * entire cache and go back and rescan.  I know this is a huge pain and
6841          * adds a lot of extra work, but it's the only way to be safe.  Once all
6842          * the backrefs agree we may not need to do anything to the extent
6843          * record itself.
6844          */
6845         ret = -EAGAIN;
6846 out:
6847         while (!list_empty(&entries)) {
6848                 entry = list_entry(entries.next, struct extent_entry, list);
6849                 list_del_init(&entry->list);
6850                 free(entry);
6851         }
6852         return ret;
6853 }
6854
6855 static int process_duplicates(struct cache_tree *extent_cache,
6856                               struct extent_record *rec)
6857 {
6858         struct extent_record *good, *tmp;
6859         struct cache_extent *cache;
6860         int ret;
6861
6862         /*
6863          * If we found a extent record for this extent then return, or if we
6864          * have more than one duplicate we are likely going to need to delete
6865          * something.
6866          */
6867         if (rec->found_rec || rec->num_duplicates > 1)
6868                 return 0;
6869
6870         /* Shouldn't happen but just in case */
6871         BUG_ON(!rec->num_duplicates);
6872
6873         /*
6874          * So this happens if we end up with a backref that doesn't match the
6875          * actual extent entry.  So either the backref is bad or the extent
6876          * entry is bad.  Either way we want to have the extent_record actually
6877          * reflect what we found in the extent_tree, so we need to take the
6878          * duplicate out and use that as the extent_record since the only way we
6879          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6880          */
6881         remove_cache_extent(extent_cache, &rec->cache);
6882
6883         good = to_extent_record(rec->dups.next);
6884         list_del_init(&good->list);
6885         INIT_LIST_HEAD(&good->backrefs);
6886         INIT_LIST_HEAD(&good->dups);
6887         good->cache.start = good->start;
6888         good->cache.size = good->nr;
6889         good->content_checked = 0;
6890         good->owner_ref_checked = 0;
6891         good->num_duplicates = 0;
6892         good->refs = rec->refs;
6893         list_splice_init(&rec->backrefs, &good->backrefs);
6894         while (1) {
6895                 cache = lookup_cache_extent(extent_cache, good->start,
6896                                             good->nr);
6897                 if (!cache)
6898                         break;
6899                 tmp = container_of(cache, struct extent_record, cache);
6900
6901                 /*
6902                  * If we find another overlapping extent and it's found_rec is
6903                  * set then it's a duplicate and we need to try and delete
6904                  * something.
6905                  */
6906                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6907                         if (list_empty(&good->list))
6908                                 list_add_tail(&good->list,
6909                                               &duplicate_extents);
6910                         good->num_duplicates += tmp->num_duplicates + 1;
6911                         list_splice_init(&tmp->dups, &good->dups);
6912                         list_del_init(&tmp->list);
6913                         list_add_tail(&tmp->list, &good->dups);
6914                         remove_cache_extent(extent_cache, &tmp->cache);
6915                         continue;
6916                 }
6917
6918                 /*
6919                  * Ok we have another non extent item backed extent rec, so lets
6920                  * just add it to this extent and carry on like we did above.
6921                  */
6922                 good->refs += tmp->refs;
6923                 list_splice_init(&tmp->backrefs, &good->backrefs);
6924                 remove_cache_extent(extent_cache, &tmp->cache);
6925                 free(tmp);
6926         }
6927         ret = insert_cache_extent(extent_cache, &good->cache);
6928         BUG_ON(ret);
6929         free(rec);
6930         return good->num_duplicates ? 0 : 1;
6931 }
6932
6933 static int delete_duplicate_records(struct btrfs_root *root,
6934                                     struct extent_record *rec)
6935 {
6936         struct btrfs_trans_handle *trans;
6937         LIST_HEAD(delete_list);
6938         struct btrfs_path path;
6939         struct extent_record *tmp, *good, *n;
6940         int nr_del = 0;
6941         int ret = 0, err;
6942         struct btrfs_key key;
6943
6944         btrfs_init_path(&path);
6945
6946         good = rec;
6947         /* Find the record that covers all of the duplicates. */
6948         list_for_each_entry(tmp, &rec->dups, list) {
6949                 if (good->start < tmp->start)
6950                         continue;
6951                 if (good->nr > tmp->nr)
6952                         continue;
6953
6954                 if (tmp->start + tmp->nr < good->start + good->nr) {
6955                         fprintf(stderr,
6956 "Ok we have overlapping extents that aren't completely covered by each other, this is going to require more careful thought. The extents are [%llu-%llu] and [%llu-%llu]\n",
6957                                 tmp->start, tmp->nr, good->start, good->nr);
6958                         abort();
6959                 }
6960                 good = tmp;
6961         }
6962
6963         if (good != rec)
6964                 list_add_tail(&rec->list, &delete_list);
6965
6966         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6967                 if (tmp == good)
6968                         continue;
6969                 list_move_tail(&tmp->list, &delete_list);
6970         }
6971
6972         root = root->fs_info->extent_root;
6973         trans = btrfs_start_transaction(root, 1);
6974         if (IS_ERR(trans)) {
6975                 ret = PTR_ERR(trans);
6976                 goto out;
6977         }
6978
6979         list_for_each_entry(tmp, &delete_list, list) {
6980                 if (tmp->found_rec == 0)
6981                         continue;
6982                 key.objectid = tmp->start;
6983                 key.type = BTRFS_EXTENT_ITEM_KEY;
6984                 key.offset = tmp->nr;
6985
6986                 /* Shouldn't happen but just in case */
6987                 if (tmp->metadata) {
6988                         fprintf(stderr,
6989 "well this shouldn't happen, extent record overlaps but is metadata? [%llu, %llu]\n",
6990                                 tmp->start, tmp->nr);
6991                         abort();
6992                 }
6993
6994                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
6995                 if (ret) {
6996                         if (ret > 0)
6997                                 ret = -EINVAL;
6998                         break;
6999                 }
7000                 ret = btrfs_del_item(trans, root, &path);
7001                 if (ret)
7002                         break;
7003                 btrfs_release_path(&path);
7004                 nr_del++;
7005         }
7006         err = btrfs_commit_transaction(trans, root);
7007         if (err && !ret)
7008                 ret = err;
7009 out:
7010         while (!list_empty(&delete_list)) {
7011                 tmp = to_extent_record(delete_list.next);
7012                 list_del_init(&tmp->list);
7013                 if (tmp == rec)
7014                         continue;
7015                 free(tmp);
7016         }
7017
7018         while (!list_empty(&rec->dups)) {
7019                 tmp = to_extent_record(rec->dups.next);
7020                 list_del_init(&tmp->list);
7021                 free(tmp);
7022         }
7023
7024         btrfs_release_path(&path);
7025
7026         if (!ret && !nr_del)
7027                 rec->num_duplicates = 0;
7028
7029         return ret ? ret : nr_del;
7030 }
7031
7032 static int find_possible_backrefs(struct btrfs_fs_info *info,
7033                                   struct btrfs_path *path,
7034                                   struct cache_tree *extent_cache,
7035                                   struct extent_record *rec)
7036 {
7037         struct btrfs_root *root;
7038         struct extent_backref *back, *tmp;
7039         struct data_backref *dback;
7040         struct cache_extent *cache;
7041         struct btrfs_file_extent_item *fi;
7042         struct btrfs_key key;
7043         u64 bytenr, bytes;
7044         int ret;
7045
7046         rbtree_postorder_for_each_entry_safe(back, tmp,
7047                                              &rec->backref_tree, node) {
7048                 /* Don't care about full backrefs (poor unloved backrefs) */
7049                 if (back->full_backref || !back->is_data)
7050                         continue;
7051
7052                 dback = to_data_backref(back);
7053
7054                 /* We found this one, we don't need to do a lookup */
7055                 if (dback->found_ref)
7056                         continue;
7057
7058                 key.objectid = dback->root;
7059                 key.type = BTRFS_ROOT_ITEM_KEY;
7060                 key.offset = (u64)-1;
7061
7062                 root = btrfs_read_fs_root(info, &key);
7063
7064                 /* No root, definitely a bad ref, skip */
7065                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7066                         continue;
7067                 /* Other err, exit */
7068                 if (IS_ERR(root))
7069                         return PTR_ERR(root);
7070
7071                 key.objectid = dback->owner;
7072                 key.type = BTRFS_EXTENT_DATA_KEY;
7073                 key.offset = dback->offset;
7074                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7075                 if (ret) {
7076                         btrfs_release_path(path);
7077                         if (ret < 0)
7078                                 return ret;
7079                         /* Didn't find it, we can carry on */
7080                         ret = 0;
7081                         continue;
7082                 }
7083
7084                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7085                                     struct btrfs_file_extent_item);
7086                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7087                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7088                 btrfs_release_path(path);
7089                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7090                 if (cache) {
7091                         struct extent_record *tmp;
7092
7093                         tmp = container_of(cache, struct extent_record, cache);
7094
7095                         /*
7096                          * If we found an extent record for the bytenr for this
7097                          * particular backref then we can't add it to our
7098                          * current extent record.  We only want to add backrefs
7099                          * that don't have a corresponding extent item in the
7100                          * extent tree since they likely belong to this record
7101                          * and we need to fix it if it doesn't match bytenrs.
7102                          */
7103                         if  (tmp->found_rec)
7104                                 continue;
7105                 }
7106
7107                 dback->found_ref += 1;
7108                 dback->disk_bytenr = bytenr;
7109                 dback->bytes = bytes;
7110
7111                 /*
7112                  * Set this so the verify backref code knows not to trust the
7113                  * values in this backref.
7114                  */
7115                 back->broken = 1;
7116         }
7117
7118         return 0;
7119 }
7120
7121 /*
7122  * Record orphan data ref into corresponding root.
7123  *
7124  * Return 0 if the extent item contains data ref and recorded.
7125  * Return 1 if the extent item contains no useful data ref
7126  *   On that case, it may contains only shared_dataref or metadata backref
7127  *   or the file extent exists(this should be handled by the extent bytenr
7128  *   recovery routine)
7129  * Return <0 if something goes wrong.
7130  */
7131 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7132                                       struct extent_record *rec)
7133 {
7134         struct btrfs_key key;
7135         struct btrfs_root *dest_root;
7136         struct extent_backref *back, *tmp;
7137         struct data_backref *dback;
7138         struct orphan_data_extent *orphan;
7139         struct btrfs_path path;
7140         int recorded_data_ref = 0;
7141         int ret = 0;
7142
7143         if (rec->metadata)
7144                 return 1;
7145         btrfs_init_path(&path);
7146         rbtree_postorder_for_each_entry_safe(back, tmp,
7147                                              &rec->backref_tree, node) {
7148                 if (back->full_backref || !back->is_data ||
7149                     !back->found_extent_tree)
7150                         continue;
7151                 dback = to_data_backref(back);
7152                 if (dback->found_ref)
7153                         continue;
7154                 key.objectid = dback->root;
7155                 key.type = BTRFS_ROOT_ITEM_KEY;
7156                 key.offset = (u64)-1;
7157
7158                 dest_root = btrfs_read_fs_root(fs_info, &key);
7159
7160                 /* For non-exist root we just skip it */
7161                 if (IS_ERR(dest_root) || !dest_root)
7162                         continue;
7163
7164                 key.objectid = dback->owner;
7165                 key.type = BTRFS_EXTENT_DATA_KEY;
7166                 key.offset = dback->offset;
7167
7168                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7169                 btrfs_release_path(&path);
7170                 /*
7171                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7172                  * we need to record it for inode/file extent rebuild.
7173                  * For ret > 0, we record it only for file extent rebuild.
7174                  * For ret == 0, the file extent exists but only bytenr
7175                  * mismatch, let the original bytenr fix routine to handle,
7176                  * don't record it.
7177                  */
7178                 if (ret == 0)
7179                         continue;
7180                 ret = 0;
7181                 orphan = malloc(sizeof(*orphan));
7182                 if (!orphan) {
7183                         ret = -ENOMEM;
7184                         goto out;
7185                 }
7186                 INIT_LIST_HEAD(&orphan->list);
7187                 orphan->root = dback->root;
7188                 orphan->objectid = dback->owner;
7189                 orphan->offset = dback->offset;
7190                 orphan->disk_bytenr = rec->cache.start;
7191                 orphan->disk_len = rec->cache.size;
7192                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7193                 recorded_data_ref = 1;
7194         }
7195 out:
7196         btrfs_release_path(&path);
7197         if (!ret)
7198                 return !recorded_data_ref;
7199         else
7200                 return ret;
7201 }
7202
7203 /*
7204  * when an incorrect extent item is found, this will delete
7205  * all of the existing entries for it and recreate them
7206  * based on what the tree scan found.
7207  */
7208 static int fixup_extent_refs(struct btrfs_fs_info *info,
7209                              struct cache_tree *extent_cache,
7210                              struct extent_record *rec)
7211 {
7212         struct btrfs_trans_handle *trans = NULL;
7213         int ret;
7214         struct btrfs_path path;
7215         struct cache_extent *cache;
7216         struct extent_backref *back, *tmp;
7217         int allocated = 0;
7218         u64 flags = 0;
7219
7220         if (rec->flag_block_full_backref)
7221                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7222
7223         btrfs_init_path(&path);
7224         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7225                 /*
7226                  * Sometimes the backrefs themselves are so broken they don't
7227                  * get attached to any meaningful rec, so first go back and
7228                  * check any of our backrefs that we couldn't find and throw
7229                  * them into the list if we find the backref so that
7230                  * verify_backrefs can figure out what to do.
7231                  */
7232                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7233                 if (ret < 0)
7234                         goto out;
7235         }
7236
7237         /* step one, make sure all of the backrefs agree */
7238         ret = verify_backrefs(info, &path, rec);
7239         if (ret < 0)
7240                 goto out;
7241
7242         trans = btrfs_start_transaction(info->extent_root, 1);
7243         if (IS_ERR(trans)) {
7244                 ret = PTR_ERR(trans);
7245                 goto out;
7246         }
7247
7248         /* step two, delete all the existing records */
7249         ret = delete_extent_records(trans, info->extent_root, &path,
7250                                     rec->start);
7251
7252         if (ret < 0)
7253                 goto out;
7254
7255         /* was this block corrupt?  If so, don't add references to it */
7256         cache = lookup_cache_extent(info->corrupt_blocks,
7257                                     rec->start, rec->max_size);
7258         if (cache) {
7259                 ret = 0;
7260                 goto out;
7261         }
7262
7263         /* step three, recreate all the refs we did find */
7264         rbtree_postorder_for_each_entry_safe(back, tmp,
7265                                              &rec->backref_tree, node) {
7266                 /*
7267                  * if we didn't find any references, don't create a
7268                  * new extent record
7269                  */
7270                 if (!back->found_ref)
7271                         continue;
7272
7273                 rec->bad_full_backref = 0;
7274                 ret = record_extent(trans, info, &path, rec, back, allocated,
7275                                     flags);
7276                 allocated = 1;
7277
7278                 if (ret)
7279                         goto out;
7280         }
7281 out:
7282         if (trans) {
7283                 int err = btrfs_commit_transaction(trans, info->extent_root);
7284
7285                 if (!ret)
7286                         ret = err;
7287         }
7288
7289         if (!ret)
7290                 fprintf(stderr, "Repaired extent references for %llu\n",
7291                                 (unsigned long long)rec->start);
7292
7293         btrfs_release_path(&path);
7294         return ret;
7295 }
7296
7297 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7298                               struct extent_record *rec)
7299 {
7300         struct btrfs_trans_handle *trans;
7301         struct btrfs_root *root = fs_info->extent_root;
7302         struct btrfs_path path;
7303         struct btrfs_extent_item *ei;
7304         struct btrfs_key key;
7305         u64 flags;
7306         int ret = 0;
7307
7308         key.objectid = rec->start;
7309         if (rec->metadata) {
7310                 key.type = BTRFS_METADATA_ITEM_KEY;
7311                 key.offset = rec->info_level;
7312         } else {
7313                 key.type = BTRFS_EXTENT_ITEM_KEY;
7314                 key.offset = rec->max_size;
7315         }
7316
7317         trans = btrfs_start_transaction(root, 0);
7318         if (IS_ERR(trans))
7319                 return PTR_ERR(trans);
7320
7321         btrfs_init_path(&path);
7322         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7323         if (ret < 0) {
7324                 btrfs_release_path(&path);
7325                 btrfs_commit_transaction(trans, root);
7326                 return ret;
7327         } else if (ret) {
7328                 fprintf(stderr, "Didn't find extent for %llu\n",
7329                         (unsigned long long)rec->start);
7330                 btrfs_release_path(&path);
7331                 btrfs_commit_transaction(trans, root);
7332                 return -ENOENT;
7333         }
7334
7335         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7336                             struct btrfs_extent_item);
7337         flags = btrfs_extent_flags(path.nodes[0], ei);
7338         if (rec->flag_block_full_backref) {
7339                 fprintf(stderr, "setting full backref on %llu\n",
7340                         (unsigned long long)key.objectid);
7341                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7342         } else {
7343                 fprintf(stderr, "clearing full backref on %llu\n",
7344                         (unsigned long long)key.objectid);
7345                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7346         }
7347         btrfs_set_extent_flags(path.nodes[0], ei, flags);
7348         btrfs_mark_buffer_dirty(path.nodes[0]);
7349         btrfs_release_path(&path);
7350         ret = btrfs_commit_transaction(trans, root);
7351         if (!ret)
7352                 fprintf(stderr, "Repaired extent flags for %llu\n",
7353                                 (unsigned long long)rec->start);
7354
7355         return ret;
7356 }
7357
7358 /* right now we only prune from the extent allocation tree */
7359 static int prune_one_block(struct btrfs_trans_handle *trans,
7360                            struct btrfs_fs_info *info,
7361                            struct btrfs_corrupt_block *corrupt)
7362 {
7363         int ret;
7364         struct btrfs_path path;
7365         struct extent_buffer *eb;
7366         u64 found;
7367         int slot;
7368         int nritems;
7369         int level = corrupt->level + 1;
7370
7371         btrfs_init_path(&path);
7372 again:
7373         /* we want to stop at the parent to our busted block */
7374         path.lowest_level = level;
7375
7376         ret = btrfs_search_slot(trans, info->extent_root,
7377                                 &corrupt->key, &path, -1, 1);
7378
7379         if (ret < 0)
7380                 goto out;
7381
7382         eb = path.nodes[level];
7383         if (!eb) {
7384                 ret = -ENOENT;
7385                 goto out;
7386         }
7387
7388         /*
7389          * hopefully the search gave us the block we want to prune,
7390          * lets try that first
7391          */
7392         slot = path.slots[level];
7393         found =  btrfs_node_blockptr(eb, slot);
7394         if (found == corrupt->cache.start)
7395                 goto del_ptr;
7396
7397         nritems = btrfs_header_nritems(eb);
7398
7399         /* the search failed, lets scan this node and hope we find it */
7400         for (slot = 0; slot < nritems; slot++) {
7401                 found =  btrfs_node_blockptr(eb, slot);
7402                 if (found == corrupt->cache.start)
7403                         goto del_ptr;
7404         }
7405         /*
7406          * We couldn't find the bad block.
7407          * TODO: search all the nodes for pointers to this block
7408          */
7409         if (eb == info->extent_root->node) {
7410                 ret = -ENOENT;
7411                 goto out;
7412         } else {
7413                 level++;
7414                 btrfs_release_path(&path);
7415                 goto again;
7416         }
7417
7418 del_ptr:
7419         printk("deleting pointer to block %llu\n", corrupt->cache.start);
7420         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
7421
7422 out:
7423         btrfs_release_path(&path);
7424         return ret;
7425 }
7426
7427 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7428 {
7429         struct btrfs_trans_handle *trans = NULL;
7430         struct cache_extent *cache;
7431         struct btrfs_corrupt_block *corrupt;
7432
7433         while (1) {
7434                 cache = search_cache_extent(info->corrupt_blocks, 0);
7435                 if (!cache)
7436                         break;
7437                 if (!trans) {
7438                         trans = btrfs_start_transaction(info->extent_root, 1);
7439                         if (IS_ERR(trans))
7440                                 return PTR_ERR(trans);
7441                 }
7442                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7443                 prune_one_block(trans, info, corrupt);
7444                 remove_cache_extent(info->corrupt_blocks, cache);
7445         }
7446         if (trans)
7447                 return btrfs_commit_transaction(trans, info->extent_root);
7448         return 0;
7449 }
7450
7451 static int check_extent_refs(struct btrfs_root *root,
7452                              struct cache_tree *extent_cache)
7453 {
7454         struct extent_record *rec;
7455         struct cache_extent *cache;
7456         int ret = 0;
7457         int had_dups = 0;
7458         int err = 0;
7459
7460         if (repair) {
7461                 /*
7462                  * if we're doing a repair, we have to make sure
7463                  * we don't allocate from the problem extents.
7464                  * In the worst case, this will be all the
7465                  * extents in the FS
7466                  */
7467                 cache = search_cache_extent(extent_cache, 0);
7468                 while (cache) {
7469                         rec = container_of(cache, struct extent_record, cache);
7470                         set_extent_dirty(root->fs_info->excluded_extents,
7471                                          rec->start,
7472                                          rec->start + rec->max_size - 1);
7473                         cache = next_cache_extent(cache);
7474                 }
7475
7476                 /* pin down all the corrupted blocks too */
7477                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7478                 while (cache) {
7479                         set_extent_dirty(root->fs_info->excluded_extents,
7480                                          cache->start,
7481                                          cache->start + cache->size - 1);
7482                         cache = next_cache_extent(cache);
7483                 }
7484                 prune_corrupt_blocks(root->fs_info);
7485                 reset_cached_block_groups(root->fs_info);
7486         }
7487
7488         reset_cached_block_groups(root->fs_info);
7489
7490         /*
7491          * We need to delete any duplicate entries we find first otherwise we
7492          * could mess up the extent tree when we have backrefs that actually
7493          * belong to a different extent item and not the weird duplicate one.
7494          */
7495         while (repair && !list_empty(&duplicate_extents)) {
7496                 rec = to_extent_record(duplicate_extents.next);
7497                 list_del_init(&rec->list);
7498
7499                 /* Sometimes we can find a backref before we find an actual
7500                  * extent, so we need to process it a little bit to see if there
7501                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7502                  * if this is a backref screwup.  If we need to delete stuff
7503                  * process_duplicates() will return 0, otherwise it will return
7504                  * 1 and we
7505                  */
7506                 if (process_duplicates(extent_cache, rec))
7507                         continue;
7508                 ret = delete_duplicate_records(root, rec);
7509                 if (ret < 0)
7510                         return ret;
7511                 /*
7512                  * delete_duplicate_records will return the number of entries
7513                  * deleted, so if it's greater than 0 then we know we actually
7514                  * did something and we need to remove.
7515                  */
7516                 if (ret)
7517                         had_dups = 1;
7518         }
7519
7520         if (had_dups)
7521                 return -EAGAIN;
7522
7523         while (1) {
7524                 int cur_err = 0;
7525                 int fix = 0;
7526
7527                 cache = search_cache_extent(extent_cache, 0);
7528                 if (!cache)
7529                         break;
7530                 rec = container_of(cache, struct extent_record, cache);
7531                 if (rec->num_duplicates) {
7532                         fprintf(stderr,
7533                                 "extent item %llu has multiple extent items\n",
7534                                 (unsigned long long)rec->start);
7535                         cur_err = 1;
7536                 }
7537
7538                 if (rec->refs != rec->extent_item_refs) {
7539                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7540                                 (unsigned long long)rec->start,
7541                                 (unsigned long long)rec->nr);
7542                         fprintf(stderr, "extent item %llu, found %llu\n",
7543                                 (unsigned long long)rec->extent_item_refs,
7544                                 (unsigned long long)rec->refs);
7545                         ret = record_orphan_data_extents(root->fs_info, rec);
7546                         if (ret < 0)
7547                                 goto repair_abort;
7548                         fix = ret;
7549                         cur_err = 1;
7550                 }
7551                 if (all_backpointers_checked(rec, 1)) {
7552                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7553                                 (unsigned long long)rec->start,
7554                                 (unsigned long long)rec->nr);
7555                         fix = 1;
7556                         cur_err = 1;
7557                 }
7558                 if (!rec->owner_ref_checked) {
7559                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7560                                 (unsigned long long)rec->start,
7561                                 (unsigned long long)rec->nr);
7562                         fix = 1;
7563                         cur_err = 1;
7564                 }
7565
7566                 if (repair && fix) {
7567                         ret = fixup_extent_refs(root->fs_info, extent_cache,
7568                                                 rec);
7569                         if (ret)
7570                                 goto repair_abort;
7571                 }
7572
7573
7574                 if (rec->bad_full_backref) {
7575                         fprintf(stderr, "bad full backref, on [%llu]\n",
7576                                 (unsigned long long)rec->start);
7577                         if (repair) {
7578                                 ret = fixup_extent_flags(root->fs_info, rec);
7579                                 if (ret)
7580                                         goto repair_abort;
7581                                 fix = 1;
7582                         }
7583                         cur_err = 1;
7584                 }
7585                 /*
7586                  * Although it's not a extent ref's problem, we reuse this
7587                  * routine for error reporting.
7588                  * No repair function yet.
7589                  */
7590                 if (rec->crossing_stripes) {
7591                         fprintf(stderr,
7592                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7593                                 rec->start, rec->start + rec->max_size);
7594                         cur_err = 1;
7595                 }
7596
7597                 if (rec->wrong_chunk_type) {
7598                         fprintf(stderr,
7599                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7600                                 rec->start, rec->start + rec->max_size);
7601                         cur_err = 1;
7602                 }
7603
7604                 err = cur_err;
7605                 remove_cache_extent(extent_cache, cache);
7606                 free_all_extent_backrefs(rec);
7607                 if (!init_extent_tree && repair && (!cur_err || fix))
7608                         clear_extent_dirty(root->fs_info->excluded_extents,
7609                                            rec->start,
7610                                            rec->start + rec->max_size - 1);
7611                 free(rec);
7612         }
7613 repair_abort:
7614         if (repair) {
7615                 if (ret && ret != -EAGAIN) {
7616                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7617                         exit(1);
7618                 } else if (!ret) {
7619                         struct btrfs_trans_handle *trans;
7620
7621                         root = root->fs_info->extent_root;
7622                         trans = btrfs_start_transaction(root, 1);
7623                         if (IS_ERR(trans)) {
7624                                 ret = PTR_ERR(trans);
7625                                 goto repair_abort;
7626                         }
7627
7628                         ret = btrfs_fix_block_accounting(trans, root);
7629                         if (ret)
7630                                 goto repair_abort;
7631                         ret = btrfs_commit_transaction(trans, root);
7632                         if (ret)
7633                                 goto repair_abort;
7634                 }
7635                 return ret;
7636         }
7637
7638         if (err)
7639                 err = -EIO;
7640         return err;
7641 }
7642
7643 /*
7644  * Check the chunk with its block group/dev list ref:
7645  * Return 0 if all refs seems valid.
7646  * Return 1 if part of refs seems valid, need later check for rebuild ref
7647  * like missing block group and needs to search extent tree to rebuild them.
7648  * Return -1 if essential refs are missing and unable to rebuild.
7649  */
7650 static int check_chunk_refs(struct chunk_record *chunk_rec,
7651                             struct block_group_tree *block_group_cache,
7652                             struct device_extent_tree *dev_extent_cache,
7653                             int silent)
7654 {
7655         struct cache_extent *block_group_item;
7656         struct block_group_record *block_group_rec;
7657         struct cache_extent *dev_extent_item;
7658         struct device_extent_record *dev_extent_rec;
7659         u64 devid;
7660         u64 offset;
7661         u64 length;
7662         int metadump_v2 = 0;
7663         int i;
7664         int ret = 0;
7665
7666         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7667                                                chunk_rec->offset,
7668                                                chunk_rec->length);
7669         if (block_group_item) {
7670                 block_group_rec = container_of(block_group_item,
7671                                                struct block_group_record,
7672                                                cache);
7673                 if (chunk_rec->length != block_group_rec->offset ||
7674                     chunk_rec->offset != block_group_rec->objectid ||
7675                     (!metadump_v2 &&
7676                      chunk_rec->type_flags != block_group_rec->flags)) {
7677                         if (!silent)
7678                                 fprintf(stderr,
7679                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7680                                         chunk_rec->objectid,
7681                                         chunk_rec->type,
7682                                         chunk_rec->offset,
7683                                         chunk_rec->length,
7684                                         chunk_rec->offset,
7685                                         chunk_rec->type_flags,
7686                                         block_group_rec->objectid,
7687                                         block_group_rec->type,
7688                                         block_group_rec->offset,
7689                                         block_group_rec->offset,
7690                                         block_group_rec->objectid,
7691                                         block_group_rec->flags);
7692                         ret = -1;
7693                 } else {
7694                         list_del_init(&block_group_rec->list);
7695                         chunk_rec->bg_rec = block_group_rec;
7696                 }
7697         } else {
7698                 if (!silent)
7699                         fprintf(stderr,
7700                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7701                                 chunk_rec->objectid,
7702                                 chunk_rec->type,
7703                                 chunk_rec->offset,
7704                                 chunk_rec->length,
7705                                 chunk_rec->offset,
7706                                 chunk_rec->type_flags);
7707                 ret = 1;
7708         }
7709
7710         if (metadump_v2)
7711                 return ret;
7712
7713         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7714                                     chunk_rec->num_stripes);
7715         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7716                 devid = chunk_rec->stripes[i].devid;
7717                 offset = chunk_rec->stripes[i].offset;
7718                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7719                                                        devid, offset, length);
7720                 if (dev_extent_item) {
7721                         dev_extent_rec = container_of(dev_extent_item,
7722                                                 struct device_extent_record,
7723                                                 cache);
7724                         if (dev_extent_rec->objectid != devid ||
7725                             dev_extent_rec->offset != offset ||
7726                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7727                             dev_extent_rec->length != length) {
7728                                 if (!silent)
7729                                         fprintf(stderr,
7730                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7731                                                 chunk_rec->objectid,
7732                                                 chunk_rec->type,
7733                                                 chunk_rec->offset,
7734                                                 chunk_rec->stripes[i].devid,
7735                                                 chunk_rec->stripes[i].offset,
7736                                                 dev_extent_rec->objectid,
7737                                                 dev_extent_rec->offset,
7738                                                 dev_extent_rec->length);
7739                                 ret = -1;
7740                         } else {
7741                                 list_move(&dev_extent_rec->chunk_list,
7742                                           &chunk_rec->dextents);
7743                         }
7744                 } else {
7745                         if (!silent)
7746                                 fprintf(stderr,
7747                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7748                                         chunk_rec->objectid,
7749                                         chunk_rec->type,
7750                                         chunk_rec->offset,
7751                                         chunk_rec->stripes[i].devid,
7752                                         chunk_rec->stripes[i].offset);
7753                         ret = -1;
7754                 }
7755         }
7756         return ret;
7757 }
7758
7759 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7760 int check_chunks(struct cache_tree *chunk_cache,
7761                  struct block_group_tree *block_group_cache,
7762                  struct device_extent_tree *dev_extent_cache,
7763                  struct list_head *good, struct list_head *bad,
7764                  struct list_head *rebuild, int silent)
7765 {
7766         struct cache_extent *chunk_item;
7767         struct chunk_record *chunk_rec;
7768         struct block_group_record *bg_rec;
7769         struct device_extent_record *dext_rec;
7770         int err;
7771         int ret = 0;
7772
7773         chunk_item = first_cache_extent(chunk_cache);
7774         while (chunk_item) {
7775                 chunk_rec = container_of(chunk_item, struct chunk_record,
7776                                          cache);
7777                 err = check_chunk_refs(chunk_rec, block_group_cache,
7778                                        dev_extent_cache, silent);
7779                 if (err < 0)
7780                         ret = err;
7781                 if (err == 0 && good)
7782                         list_add_tail(&chunk_rec->list, good);
7783                 if (err > 0 && rebuild)
7784                         list_add_tail(&chunk_rec->list, rebuild);
7785                 if (err < 0 && bad)
7786                         list_add_tail(&chunk_rec->list, bad);
7787                 chunk_item = next_cache_extent(chunk_item);
7788         }
7789
7790         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7791                 if (!silent)
7792                         fprintf(stderr,
7793                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7794                                 bg_rec->objectid,
7795                                 bg_rec->offset,
7796                                 bg_rec->flags);
7797                 if (!ret)
7798                         ret = 1;
7799         }
7800
7801         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7802                             chunk_list) {
7803                 if (!silent)
7804                         fprintf(stderr,
7805                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7806                                 dext_rec->objectid,
7807                                 dext_rec->offset,
7808                                 dext_rec->length);
7809                 if (!ret)
7810                         ret = 1;
7811         }
7812         return ret;
7813 }
7814
7815
7816 static int check_device_used(struct device_record *dev_rec,
7817                              struct device_extent_tree *dext_cache)
7818 {
7819         struct cache_extent *cache;
7820         struct device_extent_record *dev_extent_rec;
7821         u64 total_byte = 0;
7822
7823         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7824         while (cache) {
7825                 dev_extent_rec = container_of(cache,
7826                                               struct device_extent_record,
7827                                               cache);
7828                 if (dev_extent_rec->objectid != dev_rec->devid)
7829                         break;
7830
7831                 list_del_init(&dev_extent_rec->device_list);
7832                 total_byte += dev_extent_rec->length;
7833                 cache = next_cache_extent(cache);
7834         }
7835
7836         if (total_byte != dev_rec->byte_used) {
7837                 fprintf(stderr,
7838                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7839                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7840                         dev_rec->type, dev_rec->offset);
7841                 return -1;
7842         } else {
7843                 return 0;
7844         }
7845 }
7846
7847 /*
7848  * Unlike device size alignment check above, some super total_bytes check
7849  * failure can lead to mount failure for newer kernel.
7850  *
7851  * So this function will return the error for a fatal super total_bytes problem.
7852  */
7853 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
7854 {
7855         struct btrfs_device *dev;
7856         struct list_head *dev_list = &fs_info->fs_devices->devices;
7857         u64 total_bytes = 0;
7858         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
7859
7860         list_for_each_entry(dev, dev_list, dev_list)
7861                 total_bytes += dev->total_bytes;
7862
7863         /* Important check, which can cause unmountable fs */
7864         if (super_bytes < total_bytes) {
7865                 error("super total bytes %llu smaller than real device(s) size %llu",
7866                         super_bytes, total_bytes);
7867                 error("mounting this fs may fail for newer kernels");
7868                 error("this can be fixed by 'btrfs rescue fix-device-size'");
7869                 return false;
7870         }
7871
7872         /*
7873          * Optional check, just to make everything aligned and match with each
7874          * other.
7875          *
7876          * For a btrfs-image restored fs, we don't need to check it anyway.
7877          */
7878         if (btrfs_super_flags(fs_info->super_copy) &
7879             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
7880                 return true;
7881         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
7882             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
7883             super_bytes != total_bytes) {
7884                 warning("minor unaligned/mismatch device size detected");
7885                 warning(
7886                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
7887         }
7888         return true;
7889 }
7890
7891 /* check btrfs_dev_item -> btrfs_dev_extent */
7892 static int check_devices(struct rb_root *dev_cache,
7893                          struct device_extent_tree *dev_extent_cache)
7894 {
7895         struct rb_node *dev_node;
7896         struct device_record *dev_rec;
7897         struct device_extent_record *dext_rec;
7898         int err;
7899         int ret = 0;
7900
7901         dev_node = rb_first(dev_cache);
7902         while (dev_node) {
7903                 dev_rec = container_of(dev_node, struct device_record, node);
7904                 err = check_device_used(dev_rec, dev_extent_cache);
7905                 if (err)
7906                         ret = err;
7907
7908                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
7909                                          global_info->sectorsize);
7910                 dev_node = rb_next(dev_node);
7911         }
7912         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7913                             device_list) {
7914                 fprintf(stderr,
7915                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7916                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7917                 if (!ret)
7918                         ret = 1;
7919         }
7920         return ret;
7921 }
7922
7923 static int add_root_item_to_list(struct list_head *head,
7924                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7925                                   u8 level, u8 drop_level,
7926                                   struct btrfs_key *drop_key)
7927 {
7928         struct root_item_record *ri_rec;
7929
7930         ri_rec = malloc(sizeof(*ri_rec));
7931         if (!ri_rec)
7932                 return -ENOMEM;
7933         ri_rec->bytenr = bytenr;
7934         ri_rec->objectid = objectid;
7935         ri_rec->level = level;
7936         ri_rec->drop_level = drop_level;
7937         ri_rec->last_snapshot = last_snapshot;
7938         if (drop_key)
7939                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7940         list_add_tail(&ri_rec->list, head);
7941
7942         return 0;
7943 }
7944
7945 static void free_root_item_list(struct list_head *list)
7946 {
7947         struct root_item_record *ri_rec;
7948
7949         while (!list_empty(list)) {
7950                 ri_rec = list_first_entry(list, struct root_item_record,
7951                                           list);
7952                 list_del_init(&ri_rec->list);
7953                 free(ri_rec);
7954         }
7955 }
7956
7957 static int deal_root_from_list(struct list_head *list,
7958                                struct btrfs_root *root,
7959                                struct block_info *bits,
7960                                int bits_nr,
7961                                struct cache_tree *pending,
7962                                struct cache_tree *seen,
7963                                struct cache_tree *reada,
7964                                struct cache_tree *nodes,
7965                                struct cache_tree *extent_cache,
7966                                struct cache_tree *chunk_cache,
7967                                struct rb_root *dev_cache,
7968                                struct block_group_tree *block_group_cache,
7969                                struct device_extent_tree *dev_extent_cache)
7970 {
7971         int ret = 0;
7972         u64 last;
7973
7974         while (!list_empty(list)) {
7975                 struct root_item_record *rec;
7976                 struct extent_buffer *buf;
7977
7978                 rec = list_entry(list->next,
7979                                  struct root_item_record, list);
7980                 last = 0;
7981                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
7982                 if (!extent_buffer_uptodate(buf)) {
7983                         free_extent_buffer(buf);
7984                         ret = -EIO;
7985                         break;
7986                 }
7987                 ret = add_root_to_pending(buf, extent_cache, pending,
7988                                     seen, nodes, rec->objectid);
7989                 if (ret < 0)
7990                         break;
7991                 /*
7992                  * To rebuild extent tree, we need deal with snapshot
7993                  * one by one, otherwise we deal with node firstly which
7994                  * can maximize readahead.
7995                  */
7996                 while (1) {
7997                         ret = run_next_block(root, bits, bits_nr, &last,
7998                                              pending, seen, reada, nodes,
7999                                              extent_cache, chunk_cache,
8000                                              dev_cache, block_group_cache,
8001                                              dev_extent_cache, rec);
8002                         if (ret != 0)
8003                                 break;
8004                 }
8005                 free_extent_buffer(buf);
8006                 list_del(&rec->list);
8007                 free(rec);
8008                 if (ret < 0)
8009                         break;
8010         }
8011         while (ret >= 0) {
8012                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8013                                      reada, nodes, extent_cache, chunk_cache,
8014                                      dev_cache, block_group_cache,
8015                                      dev_extent_cache, NULL);
8016                 if (ret != 0) {
8017                         if (ret > 0)
8018                                 ret = 0;
8019                         break;
8020                 }
8021         }
8022         return ret;
8023 }
8024
8025 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8026 {
8027         struct rb_root dev_cache;
8028         struct cache_tree chunk_cache;
8029         struct block_group_tree block_group_cache;
8030         struct device_extent_tree dev_extent_cache;
8031         struct cache_tree extent_cache;
8032         struct cache_tree seen;
8033         struct cache_tree pending;
8034         struct cache_tree reada;
8035         struct cache_tree nodes;
8036         struct extent_io_tree excluded_extents;
8037         struct cache_tree corrupt_blocks;
8038         struct btrfs_path path;
8039         struct btrfs_key key;
8040         struct btrfs_key found_key;
8041         int ret, err = 0;
8042         struct block_info *bits;
8043         int bits_nr;
8044         struct extent_buffer *leaf;
8045         int slot;
8046         struct btrfs_root_item ri;
8047         struct list_head dropping_trees;
8048         struct list_head normal_trees;
8049         struct btrfs_root *root1;
8050         struct btrfs_root *root;
8051         u64 objectid;
8052         u8 level;
8053
8054         root = fs_info->fs_root;
8055         dev_cache = RB_ROOT;
8056         cache_tree_init(&chunk_cache);
8057         block_group_tree_init(&block_group_cache);
8058         device_extent_tree_init(&dev_extent_cache);
8059
8060         cache_tree_init(&extent_cache);
8061         cache_tree_init(&seen);
8062         cache_tree_init(&pending);
8063         cache_tree_init(&nodes);
8064         cache_tree_init(&reada);
8065         cache_tree_init(&corrupt_blocks);
8066         extent_io_tree_init(&excluded_extents);
8067         INIT_LIST_HEAD(&dropping_trees);
8068         INIT_LIST_HEAD(&normal_trees);
8069
8070         if (repair) {
8071                 fs_info->excluded_extents = &excluded_extents;
8072                 fs_info->fsck_extent_cache = &extent_cache;
8073                 fs_info->free_extent_hook = free_extent_hook;
8074                 fs_info->corrupt_blocks = &corrupt_blocks;
8075         }
8076
8077         bits_nr = 1024;
8078         bits = malloc(bits_nr * sizeof(struct block_info));
8079         if (!bits) {
8080                 perror("malloc");
8081                 exit(1);
8082         }
8083
8084         if (ctx.progress_enabled) {
8085                 ctx.tp = TASK_EXTENTS;
8086                 task_start(ctx.info);
8087         }
8088
8089 again:
8090         root1 = fs_info->tree_root;
8091         level = btrfs_header_level(root1->node);
8092         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8093                                     root1->node->start, 0, level, 0, NULL);
8094         if (ret < 0)
8095                 goto out;
8096         root1 = fs_info->chunk_root;
8097         level = btrfs_header_level(root1->node);
8098         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8099                                     root1->node->start, 0, level, 0, NULL);
8100         if (ret < 0)
8101                 goto out;
8102         btrfs_init_path(&path);
8103         key.offset = 0;
8104         key.objectid = 0;
8105         key.type = BTRFS_ROOT_ITEM_KEY;
8106         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
8107         if (ret < 0)
8108                 goto out;
8109         while (1) {
8110                 leaf = path.nodes[0];
8111                 slot = path.slots[0];
8112                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8113                         ret = btrfs_next_leaf(root, &path);
8114                         if (ret != 0)
8115                                 break;
8116                         leaf = path.nodes[0];
8117                         slot = path.slots[0];
8118                 }
8119                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8120                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8121                         unsigned long offset;
8122                         u64 last_snapshot;
8123
8124                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8125                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8126                         last_snapshot = btrfs_root_last_snapshot(&ri);
8127                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8128                                 level = btrfs_root_level(&ri);
8129                                 ret = add_root_item_to_list(&normal_trees,
8130                                                 found_key.objectid,
8131                                                 btrfs_root_bytenr(&ri),
8132                                                 last_snapshot, level,
8133                                                 0, NULL);
8134                                 if (ret < 0)
8135                                         goto out;
8136                         } else {
8137                                 level = btrfs_root_level(&ri);
8138                                 objectid = found_key.objectid;
8139                                 btrfs_disk_key_to_cpu(&found_key,
8140                                                       &ri.drop_progress);
8141                                 ret = add_root_item_to_list(&dropping_trees,
8142                                                 objectid,
8143                                                 btrfs_root_bytenr(&ri),
8144                                                 last_snapshot, level,
8145                                                 ri.drop_level, &found_key);
8146                                 if (ret < 0)
8147                                         goto out;
8148                         }
8149                 }
8150                 path.slots[0]++;
8151         }
8152         btrfs_release_path(&path);
8153
8154         /*
8155          * check_block can return -EAGAIN if it fixes something, please keep
8156          * this in mind when dealing with return values from these functions, if
8157          * we get -EAGAIN we want to fall through and restart the loop.
8158          */
8159         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8160                                   &seen, &reada, &nodes, &extent_cache,
8161                                   &chunk_cache, &dev_cache, &block_group_cache,
8162                                   &dev_extent_cache);
8163         if (ret < 0) {
8164                 if (ret == -EAGAIN)
8165                         goto loop;
8166                 goto out;
8167         }
8168         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8169                                   &pending, &seen, &reada, &nodes,
8170                                   &extent_cache, &chunk_cache, &dev_cache,
8171                                   &block_group_cache, &dev_extent_cache);
8172         if (ret < 0) {
8173                 if (ret == -EAGAIN)
8174                         goto loop;
8175                 goto out;
8176         }
8177
8178         ret = check_chunks(&chunk_cache, &block_group_cache,
8179                            &dev_extent_cache, NULL, NULL, NULL, 0);
8180         if (ret) {
8181                 if (ret == -EAGAIN)
8182                         goto loop;
8183                 err = ret;
8184         }
8185
8186         ret = check_extent_refs(root, &extent_cache);
8187         if (ret < 0) {
8188                 if (ret == -EAGAIN)
8189                         goto loop;
8190                 goto out;
8191         }
8192
8193         ret = check_devices(&dev_cache, &dev_extent_cache);
8194         if (ret && err)
8195                 ret = err;
8196
8197 out:
8198         task_stop(ctx.info);
8199         if (repair) {
8200                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8201                 extent_io_tree_cleanup(&excluded_extents);
8202                 fs_info->fsck_extent_cache = NULL;
8203                 fs_info->free_extent_hook = NULL;
8204                 fs_info->corrupt_blocks = NULL;
8205                 fs_info->excluded_extents = NULL;
8206         }
8207         free(bits);
8208         free_chunk_cache_tree(&chunk_cache);
8209         free_device_cache_tree(&dev_cache);
8210         free_block_group_tree(&block_group_cache);
8211         free_device_extent_tree(&dev_extent_cache);
8212         free_extent_cache_tree(&seen);
8213         free_extent_cache_tree(&pending);
8214         free_extent_cache_tree(&reada);
8215         free_extent_cache_tree(&nodes);
8216         free_root_item_list(&normal_trees);
8217         free_root_item_list(&dropping_trees);
8218         return ret;
8219 loop:
8220         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8221         free_extent_cache_tree(&seen);
8222         free_extent_cache_tree(&pending);
8223         free_extent_cache_tree(&reada);
8224         free_extent_cache_tree(&nodes);
8225         free_chunk_cache_tree(&chunk_cache);
8226         free_block_group_tree(&block_group_cache);
8227         free_device_cache_tree(&dev_cache);
8228         free_device_extent_tree(&dev_extent_cache);
8229         free_extent_record_cache(&extent_cache);
8230         free_root_item_list(&normal_trees);
8231         free_root_item_list(&dropping_trees);
8232         extent_io_tree_cleanup(&excluded_extents);
8233         goto again;
8234 }
8235
8236 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8237 {
8238         int ret;
8239
8240         if (!ctx.progress_enabled)
8241                 fprintf(stderr, "checking extents\n");
8242         if (check_mode == CHECK_MODE_LOWMEM)
8243                 ret = check_chunks_and_extents_lowmem(fs_info);
8244         else
8245                 ret = check_chunks_and_extents(fs_info);
8246
8247         /* Also repair device size related problems */
8248         if (repair && !ret) {
8249                 ret = btrfs_fix_device_and_super_size(fs_info);
8250                 if (ret > 0)
8251                         ret = 0;
8252         }
8253         return ret;
8254 }
8255
8256 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8257                            struct btrfs_root *root, int overwrite)
8258 {
8259         struct extent_buffer *c;
8260         struct extent_buffer *old = root->node;
8261         int level;
8262         int ret;
8263         struct btrfs_disk_key disk_key = {0,0,0};
8264
8265         level = 0;
8266
8267         if (overwrite) {
8268                 c = old;
8269                 extent_buffer_get(c);
8270                 goto init;
8271         }
8272         c = btrfs_alloc_free_block(trans, root,
8273                                    root->fs_info->nodesize,
8274                                    root->root_key.objectid,
8275                                    &disk_key, level, 0, 0);
8276         if (IS_ERR(c)) {
8277                 c = old;
8278                 extent_buffer_get(c);
8279                 overwrite = 1;
8280         }
8281 init:
8282         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8283         btrfs_set_header_level(c, level);
8284         btrfs_set_header_bytenr(c, c->start);
8285         btrfs_set_header_generation(c, trans->transid);
8286         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8287         btrfs_set_header_owner(c, root->root_key.objectid);
8288
8289         write_extent_buffer(c, root->fs_info->fsid,
8290                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8291
8292         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8293                             btrfs_header_chunk_tree_uuid(c),
8294                             BTRFS_UUID_SIZE);
8295
8296         btrfs_mark_buffer_dirty(c);
8297         /*
8298          * this case can happen in the following case:
8299          *
8300          * 1.overwrite previous root.
8301          *
8302          * 2.reinit reloc data root, this is because we skip pin
8303          * down reloc data tree before which means we can allocate
8304          * same block bytenr here.
8305          */
8306         if (old->start == c->start) {
8307                 btrfs_set_root_generation(&root->root_item,
8308                                           trans->transid);
8309                 root->root_item.level = btrfs_header_level(root->node);
8310                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8311                                         &root->root_key, &root->root_item);
8312                 if (ret) {
8313                         free_extent_buffer(c);
8314                         return ret;
8315                 }
8316         }
8317         free_extent_buffer(old);
8318         root->node = c;
8319         add_root_to_dirty_list(root);
8320         return 0;
8321 }
8322
8323 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8324                                 struct extent_buffer *eb, int tree_root)
8325 {
8326         struct extent_buffer *tmp;
8327         struct btrfs_root_item *ri;
8328         struct btrfs_key key;
8329         u64 bytenr;
8330         int level = btrfs_header_level(eb);
8331         int nritems;
8332         int ret;
8333         int i;
8334
8335         /*
8336          * If we have pinned this block before, don't pin it again.
8337          * This can not only avoid forever loop with broken filesystem
8338          * but also give us some speedups.
8339          */
8340         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8341                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8342                 return 0;
8343
8344         btrfs_pin_extent(fs_info, eb->start, eb->len);
8345
8346         nritems = btrfs_header_nritems(eb);
8347         for (i = 0; i < nritems; i++) {
8348                 if (level == 0) {
8349                         btrfs_item_key_to_cpu(eb, &key, i);
8350                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8351                                 continue;
8352                         /* Skip the extent root and reloc roots */
8353                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8354                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8355                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8356                                 continue;
8357                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8358                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8359
8360                         /*
8361                          * If at any point we start needing the real root we
8362                          * will have to build a stump root for the root we are
8363                          * in, but for now this doesn't actually use the root so
8364                          * just pass in extent_root.
8365                          */
8366                         tmp = read_tree_block(fs_info, bytenr, 0);
8367                         if (!extent_buffer_uptodate(tmp)) {
8368                                 fprintf(stderr, "Error reading root block\n");
8369                                 return -EIO;
8370                         }
8371                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8372                         free_extent_buffer(tmp);
8373                         if (ret)
8374                                 return ret;
8375                 } else {
8376                         bytenr = btrfs_node_blockptr(eb, i);
8377
8378                         /* If we aren't the tree root don't read the block */
8379                         if (level == 1 && !tree_root) {
8380                                 btrfs_pin_extent(fs_info, bytenr,
8381                                                 fs_info->nodesize);
8382                                 continue;
8383                         }
8384
8385                         tmp = read_tree_block(fs_info, bytenr, 0);
8386                         if (!extent_buffer_uptodate(tmp)) {
8387                                 fprintf(stderr, "Error reading tree block\n");
8388                                 return -EIO;
8389                         }
8390                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8391                         free_extent_buffer(tmp);
8392                         if (ret)
8393                                 return ret;
8394                 }
8395         }
8396
8397         return 0;
8398 }
8399
8400 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8401 {
8402         int ret;
8403
8404         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8405         if (ret)
8406                 return ret;
8407
8408         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8409 }
8410
8411 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8412 {
8413         struct btrfs_block_group_cache *cache;
8414         struct btrfs_path path;
8415         struct extent_buffer *leaf;
8416         struct btrfs_chunk *chunk;
8417         struct btrfs_key key;
8418         int ret;
8419         u64 start;
8420
8421         btrfs_init_path(&path);
8422         key.objectid = 0;
8423         key.type = BTRFS_CHUNK_ITEM_KEY;
8424         key.offset = 0;
8425         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
8426         if (ret < 0) {
8427                 btrfs_release_path(&path);
8428                 return ret;
8429         }
8430
8431         /*
8432          * We do this in case the block groups were screwed up and had alloc
8433          * bits that aren't actually set on the chunks.  This happens with
8434          * restored images every time and could happen in real life I guess.
8435          */
8436         fs_info->avail_data_alloc_bits = 0;
8437         fs_info->avail_metadata_alloc_bits = 0;
8438         fs_info->avail_system_alloc_bits = 0;
8439
8440         /* First we need to create the in-memory block groups */
8441         while (1) {
8442                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8443                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
8444                         if (ret < 0) {
8445                                 btrfs_release_path(&path);
8446                                 return ret;
8447                         }
8448                         if (ret) {
8449                                 ret = 0;
8450                                 break;
8451                         }
8452                 }
8453                 leaf = path.nodes[0];
8454                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8455                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8456                         path.slots[0]++;
8457                         continue;
8458                 }
8459
8460                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
8461                 btrfs_add_block_group(fs_info, 0,
8462                                       btrfs_chunk_type(leaf, chunk), key.offset,
8463                                       btrfs_chunk_length(leaf, chunk));
8464                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8465                                  key.offset + btrfs_chunk_length(leaf, chunk));
8466                 path.slots[0]++;
8467         }
8468         start = 0;
8469         while (1) {
8470                 cache = btrfs_lookup_first_block_group(fs_info, start);
8471                 if (!cache)
8472                         break;
8473                 cache->cached = 1;
8474                 start = cache->key.objectid + cache->key.offset;
8475         }
8476
8477         btrfs_release_path(&path);
8478         return 0;
8479 }
8480
8481 static int reset_balance(struct btrfs_trans_handle *trans,
8482                          struct btrfs_fs_info *fs_info)
8483 {
8484         struct btrfs_root *root = fs_info->tree_root;
8485         struct btrfs_path path;
8486         struct extent_buffer *leaf;
8487         struct btrfs_key key;
8488         int del_slot, del_nr = 0;
8489         int ret;
8490         int found = 0;
8491
8492         btrfs_init_path(&path);
8493         key.objectid = BTRFS_BALANCE_OBJECTID;
8494         key.type = BTRFS_BALANCE_ITEM_KEY;
8495         key.offset = 0;
8496         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8497         if (ret) {
8498                 if (ret > 0)
8499                         ret = 0;
8500                 if (!ret)
8501                         goto reinit_data_reloc;
8502                 else
8503                         goto out;
8504         }
8505
8506         ret = btrfs_del_item(trans, root, &path);
8507         if (ret)
8508                 goto out;
8509         btrfs_release_path(&path);
8510
8511         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8512         key.type = BTRFS_ROOT_ITEM_KEY;
8513         key.offset = 0;
8514         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8515         if (ret < 0)
8516                 goto out;
8517         while (1) {
8518                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8519                         if (!found)
8520                                 break;
8521
8522                         if (del_nr) {
8523                                 ret = btrfs_del_items(trans, root, &path,
8524                                                       del_slot, del_nr);
8525                                 del_nr = 0;
8526                                 if (ret)
8527                                         goto out;
8528                         }
8529                         key.offset++;
8530                         btrfs_release_path(&path);
8531
8532                         found = 0;
8533                         ret = btrfs_search_slot(trans, root, &key, &path,
8534                                                 -1, 1);
8535                         if (ret < 0)
8536                                 goto out;
8537                         continue;
8538                 }
8539                 found = 1;
8540                 leaf = path.nodes[0];
8541                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8542                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8543                         break;
8544                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8545                         path.slots[0]++;
8546                         continue;
8547                 }
8548                 if (!del_nr) {
8549                         del_slot = path.slots[0];
8550                         del_nr = 1;
8551                 } else {
8552                         del_nr++;
8553                 }
8554                 path.slots[0]++;
8555         }
8556
8557         if (del_nr) {
8558                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
8559                 if (ret)
8560                         goto out;
8561         }
8562         btrfs_release_path(&path);
8563
8564 reinit_data_reloc:
8565         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8566         key.type = BTRFS_ROOT_ITEM_KEY;
8567         key.offset = (u64)-1;
8568         root = btrfs_read_fs_root(fs_info, &key);
8569         if (IS_ERR(root)) {
8570                 fprintf(stderr, "Error reading data reloc tree\n");
8571                 ret = PTR_ERR(root);
8572                 goto out;
8573         }
8574         record_root_in_trans(trans, root);
8575         ret = btrfs_fsck_reinit_root(trans, root, 0);
8576         if (ret)
8577                 goto out;
8578         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8579 out:
8580         btrfs_release_path(&path);
8581         return ret;
8582 }
8583
8584 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8585                               struct btrfs_fs_info *fs_info)
8586 {
8587         u64 start = 0;
8588         int ret;
8589
8590         /*
8591          * The only reason we don't do this is because right now we're just
8592          * walking the trees we find and pinning down their bytes, we don't look
8593          * at any of the leaves.  In order to do mixed groups we'd have to check
8594          * the leaves of any fs roots and pin down the bytes for any file
8595          * extents we find.  Not hard but why do it if we don't have to?
8596          */
8597         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
8598                 fprintf(stderr, "We don't support re-initing the extent tree "
8599                         "for mixed block groups yet, please notify a btrfs "
8600                         "developer you want to do this so they can add this "
8601                         "functionality.\n");
8602                 return -EINVAL;
8603         }
8604
8605         /*
8606          * first we need to walk all of the trees except the extent tree and pin
8607          * down the bytes that are in use so we don't overwrite any existing
8608          * metadata.
8609          */
8610         ret = pin_metadata_blocks(fs_info);
8611         if (ret) {
8612                 fprintf(stderr, "error pinning down used bytes\n");
8613                 return ret;
8614         }
8615
8616         /*
8617          * Need to drop all the block groups since we're going to recreate all
8618          * of them again.
8619          */
8620         btrfs_free_block_groups(fs_info);
8621         ret = reset_block_groups(fs_info);
8622         if (ret) {
8623                 fprintf(stderr, "error resetting the block groups\n");
8624                 return ret;
8625         }
8626
8627         /* Ok we can allocate now, reinit the extent root */
8628         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8629         if (ret) {
8630                 fprintf(stderr, "extent root initialization failed\n");
8631                 /*
8632                  * When the transaction code is updated we should end the
8633                  * transaction, but for now progs only knows about commit so
8634                  * just return an error.
8635                  */
8636                 return ret;
8637         }
8638
8639         /*
8640          * Now we have all the in-memory block groups setup so we can make
8641          * allocations properly, and the metadata we care about is safe since we
8642          * pinned all of it above.
8643          */
8644         while (1) {
8645                 struct btrfs_block_group_cache *cache;
8646
8647                 cache = btrfs_lookup_first_block_group(fs_info, start);
8648                 if (!cache)
8649                         break;
8650                 start = cache->key.objectid + cache->key.offset;
8651                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8652                                         &cache->key, &cache->item,
8653                                         sizeof(cache->item));
8654                 if (ret) {
8655                         fprintf(stderr, "Error adding block group\n");
8656                         return ret;
8657                 }
8658                 btrfs_extent_post_op(trans, fs_info->extent_root);
8659         }
8660
8661         ret = reset_balance(trans, fs_info);
8662         if (ret)
8663                 fprintf(stderr, "error resetting the pending balance\n");
8664
8665         return ret;
8666 }
8667
8668 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8669 {
8670         struct btrfs_path path;
8671         struct btrfs_trans_handle *trans;
8672         struct btrfs_key key;
8673         int ret;
8674
8675         printf("Recowing metadata block %llu\n", eb->start);
8676         key.objectid = btrfs_header_owner(eb);
8677         key.type = BTRFS_ROOT_ITEM_KEY;
8678         key.offset = (u64)-1;
8679
8680         root = btrfs_read_fs_root(root->fs_info, &key);
8681         if (IS_ERR(root)) {
8682                 fprintf(stderr, "Couldn't find owner root %llu\n",
8683                         key.objectid);
8684                 return PTR_ERR(root);
8685         }
8686
8687         trans = btrfs_start_transaction(root, 1);
8688         if (IS_ERR(trans))
8689                 return PTR_ERR(trans);
8690
8691         btrfs_init_path(&path);
8692         path.lowest_level = btrfs_header_level(eb);
8693         if (path.lowest_level)
8694                 btrfs_node_key_to_cpu(eb, &key, 0);
8695         else
8696                 btrfs_item_key_to_cpu(eb, &key, 0);
8697
8698         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8699         btrfs_commit_transaction(trans, root);
8700         btrfs_release_path(&path);
8701         return ret;
8702 }
8703
8704 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8705 {
8706         struct btrfs_path path;
8707         struct btrfs_trans_handle *trans;
8708         struct btrfs_key key;
8709         int ret;
8710
8711         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8712                bad->key.type, bad->key.offset);
8713         key.objectid = bad->root_id;
8714         key.type = BTRFS_ROOT_ITEM_KEY;
8715         key.offset = (u64)-1;
8716
8717         root = btrfs_read_fs_root(root->fs_info, &key);
8718         if (IS_ERR(root)) {
8719                 fprintf(stderr, "Couldn't find owner root %llu\n",
8720                         key.objectid);
8721                 return PTR_ERR(root);
8722         }
8723
8724         trans = btrfs_start_transaction(root, 1);
8725         if (IS_ERR(trans))
8726                 return PTR_ERR(trans);
8727
8728         btrfs_init_path(&path);
8729         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
8730         if (ret) {
8731                 if (ret > 0)
8732                         ret = 0;
8733                 goto out;
8734         }
8735         ret = btrfs_del_item(trans, root, &path);
8736 out:
8737         btrfs_commit_transaction(trans, root);
8738         btrfs_release_path(&path);
8739         return ret;
8740 }
8741
8742 static int zero_log_tree(struct btrfs_root *root)
8743 {
8744         struct btrfs_trans_handle *trans;
8745         int ret;
8746
8747         trans = btrfs_start_transaction(root, 1);
8748         if (IS_ERR(trans)) {
8749                 ret = PTR_ERR(trans);
8750                 return ret;
8751         }
8752         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8753         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8754         ret = btrfs_commit_transaction(trans, root);
8755         return ret;
8756 }
8757
8758 static int populate_csum(struct btrfs_trans_handle *trans,
8759                          struct btrfs_root *csum_root, char *buf, u64 start,
8760                          u64 len)
8761 {
8762         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8763         u64 offset = 0;
8764         u64 sectorsize;
8765         int ret = 0;
8766
8767         while (offset < len) {
8768                 sectorsize = fs_info->sectorsize;
8769                 ret = read_extent_data(fs_info, buf, start + offset,
8770                                        &sectorsize, 0);
8771                 if (ret)
8772                         break;
8773                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8774                                             start + offset, buf, sectorsize);
8775                 if (ret)
8776                         break;
8777                 offset += sectorsize;
8778         }
8779         return ret;
8780 }
8781
8782 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8783                                       struct btrfs_root *csum_root,
8784                                       struct btrfs_root *cur_root)
8785 {
8786         struct btrfs_path path;
8787         struct btrfs_key key;
8788         struct extent_buffer *node;
8789         struct btrfs_file_extent_item *fi;
8790         char *buf = NULL;
8791         u64 start = 0;
8792         u64 len = 0;
8793         int slot = 0;
8794         int ret = 0;
8795
8796         buf = malloc(cur_root->fs_info->sectorsize);
8797         if (!buf)
8798                 return -ENOMEM;
8799
8800         btrfs_init_path(&path);
8801         key.objectid = 0;
8802         key.offset = 0;
8803         key.type = 0;
8804         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
8805         if (ret < 0)
8806                 goto out;
8807         /* Iterate all regular file extents and fill its csum */
8808         while (1) {
8809                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8810
8811                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8812                         goto next;
8813                 node = path.nodes[0];
8814                 slot = path.slots[0];
8815                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8816                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8817                         goto next;
8818                 start = btrfs_file_extent_disk_bytenr(node, fi);
8819                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8820
8821                 ret = populate_csum(trans, csum_root, buf, start, len);
8822                 if (ret == -EEXIST)
8823                         ret = 0;
8824                 if (ret < 0)
8825                         goto out;
8826 next:
8827                 /*
8828                  * TODO: if next leaf is corrupted, jump to nearest next valid
8829                  * leaf.
8830                  */
8831                 ret = btrfs_next_item(cur_root, &path);
8832                 if (ret < 0)
8833                         goto out;
8834                 if (ret > 0) {
8835                         ret = 0;
8836                         goto out;
8837                 }
8838         }
8839
8840 out:
8841         btrfs_release_path(&path);
8842         free(buf);
8843         return ret;
8844 }
8845
8846 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8847                                   struct btrfs_root *csum_root)
8848 {
8849         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8850         struct btrfs_path path;
8851         struct btrfs_root *tree_root = fs_info->tree_root;
8852         struct btrfs_root *cur_root;
8853         struct extent_buffer *node;
8854         struct btrfs_key key;
8855         int slot = 0;
8856         int ret = 0;
8857
8858         btrfs_init_path(&path);
8859         key.objectid = BTRFS_FS_TREE_OBJECTID;
8860         key.offset = 0;
8861         key.type = BTRFS_ROOT_ITEM_KEY;
8862         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
8863         if (ret < 0)
8864                 goto out;
8865         if (ret > 0) {
8866                 ret = -ENOENT;
8867                 goto out;
8868         }
8869
8870         while (1) {
8871                 node = path.nodes[0];
8872                 slot = path.slots[0];
8873                 btrfs_item_key_to_cpu(node, &key, slot);
8874                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8875                         goto out;
8876                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8877                         goto next;
8878                 if (!is_fstree(key.objectid))
8879                         goto next;
8880                 key.offset = (u64)-1;
8881
8882                 cur_root = btrfs_read_fs_root(fs_info, &key);
8883                 if (IS_ERR(cur_root) || !cur_root) {
8884                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8885                                 key.objectid);
8886                         goto out;
8887                 }
8888                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8889                                 cur_root);
8890                 if (ret < 0)
8891                         goto out;
8892 next:
8893                 ret = btrfs_next_item(tree_root, &path);
8894                 if (ret > 0) {
8895                         ret = 0;
8896                         goto out;
8897                 }
8898                 if (ret < 0)
8899                         goto out;
8900         }
8901
8902 out:
8903         btrfs_release_path(&path);
8904         return ret;
8905 }
8906
8907 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8908                                       struct btrfs_root *csum_root)
8909 {
8910         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8911         struct btrfs_path path;
8912         struct btrfs_extent_item *ei;
8913         struct extent_buffer *leaf;
8914         char *buf;
8915         struct btrfs_key key;
8916         int ret;
8917
8918         btrfs_init_path(&path);
8919         key.objectid = 0;
8920         key.type = BTRFS_EXTENT_ITEM_KEY;
8921         key.offset = 0;
8922         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8923         if (ret < 0) {
8924                 btrfs_release_path(&path);
8925                 return ret;
8926         }
8927
8928         buf = malloc(csum_root->fs_info->sectorsize);
8929         if (!buf) {
8930                 btrfs_release_path(&path);
8931                 return -ENOMEM;
8932         }
8933
8934         while (1) {
8935                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8936                         ret = btrfs_next_leaf(extent_root, &path);
8937                         if (ret < 0)
8938                                 break;
8939                         if (ret) {
8940                                 ret = 0;
8941                                 break;
8942                         }
8943                 }
8944                 leaf = path.nodes[0];
8945
8946                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8947                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8948                         path.slots[0]++;
8949                         continue;
8950                 }
8951
8952                 ei = btrfs_item_ptr(leaf, path.slots[0],
8953                                     struct btrfs_extent_item);
8954                 if (!(btrfs_extent_flags(leaf, ei) &
8955                       BTRFS_EXTENT_FLAG_DATA)) {
8956                         path.slots[0]++;
8957                         continue;
8958                 }
8959
8960                 ret = populate_csum(trans, csum_root, buf, key.objectid,
8961                                     key.offset);
8962                 if (ret)
8963                         break;
8964                 path.slots[0]++;
8965         }
8966
8967         btrfs_release_path(&path);
8968         free(buf);
8969         return ret;
8970 }
8971
8972 /*
8973  * Recalculate the csum and put it into the csum tree.
8974  *
8975  * Extent tree init will wipe out all the extent info, so in that case, we
8976  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
8977  * will use fs/subvol trees to init the csum tree.
8978  */
8979 static int fill_csum_tree(struct btrfs_trans_handle *trans,
8980                           struct btrfs_root *csum_root,
8981                           int search_fs_tree)
8982 {
8983         if (search_fs_tree)
8984                 return fill_csum_tree_from_fs(trans, csum_root);
8985         else
8986                 return fill_csum_tree_from_extent(trans, csum_root);
8987 }
8988
8989 static void free_roots_info_cache(void)
8990 {
8991         if (!roots_info_cache)
8992                 return;
8993
8994         while (!cache_tree_empty(roots_info_cache)) {
8995                 struct cache_extent *entry;
8996                 struct root_item_info *rii;
8997
8998                 entry = first_cache_extent(roots_info_cache);
8999                 if (!entry)
9000                         break;
9001                 remove_cache_extent(roots_info_cache, entry);
9002                 rii = container_of(entry, struct root_item_info, cache_extent);
9003                 free(rii);
9004         }
9005
9006         free(roots_info_cache);
9007         roots_info_cache = NULL;
9008 }
9009
9010 static int build_roots_info_cache(struct btrfs_fs_info *info)
9011 {
9012         int ret = 0;
9013         struct btrfs_key key;
9014         struct extent_buffer *leaf;
9015         struct btrfs_path path;
9016
9017         if (!roots_info_cache) {
9018                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9019                 if (!roots_info_cache)
9020                         return -ENOMEM;
9021                 cache_tree_init(roots_info_cache);
9022         }
9023
9024         btrfs_init_path(&path);
9025         key.objectid = 0;
9026         key.type = BTRFS_EXTENT_ITEM_KEY;
9027         key.offset = 0;
9028         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
9029         if (ret < 0)
9030                 goto out;
9031         leaf = path.nodes[0];
9032
9033         while (1) {
9034                 struct btrfs_key found_key;
9035                 struct btrfs_extent_item *ei;
9036                 struct btrfs_extent_inline_ref *iref;
9037                 unsigned long item_end;
9038                 int slot = path.slots[0];
9039                 int type;
9040                 u64 flags;
9041                 u64 root_id;
9042                 u8 level;
9043                 struct cache_extent *entry;
9044                 struct root_item_info *rii;
9045
9046                 if (slot >= btrfs_header_nritems(leaf)) {
9047                         ret = btrfs_next_leaf(info->extent_root, &path);
9048                         if (ret < 0) {
9049                                 break;
9050                         } else if (ret) {
9051                                 ret = 0;
9052                                 break;
9053                         }
9054                         leaf = path.nodes[0];
9055                         slot = path.slots[0];
9056                 }
9057
9058                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9059
9060                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9061                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9062                         goto next;
9063
9064                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9065                 flags = btrfs_extent_flags(leaf, ei);
9066                 item_end = (unsigned long)ei + btrfs_item_size_nr(leaf, slot);
9067
9068                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9069                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9070                         goto next;
9071
9072                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9073                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9074                         level = found_key.offset;
9075                 } else {
9076                         struct btrfs_tree_block_info *binfo;
9077
9078                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9079                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9080                         level = btrfs_tree_block_level(leaf, binfo);
9081                 }
9082
9083                 /*
9084                  * It's a valid extent/metadata item that has no inline ref,
9085                  * but SHARED_BLOCK_REF or other shared references.
9086                  * So we need to do extra check to avoid reading beyond leaf
9087                  * boudnary.
9088                  */
9089                 if ((unsigned long)iref >= item_end)
9090                         goto next;
9091
9092                 /*
9093                  * For a root extent, it must be of the following type and the
9094                  * first (and only one) iref in the item.
9095                  */
9096                 type = btrfs_extent_inline_ref_type(leaf, iref);
9097                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9098                         goto next;
9099
9100                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9101                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9102                 if (!entry) {
9103                         rii = malloc(sizeof(struct root_item_info));
9104                         if (!rii) {
9105                                 ret = -ENOMEM;
9106                                 goto out;
9107                         }
9108                         rii->cache_extent.start = root_id;
9109                         rii->cache_extent.size = 1;
9110                         rii->level = (u8)-1;
9111                         entry = &rii->cache_extent;
9112                         ret = insert_cache_extent(roots_info_cache, entry);
9113                         ASSERT(ret == 0);
9114                 } else {
9115                         rii = container_of(entry, struct root_item_info,
9116                                            cache_extent);
9117                 }
9118
9119                 ASSERT(rii->cache_extent.start == root_id);
9120                 ASSERT(rii->cache_extent.size == 1);
9121
9122                 if (level > rii->level || rii->level == (u8)-1) {
9123                         rii->level = level;
9124                         rii->bytenr = found_key.objectid;
9125                         rii->gen = btrfs_extent_generation(leaf, ei);
9126                         rii->node_count = 1;
9127                 } else if (level == rii->level) {
9128                         rii->node_count++;
9129                 }
9130 next:
9131                 path.slots[0]++;
9132         }
9133
9134 out:
9135         btrfs_release_path(&path);
9136
9137         return ret;
9138 }
9139
9140 static int maybe_repair_root_item(struct btrfs_path *path,
9141                                   const struct btrfs_key *root_key,
9142                                   const int read_only_mode)
9143 {
9144         const u64 root_id = root_key->objectid;
9145         struct cache_extent *entry;
9146         struct root_item_info *rii;
9147         struct btrfs_root_item ri;
9148         unsigned long offset;
9149
9150         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9151         if (!entry) {
9152                 fprintf(stderr,
9153                         "Error: could not find extent items for root %llu\n",
9154                         root_key->objectid);
9155                 return -ENOENT;
9156         }
9157
9158         rii = container_of(entry, struct root_item_info, cache_extent);
9159         ASSERT(rii->cache_extent.start == root_id);
9160         ASSERT(rii->cache_extent.size == 1);
9161
9162         if (rii->node_count != 1) {
9163                 fprintf(stderr,
9164                         "Error: could not find btree root extent for root %llu\n",
9165                         root_id);
9166                 return -ENOENT;
9167         }
9168
9169         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9170         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9171
9172         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9173             btrfs_root_level(&ri) != rii->level ||
9174             btrfs_root_generation(&ri) != rii->gen) {
9175
9176                 /*
9177                  * If we're in repair mode but our caller told us to not update
9178                  * the root item, i.e. just check if it needs to be updated, don't
9179                  * print this message, since the caller will call us again shortly
9180                  * for the same root item without read only mode (the caller will
9181                  * open a transaction first).
9182                  */
9183                 if (!(read_only_mode && repair))
9184                         fprintf(stderr,
9185                                 "%sroot item for root %llu,"
9186                                 " current bytenr %llu, current gen %llu, current level %u,"
9187                                 " new bytenr %llu, new gen %llu, new level %u\n",
9188                                 (read_only_mode ? "" : "fixing "),
9189                                 root_id,
9190                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9191                                 btrfs_root_level(&ri),
9192                                 rii->bytenr, rii->gen, rii->level);
9193
9194                 if (btrfs_root_generation(&ri) > rii->gen) {
9195                         fprintf(stderr,
9196                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9197                                 root_id, btrfs_root_generation(&ri), rii->gen);
9198                         return -EINVAL;
9199                 }
9200
9201                 if (!read_only_mode) {
9202                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9203                         btrfs_set_root_level(&ri, rii->level);
9204                         btrfs_set_root_generation(&ri, rii->gen);
9205                         write_extent_buffer(path->nodes[0], &ri,
9206                                             offset, sizeof(ri));
9207                 }
9208
9209                 return 1;
9210         }
9211
9212         return 0;
9213 }
9214
9215 /*
9216  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9217  * caused read-only snapshots to be corrupted if they were created at a moment
9218  * when the source subvolume/snapshot had orphan items. The issue was that the
9219  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9220  * node instead of the post orphan cleanup root node.
9221  * So this function, and its callees, just detects and fixes those cases. Even
9222  * though the regression was for read-only snapshots, this function applies to
9223  * any snapshot/subvolume root.
9224  * This must be run before any other repair code - not doing it so, makes other
9225  * repair code delete or modify backrefs in the extent tree for example, which
9226  * will result in an inconsistent fs after repairing the root items.
9227  */
9228 static int repair_root_items(struct btrfs_fs_info *info)
9229 {
9230         struct btrfs_path path;
9231         struct btrfs_key key;
9232         struct extent_buffer *leaf;
9233         struct btrfs_trans_handle *trans = NULL;
9234         int ret = 0;
9235         int bad_roots = 0;
9236         int need_trans = 0;
9237
9238         btrfs_init_path(&path);
9239
9240         ret = build_roots_info_cache(info);
9241         if (ret)
9242                 goto out;
9243
9244         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9245         key.type = BTRFS_ROOT_ITEM_KEY;
9246         key.offset = 0;
9247
9248 again:
9249         /*
9250          * Avoid opening and committing transactions if a leaf doesn't have
9251          * any root items that need to be fixed, so that we avoid rotating
9252          * backup roots unnecessarily.
9253          */
9254         if (need_trans) {
9255                 trans = btrfs_start_transaction(info->tree_root, 1);
9256                 if (IS_ERR(trans)) {
9257                         ret = PTR_ERR(trans);
9258                         goto out;
9259                 }
9260         }
9261
9262         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
9263                                 0, trans ? 1 : 0);
9264         if (ret < 0)
9265                 goto out;
9266         leaf = path.nodes[0];
9267
9268         while (1) {
9269                 struct btrfs_key found_key;
9270
9271                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
9272                         int no_more_keys = find_next_key(&path, &key);
9273
9274                         btrfs_release_path(&path);
9275                         if (trans) {
9276                                 ret = btrfs_commit_transaction(trans,
9277                                                                info->tree_root);
9278                                 trans = NULL;
9279                                 if (ret < 0)
9280                                         goto out;
9281                         }
9282                         need_trans = 0;
9283                         if (no_more_keys)
9284                                 break;
9285                         goto again;
9286                 }
9287
9288                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9289
9290                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9291                         goto next;
9292                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9293                         goto next;
9294
9295                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
9296                 if (ret < 0)
9297                         goto out;
9298                 if (ret) {
9299                         if (!trans && repair) {
9300                                 need_trans = 1;
9301                                 key = found_key;
9302                                 btrfs_release_path(&path);
9303                                 goto again;
9304                         }
9305                         bad_roots++;
9306                 }
9307 next:
9308                 path.slots[0]++;
9309         }
9310         ret = 0;
9311 out:
9312         free_roots_info_cache();
9313         btrfs_release_path(&path);
9314         if (trans)
9315                 btrfs_commit_transaction(trans, info->tree_root);
9316         if (ret < 0)
9317                 return ret;
9318
9319         return bad_roots;
9320 }
9321
9322 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
9323 {
9324         struct btrfs_trans_handle *trans;
9325         struct btrfs_block_group_cache *bg_cache;
9326         u64 current = 0;
9327         int ret = 0;
9328
9329         /* Clear all free space cache inodes and its extent data */
9330         while (1) {
9331                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
9332                 if (!bg_cache)
9333                         break;
9334                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
9335                 if (ret < 0)
9336                         return ret;
9337                 current = bg_cache->key.objectid + bg_cache->key.offset;
9338         }
9339
9340         /* Don't forget to set cache_generation to -1 */
9341         trans = btrfs_start_transaction(fs_info->tree_root, 0);
9342         if (IS_ERR(trans)) {
9343                 error("failed to update super block cache generation");
9344                 return PTR_ERR(trans);
9345         }
9346         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
9347         btrfs_commit_transaction(trans, fs_info->tree_root);
9348
9349         return ret;
9350 }
9351
9352 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
9353                 int clear_version)
9354 {
9355         int ret = 0;
9356
9357         if (clear_version == 1) {
9358                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9359                         error(
9360                 "free space cache v2 detected, use --clear-space-cache v2");
9361                         ret = 1;
9362                         goto close_out;
9363                 }
9364                 printf("Clearing free space cache\n");
9365                 ret = clear_free_space_cache(fs_info);
9366                 if (ret) {
9367                         error("failed to clear free space cache");
9368                         ret = 1;
9369                 } else {
9370                         printf("Free space cache cleared\n");
9371                 }
9372         } else if (clear_version == 2) {
9373                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9374                         printf("no free space cache v2 to clear\n");
9375                         ret = 0;
9376                         goto close_out;
9377                 }
9378                 printf("Clear free space cache v2\n");
9379                 ret = btrfs_clear_free_space_tree(fs_info);
9380                 if (ret) {
9381                         error("failed to clear free space cache v2: %d", ret);
9382                         ret = 1;
9383                 } else {
9384                         printf("free space cache v2 cleared\n");
9385                 }
9386         }
9387 close_out:
9388         return ret;
9389 }
9390
9391 const char * const cmd_check_usage[] = {
9392         "btrfs check [options] <device>",
9393         "Check structural integrity of a filesystem (unmounted).",
9394         "Check structural integrity of an unmounted filesystem. Verify internal",
9395         "trees' consistency and item connectivity. In the repair mode try to",
9396         "fix the problems found. ",
9397         "WARNING: the repair mode is considered dangerous",
9398         "",
9399         "-s|--super <superblock>     use this superblock copy",
9400         "-b|--backup                 use the first valid backup root copy",
9401         "--force                     skip mount checks, repair is not possible",
9402         "--repair                    try to repair the filesystem",
9403         "--readonly                  run in read-only mode (default)",
9404         "--init-csum-tree            create a new CRC tree",
9405         "--init-extent-tree          create a new extent tree",
9406         "--mode <MODE>               allows choice of memory/IO trade-offs",
9407         "                            where MODE is one of:",
9408         "                            original - read inodes and extents to memory (requires",
9409         "                                       more memory, does less IO)",
9410         "                            lowmem   - try to use less memory but read blocks again",
9411         "                                       when needed",
9412         "--check-data-csum           verify checksums of data blocks",
9413         "-Q|--qgroup-report          print a report on qgroup consistency",
9414         "-E|--subvol-extents <subvolid>",
9415         "                            print subvolume extents and sharing state",
9416         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9417         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9418         "-p|--progress               indicate progress",
9419         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
9420         NULL
9421 };
9422
9423 int cmd_check(int argc, char **argv)
9424 {
9425         struct cache_tree root_cache;
9426         struct btrfs_root *root;
9427         struct btrfs_fs_info *info;
9428         u64 bytenr = 0;
9429         u64 subvolid = 0;
9430         u64 tree_root_bytenr = 0;
9431         u64 chunk_root_bytenr = 0;
9432         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9433         int ret = 0;
9434         int err = 0;
9435         u64 num;
9436         int init_csum_tree = 0;
9437         int readonly = 0;
9438         int clear_space_cache = 0;
9439         int qgroup_report = 0;
9440         int qgroups_repaired = 0;
9441         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
9442         int force = 0;
9443
9444         while(1) {
9445                 int c;
9446                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9447                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9448                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
9449                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
9450                         GETOPT_VAL_FORCE };
9451                 static const struct option long_options[] = {
9452                         { "super", required_argument, NULL, 's' },
9453                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9454                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9455                         { "init-csum-tree", no_argument, NULL,
9456                                 GETOPT_VAL_INIT_CSUM },
9457                         { "init-extent-tree", no_argument, NULL,
9458                                 GETOPT_VAL_INIT_EXTENT },
9459                         { "check-data-csum", no_argument, NULL,
9460                                 GETOPT_VAL_CHECK_CSUM },
9461                         { "backup", no_argument, NULL, 'b' },
9462                         { "subvol-extents", required_argument, NULL, 'E' },
9463                         { "qgroup-report", no_argument, NULL, 'Q' },
9464                         { "tree-root", required_argument, NULL, 'r' },
9465                         { "chunk-root", required_argument, NULL,
9466                                 GETOPT_VAL_CHUNK_TREE },
9467                         { "progress", no_argument, NULL, 'p' },
9468                         { "mode", required_argument, NULL,
9469                                 GETOPT_VAL_MODE },
9470                         { "clear-space-cache", required_argument, NULL,
9471                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
9472                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
9473                         { NULL, 0, NULL, 0}
9474                 };
9475
9476                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
9477                 if (c < 0)
9478                         break;
9479                 switch(c) {
9480                         case 'a': /* ignored */ break;
9481                         case 'b':
9482                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9483                                 break;
9484                         case 's':
9485                                 num = arg_strtou64(optarg);
9486                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9487                                         error(
9488                                         "super mirror should be less than %d",
9489                                                 BTRFS_SUPER_MIRROR_MAX);
9490                                         exit(1);
9491                                 }
9492                                 bytenr = btrfs_sb_offset(((int)num));
9493                                 printf("using SB copy %llu, bytenr %llu\n", num,
9494                                        (unsigned long long)bytenr);
9495                                 break;
9496                         case 'Q':
9497                                 qgroup_report = 1;
9498                                 break;
9499                         case 'E':
9500                                 subvolid = arg_strtou64(optarg);
9501                                 break;
9502                         case 'r':
9503                                 tree_root_bytenr = arg_strtou64(optarg);
9504                                 break;
9505                         case GETOPT_VAL_CHUNK_TREE:
9506                                 chunk_root_bytenr = arg_strtou64(optarg);
9507                                 break;
9508                         case 'p':
9509                                 ctx.progress_enabled = true;
9510                                 break;
9511                         case '?':
9512                         case 'h':
9513                                 usage(cmd_check_usage);
9514                         case GETOPT_VAL_REPAIR:
9515                                 printf("enabling repair mode\n");
9516                                 repair = 1;
9517                                 ctree_flags |= OPEN_CTREE_WRITES;
9518                                 break;
9519                         case GETOPT_VAL_READONLY:
9520                                 readonly = 1;
9521                                 break;
9522                         case GETOPT_VAL_INIT_CSUM:
9523                                 printf("Creating a new CRC tree\n");
9524                                 init_csum_tree = 1;
9525                                 repair = 1;
9526                                 ctree_flags |= OPEN_CTREE_WRITES;
9527                                 break;
9528                         case GETOPT_VAL_INIT_EXTENT:
9529                                 init_extent_tree = 1;
9530                                 ctree_flags |= (OPEN_CTREE_WRITES |
9531                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9532                                 repair = 1;
9533                                 break;
9534                         case GETOPT_VAL_CHECK_CSUM:
9535                                 check_data_csum = 1;
9536                                 break;
9537                         case GETOPT_VAL_MODE:
9538                                 check_mode = parse_check_mode(optarg);
9539                                 if (check_mode == CHECK_MODE_UNKNOWN) {
9540                                         error("unknown mode: %s", optarg);
9541                                         exit(1);
9542                                 }
9543                                 break;
9544                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
9545                                 if (strcmp(optarg, "v1") == 0) {
9546                                         clear_space_cache = 1;
9547                                 } else if (strcmp(optarg, "v2") == 0) {
9548                                         clear_space_cache = 2;
9549                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
9550                                 } else {
9551                                         error(
9552                 "invalid argument to --clear-space-cache, must be v1 or v2");
9553                                         exit(1);
9554                                 }
9555                                 ctree_flags |= OPEN_CTREE_WRITES;
9556                                 break;
9557                         case GETOPT_VAL_FORCE:
9558                                 force = 1;
9559                                 break;
9560                 }
9561         }
9562
9563         if (check_argc_exact(argc - optind, 1))
9564                 usage(cmd_check_usage);
9565
9566         if (ctx.progress_enabled) {
9567                 ctx.tp = TASK_NOTHING;
9568                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9569         }
9570
9571         /* This check is the only reason for --readonly to exist */
9572         if (readonly && repair) {
9573                 error("repair options are not compatible with --readonly");
9574                 exit(1);
9575         }
9576
9577         /*
9578          * experimental and dangerous
9579          */
9580         if (repair && check_mode == CHECK_MODE_LOWMEM)
9581                 warning("low-memory mode repair support is only partial");
9582
9583         radix_tree_init();
9584         cache_tree_init(&root_cache);
9585
9586         ret = check_mounted(argv[optind]);
9587         if (!force) {
9588                 if (ret < 0) {
9589                         error("could not check mount status: %s",
9590                                         strerror(-ret));
9591                         err |= !!ret;
9592                         goto err_out;
9593                 } else if (ret) {
9594                         error(
9595 "%s is currently mounted, use --force if you really intend to check the filesystem",
9596                                 argv[optind]);
9597                         ret = -EBUSY;
9598                         err |= !!ret;
9599                         goto err_out;
9600                 }
9601         } else {
9602                 if (repair) {
9603                         error("repair and --force is not yet supported");
9604                         ret = 1;
9605                         err |= !!ret;
9606                         goto err_out;
9607                 }
9608                 if (ret < 0) {
9609                         warning(
9610 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
9611                                 argv[optind]);
9612                 } else if (ret) {
9613                         warning(
9614                         "filesystem mounted, continuing because of --force");
9615                 }
9616                 /* A block device is mounted in exclusive mode by kernel */
9617                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
9618         }
9619
9620         /* only allow partial opening under repair mode */
9621         if (repair)
9622                 ctree_flags |= OPEN_CTREE_PARTIAL;
9623
9624         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9625                                   chunk_root_bytenr, ctree_flags);
9626         if (!info) {
9627                 error("cannot open file system");
9628                 ret = -EIO;
9629                 err |= !!ret;
9630                 goto err_out;
9631         }
9632
9633         global_info = info;
9634         root = info->fs_root;
9635         uuid_unparse(info->super_copy->fsid, uuidbuf);
9636
9637         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9638
9639         /*
9640          * Check the bare minimum before starting anything else that could rely
9641          * on it, namely the tree roots, any local consistency checks
9642          */
9643         if (!extent_buffer_uptodate(info->tree_root->node) ||
9644             !extent_buffer_uptodate(info->dev_root->node) ||
9645             !extent_buffer_uptodate(info->chunk_root->node)) {
9646                 error("critical roots corrupted, unable to check the filesystem");
9647                 err |= !!ret;
9648                 ret = -EIO;
9649                 goto close_out;
9650         }
9651
9652         if (clear_space_cache) {
9653                 ret = do_clear_free_space_cache(info, clear_space_cache);
9654                 err |= !!ret;
9655                 goto close_out;
9656         }
9657
9658         /*
9659          * repair mode will force us to commit transaction which
9660          * will make us fail to load log tree when mounting.
9661          */
9662         if (repair && btrfs_super_log_root(info->super_copy)) {
9663                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
9664                 if (!ret) {
9665                         ret = 1;
9666                         err |= !!ret;
9667                         goto close_out;
9668                 }
9669                 ret = zero_log_tree(root);
9670                 err |= !!ret;
9671                 if (ret) {
9672                         error("failed to zero log tree: %d", ret);
9673                         goto close_out;
9674                 }
9675         }
9676
9677         if (qgroup_report) {
9678                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9679                        uuidbuf);
9680                 ret = qgroup_verify_all(info);
9681                 err |= !!ret;
9682                 if (ret == 0)
9683                         report_qgroups(1);
9684                 goto close_out;
9685         }
9686         if (subvolid) {
9687                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9688                        subvolid, argv[optind], uuidbuf);
9689                 ret = print_extent_state(info, subvolid);
9690                 err |= !!ret;
9691                 goto close_out;
9692         }
9693
9694         if (init_extent_tree || init_csum_tree) {
9695                 struct btrfs_trans_handle *trans;
9696
9697                 trans = btrfs_start_transaction(info->extent_root, 0);
9698                 if (IS_ERR(trans)) {
9699                         error("error starting transaction");
9700                         ret = PTR_ERR(trans);
9701                         err |= !!ret;
9702                         goto close_out;
9703                 }
9704
9705                 if (init_extent_tree) {
9706                         printf("Creating a new extent tree\n");
9707                         ret = reinit_extent_tree(trans, info);
9708                         err |= !!ret;
9709                         if (ret)
9710                                 goto close_out;
9711                 }
9712
9713                 if (init_csum_tree) {
9714                         printf("Reinitialize checksum tree\n");
9715                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9716                         if (ret) {
9717                                 error("checksum tree initialization failed: %d",
9718                                                 ret);
9719                                 ret = -EIO;
9720                                 err |= !!ret;
9721                                 goto close_out;
9722                         }
9723
9724                         ret = fill_csum_tree(trans, info->csum_root,
9725                                              init_extent_tree);
9726                         err |= !!ret;
9727                         if (ret) {
9728                                 error("checksum tree refilling failed: %d", ret);
9729                                 return -EIO;
9730                         }
9731                 }
9732                 /*
9733                  * Ok now we commit and run the normal fsck, which will add
9734                  * extent entries for all of the items it finds.
9735                  */
9736                 ret = btrfs_commit_transaction(trans, info->extent_root);
9737                 err |= !!ret;
9738                 if (ret)
9739                         goto close_out;
9740         }
9741         if (!extent_buffer_uptodate(info->extent_root->node)) {
9742                 error("critical: extent_root, unable to check the filesystem");
9743                 ret = -EIO;
9744                 err |= !!ret;
9745                 goto close_out;
9746         }
9747         if (!extent_buffer_uptodate(info->csum_root->node)) {
9748                 error("critical: csum_root, unable to check the filesystem");
9749                 ret = -EIO;
9750                 err |= !!ret;
9751                 goto close_out;
9752         }
9753
9754         if (!init_extent_tree) {
9755                 ret = repair_root_items(info);
9756                 if (ret < 0) {
9757                         err = !!ret;
9758                         error("failed to repair root items: %s", strerror(-ret));
9759                         goto close_out;
9760                 }
9761                 if (repair) {
9762                         fprintf(stderr, "Fixed %d roots.\n", ret);
9763                         ret = 0;
9764                 } else if (ret > 0) {
9765                         fprintf(stderr,
9766                                 "Found %d roots with an outdated root item.\n",
9767                                 ret);
9768                         fprintf(stderr,
9769         "Please run a filesystem check with the option --repair to fix them.\n");
9770                         ret = 1;
9771                         err |= ret;
9772                         goto close_out;
9773                 }
9774         }
9775
9776         ret = do_check_chunks_and_extents(info);
9777         err |= !!ret;
9778         if (ret)
9779                 error(
9780                 "errors found in extent allocation tree or chunk allocation");
9781
9782         /* Only re-check super size after we checked and repaired the fs */
9783         err |= !is_super_size_valid(info);
9784
9785         if (!ctx.progress_enabled) {
9786                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9787                         fprintf(stderr, "checking free space tree\n");
9788                 else
9789                         fprintf(stderr, "checking free space cache\n");
9790         }
9791         ret = check_space_cache(root);
9792         err |= !!ret;
9793         if (ret) {
9794                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9795                         error("errors found in free space tree");
9796                 else
9797                         error("errors found in free space cache");
9798                 goto out;
9799         }
9800
9801         /*
9802          * We used to have to have these hole extents in between our real
9803          * extents so if we don't have this flag set we need to make sure there
9804          * are no gaps in the file extents for inodes, otherwise we can just
9805          * ignore it when this happens.
9806          */
9807         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
9808         ret = do_check_fs_roots(info, &root_cache);
9809         err |= !!ret;
9810         if (ret) {
9811                 error("errors found in fs roots");
9812                 goto out;
9813         }
9814
9815         fprintf(stderr, "checking csums\n");
9816         ret = check_csums(root);
9817         err |= !!ret;
9818         if (ret) {
9819                 error("errors found in csum tree");
9820                 goto out;
9821         }
9822
9823         fprintf(stderr, "checking root refs\n");
9824         /* For low memory mode, check_fs_roots_v2 handles root refs */
9825         if (check_mode != CHECK_MODE_LOWMEM) {
9826                 ret = check_root_refs(root, &root_cache);
9827                 err |= !!ret;
9828                 if (ret) {
9829                         error("errors found in root refs");
9830                         goto out;
9831                 }
9832         }
9833
9834         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9835                 struct extent_buffer *eb;
9836
9837                 eb = list_first_entry(&root->fs_info->recow_ebs,
9838                                       struct extent_buffer, recow);
9839                 list_del_init(&eb->recow);
9840                 ret = recow_extent_buffer(root, eb);
9841                 err |= !!ret;
9842                 if (ret) {
9843                         error("fails to fix transid errors");
9844                         break;
9845                 }
9846         }
9847
9848         while (!list_empty(&delete_items)) {
9849                 struct bad_item *bad;
9850
9851                 bad = list_first_entry(&delete_items, struct bad_item, list);
9852                 list_del_init(&bad->list);
9853                 if (repair) {
9854                         ret = delete_bad_item(root, bad);
9855                         err |= !!ret;
9856                 }
9857                 free(bad);
9858         }
9859
9860         if (info->quota_enabled) {
9861                 fprintf(stderr, "checking quota groups\n");
9862                 ret = qgroup_verify_all(info);
9863                 err |= !!ret;
9864                 if (ret) {
9865                         error("failed to check quota groups");
9866                         goto out;
9867                 }
9868                 report_qgroups(0);
9869                 ret = repair_qgroups(info, &qgroups_repaired);
9870                 err |= !!ret;
9871                 if (err) {
9872                         error("failed to repair quota groups");
9873                         goto out;
9874                 }
9875                 ret = 0;
9876         }
9877
9878         if (!list_empty(&root->fs_info->recow_ebs)) {
9879                 error("transid errors in file system");
9880                 ret = 1;
9881                 err |= !!ret;
9882         }
9883 out:
9884         printf("found %llu bytes used, ",
9885                (unsigned long long)bytes_used);
9886         if (err)
9887                 printf("error(s) found\n");
9888         else
9889                 printf("no error found\n");
9890         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9891         printf("total tree bytes: %llu\n",
9892                (unsigned long long)total_btree_bytes);
9893         printf("total fs tree bytes: %llu\n",
9894                (unsigned long long)total_fs_tree_bytes);
9895         printf("total extent tree bytes: %llu\n",
9896                (unsigned long long)total_extent_tree_bytes);
9897         printf("btree space waste bytes: %llu\n",
9898                (unsigned long long)btree_space_waste);
9899         printf("file data blocks allocated: %llu\n referenced %llu\n",
9900                 (unsigned long long)data_bytes_allocated,
9901                 (unsigned long long)data_bytes_referenced);
9902
9903         free_qgroup_counts();
9904         free_root_recs_tree(&root_cache);
9905 close_out:
9906         close_ctree(root);
9907 err_out:
9908         if (ctx.progress_enabled)
9909                 task_deinit(ctx.info);
9910
9911         return err;
9912 }