btrfs-progs: Unify btrfs_leaf_free_space() parameter with kernel
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/mode-common.h"
47 #include "check/mode-original.h"
48 #include "check/mode-lowmem.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void record_root_in_trans(struct btrfs_trans_handle *trans,
416                                  struct btrfs_root *root)
417 {
418         if (root->last_trans != trans->transid) {
419                 root->track_dirty = 1;
420                 root->last_trans = trans->transid;
421                 root->commit_root = root->node;
422                 extent_buffer_get(root->node);
423         }
424 }
425
426 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
427 {
428         struct device_record *rec1;
429         struct device_record *rec2;
430
431         rec1 = rb_entry(node1, struct device_record, node);
432         rec2 = rb_entry(node2, struct device_record, node);
433         if (rec1->devid > rec2->devid)
434                 return -1;
435         else if (rec1->devid < rec2->devid)
436                 return 1;
437         else
438                 return 0;
439 }
440
441 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
442 {
443         struct inode_record *rec;
444         struct inode_backref *backref;
445         struct inode_backref *orig;
446         struct inode_backref *tmp;
447         struct orphan_data_extent *src_orphan;
448         struct orphan_data_extent *dst_orphan;
449         struct rb_node *rb;
450         size_t size;
451         int ret;
452
453         rec = malloc(sizeof(*rec));
454         if (!rec)
455                 return ERR_PTR(-ENOMEM);
456         memcpy(rec, orig_rec, sizeof(*rec));
457         rec->refs = 1;
458         INIT_LIST_HEAD(&rec->backrefs);
459         INIT_LIST_HEAD(&rec->orphan_extents);
460         rec->holes = RB_ROOT;
461
462         list_for_each_entry(orig, &orig_rec->backrefs, list) {
463                 size = sizeof(*orig) + orig->namelen + 1;
464                 backref = malloc(size);
465                 if (!backref) {
466                         ret = -ENOMEM;
467                         goto cleanup;
468                 }
469                 memcpy(backref, orig, size);
470                 list_add_tail(&backref->list, &rec->backrefs);
471         }
472         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
473                 dst_orphan = malloc(sizeof(*dst_orphan));
474                 if (!dst_orphan) {
475                         ret = -ENOMEM;
476                         goto cleanup;
477                 }
478                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
479                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
480         }
481         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
482         if (ret < 0)
483                 goto cleanup_rb;
484
485         return rec;
486
487 cleanup_rb:
488         rb = rb_first(&rec->holes);
489         while (rb) {
490                 struct file_extent_hole *hole;
491
492                 hole = rb_entry(rb, struct file_extent_hole, node);
493                 rb = rb_next(rb);
494                 free(hole);
495         }
496
497 cleanup:
498         if (!list_empty(&rec->backrefs))
499                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
500                         list_del(&orig->list);
501                         free(orig);
502                 }
503
504         if (!list_empty(&rec->orphan_extents))
505                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
506                         list_del(&orig->list);
507                         free(orig);
508                 }
509
510         free(rec);
511
512         return ERR_PTR(ret);
513 }
514
515 static void print_orphan_data_extents(struct list_head *orphan_extents,
516                                       u64 objectid)
517 {
518         struct orphan_data_extent *orphan;
519
520         if (list_empty(orphan_extents))
521                 return;
522         printf("The following data extent is lost in tree %llu:\n",
523                objectid);
524         list_for_each_entry(orphan, orphan_extents, list) {
525                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
526                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
527                        orphan->disk_len);
528         }
529 }
530
531 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
532 {
533         u64 root_objectid = root->root_key.objectid;
534         int errors = rec->errors;
535
536         if (!errors)
537                 return;
538         /* reloc root errors, we print its corresponding fs root objectid*/
539         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
540                 root_objectid = root->root_key.offset;
541                 fprintf(stderr, "reloc");
542         }
543         fprintf(stderr, "root %llu inode %llu errors %x",
544                 (unsigned long long) root_objectid,
545                 (unsigned long long) rec->ino, rec->errors);
546
547         if (errors & I_ERR_NO_INODE_ITEM)
548                 fprintf(stderr, ", no inode item");
549         if (errors & I_ERR_NO_ORPHAN_ITEM)
550                 fprintf(stderr, ", no orphan item");
551         if (errors & I_ERR_DUP_INODE_ITEM)
552                 fprintf(stderr, ", dup inode item");
553         if (errors & I_ERR_DUP_DIR_INDEX)
554                 fprintf(stderr, ", dup dir index");
555         if (errors & I_ERR_ODD_DIR_ITEM)
556                 fprintf(stderr, ", odd dir item");
557         if (errors & I_ERR_ODD_FILE_EXTENT)
558                 fprintf(stderr, ", odd file extent");
559         if (errors & I_ERR_BAD_FILE_EXTENT)
560                 fprintf(stderr, ", bad file extent");
561         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
562                 fprintf(stderr, ", file extent overlap");
563         if (errors & I_ERR_FILE_EXTENT_TOO_LARGE)
564                 fprintf(stderr, ", inline file extent too large");
565         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566                 fprintf(stderr, ", file extent discount");
567         if (errors & I_ERR_DIR_ISIZE_WRONG)
568                 fprintf(stderr, ", dir isize wrong");
569         if (errors & I_ERR_FILE_NBYTES_WRONG)
570                 fprintf(stderr, ", nbytes wrong");
571         if (errors & I_ERR_ODD_CSUM_ITEM)
572                 fprintf(stderr, ", odd csum item");
573         if (errors & I_ERR_SOME_CSUM_MISSING)
574                 fprintf(stderr, ", some csum missing");
575         if (errors & I_ERR_LINK_COUNT_WRONG)
576                 fprintf(stderr, ", link count wrong");
577         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578                 fprintf(stderr, ", orphan file extent");
579         fprintf(stderr, "\n");
580         /* Print the orphan extents if needed */
581         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
583
584         /* Print the holes if needed */
585         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586                 struct file_extent_hole *hole;
587                 struct rb_node *node;
588                 int found = 0;
589
590                 node = rb_first(&rec->holes);
591                 fprintf(stderr, "Found file extent holes:\n");
592                 while (node) {
593                         found = 1;
594                         hole = rb_entry(node, struct file_extent_hole, node);
595                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
596                                 hole->start, hole->len);
597                         node = rb_next(node);
598                 }
599                 if (!found)
600                         fprintf(stderr, "\tstart: 0, len: %llu\n",
601                                 round_up(rec->isize,
602                                          root->fs_info->sectorsize));
603         }
604 }
605
606 static void print_ref_error(int errors)
607 {
608         if (errors & REF_ERR_NO_DIR_ITEM)
609                 fprintf(stderr, ", no dir item");
610         if (errors & REF_ERR_NO_DIR_INDEX)
611                 fprintf(stderr, ", no dir index");
612         if (errors & REF_ERR_NO_INODE_REF)
613                 fprintf(stderr, ", no inode ref");
614         if (errors & REF_ERR_DUP_DIR_ITEM)
615                 fprintf(stderr, ", dup dir item");
616         if (errors & REF_ERR_DUP_DIR_INDEX)
617                 fprintf(stderr, ", dup dir index");
618         if (errors & REF_ERR_DUP_INODE_REF)
619                 fprintf(stderr, ", dup inode ref");
620         if (errors & REF_ERR_INDEX_UNMATCH)
621                 fprintf(stderr, ", index mismatch");
622         if (errors & REF_ERR_FILETYPE_UNMATCH)
623                 fprintf(stderr, ", filetype mismatch");
624         if (errors & REF_ERR_NAME_TOO_LONG)
625                 fprintf(stderr, ", name too long");
626         if (errors & REF_ERR_NO_ROOT_REF)
627                 fprintf(stderr, ", no root ref");
628         if (errors & REF_ERR_NO_ROOT_BACKREF)
629                 fprintf(stderr, ", no root backref");
630         if (errors & REF_ERR_DUP_ROOT_REF)
631                 fprintf(stderr, ", dup root ref");
632         if (errors & REF_ERR_DUP_ROOT_BACKREF)
633                 fprintf(stderr, ", dup root backref");
634         fprintf(stderr, "\n");
635 }
636
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
638                                           u64 ino, int mod)
639 {
640         struct ptr_node *node;
641         struct cache_extent *cache;
642         struct inode_record *rec = NULL;
643         int ret;
644
645         cache = lookup_cache_extent(inode_cache, ino, 1);
646         if (cache) {
647                 node = container_of(cache, struct ptr_node, cache);
648                 rec = node->data;
649                 if (mod && rec->refs > 1) {
650                         node->data = clone_inode_rec(rec);
651                         if (IS_ERR(node->data))
652                                 return node->data;
653                         rec->refs--;
654                         rec = node->data;
655                 }
656         } else if (mod) {
657                 rec = calloc(1, sizeof(*rec));
658                 if (!rec)
659                         return ERR_PTR(-ENOMEM);
660                 rec->ino = ino;
661                 rec->extent_start = (u64)-1;
662                 rec->refs = 1;
663                 INIT_LIST_HEAD(&rec->backrefs);
664                 INIT_LIST_HEAD(&rec->orphan_extents);
665                 rec->holes = RB_ROOT;
666
667                 node = malloc(sizeof(*node));
668                 if (!node) {
669                         free(rec);
670                         return ERR_PTR(-ENOMEM);
671                 }
672                 node->cache.start = ino;
673                 node->cache.size = 1;
674                 node->data = rec;
675
676                 if (ino == BTRFS_FREE_INO_OBJECTID)
677                         rec->found_link = 1;
678
679                 ret = insert_cache_extent(inode_cache, &node->cache);
680                 if (ret)
681                         return ERR_PTR(-EEXIST);
682         }
683         return rec;
684 }
685
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
687 {
688         struct orphan_data_extent *orphan;
689
690         while (!list_empty(orphan_extents)) {
691                 orphan = list_entry(orphan_extents->next,
692                                     struct orphan_data_extent, list);
693                 list_del(&orphan->list);
694                 free(orphan);
695         }
696 }
697
698 static void free_inode_rec(struct inode_record *rec)
699 {
700         struct inode_backref *backref;
701
702         if (--rec->refs > 0)
703                 return;
704
705         while (!list_empty(&rec->backrefs)) {
706                 backref = to_inode_backref(rec->backrefs.next);
707                 list_del(&backref->list);
708                 free(backref);
709         }
710         free_orphan_data_extents(&rec->orphan_extents);
711         free_file_extent_holes(&rec->holes);
712         free(rec);
713 }
714
715 static int can_free_inode_rec(struct inode_record *rec)
716 {
717         if (!rec->errors && rec->checked && rec->found_inode_item &&
718             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
719                 return 1;
720         return 0;
721 }
722
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724                                  struct inode_record *rec)
725 {
726         struct cache_extent *cache;
727         struct inode_backref *tmp, *backref;
728         struct ptr_node *node;
729         u8 filetype;
730
731         if (!rec->found_inode_item)
732                 return;
733
734         filetype = imode_to_type(rec->imode);
735         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736                 if (backref->found_dir_item && backref->found_dir_index) {
737                         if (backref->filetype != filetype)
738                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739                         if (!backref->errors && backref->found_inode_ref &&
740                             rec->nlink == rec->found_link) {
741                                 list_del(&backref->list);
742                                 free(backref);
743                         }
744                 }
745         }
746
747         if (!rec->checked || rec->merging)
748                 return;
749
750         if (S_ISDIR(rec->imode)) {
751                 if (rec->found_size != rec->isize)
752                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753                 if (rec->found_file_extent)
754                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
755         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756                 if (rec->found_dir_item)
757                         rec->errors |= I_ERR_ODD_DIR_ITEM;
758                 if (rec->found_size != rec->nbytes)
759                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760                 if (rec->nlink > 0 && !no_holes &&
761                     (rec->extent_end < rec->isize ||
762                      first_extent_gap(&rec->holes) < rec->isize))
763                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
764         }
765
766         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767                 if (rec->found_csum_item && rec->nodatasum)
768                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
769                 if (rec->some_csum_missing && !rec->nodatasum)
770                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
771         }
772
773         BUG_ON(rec->refs != 1);
774         if (can_free_inode_rec(rec)) {
775                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776                 node = container_of(cache, struct ptr_node, cache);
777                 BUG_ON(node->data != rec);
778                 remove_cache_extent(inode_cache, &node->cache);
779                 free(node);
780                 free_inode_rec(rec);
781         }
782 }
783
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
785 {
786         struct btrfs_path path;
787         struct btrfs_key key;
788         int ret;
789
790         key.objectid = BTRFS_ORPHAN_OBJECTID;
791         key.type = BTRFS_ORPHAN_ITEM_KEY;
792         key.offset = ino;
793
794         btrfs_init_path(&path);
795         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796         btrfs_release_path(&path);
797         if (ret > 0)
798                 ret = -ENOENT;
799         return ret;
800 }
801
802 static int process_inode_item(struct extent_buffer *eb,
803                               int slot, struct btrfs_key *key,
804                               struct shared_node *active_node)
805 {
806         struct inode_record *rec;
807         struct btrfs_inode_item *item;
808
809         rec = active_node->current;
810         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811         if (rec->found_inode_item) {
812                 rec->errors |= I_ERR_DUP_INODE_ITEM;
813                 return 1;
814         }
815         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816         rec->nlink = btrfs_inode_nlink(eb, item);
817         rec->isize = btrfs_inode_size(eb, item);
818         rec->nbytes = btrfs_inode_nbytes(eb, item);
819         rec->imode = btrfs_inode_mode(eb, item);
820         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
821                 rec->nodatasum = 1;
822         rec->found_inode_item = 1;
823         if (rec->nlink == 0)
824                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825         maybe_free_inode_rec(&active_node->inode_cache, rec);
826         return 0;
827 }
828
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
830                                                 const char *name,
831                                                 int namelen, u64 dir)
832 {
833         struct inode_backref *backref;
834
835         list_for_each_entry(backref, &rec->backrefs, list) {
836                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
837                         break;
838                 if (backref->dir != dir || backref->namelen != namelen)
839                         continue;
840                 if (memcmp(name, backref->name, namelen))
841                         continue;
842                 return backref;
843         }
844
845         backref = malloc(sizeof(*backref) + namelen + 1);
846         if (!backref)
847                 return NULL;
848         memset(backref, 0, sizeof(*backref));
849         backref->dir = dir;
850         backref->namelen = namelen;
851         memcpy(backref->name, name, namelen);
852         backref->name[namelen] = '\0';
853         list_add_tail(&backref->list, &rec->backrefs);
854         return backref;
855 }
856
857 static int add_inode_backref(struct cache_tree *inode_cache,
858                              u64 ino, u64 dir, u64 index,
859                              const char *name, int namelen,
860                              u8 filetype, u8 itemtype, int errors)
861 {
862         struct inode_record *rec;
863         struct inode_backref *backref;
864
865         rec = get_inode_rec(inode_cache, ino, 1);
866         BUG_ON(IS_ERR(rec));
867         backref = get_inode_backref(rec, name, namelen, dir);
868         BUG_ON(!backref);
869         if (errors)
870                 backref->errors |= errors;
871         if (itemtype == BTRFS_DIR_INDEX_KEY) {
872                 if (backref->found_dir_index)
873                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
874                 if (backref->found_inode_ref && backref->index != index)
875                         backref->errors |= REF_ERR_INDEX_UNMATCH;
876                 if (backref->found_dir_item && backref->filetype != filetype)
877                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
878
879                 backref->index = index;
880                 backref->filetype = filetype;
881                 backref->found_dir_index = 1;
882         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
883                 rec->found_link++;
884                 if (backref->found_dir_item)
885                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
886                 if (backref->found_dir_index && backref->filetype != filetype)
887                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
888
889                 backref->filetype = filetype;
890                 backref->found_dir_item = 1;
891         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893                 if (backref->found_inode_ref)
894                         backref->errors |= REF_ERR_DUP_INODE_REF;
895                 if (backref->found_dir_index && backref->index != index)
896                         backref->errors |= REF_ERR_INDEX_UNMATCH;
897                 else
898                         backref->index = index;
899
900                 backref->ref_type = itemtype;
901                 backref->found_inode_ref = 1;
902         } else {
903                 BUG_ON(1);
904         }
905
906         maybe_free_inode_rec(inode_cache, rec);
907         return 0;
908 }
909
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911                             struct cache_tree *dst_cache)
912 {
913         struct inode_backref *backref;
914         u32 dir_count = 0;
915         int ret = 0;
916
917         dst->merging = 1;
918         list_for_each_entry(backref, &src->backrefs, list) {
919                 if (backref->found_dir_index) {
920                         add_inode_backref(dst_cache, dst->ino, backref->dir,
921                                         backref->index, backref->name,
922                                         backref->namelen, backref->filetype,
923                                         BTRFS_DIR_INDEX_KEY, backref->errors);
924                 }
925                 if (backref->found_dir_item) {
926                         dir_count++;
927                         add_inode_backref(dst_cache, dst->ino,
928                                         backref->dir, 0, backref->name,
929                                         backref->namelen, backref->filetype,
930                                         BTRFS_DIR_ITEM_KEY, backref->errors);
931                 }
932                 if (backref->found_inode_ref) {
933                         add_inode_backref(dst_cache, dst->ino,
934                                         backref->dir, backref->index,
935                                         backref->name, backref->namelen, 0,
936                                         backref->ref_type, backref->errors);
937                 }
938         }
939
940         if (src->found_dir_item)
941                 dst->found_dir_item = 1;
942         if (src->found_file_extent)
943                 dst->found_file_extent = 1;
944         if (src->found_csum_item)
945                 dst->found_csum_item = 1;
946         if (src->some_csum_missing)
947                 dst->some_csum_missing = 1;
948         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
950                 if (ret < 0)
951                         return ret;
952         }
953
954         BUG_ON(src->found_link < dir_count);
955         dst->found_link += src->found_link - dir_count;
956         dst->found_size += src->found_size;
957         if (src->extent_start != (u64)-1) {
958                 if (dst->extent_start == (u64)-1) {
959                         dst->extent_start = src->extent_start;
960                         dst->extent_end = src->extent_end;
961                 } else {
962                         if (dst->extent_end > src->extent_start)
963                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964                         else if (dst->extent_end < src->extent_start) {
965                                 ret = add_file_extent_hole(&dst->holes,
966                                         dst->extent_end,
967                                         src->extent_start - dst->extent_end);
968                         }
969                         if (dst->extent_end < src->extent_end)
970                                 dst->extent_end = src->extent_end;
971                 }
972         }
973
974         dst->errors |= src->errors;
975         if (src->found_inode_item) {
976                 if (!dst->found_inode_item) {
977                         dst->nlink = src->nlink;
978                         dst->isize = src->isize;
979                         dst->nbytes = src->nbytes;
980                         dst->imode = src->imode;
981                         dst->nodatasum = src->nodatasum;
982                         dst->found_inode_item = 1;
983                 } else {
984                         dst->errors |= I_ERR_DUP_INODE_ITEM;
985                 }
986         }
987         dst->merging = 0;
988
989         return 0;
990 }
991
992 static int splice_shared_node(struct shared_node *src_node,
993                               struct shared_node *dst_node)
994 {
995         struct cache_extent *cache;
996         struct ptr_node *node, *ins;
997         struct cache_tree *src, *dst;
998         struct inode_record *rec, *conflict;
999         u64 current_ino = 0;
1000         int splice = 0;
1001         int ret;
1002
1003         if (--src_node->refs == 0)
1004                 splice = 1;
1005         if (src_node->current)
1006                 current_ino = src_node->current->ino;
1007
1008         src = &src_node->root_cache;
1009         dst = &dst_node->root_cache;
1010 again:
1011         cache = search_cache_extent(src, 0);
1012         while (cache) {
1013                 node = container_of(cache, struct ptr_node, cache);
1014                 rec = node->data;
1015                 cache = next_cache_extent(cache);
1016
1017                 if (splice) {
1018                         remove_cache_extent(src, &node->cache);
1019                         ins = node;
1020                 } else {
1021                         ins = malloc(sizeof(*ins));
1022                         BUG_ON(!ins);
1023                         ins->cache.start = node->cache.start;
1024                         ins->cache.size = node->cache.size;
1025                         ins->data = rec;
1026                         rec->refs++;
1027                 }
1028                 ret = insert_cache_extent(dst, &ins->cache);
1029                 if (ret == -EEXIST) {
1030                         conflict = get_inode_rec(dst, rec->ino, 1);
1031                         BUG_ON(IS_ERR(conflict));
1032                         merge_inode_recs(rec, conflict, dst);
1033                         if (rec->checked) {
1034                                 conflict->checked = 1;
1035                                 if (dst_node->current == conflict)
1036                                         dst_node->current = NULL;
1037                         }
1038                         maybe_free_inode_rec(dst, conflict);
1039                         free_inode_rec(rec);
1040                         free(ins);
1041                 } else {
1042                         BUG_ON(ret);
1043                 }
1044         }
1045
1046         if (src == &src_node->root_cache) {
1047                 src = &src_node->inode_cache;
1048                 dst = &dst_node->inode_cache;
1049                 goto again;
1050         }
1051
1052         if (current_ino > 0 && (!dst_node->current ||
1053             current_ino > dst_node->current->ino)) {
1054                 if (dst_node->current) {
1055                         dst_node->current->checked = 1;
1056                         maybe_free_inode_rec(dst, dst_node->current);
1057                 }
1058                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059                 BUG_ON(IS_ERR(dst_node->current));
1060         }
1061         return 0;
1062 }
1063
1064 static void free_inode_ptr(struct cache_extent *cache)
1065 {
1066         struct ptr_node *node;
1067         struct inode_record *rec;
1068
1069         node = container_of(cache, struct ptr_node, cache);
1070         rec = node->data;
1071         free_inode_rec(rec);
1072         free(node);
1073 }
1074
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1076
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1078                                             u64 bytenr)
1079 {
1080         struct cache_extent *cache;
1081         struct shared_node *node;
1082
1083         cache = lookup_cache_extent(shared, bytenr, 1);
1084         if (cache) {
1085                 node = container_of(cache, struct shared_node, cache);
1086                 return node;
1087         }
1088         return NULL;
1089 }
1090
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1092 {
1093         int ret;
1094         struct shared_node *node;
1095
1096         node = calloc(1, sizeof(*node));
1097         if (!node)
1098                 return -ENOMEM;
1099         node->cache.start = bytenr;
1100         node->cache.size = 1;
1101         cache_tree_init(&node->root_cache);
1102         cache_tree_init(&node->inode_cache);
1103         node->refs = refs;
1104
1105         ret = insert_cache_extent(shared, &node->cache);
1106
1107         return ret;
1108 }
1109
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111                              struct walk_control *wc, int level)
1112 {
1113         struct shared_node *node;
1114         struct shared_node *dest;
1115         int ret;
1116
1117         if (level == wc->active_node)
1118                 return 0;
1119
1120         BUG_ON(wc->active_node <= level);
1121         node = find_shared_node(&wc->shared, bytenr);
1122         if (!node) {
1123                 ret = add_shared_node(&wc->shared, bytenr, refs);
1124                 BUG_ON(ret);
1125                 node = find_shared_node(&wc->shared, bytenr);
1126                 wc->nodes[level] = node;
1127                 wc->active_node = level;
1128                 return 0;
1129         }
1130
1131         if (wc->root_level == wc->active_node &&
1132             btrfs_root_refs(&root->root_item) == 0) {
1133                 if (--node->refs == 0) {
1134                         free_inode_recs_tree(&node->root_cache);
1135                         free_inode_recs_tree(&node->inode_cache);
1136                         remove_cache_extent(&wc->shared, &node->cache);
1137                         free(node);
1138                 }
1139                 return 1;
1140         }
1141
1142         dest = wc->nodes[wc->active_node];
1143         splice_shared_node(node, dest);
1144         if (node->refs == 0) {
1145                 remove_cache_extent(&wc->shared, &node->cache);
1146                 free(node);
1147         }
1148         return 1;
1149 }
1150
1151 static int leave_shared_node(struct btrfs_root *root,
1152                              struct walk_control *wc, int level)
1153 {
1154         struct shared_node *node;
1155         struct shared_node *dest;
1156         int i;
1157
1158         if (level == wc->root_level)
1159                 return 0;
1160
1161         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1162                 if (wc->nodes[i])
1163                         break;
1164         }
1165         BUG_ON(i >= BTRFS_MAX_LEVEL);
1166
1167         node = wc->nodes[wc->active_node];
1168         wc->nodes[wc->active_node] = NULL;
1169         wc->active_node = i;
1170
1171         dest = wc->nodes[wc->active_node];
1172         if (wc->active_node < wc->root_level ||
1173             btrfs_root_refs(&root->root_item) > 0) {
1174                 BUG_ON(node->refs <= 1);
1175                 splice_shared_node(node, dest);
1176         } else {
1177                 BUG_ON(node->refs < 2);
1178                 node->refs--;
1179         }
1180         return 0;
1181 }
1182
1183 /*
1184  * Returns:
1185  * < 0 - on error
1186  * 1   - if the root with id child_root_id is a child of root parent_root_id
1187  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1188  *       has other root(s) as parent(s)
1189  * 2   - if the root child_root_id doesn't have any parent roots
1190  */
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1192                          u64 child_root_id)
1193 {
1194         struct btrfs_path path;
1195         struct btrfs_key key;
1196         struct extent_buffer *leaf;
1197         int has_parent = 0;
1198         int ret;
1199
1200         btrfs_init_path(&path);
1201
1202         key.objectid = parent_root_id;
1203         key.type = BTRFS_ROOT_REF_KEY;
1204         key.offset = child_root_id;
1205         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1206                                 0, 0);
1207         if (ret < 0)
1208                 return ret;
1209         btrfs_release_path(&path);
1210         if (!ret)
1211                 return 1;
1212
1213         key.objectid = child_root_id;
1214         key.type = BTRFS_ROOT_BACKREF_KEY;
1215         key.offset = 0;
1216         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1217                                 0, 0);
1218         if (ret < 0)
1219                 goto out;
1220
1221         while (1) {
1222                 leaf = path.nodes[0];
1223                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1225                         if (ret)
1226                                 break;
1227                         leaf = path.nodes[0];
1228                 }
1229
1230                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231                 if (key.objectid != child_root_id ||
1232                     key.type != BTRFS_ROOT_BACKREF_KEY)
1233                         break;
1234
1235                 has_parent = 1;
1236
1237                 if (key.offset == parent_root_id) {
1238                         btrfs_release_path(&path);
1239                         return 1;
1240                 }
1241
1242                 path.slots[0]++;
1243         }
1244 out:
1245         btrfs_release_path(&path);
1246         if (ret < 0)
1247                 return ret;
1248         return has_parent ? 0 : 2;
1249 }
1250
1251 static int process_dir_item(struct extent_buffer *eb,
1252                             int slot, struct btrfs_key *key,
1253                             struct shared_node *active_node)
1254 {
1255         u32 total;
1256         u32 cur = 0;
1257         u32 len;
1258         u32 name_len;
1259         u32 data_len;
1260         int error;
1261         int nritems = 0;
1262         u8 filetype;
1263         struct btrfs_dir_item *di;
1264         struct inode_record *rec;
1265         struct cache_tree *root_cache;
1266         struct cache_tree *inode_cache;
1267         struct btrfs_key location;
1268         char namebuf[BTRFS_NAME_LEN];
1269
1270         root_cache = &active_node->root_cache;
1271         inode_cache = &active_node->inode_cache;
1272         rec = active_node->current;
1273         rec->found_dir_item = 1;
1274
1275         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276         total = btrfs_item_size_nr(eb, slot);
1277         while (cur < total) {
1278                 nritems++;
1279                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280                 name_len = btrfs_dir_name_len(eb, di);
1281                 data_len = btrfs_dir_data_len(eb, di);
1282                 filetype = btrfs_dir_type(eb, di);
1283
1284                 rec->found_size += name_len;
1285                 if (cur + sizeof(*di) + name_len > total ||
1286                     name_len > BTRFS_NAME_LEN) {
1287                         error = REF_ERR_NAME_TOO_LONG;
1288
1289                         if (cur + sizeof(*di) > total)
1290                                 break;
1291                         len = min_t(u32, total - cur - sizeof(*di),
1292                                     BTRFS_NAME_LEN);
1293                 } else {
1294                         len = name_len;
1295                         error = 0;
1296                 }
1297
1298                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1299
1300                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301                     key->offset != btrfs_name_hash(namebuf, len)) {
1302                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1303                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304                         key->objectid, key->offset, namebuf, len, filetype,
1305                         key->offset, btrfs_name_hash(namebuf, len));
1306                 }
1307
1308                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309                         add_inode_backref(inode_cache, location.objectid,
1310                                           key->objectid, key->offset, namebuf,
1311                                           len, filetype, key->type, error);
1312                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313                         add_inode_backref(root_cache, location.objectid,
1314                                           key->objectid, key->offset,
1315                                           namebuf, len, filetype,
1316                                           key->type, error);
1317                 } else {
1318                         fprintf(stderr,
1319                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320                                 location.type, key->objectid, key->offset);
1321                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322                                           key->objectid, key->offset, namebuf,
1323                                           len, filetype, key->type, error);
1324                 }
1325
1326                 len = sizeof(*di) + name_len + data_len;
1327                 di = (struct btrfs_dir_item *)((char *)di + len);
1328                 cur += len;
1329         }
1330         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1332
1333         return 0;
1334 }
1335
1336 static int process_inode_ref(struct extent_buffer *eb,
1337                              int slot, struct btrfs_key *key,
1338                              struct shared_node *active_node)
1339 {
1340         u32 total;
1341         u32 cur = 0;
1342         u32 len;
1343         u32 name_len;
1344         u64 index;
1345         int error;
1346         struct cache_tree *inode_cache;
1347         struct btrfs_inode_ref *ref;
1348         char namebuf[BTRFS_NAME_LEN];
1349
1350         inode_cache = &active_node->inode_cache;
1351
1352         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353         total = btrfs_item_size_nr(eb, slot);
1354         while (cur < total) {
1355                 name_len = btrfs_inode_ref_name_len(eb, ref);
1356                 index = btrfs_inode_ref_index(eb, ref);
1357
1358                 /* inode_ref + namelen should not cross item boundary */
1359                 if (cur + sizeof(*ref) + name_len > total ||
1360                     name_len > BTRFS_NAME_LEN) {
1361                         if (total < cur + sizeof(*ref))
1362                                 break;
1363
1364                         /* Still try to read out the remaining part */
1365                         len = min_t(u32, total - cur - sizeof(*ref),
1366                                     BTRFS_NAME_LEN);
1367                         error = REF_ERR_NAME_TOO_LONG;
1368                 } else {
1369                         len = name_len;
1370                         error = 0;
1371                 }
1372
1373                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374                 add_inode_backref(inode_cache, key->objectid, key->offset,
1375                                   index, namebuf, len, 0, key->type, error);
1376
1377                 len = sizeof(*ref) + name_len;
1378                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1379                 cur += len;
1380         }
1381         return 0;
1382 }
1383
1384 static int process_inode_extref(struct extent_buffer *eb,
1385                                 int slot, struct btrfs_key *key,
1386                                 struct shared_node *active_node)
1387 {
1388         u32 total;
1389         u32 cur = 0;
1390         u32 len;
1391         u32 name_len;
1392         u64 index;
1393         u64 parent;
1394         int error;
1395         struct cache_tree *inode_cache;
1396         struct btrfs_inode_extref *extref;
1397         char namebuf[BTRFS_NAME_LEN];
1398
1399         inode_cache = &active_node->inode_cache;
1400
1401         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402         total = btrfs_item_size_nr(eb, slot);
1403         while (cur < total) {
1404                 name_len = btrfs_inode_extref_name_len(eb, extref);
1405                 index = btrfs_inode_extref_index(eb, extref);
1406                 parent = btrfs_inode_extref_parent(eb, extref);
1407                 if (name_len <= BTRFS_NAME_LEN) {
1408                         len = name_len;
1409                         error = 0;
1410                 } else {
1411                         len = BTRFS_NAME_LEN;
1412                         error = REF_ERR_NAME_TOO_LONG;
1413                 }
1414                 read_extent_buffer(eb, namebuf,
1415                                    (unsigned long)(extref + 1), len);
1416                 add_inode_backref(inode_cache, key->objectid, parent,
1417                                   index, namebuf, len, 0, key->type, error);
1418
1419                 len = sizeof(*extref) + name_len;
1420                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1421                 cur += len;
1422         }
1423         return 0;
1424
1425 }
1426
1427 static int process_file_extent(struct btrfs_root *root,
1428                                 struct extent_buffer *eb,
1429                                 int slot, struct btrfs_key *key,
1430                                 struct shared_node *active_node)
1431 {
1432         struct inode_record *rec;
1433         struct btrfs_file_extent_item *fi;
1434         u64 num_bytes = 0;
1435         u64 disk_bytenr = 0;
1436         u64 extent_offset = 0;
1437         u64 mask = root->fs_info->sectorsize - 1;
1438         u32 max_inline_size = min_t(u32, mask,
1439                                 BTRFS_MAX_INLINE_DATA_SIZE(root->fs_info));
1440         int extent_type;
1441         int ret;
1442
1443         rec = active_node->current;
1444         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1445         rec->found_file_extent = 1;
1446
1447         if (rec->extent_start == (u64)-1) {
1448                 rec->extent_start = key->offset;
1449                 rec->extent_end = key->offset;
1450         }
1451
1452         if (rec->extent_end > key->offset)
1453                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1454         else if (rec->extent_end < key->offset) {
1455                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1456                                            key->offset - rec->extent_end);
1457                 if (ret < 0)
1458                         return ret;
1459         }
1460
1461         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1462         extent_type = btrfs_file_extent_type(eb, fi);
1463
1464         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1465                 u8 compression = btrfs_file_extent_compression(eb, fi);
1466                 struct btrfs_item *item = btrfs_item_nr(slot);
1467
1468                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1469                 if (num_bytes == 0)
1470                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1471                 if (compression) {
1472                         if (btrfs_file_extent_inline_item_len(eb, item) >
1473                             max_inline_size ||
1474                             num_bytes > root->fs_info->sectorsize)
1475                                 rec->errors |= I_ERR_FILE_EXTENT_TOO_LARGE;
1476                 } else {
1477                         if (num_bytes > max_inline_size)
1478                                 rec->errors |= I_ERR_FILE_EXTENT_TOO_LARGE;
1479                 }
1480                 rec->found_size += num_bytes;
1481                 num_bytes = (num_bytes + mask) & ~mask;
1482         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1483                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1484                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1485                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1486                 extent_offset = btrfs_file_extent_offset(eb, fi);
1487                 if (num_bytes == 0 || (num_bytes & mask))
1488                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1489                 if (num_bytes + extent_offset >
1490                     btrfs_file_extent_ram_bytes(eb, fi))
1491                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1492                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1493                     (btrfs_file_extent_compression(eb, fi) ||
1494                      btrfs_file_extent_encryption(eb, fi) ||
1495                      btrfs_file_extent_other_encoding(eb, fi)))
1496                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1497                 if (disk_bytenr > 0)
1498                         rec->found_size += num_bytes;
1499         } else {
1500                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1501         }
1502         rec->extent_end = key->offset + num_bytes;
1503
1504         /*
1505          * The data reloc tree will copy full extents into its inode and then
1506          * copy the corresponding csums.  Because the extent it copied could be
1507          * a preallocated extent that hasn't been written to yet there may be no
1508          * csums to copy, ergo we won't have csums for our file extent.  This is
1509          * ok so just don't bother checking csums if the inode belongs to the
1510          * data reloc tree.
1511          */
1512         if (disk_bytenr > 0 &&
1513             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1514                 u64 found;
1515                 if (btrfs_file_extent_compression(eb, fi))
1516                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1517                 else
1518                         disk_bytenr += extent_offset;
1519
1520                 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1521                                        &found);
1522                 if (ret < 0)
1523                         return ret;
1524                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1525                         if (found > 0)
1526                                 rec->found_csum_item = 1;
1527                         if (found < num_bytes)
1528                                 rec->some_csum_missing = 1;
1529                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1530                         if (found > 0) {
1531                                 ret = check_prealloc_extent_written(root->fs_info,
1532                                                                     disk_bytenr,
1533                                                                     num_bytes);
1534                                 if (ret < 0)
1535                                         return ret;
1536                                 if (ret == 0)
1537                                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1538                         }
1539                 }
1540         }
1541         return 0;
1542 }
1543
1544 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1545                             struct walk_control *wc)
1546 {
1547         struct btrfs_key key;
1548         u32 nritems;
1549         int i;
1550         int ret = 0;
1551         struct cache_tree *inode_cache;
1552         struct shared_node *active_node;
1553
1554         if (wc->root_level == wc->active_node &&
1555             btrfs_root_refs(&root->root_item) == 0)
1556                 return 0;
1557
1558         active_node = wc->nodes[wc->active_node];
1559         inode_cache = &active_node->inode_cache;
1560         nritems = btrfs_header_nritems(eb);
1561         for (i = 0; i < nritems; i++) {
1562                 btrfs_item_key_to_cpu(eb, &key, i);
1563
1564                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1565                         continue;
1566                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1567                         continue;
1568
1569                 if (active_node->current == NULL ||
1570                     active_node->current->ino < key.objectid) {
1571                         if (active_node->current) {
1572                                 active_node->current->checked = 1;
1573                                 maybe_free_inode_rec(inode_cache,
1574                                                      active_node->current);
1575                         }
1576                         active_node->current = get_inode_rec(inode_cache,
1577                                                              key.objectid, 1);
1578                         BUG_ON(IS_ERR(active_node->current));
1579                 }
1580                 switch (key.type) {
1581                 case BTRFS_DIR_ITEM_KEY:
1582                 case BTRFS_DIR_INDEX_KEY:
1583                         ret = process_dir_item(eb, i, &key, active_node);
1584                         break;
1585                 case BTRFS_INODE_REF_KEY:
1586                         ret = process_inode_ref(eb, i, &key, active_node);
1587                         break;
1588                 case BTRFS_INODE_EXTREF_KEY:
1589                         ret = process_inode_extref(eb, i, &key, active_node);
1590                         break;
1591                 case BTRFS_INODE_ITEM_KEY:
1592                         ret = process_inode_item(eb, i, &key, active_node);
1593                         break;
1594                 case BTRFS_EXTENT_DATA_KEY:
1595                         ret = process_file_extent(root, eb, i, &key,
1596                                                   active_node);
1597                         break;
1598                 default:
1599                         break;
1600                 };
1601         }
1602         return ret;
1603 }
1604
1605 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1606                           struct walk_control *wc, int *level,
1607                           struct node_refs *nrefs)
1608 {
1609         enum btrfs_tree_block_status status;
1610         u64 bytenr;
1611         u64 ptr_gen;
1612         struct btrfs_fs_info *fs_info = root->fs_info;
1613         struct extent_buffer *next;
1614         struct extent_buffer *cur;
1615         int ret, err = 0;
1616         u64 refs;
1617
1618         WARN_ON(*level < 0);
1619         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1620
1621         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1622                 refs = nrefs->refs[*level];
1623                 ret = 0;
1624         } else {
1625                 ret = btrfs_lookup_extent_info(NULL, root,
1626                                        path->nodes[*level]->start,
1627                                        *level, 1, &refs, NULL);
1628                 if (ret < 0) {
1629                         err = ret;
1630                         goto out;
1631                 }
1632                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1633                 nrefs->refs[*level] = refs;
1634         }
1635
1636         if (refs > 1) {
1637                 ret = enter_shared_node(root, path->nodes[*level]->start,
1638                                         refs, wc, *level);
1639                 if (ret > 0) {
1640                         err = ret;
1641                         goto out;
1642                 }
1643         }
1644
1645         while (*level >= 0) {
1646                 WARN_ON(*level < 0);
1647                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1648                 cur = path->nodes[*level];
1649
1650                 if (btrfs_header_level(cur) != *level)
1651                         WARN_ON(1);
1652
1653                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1654                         break;
1655                 if (*level == 0) {
1656                         ret = process_one_leaf(root, cur, wc);
1657                         if (ret < 0)
1658                                 err = ret;
1659                         break;
1660                 }
1661                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1662                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1663
1664                 if (bytenr == nrefs->bytenr[*level - 1]) {
1665                         refs = nrefs->refs[*level - 1];
1666                 } else {
1667                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1668                                         *level - 1, 1, &refs, NULL);
1669                         if (ret < 0) {
1670                                 refs = 0;
1671                         } else {
1672                                 nrefs->bytenr[*level - 1] = bytenr;
1673                                 nrefs->refs[*level - 1] = refs;
1674                         }
1675                 }
1676
1677                 if (refs > 1) {
1678                         ret = enter_shared_node(root, bytenr, refs,
1679                                                 wc, *level - 1);
1680                         if (ret > 0) {
1681                                 path->slots[*level]++;
1682                                 continue;
1683                         }
1684                 }
1685
1686                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
1687                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1688                         free_extent_buffer(next);
1689                         reada_walk_down(root, cur, path->slots[*level]);
1690                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
1691                         if (!extent_buffer_uptodate(next)) {
1692                                 struct btrfs_key node_key;
1693
1694                                 btrfs_node_key_to_cpu(path->nodes[*level],
1695                                                       &node_key,
1696                                                       path->slots[*level]);
1697                                 btrfs_add_corrupt_extent_record(root->fs_info,
1698                                                 &node_key,
1699                                                 path->nodes[*level]->start,
1700                                                 root->fs_info->nodesize,
1701                                                 *level);
1702                                 err = -EIO;
1703                                 goto out;
1704                         }
1705                 }
1706
1707                 ret = check_child_node(cur, path->slots[*level], next);
1708                 if (ret) {
1709                         free_extent_buffer(next);
1710                         err = ret;
1711                         goto out;
1712                 }
1713
1714                 if (btrfs_is_leaf(next))
1715                         status = btrfs_check_leaf(root, NULL, next);
1716                 else
1717                         status = btrfs_check_node(root, NULL, next);
1718                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1719                         free_extent_buffer(next);
1720                         err = -EIO;
1721                         goto out;
1722                 }
1723
1724                 *level = *level - 1;
1725                 free_extent_buffer(path->nodes[*level]);
1726                 path->nodes[*level] = next;
1727                 path->slots[*level] = 0;
1728         }
1729 out:
1730         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1731         return err;
1732 }
1733
1734 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1735                         struct walk_control *wc, int *level)
1736 {
1737         int i;
1738         struct extent_buffer *leaf;
1739
1740         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1741                 leaf = path->nodes[i];
1742                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1743                         path->slots[i]++;
1744                         *level = i;
1745                         return 0;
1746                 }
1747                 free_extent_buffer(path->nodes[*level]);
1748                 path->nodes[*level] = NULL;
1749                 BUG_ON(*level > wc->active_node);
1750                 if (*level == wc->active_node)
1751                         leave_shared_node(root, wc, *level);
1752                 *level = i + 1;
1753         }
1754         return 1;
1755 }
1756
1757 static int check_root_dir(struct inode_record *rec)
1758 {
1759         struct inode_backref *backref;
1760         int ret = -1;
1761
1762         if (!rec->found_inode_item || rec->errors)
1763                 goto out;
1764         if (rec->nlink != 1 || rec->found_link != 0)
1765                 goto out;
1766         if (list_empty(&rec->backrefs))
1767                 goto out;
1768         backref = to_inode_backref(rec->backrefs.next);
1769         if (!backref->found_inode_ref)
1770                 goto out;
1771         if (backref->index != 0 || backref->namelen != 2 ||
1772             memcmp(backref->name, "..", 2))
1773                 goto out;
1774         if (backref->found_dir_index || backref->found_dir_item)
1775                 goto out;
1776         ret = 0;
1777 out:
1778         return ret;
1779 }
1780
1781 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1782                               struct btrfs_root *root, struct btrfs_path *path,
1783                               struct inode_record *rec)
1784 {
1785         struct btrfs_inode_item *ei;
1786         struct btrfs_key key;
1787         int ret;
1788
1789         key.objectid = rec->ino;
1790         key.type = BTRFS_INODE_ITEM_KEY;
1791         key.offset = (u64)-1;
1792
1793         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1794         if (ret < 0)
1795                 goto out;
1796         if (ret) {
1797                 if (!path->slots[0]) {
1798                         ret = -ENOENT;
1799                         goto out;
1800                 }
1801                 path->slots[0]--;
1802                 ret = 0;
1803         }
1804         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1805         if (key.objectid != rec->ino) {
1806                 ret = -ENOENT;
1807                 goto out;
1808         }
1809
1810         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1811                             struct btrfs_inode_item);
1812         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1813         btrfs_mark_buffer_dirty(path->nodes[0]);
1814         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1815         printf("reset isize for dir %llu root %llu\n", rec->ino,
1816                root->root_key.objectid);
1817 out:
1818         btrfs_release_path(path);
1819         return ret;
1820 }
1821
1822 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1823                                     struct btrfs_root *root,
1824                                     struct btrfs_path *path,
1825                                     struct inode_record *rec)
1826 {
1827         int ret;
1828
1829         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1830         btrfs_release_path(path);
1831         if (!ret)
1832                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1833         return ret;
1834 }
1835
1836 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1837                                struct btrfs_root *root,
1838                                struct btrfs_path *path,
1839                                struct inode_record *rec)
1840 {
1841         struct btrfs_inode_item *ei;
1842         struct btrfs_key key;
1843         int ret = 0;
1844
1845         key.objectid = rec->ino;
1846         key.type = BTRFS_INODE_ITEM_KEY;
1847         key.offset = 0;
1848
1849         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1850         if (ret) {
1851                 if (ret > 0)
1852                         ret = -ENOENT;
1853                 goto out;
1854         }
1855
1856         /* Since ret == 0, no need to check anything */
1857         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1858                             struct btrfs_inode_item);
1859         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
1860         btrfs_mark_buffer_dirty(path->nodes[0]);
1861         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
1862         printf("reset nbytes for ino %llu root %llu\n",
1863                rec->ino, root->root_key.objectid);
1864 out:
1865         btrfs_release_path(path);
1866         return ret;
1867 }
1868
1869 static int add_missing_dir_index(struct btrfs_root *root,
1870                                  struct cache_tree *inode_cache,
1871                                  struct inode_record *rec,
1872                                  struct inode_backref *backref)
1873 {
1874         struct btrfs_path path;
1875         struct btrfs_trans_handle *trans;
1876         struct btrfs_dir_item *dir_item;
1877         struct extent_buffer *leaf;
1878         struct btrfs_key key;
1879         struct btrfs_disk_key disk_key;
1880         struct inode_record *dir_rec;
1881         unsigned long name_ptr;
1882         u32 data_size = sizeof(*dir_item) + backref->namelen;
1883         int ret;
1884
1885         trans = btrfs_start_transaction(root, 1);
1886         if (IS_ERR(trans))
1887                 return PTR_ERR(trans);
1888
1889         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
1890                 (unsigned long long)rec->ino);
1891
1892         btrfs_init_path(&path);
1893         key.objectid = backref->dir;
1894         key.type = BTRFS_DIR_INDEX_KEY;
1895         key.offset = backref->index;
1896         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
1897         BUG_ON(ret);
1898
1899         leaf = path.nodes[0];
1900         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
1901
1902         disk_key.objectid = cpu_to_le64(rec->ino);
1903         disk_key.type = BTRFS_INODE_ITEM_KEY;
1904         disk_key.offset = 0;
1905
1906         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
1907         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
1908         btrfs_set_dir_data_len(leaf, dir_item, 0);
1909         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
1910         name_ptr = (unsigned long)(dir_item + 1);
1911         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
1912         btrfs_mark_buffer_dirty(leaf);
1913         btrfs_release_path(&path);
1914         btrfs_commit_transaction(trans, root);
1915
1916         backref->found_dir_index = 1;
1917         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
1918         BUG_ON(IS_ERR(dir_rec));
1919         if (!dir_rec)
1920                 return 0;
1921         dir_rec->found_size += backref->namelen;
1922         if (dir_rec->found_size == dir_rec->isize &&
1923             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
1924                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1925         if (dir_rec->found_size != dir_rec->isize)
1926                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1927
1928         return 0;
1929 }
1930
1931 static int delete_dir_index(struct btrfs_root *root,
1932                             struct inode_backref *backref)
1933 {
1934         struct btrfs_trans_handle *trans;
1935         struct btrfs_dir_item *di;
1936         struct btrfs_path path;
1937         int ret = 0;
1938
1939         trans = btrfs_start_transaction(root, 1);
1940         if (IS_ERR(trans))
1941                 return PTR_ERR(trans);
1942
1943         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1944                 (unsigned long long)backref->dir,
1945                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
1946                 (unsigned long long)root->objectid);
1947
1948         btrfs_init_path(&path);
1949         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
1950                                     backref->name, backref->namelen,
1951                                     backref->index, -1);
1952         if (IS_ERR(di)) {
1953                 ret = PTR_ERR(di);
1954                 btrfs_release_path(&path);
1955                 btrfs_commit_transaction(trans, root);
1956                 if (ret == -ENOENT)
1957                         return 0;
1958                 return ret;
1959         }
1960
1961         if (!di)
1962                 ret = btrfs_del_item(trans, root, &path);
1963         else
1964                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
1965         BUG_ON(ret);
1966         btrfs_release_path(&path);
1967         btrfs_commit_transaction(trans, root);
1968         return ret;
1969 }
1970
1971 static int create_inode_item(struct btrfs_root *root,
1972                              struct inode_record *rec, int root_dir)
1973 {
1974         struct btrfs_trans_handle *trans;
1975         u64 nlink = 0;
1976         u32 mode = 0;
1977         u64 size = 0;
1978         int ret;
1979
1980         trans = btrfs_start_transaction(root, 1);
1981         if (IS_ERR(trans)) {
1982                 ret = PTR_ERR(trans);
1983                 return ret;
1984         }
1985
1986         nlink = root_dir ? 1 : rec->found_link;
1987         if (rec->found_dir_item) {
1988                 if (rec->found_file_extent)
1989                         fprintf(stderr, "root %llu inode %llu has both a dir "
1990                                 "item and extents, unsure if it is a dir or a "
1991                                 "regular file so setting it as a directory\n",
1992                                 (unsigned long long)root->objectid,
1993                                 (unsigned long long)rec->ino);
1994                 mode = S_IFDIR | 0755;
1995                 size = rec->found_size;
1996         } else if (!rec->found_dir_item) {
1997                 size = rec->extent_end;
1998                 mode =  S_IFREG | 0755;
1999         }
2000
2001         ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
2002                                   nlink, mode);
2003         btrfs_commit_transaction(trans, root);
2004         return 0;
2005 }
2006
2007 static int repair_inode_backrefs(struct btrfs_root *root,
2008                                  struct inode_record *rec,
2009                                  struct cache_tree *inode_cache,
2010                                  int delete)
2011 {
2012         struct inode_backref *tmp, *backref;
2013         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2014         int ret = 0;
2015         int repaired = 0;
2016
2017         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2018                 if (!delete && rec->ino == root_dirid) {
2019                         if (!rec->found_inode_item) {
2020                                 ret = create_inode_item(root, rec, 1);
2021                                 if (ret)
2022                                         break;
2023                                 repaired++;
2024                         }
2025                 }
2026
2027                 /* Index 0 for root dir's are special, don't mess with it */
2028                 if (rec->ino == root_dirid && backref->index == 0)
2029                         continue;
2030
2031                 if (delete &&
2032                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2033                      (backref->found_dir_index && backref->found_inode_ref &&
2034                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2035                         ret = delete_dir_index(root, backref);
2036                         if (ret)
2037                                 break;
2038                         repaired++;
2039                         list_del(&backref->list);
2040                         free(backref);
2041                         continue;
2042                 }
2043
2044                 if (!delete && !backref->found_dir_index &&
2045                     backref->found_dir_item && backref->found_inode_ref) {
2046                         ret = add_missing_dir_index(root, inode_cache, rec,
2047                                                     backref);
2048                         if (ret)
2049                                 break;
2050                         repaired++;
2051                         if (backref->found_dir_item &&
2052                             backref->found_dir_index) {
2053                                 if (!backref->errors &&
2054                                     backref->found_inode_ref) {
2055                                         list_del(&backref->list);
2056                                         free(backref);
2057                                         continue;
2058                                 }
2059                         }
2060                 }
2061
2062                 if (!delete && (!backref->found_dir_index &&
2063                                 !backref->found_dir_item &&
2064                                 backref->found_inode_ref)) {
2065                         struct btrfs_trans_handle *trans;
2066                         struct btrfs_key location;
2067
2068                         ret = check_dir_conflict(root, backref->name,
2069                                                  backref->namelen,
2070                                                  backref->dir,
2071                                                  backref->index);
2072                         if (ret) {
2073                                 /*
2074                                  * let nlink fixing routine to handle it,
2075                                  * which can do it better.
2076                                  */
2077                                 ret = 0;
2078                                 break;
2079                         }
2080                         location.objectid = rec->ino;
2081                         location.type = BTRFS_INODE_ITEM_KEY;
2082                         location.offset = 0;
2083
2084                         trans = btrfs_start_transaction(root, 1);
2085                         if (IS_ERR(trans)) {
2086                                 ret = PTR_ERR(trans);
2087                                 break;
2088                         }
2089                         fprintf(stderr, "adding missing dir index/item pair "
2090                                 "for inode %llu\n",
2091                                 (unsigned long long)rec->ino);
2092                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2093                                                     backref->namelen,
2094                                                     backref->dir, &location,
2095                                                     imode_to_type(rec->imode),
2096                                                     backref->index);
2097                         BUG_ON(ret);
2098                         btrfs_commit_transaction(trans, root);
2099                         repaired++;
2100                 }
2101
2102                 if (!delete && (backref->found_inode_ref &&
2103                                 backref->found_dir_index &&
2104                                 backref->found_dir_item &&
2105                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2106                                 !rec->found_inode_item)) {
2107                         ret = create_inode_item(root, rec, 0);
2108                         if (ret)
2109                                 break;
2110                         repaired++;
2111                 }
2112
2113         }
2114         return ret ? ret : repaired;
2115 }
2116
2117 /*
2118  * To determine the file type for nlink/inode_item repair
2119  *
2120  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2121  * Return -ENOENT if file type is not found.
2122  */
2123 static int find_file_type(struct inode_record *rec, u8 *type)
2124 {
2125         struct inode_backref *backref;
2126
2127         /* For inode item recovered case */
2128         if (rec->found_inode_item) {
2129                 *type = imode_to_type(rec->imode);
2130                 return 0;
2131         }
2132
2133         list_for_each_entry(backref, &rec->backrefs, list) {
2134                 if (backref->found_dir_index || backref->found_dir_item) {
2135                         *type = backref->filetype;
2136                         return 0;
2137                 }
2138         }
2139         return -ENOENT;
2140 }
2141
2142 /*
2143  * To determine the file name for nlink repair
2144  *
2145  * Return 0 if file name is found, set name and namelen.
2146  * Return -ENOENT if file name is not found.
2147  */
2148 static int find_file_name(struct inode_record *rec,
2149                           char *name, int *namelen)
2150 {
2151         struct inode_backref *backref;
2152
2153         list_for_each_entry(backref, &rec->backrefs, list) {
2154                 if (backref->found_dir_index || backref->found_dir_item ||
2155                     backref->found_inode_ref) {
2156                         memcpy(name, backref->name, backref->namelen);
2157                         *namelen = backref->namelen;
2158                         return 0;
2159                 }
2160         }
2161         return -ENOENT;
2162 }
2163
2164 /* Reset the nlink of the inode to the correct one */
2165 static int reset_nlink(struct btrfs_trans_handle *trans,
2166                        struct btrfs_root *root,
2167                        struct btrfs_path *path,
2168                        struct inode_record *rec)
2169 {
2170         struct inode_backref *backref;
2171         struct inode_backref *tmp;
2172         struct btrfs_key key;
2173         struct btrfs_inode_item *inode_item;
2174         int ret = 0;
2175
2176         /* We don't believe this either, reset it and iterate backref */
2177         rec->found_link = 0;
2178
2179         /* Remove all backref including the valid ones */
2180         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2181                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2182                                    backref->index, backref->name,
2183                                    backref->namelen, 0);
2184                 if (ret < 0)
2185                         goto out;
2186
2187                 /* remove invalid backref, so it won't be added back */
2188                 if (!(backref->found_dir_index &&
2189                       backref->found_dir_item &&
2190                       backref->found_inode_ref)) {
2191                         list_del(&backref->list);
2192                         free(backref);
2193                 } else {
2194                         rec->found_link++;
2195                 }
2196         }
2197
2198         /* Set nlink to 0 */
2199         key.objectid = rec->ino;
2200         key.type = BTRFS_INODE_ITEM_KEY;
2201         key.offset = 0;
2202         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2203         if (ret < 0)
2204                 goto out;
2205         if (ret > 0) {
2206                 ret = -ENOENT;
2207                 goto out;
2208         }
2209         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2210                                     struct btrfs_inode_item);
2211         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2212         btrfs_mark_buffer_dirty(path->nodes[0]);
2213         btrfs_release_path(path);
2214
2215         /*
2216          * Add back valid inode_ref/dir_item/dir_index,
2217          * add_link() will handle the nlink inc, so new nlink must be correct
2218          */
2219         list_for_each_entry(backref, &rec->backrefs, list) {
2220                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2221                                      backref->name, backref->namelen,
2222                                      backref->filetype, &backref->index, 1, 0);
2223                 if (ret < 0)
2224                         goto out;
2225         }
2226 out:
2227         btrfs_release_path(path);
2228         return ret;
2229 }
2230
2231 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2232                                struct btrfs_root *root,
2233                                struct btrfs_path *path,
2234                                struct inode_record *rec)
2235 {
2236         char namebuf[BTRFS_NAME_LEN] = {0};
2237         u8 type = 0;
2238         int namelen = 0;
2239         int name_recovered = 0;
2240         int type_recovered = 0;
2241         int ret = 0;
2242
2243         /*
2244          * Get file name and type first before these invalid inode ref
2245          * are deleted by remove_all_invalid_backref()
2246          */
2247         name_recovered = !find_file_name(rec, namebuf, &namelen);
2248         type_recovered = !find_file_type(rec, &type);
2249
2250         if (!name_recovered) {
2251                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2252                        rec->ino, rec->ino);
2253                 namelen = count_digits(rec->ino);
2254                 sprintf(namebuf, "%llu", rec->ino);
2255                 name_recovered = 1;
2256         }
2257         if (!type_recovered) {
2258                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2259                        rec->ino);
2260                 type = BTRFS_FT_REG_FILE;
2261                 type_recovered = 1;
2262         }
2263
2264         ret = reset_nlink(trans, root, path, rec);
2265         if (ret < 0) {
2266                 fprintf(stderr,
2267                         "Failed to reset nlink for inode %llu: %s\n",
2268                         rec->ino, strerror(-ret));
2269                 goto out;
2270         }
2271
2272         if (rec->found_link == 0) {
2273                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2274                                               namebuf, namelen, type,
2275                                               (u64 *)&rec->found_link);
2276                 if (ret)
2277                         goto out;
2278         }
2279         printf("Fixed the nlink of inode %llu\n", rec->ino);
2280 out:
2281         /*
2282          * Clear the flag anyway, or we will loop forever for the same inode
2283          * as it will not be removed from the bad inode list and the dead loop
2284          * happens.
2285          */
2286         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2287         btrfs_release_path(path);
2288         return ret;
2289 }
2290
2291 /*
2292  * Check if there is any normal(reg or prealloc) file extent for given
2293  * ino.
2294  * This is used to determine the file type when neither its dir_index/item or
2295  * inode_item exists.
2296  *
2297  * This will *NOT* report error, if any error happens, just consider it does
2298  * not have any normal file extent.
2299  */
2300 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2301 {
2302         struct btrfs_path path;
2303         struct btrfs_key key;
2304         struct btrfs_key found_key;
2305         struct btrfs_file_extent_item *fi;
2306         u8 type;
2307         int ret = 0;
2308
2309         btrfs_init_path(&path);
2310         key.objectid = ino;
2311         key.type = BTRFS_EXTENT_DATA_KEY;
2312         key.offset = 0;
2313
2314         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2315         if (ret < 0) {
2316                 ret = 0;
2317                 goto out;
2318         }
2319         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2320                 ret = btrfs_next_leaf(root, &path);
2321                 if (ret) {
2322                         ret = 0;
2323                         goto out;
2324                 }
2325         }
2326         while (1) {
2327                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2328                                       path.slots[0]);
2329                 if (found_key.objectid != ino ||
2330                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2331                         break;
2332                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2333                                     struct btrfs_file_extent_item);
2334                 type = btrfs_file_extent_type(path.nodes[0], fi);
2335                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2336                         ret = 1;
2337                         goto out;
2338                 }
2339         }
2340 out:
2341         btrfs_release_path(&path);
2342         return ret;
2343 }
2344
2345 static u32 btrfs_type_to_imode(u8 type)
2346 {
2347         static u32 imode_by_btrfs_type[] = {
2348                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2349                 [BTRFS_FT_DIR]          = S_IFDIR,
2350                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2351                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2352                 [BTRFS_FT_FIFO]         = S_IFIFO,
2353                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2354                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2355         };
2356
2357         return imode_by_btrfs_type[(type)];
2358 }
2359
2360 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2361                                 struct btrfs_root *root,
2362                                 struct btrfs_path *path,
2363                                 struct inode_record *rec)
2364 {
2365         u8 filetype;
2366         u32 mode = 0700;
2367         int type_recovered = 0;
2368         int ret = 0;
2369
2370         printf("Trying to rebuild inode:%llu\n", rec->ino);
2371
2372         type_recovered = !find_file_type(rec, &filetype);
2373
2374         /*
2375          * Try to determine inode type if type not found.
2376          *
2377          * For found regular file extent, it must be FILE.
2378          * For found dir_item/index, it must be DIR.
2379          *
2380          * For undetermined one, use FILE as fallback.
2381          *
2382          * TODO:
2383          * 1. If found backref(inode_index/item is already handled) to it,
2384          *    it must be DIR.
2385          *    Need new inode-inode ref structure to allow search for that.
2386          */
2387         if (!type_recovered) {
2388                 if (rec->found_file_extent &&
2389                     find_normal_file_extent(root, rec->ino)) {
2390                         type_recovered = 1;
2391                         filetype = BTRFS_FT_REG_FILE;
2392                 } else if (rec->found_dir_item) {
2393                         type_recovered = 1;
2394                         filetype = BTRFS_FT_DIR;
2395                 } else if (!list_empty(&rec->orphan_extents)) {
2396                         type_recovered = 1;
2397                         filetype = BTRFS_FT_REG_FILE;
2398                 } else{
2399                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2400                                rec->ino);
2401                         type_recovered = 1;
2402                         filetype = BTRFS_FT_REG_FILE;
2403                 }
2404         }
2405
2406         ret = btrfs_new_inode(trans, root, rec->ino,
2407                               mode | btrfs_type_to_imode(filetype));
2408         if (ret < 0)
2409                 goto out;
2410
2411         /*
2412          * Here inode rebuild is done, we only rebuild the inode item,
2413          * don't repair the nlink(like move to lost+found).
2414          * That is the job of nlink repair.
2415          *
2416          * We just fill the record and return
2417          */
2418         rec->found_dir_item = 1;
2419         rec->imode = mode | btrfs_type_to_imode(filetype);
2420         rec->nlink = 0;
2421         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2422         /* Ensure the inode_nlinks repair function will be called */
2423         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2424 out:
2425         return ret;
2426 }
2427
2428 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2429                                       struct btrfs_root *root,
2430                                       struct btrfs_path *path,
2431                                       struct inode_record *rec)
2432 {
2433         struct orphan_data_extent *orphan;
2434         struct orphan_data_extent *tmp;
2435         int ret = 0;
2436
2437         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2438                 /*
2439                  * Check for conflicting file extents
2440                  *
2441                  * Here we don't know whether the extents is compressed or not,
2442                  * so we can only assume it not compressed nor data offset,
2443                  * and use its disk_len as extent length.
2444                  */
2445                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2446                                        orphan->offset, orphan->disk_len, 0);
2447                 btrfs_release_path(path);
2448                 if (ret < 0)
2449                         goto out;
2450                 if (!ret) {
2451                         fprintf(stderr,
2452                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2453                                 orphan->disk_bytenr, orphan->disk_len);
2454                         ret = btrfs_free_extent(trans,
2455                                         root->fs_info->extent_root,
2456                                         orphan->disk_bytenr, orphan->disk_len,
2457                                         0, root->objectid, orphan->objectid,
2458                                         orphan->offset);
2459                         if (ret < 0)
2460                                 goto out;
2461                 }
2462                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2463                                 orphan->offset, orphan->disk_bytenr,
2464                                 orphan->disk_len, orphan->disk_len);
2465                 if (ret < 0)
2466                         goto out;
2467
2468                 /* Update file size info */
2469                 rec->found_size += orphan->disk_len;
2470                 if (rec->found_size == rec->nbytes)
2471                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2472
2473                 /* Update the file extent hole info too */
2474                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2475                                            orphan->disk_len);
2476                 if (ret < 0)
2477                         goto out;
2478                 if (RB_EMPTY_ROOT(&rec->holes))
2479                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2480
2481                 list_del(&orphan->list);
2482                 free(orphan);
2483         }
2484         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2485 out:
2486         return ret;
2487 }
2488
2489 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2490                                         struct btrfs_root *root,
2491                                         struct btrfs_path *path,
2492                                         struct inode_record *rec)
2493 {
2494         struct rb_node *node;
2495         struct file_extent_hole *hole;
2496         int found = 0;
2497         int ret = 0;
2498
2499         node = rb_first(&rec->holes);
2500
2501         while (node) {
2502                 found = 1;
2503                 hole = rb_entry(node, struct file_extent_hole, node);
2504                 ret = btrfs_punch_hole(trans, root, rec->ino,
2505                                        hole->start, hole->len);
2506                 if (ret < 0)
2507                         goto out;
2508                 ret = del_file_extent_hole(&rec->holes, hole->start,
2509                                            hole->len);
2510                 if (ret < 0)
2511                         goto out;
2512                 if (RB_EMPTY_ROOT(&rec->holes))
2513                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2514                 node = rb_first(&rec->holes);
2515         }
2516         /* special case for a file losing all its file extent */
2517         if (!found) {
2518                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2519                                        round_up(rec->isize,
2520                                                 root->fs_info->sectorsize));
2521                 if (ret < 0)
2522                         goto out;
2523         }
2524         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2525                rec->ino, root->objectid);
2526 out:
2527         return ret;
2528 }
2529
2530 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2531 {
2532         struct btrfs_trans_handle *trans;
2533         struct btrfs_path path;
2534         int ret = 0;
2535
2536         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2537                              I_ERR_NO_ORPHAN_ITEM |
2538                              I_ERR_LINK_COUNT_WRONG |
2539                              I_ERR_NO_INODE_ITEM |
2540                              I_ERR_FILE_EXTENT_ORPHAN |
2541                              I_ERR_FILE_EXTENT_DISCOUNT|
2542                              I_ERR_FILE_NBYTES_WRONG)))
2543                 return rec->errors;
2544
2545         /*
2546          * For nlink repair, it may create a dir and add link, so
2547          * 2 for parent(256)'s dir_index and dir_item
2548          * 2 for lost+found dir's inode_item and inode_ref
2549          * 1 for the new inode_ref of the file
2550          * 2 for lost+found dir's dir_index and dir_item for the file
2551          */
2552         trans = btrfs_start_transaction(root, 7);
2553         if (IS_ERR(trans))
2554                 return PTR_ERR(trans);
2555
2556         btrfs_init_path(&path);
2557         if (rec->errors & I_ERR_NO_INODE_ITEM)
2558                 ret = repair_inode_no_item(trans, root, &path, rec);
2559         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2560                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2561         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2562                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2563         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2564                 ret = repair_inode_isize(trans, root, &path, rec);
2565         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2566                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2567         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2568                 ret = repair_inode_nlinks(trans, root, &path, rec);
2569         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2570                 ret = repair_inode_nbytes(trans, root, &path, rec);
2571         btrfs_commit_transaction(trans, root);
2572         btrfs_release_path(&path);
2573         return ret;
2574 }
2575
2576 static int check_inode_recs(struct btrfs_root *root,
2577                             struct cache_tree *inode_cache)
2578 {
2579         struct cache_extent *cache;
2580         struct ptr_node *node;
2581         struct inode_record *rec;
2582         struct inode_backref *backref;
2583         int stage = 0;
2584         int ret = 0;
2585         int err = 0;
2586         u64 error = 0;
2587         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2588
2589         if (btrfs_root_refs(&root->root_item) == 0) {
2590                 if (!cache_tree_empty(inode_cache))
2591                         fprintf(stderr, "warning line %d\n", __LINE__);
2592                 return 0;
2593         }
2594
2595         /*
2596          * We need to repair backrefs first because we could change some of the
2597          * errors in the inode recs.
2598          *
2599          * We also need to go through and delete invalid backrefs first and then
2600          * add the correct ones second.  We do this because we may get EEXIST
2601          * when adding back the correct index because we hadn't yet deleted the
2602          * invalid index.
2603          *
2604          * For example, if we were missing a dir index then the directories
2605          * isize would be wrong, so if we fixed the isize to what we thought it
2606          * would be and then fixed the backref we'd still have a invalid fs, so
2607          * we need to add back the dir index and then check to see if the isize
2608          * is still wrong.
2609          */
2610         while (stage < 3) {
2611                 stage++;
2612                 if (stage == 3 && !err)
2613                         break;
2614
2615                 cache = search_cache_extent(inode_cache, 0);
2616                 while (repair && cache) {
2617                         node = container_of(cache, struct ptr_node, cache);
2618                         rec = node->data;
2619                         cache = next_cache_extent(cache);
2620
2621                         /* Need to free everything up and rescan */
2622                         if (stage == 3) {
2623                                 remove_cache_extent(inode_cache, &node->cache);
2624                                 free(node);
2625                                 free_inode_rec(rec);
2626                                 continue;
2627                         }
2628
2629                         if (list_empty(&rec->backrefs))
2630                                 continue;
2631
2632                         ret = repair_inode_backrefs(root, rec, inode_cache,
2633                                                     stage == 1);
2634                         if (ret < 0) {
2635                                 err = ret;
2636                                 stage = 2;
2637                                 break;
2638                         } if (ret > 0) {
2639                                 err = -EAGAIN;
2640                         }
2641                 }
2642         }
2643         if (err)
2644                 return err;
2645
2646         rec = get_inode_rec(inode_cache, root_dirid, 0);
2647         BUG_ON(IS_ERR(rec));
2648         if (rec) {
2649                 ret = check_root_dir(rec);
2650                 if (ret) {
2651                         fprintf(stderr, "root %llu root dir %llu error\n",
2652                                 (unsigned long long)root->root_key.objectid,
2653                                 (unsigned long long)root_dirid);
2654                         print_inode_error(root, rec);
2655                         error++;
2656                 }
2657         } else {
2658                 if (repair) {
2659                         struct btrfs_trans_handle *trans;
2660
2661                         trans = btrfs_start_transaction(root, 1);
2662                         if (IS_ERR(trans)) {
2663                                 err = PTR_ERR(trans);
2664                                 return err;
2665                         }
2666
2667                         fprintf(stderr,
2668                                 "root %llu missing its root dir, recreating\n",
2669                                 (unsigned long long)root->objectid);
2670
2671                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2672                         BUG_ON(ret);
2673
2674                         btrfs_commit_transaction(trans, root);
2675                         return -EAGAIN;
2676                 }
2677
2678                 fprintf(stderr, "root %llu root dir %llu not found\n",
2679                         (unsigned long long)root->root_key.objectid,
2680                         (unsigned long long)root_dirid);
2681         }
2682
2683         while (1) {
2684                 cache = search_cache_extent(inode_cache, 0);
2685                 if (!cache)
2686                         break;
2687                 node = container_of(cache, struct ptr_node, cache);
2688                 rec = node->data;
2689                 remove_cache_extent(inode_cache, &node->cache);
2690                 free(node);
2691                 if (rec->ino == root_dirid ||
2692                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2693                         free_inode_rec(rec);
2694                         continue;
2695                 }
2696
2697                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2698                         ret = check_orphan_item(root, rec->ino);
2699                         if (ret == 0)
2700                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2701                         if (can_free_inode_rec(rec)) {
2702                                 free_inode_rec(rec);
2703                                 continue;
2704                         }
2705                 }
2706
2707                 if (!rec->found_inode_item)
2708                         rec->errors |= I_ERR_NO_INODE_ITEM;
2709                 if (rec->found_link != rec->nlink)
2710                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2711                 if (repair) {
2712                         ret = try_repair_inode(root, rec);
2713                         if (ret == 0 && can_free_inode_rec(rec)) {
2714                                 free_inode_rec(rec);
2715                                 continue;
2716                         }
2717                         ret = 0;
2718                 }
2719
2720                 if (!(repair && ret == 0))
2721                         error++;
2722                 print_inode_error(root, rec);
2723                 list_for_each_entry(backref, &rec->backrefs, list) {
2724                         if (!backref->found_dir_item)
2725                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2726                         if (!backref->found_dir_index)
2727                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2728                         if (!backref->found_inode_ref)
2729                                 backref->errors |= REF_ERR_NO_INODE_REF;
2730                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2731                                 " namelen %u name %s filetype %d errors %x",
2732                                 (unsigned long long)backref->dir,
2733                                 (unsigned long long)backref->index,
2734                                 backref->namelen, backref->name,
2735                                 backref->filetype, backref->errors);
2736                         print_ref_error(backref->errors);
2737                 }
2738                 free_inode_rec(rec);
2739         }
2740         return (error > 0) ? -1 : 0;
2741 }
2742
2743 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2744                                         u64 objectid)
2745 {
2746         struct cache_extent *cache;
2747         struct root_record *rec = NULL;
2748         int ret;
2749
2750         cache = lookup_cache_extent(root_cache, objectid, 1);
2751         if (cache) {
2752                 rec = container_of(cache, struct root_record, cache);
2753         } else {
2754                 rec = calloc(1, sizeof(*rec));
2755                 if (!rec)
2756                         return ERR_PTR(-ENOMEM);
2757                 rec->objectid = objectid;
2758                 INIT_LIST_HEAD(&rec->backrefs);
2759                 rec->cache.start = objectid;
2760                 rec->cache.size = 1;
2761
2762                 ret = insert_cache_extent(root_cache, &rec->cache);
2763                 if (ret)
2764                         return ERR_PTR(-EEXIST);
2765         }
2766         return rec;
2767 }
2768
2769 static struct root_backref *get_root_backref(struct root_record *rec,
2770                                              u64 ref_root, u64 dir, u64 index,
2771                                              const char *name, int namelen)
2772 {
2773         struct root_backref *backref;
2774
2775         list_for_each_entry(backref, &rec->backrefs, list) {
2776                 if (backref->ref_root != ref_root || backref->dir != dir ||
2777                     backref->namelen != namelen)
2778                         continue;
2779                 if (memcmp(name, backref->name, namelen))
2780                         continue;
2781                 return backref;
2782         }
2783
2784         backref = calloc(1, sizeof(*backref) + namelen + 1);
2785         if (!backref)
2786                 return NULL;
2787         backref->ref_root = ref_root;
2788         backref->dir = dir;
2789         backref->index = index;
2790         backref->namelen = namelen;
2791         memcpy(backref->name, name, namelen);
2792         backref->name[namelen] = '\0';
2793         list_add_tail(&backref->list, &rec->backrefs);
2794         return backref;
2795 }
2796
2797 static void free_root_record(struct cache_extent *cache)
2798 {
2799         struct root_record *rec;
2800         struct root_backref *backref;
2801
2802         rec = container_of(cache, struct root_record, cache);
2803         while (!list_empty(&rec->backrefs)) {
2804                 backref = to_root_backref(rec->backrefs.next);
2805                 list_del(&backref->list);
2806                 free(backref);
2807         }
2808
2809         free(rec);
2810 }
2811
2812 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
2813
2814 static int add_root_backref(struct cache_tree *root_cache,
2815                             u64 root_id, u64 ref_root, u64 dir, u64 index,
2816                             const char *name, int namelen,
2817                             int item_type, int errors)
2818 {
2819         struct root_record *rec;
2820         struct root_backref *backref;
2821
2822         rec = get_root_rec(root_cache, root_id);
2823         BUG_ON(IS_ERR(rec));
2824         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
2825         BUG_ON(!backref);
2826
2827         backref->errors |= errors;
2828
2829         if (item_type != BTRFS_DIR_ITEM_KEY) {
2830                 if (backref->found_dir_index || backref->found_back_ref ||
2831                     backref->found_forward_ref) {
2832                         if (backref->index != index)
2833                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
2834                 } else {
2835                         backref->index = index;
2836                 }
2837         }
2838
2839         if (item_type == BTRFS_DIR_ITEM_KEY) {
2840                 if (backref->found_forward_ref)
2841                         rec->found_ref++;
2842                 backref->found_dir_item = 1;
2843         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
2844                 backref->found_dir_index = 1;
2845         } else if (item_type == BTRFS_ROOT_REF_KEY) {
2846                 if (backref->found_forward_ref)
2847                         backref->errors |= REF_ERR_DUP_ROOT_REF;
2848                 else if (backref->found_dir_item)
2849                         rec->found_ref++;
2850                 backref->found_forward_ref = 1;
2851         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
2852                 if (backref->found_back_ref)
2853                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
2854                 backref->found_back_ref = 1;
2855         } else {
2856                 BUG_ON(1);
2857         }
2858
2859         if (backref->found_forward_ref && backref->found_dir_item)
2860                 backref->reachable = 1;
2861         return 0;
2862 }
2863
2864 static int merge_root_recs(struct btrfs_root *root,
2865                            struct cache_tree *src_cache,
2866                            struct cache_tree *dst_cache)
2867 {
2868         struct cache_extent *cache;
2869         struct ptr_node *node;
2870         struct inode_record *rec;
2871         struct inode_backref *backref;
2872         int ret = 0;
2873
2874         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2875                 free_inode_recs_tree(src_cache);
2876                 return 0;
2877         }
2878
2879         while (1) {
2880                 cache = search_cache_extent(src_cache, 0);
2881                 if (!cache)
2882                         break;
2883                 node = container_of(cache, struct ptr_node, cache);
2884                 rec = node->data;
2885                 remove_cache_extent(src_cache, &node->cache);
2886                 free(node);
2887
2888                 ret = is_child_root(root, root->objectid, rec->ino);
2889                 if (ret < 0)
2890                         break;
2891                 else if (ret == 0)
2892                         goto skip;
2893
2894                 list_for_each_entry(backref, &rec->backrefs, list) {
2895                         BUG_ON(backref->found_inode_ref);
2896                         if (backref->found_dir_item)
2897                                 add_root_backref(dst_cache, rec->ino,
2898                                         root->root_key.objectid, backref->dir,
2899                                         backref->index, backref->name,
2900                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
2901                                         backref->errors);
2902                         if (backref->found_dir_index)
2903                                 add_root_backref(dst_cache, rec->ino,
2904                                         root->root_key.objectid, backref->dir,
2905                                         backref->index, backref->name,
2906                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
2907                                         backref->errors);
2908                 }
2909 skip:
2910                 free_inode_rec(rec);
2911         }
2912         if (ret < 0)
2913                 return ret;
2914         return 0;
2915 }
2916
2917 static int check_root_refs(struct btrfs_root *root,
2918                            struct cache_tree *root_cache)
2919 {
2920         struct root_record *rec;
2921         struct root_record *ref_root;
2922         struct root_backref *backref;
2923         struct cache_extent *cache;
2924         int loop = 1;
2925         int ret;
2926         int error;
2927         int errors = 0;
2928
2929         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
2930         BUG_ON(IS_ERR(rec));
2931         rec->found_ref = 1;
2932
2933         /* fixme: this can not detect circular references */
2934         while (loop) {
2935                 loop = 0;
2936                 cache = search_cache_extent(root_cache, 0);
2937                 while (1) {
2938                         if (!cache)
2939                                 break;
2940                         rec = container_of(cache, struct root_record, cache);
2941                         cache = next_cache_extent(cache);
2942
2943                         if (rec->found_ref == 0)
2944                                 continue;
2945
2946                         list_for_each_entry(backref, &rec->backrefs, list) {
2947                                 if (!backref->reachable)
2948                                         continue;
2949
2950                                 ref_root = get_root_rec(root_cache,
2951                                                         backref->ref_root);
2952                                 BUG_ON(IS_ERR(ref_root));
2953                                 if (ref_root->found_ref > 0)
2954                                         continue;
2955
2956                                 backref->reachable = 0;
2957                                 rec->found_ref--;
2958                                 if (rec->found_ref == 0)
2959                                         loop = 1;
2960                         }
2961                 }
2962         }
2963
2964         cache = search_cache_extent(root_cache, 0);
2965         while (1) {
2966                 if (!cache)
2967                         break;
2968                 rec = container_of(cache, struct root_record, cache);
2969                 cache = next_cache_extent(cache);
2970
2971                 if (rec->found_ref == 0 &&
2972                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
2973                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
2974                         ret = check_orphan_item(root->fs_info->tree_root,
2975                                                 rec->objectid);
2976                         if (ret == 0)
2977                                 continue;
2978
2979                         /*
2980                          * If we don't have a root item then we likely just have
2981                          * a dir item in a snapshot for this root but no actual
2982                          * ref key or anything so it's meaningless.
2983                          */
2984                         if (!rec->found_root_item)
2985                                 continue;
2986                         errors++;
2987                         fprintf(stderr, "fs tree %llu not referenced\n",
2988                                 (unsigned long long)rec->objectid);
2989                 }
2990
2991                 error = 0;
2992                 if (rec->found_ref > 0 && !rec->found_root_item)
2993                         error = 1;
2994                 list_for_each_entry(backref, &rec->backrefs, list) {
2995                         if (!backref->found_dir_item)
2996                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2997                         if (!backref->found_dir_index)
2998                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2999                         if (!backref->found_back_ref)
3000                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3001                         if (!backref->found_forward_ref)
3002                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3003                         if (backref->reachable && backref->errors)
3004                                 error = 1;
3005                 }
3006                 if (!error)
3007                         continue;
3008
3009                 errors++;
3010                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3011                         (unsigned long long)rec->objectid, rec->found_ref,
3012                          rec->found_root_item ? "" : "not found");
3013
3014                 list_for_each_entry(backref, &rec->backrefs, list) {
3015                         if (!backref->reachable)
3016                                 continue;
3017                         if (!backref->errors && rec->found_root_item)
3018                                 continue;
3019                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3020                                 " index %llu namelen %u name %s errors %x\n",
3021                                 (unsigned long long)backref->ref_root,
3022                                 (unsigned long long)backref->dir,
3023                                 (unsigned long long)backref->index,
3024                                 backref->namelen, backref->name,
3025                                 backref->errors);
3026                         print_ref_error(backref->errors);
3027                 }
3028         }
3029         return errors > 0 ? 1 : 0;
3030 }
3031
3032 static int process_root_ref(struct extent_buffer *eb, int slot,
3033                             struct btrfs_key *key,
3034                             struct cache_tree *root_cache)
3035 {
3036         u64 dirid;
3037         u64 index;
3038         u32 len;
3039         u32 name_len;
3040         struct btrfs_root_ref *ref;
3041         char namebuf[BTRFS_NAME_LEN];
3042         int error;
3043
3044         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3045
3046         dirid = btrfs_root_ref_dirid(eb, ref);
3047         index = btrfs_root_ref_sequence(eb, ref);
3048         name_len = btrfs_root_ref_name_len(eb, ref);
3049
3050         if (name_len <= BTRFS_NAME_LEN) {
3051                 len = name_len;
3052                 error = 0;
3053         } else {
3054                 len = BTRFS_NAME_LEN;
3055                 error = REF_ERR_NAME_TOO_LONG;
3056         }
3057         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3058
3059         if (key->type == BTRFS_ROOT_REF_KEY) {
3060                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3061                                  index, namebuf, len, key->type, error);
3062         } else {
3063                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3064                                  index, namebuf, len, key->type, error);
3065         }
3066         return 0;
3067 }
3068
3069 static void free_corrupt_block(struct cache_extent *cache)
3070 {
3071         struct btrfs_corrupt_block *corrupt;
3072
3073         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3074         free(corrupt);
3075 }
3076
3077 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3078
3079 /*
3080  * Repair the btree of the given root.
3081  *
3082  * The fix is to remove the node key in corrupt_blocks cache_tree.
3083  * and rebalance the tree.
3084  * After the fix, the btree should be writeable.
3085  */
3086 static int repair_btree(struct btrfs_root *root,
3087                         struct cache_tree *corrupt_blocks)
3088 {
3089         struct btrfs_trans_handle *trans;
3090         struct btrfs_path path;
3091         struct btrfs_corrupt_block *corrupt;
3092         struct cache_extent *cache;
3093         struct btrfs_key key;
3094         u64 offset;
3095         int level;
3096         int ret = 0;
3097
3098         if (cache_tree_empty(corrupt_blocks))
3099                 return 0;
3100
3101         trans = btrfs_start_transaction(root, 1);
3102         if (IS_ERR(trans)) {
3103                 ret = PTR_ERR(trans);
3104                 fprintf(stderr, "Error starting transaction: %s\n",
3105                         strerror(-ret));
3106                 return ret;
3107         }
3108         btrfs_init_path(&path);
3109         cache = first_cache_extent(corrupt_blocks);
3110         while (cache) {
3111                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3112                                        cache);
3113                 level = corrupt->level;
3114                 path.lowest_level = level;
3115                 key.objectid = corrupt->key.objectid;
3116                 key.type = corrupt->key.type;
3117                 key.offset = corrupt->key.offset;
3118
3119                 /*
3120                  * Here we don't want to do any tree balance, since it may
3121                  * cause a balance with corrupted brother leaf/node,
3122                  * so ins_len set to 0 here.
3123                  * Balance will be done after all corrupt node/leaf is deleted.
3124                  */
3125                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3126                 if (ret < 0)
3127                         goto out;
3128                 offset = btrfs_node_blockptr(path.nodes[level],
3129                                              path.slots[level]);
3130
3131                 /* Remove the ptr */
3132                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3133                 if (ret < 0)
3134                         goto out;
3135                 /*
3136                  * Remove the corresponding extent
3137                  * return value is not concerned.
3138                  */
3139                 btrfs_release_path(&path);
3140                 ret = btrfs_free_extent(trans, root, offset,
3141                                 root->fs_info->nodesize, 0,
3142                                 root->root_key.objectid, level - 1, 0);
3143                 cache = next_cache_extent(cache);
3144         }
3145
3146         /* Balance the btree using btrfs_search_slot() */
3147         cache = first_cache_extent(corrupt_blocks);
3148         while (cache) {
3149                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3150                                        cache);
3151                 memcpy(&key, &corrupt->key, sizeof(key));
3152                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3153                 if (ret < 0)
3154                         goto out;
3155                 /* return will always >0 since it won't find the item */
3156                 ret = 0;
3157                 btrfs_release_path(&path);
3158                 cache = next_cache_extent(cache);
3159         }
3160 out:
3161         btrfs_commit_transaction(trans, root);
3162         btrfs_release_path(&path);
3163         return ret;
3164 }
3165
3166 static int check_fs_root(struct btrfs_root *root,
3167                          struct cache_tree *root_cache,
3168                          struct walk_control *wc)
3169 {
3170         int ret = 0;
3171         int err = 0;
3172         int wret;
3173         int level;
3174         struct btrfs_path path;
3175         struct shared_node root_node;
3176         struct root_record *rec;
3177         struct btrfs_root_item *root_item = &root->root_item;
3178         struct cache_tree corrupt_blocks;
3179         struct orphan_data_extent *orphan;
3180         struct orphan_data_extent *tmp;
3181         enum btrfs_tree_block_status status;
3182         struct node_refs nrefs;
3183
3184         /*
3185          * Reuse the corrupt_block cache tree to record corrupted tree block
3186          *
3187          * Unlike the usage in extent tree check, here we do it in a per
3188          * fs/subvol tree base.
3189          */
3190         cache_tree_init(&corrupt_blocks);
3191         root->fs_info->corrupt_blocks = &corrupt_blocks;
3192
3193         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3194                 rec = get_root_rec(root_cache, root->root_key.objectid);
3195                 BUG_ON(IS_ERR(rec));
3196                 if (btrfs_root_refs(root_item) > 0)
3197                         rec->found_root_item = 1;
3198         }
3199
3200         btrfs_init_path(&path);
3201         memset(&root_node, 0, sizeof(root_node));
3202         cache_tree_init(&root_node.root_cache);
3203         cache_tree_init(&root_node.inode_cache);
3204         memset(&nrefs, 0, sizeof(nrefs));
3205
3206         /* Move the orphan extent record to corresponding inode_record */
3207         list_for_each_entry_safe(orphan, tmp,
3208                                  &root->orphan_data_extents, list) {
3209                 struct inode_record *inode;
3210
3211                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3212                                       1);
3213                 BUG_ON(IS_ERR(inode));
3214                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3215                 list_move(&orphan->list, &inode->orphan_extents);
3216         }
3217
3218         level = btrfs_header_level(root->node);
3219         memset(wc->nodes, 0, sizeof(wc->nodes));
3220         wc->nodes[level] = &root_node;
3221         wc->active_node = level;
3222         wc->root_level = level;
3223
3224         /* We may not have checked the root block, lets do that now */
3225         if (btrfs_is_leaf(root->node))
3226                 status = btrfs_check_leaf(root, NULL, root->node);
3227         else
3228                 status = btrfs_check_node(root, NULL, root->node);
3229         if (status != BTRFS_TREE_BLOCK_CLEAN)
3230                 return -EIO;
3231
3232         if (btrfs_root_refs(root_item) > 0 ||
3233             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3234                 path.nodes[level] = root->node;
3235                 extent_buffer_get(root->node);
3236                 path.slots[level] = 0;
3237         } else {
3238                 struct btrfs_key key;
3239                 struct btrfs_disk_key found_key;
3240
3241                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3242                 level = root_item->drop_level;
3243                 path.lowest_level = level;
3244                 if (level > btrfs_header_level(root->node) ||
3245                     level >= BTRFS_MAX_LEVEL) {
3246                         error("ignoring invalid drop level: %u", level);
3247                         goto skip_walking;
3248                 }
3249                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3250                 if (wret < 0)
3251                         goto skip_walking;
3252                 btrfs_node_key(path.nodes[level], &found_key,
3253                                 path.slots[level]);
3254                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3255                                         sizeof(found_key)));
3256         }
3257
3258         while (1) {
3259                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3260                 if (wret < 0)
3261                         ret = wret;
3262                 if (wret != 0)
3263                         break;
3264
3265                 wret = walk_up_tree(root, &path, wc, &level);
3266                 if (wret < 0)
3267                         ret = wret;
3268                 if (wret != 0)
3269                         break;
3270         }
3271 skip_walking:
3272         btrfs_release_path(&path);
3273
3274         if (!cache_tree_empty(&corrupt_blocks)) {
3275                 struct cache_extent *cache;
3276                 struct btrfs_corrupt_block *corrupt;
3277
3278                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3279                        root->root_key.objectid);
3280                 cache = first_cache_extent(&corrupt_blocks);
3281                 while (cache) {
3282                         corrupt = container_of(cache,
3283                                                struct btrfs_corrupt_block,
3284                                                cache);
3285                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3286                                cache->start, corrupt->level,
3287                                corrupt->key.objectid, corrupt->key.type,
3288                                corrupt->key.offset);
3289                         cache = next_cache_extent(cache);
3290                 }
3291                 if (repair) {
3292                         printf("Try to repair the btree for root %llu\n",
3293                                root->root_key.objectid);
3294                         ret = repair_btree(root, &corrupt_blocks);
3295                         if (ret < 0)
3296                                 fprintf(stderr, "Failed to repair btree: %s\n",
3297                                         strerror(-ret));
3298                         if (!ret)
3299                                 printf("Btree for root %llu is fixed\n",
3300                                        root->root_key.objectid);
3301                 }
3302         }
3303
3304         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3305         if (err < 0)
3306                 ret = err;
3307
3308         if (root_node.current) {
3309                 root_node.current->checked = 1;
3310                 maybe_free_inode_rec(&root_node.inode_cache,
3311                                 root_node.current);
3312         }
3313
3314         err = check_inode_recs(root, &root_node.inode_cache);
3315         if (!ret)
3316                 ret = err;
3317
3318         free_corrupt_blocks_tree(&corrupt_blocks);
3319         root->fs_info->corrupt_blocks = NULL;
3320         free_orphan_data_extents(&root->orphan_data_extents);
3321         return ret;
3322 }
3323
3324 static int check_fs_roots(struct btrfs_fs_info *fs_info,
3325                           struct cache_tree *root_cache)
3326 {
3327         struct btrfs_path path;
3328         struct btrfs_key key;
3329         struct walk_control wc;
3330         struct extent_buffer *leaf, *tree_node;
3331         struct btrfs_root *tmp_root;
3332         struct btrfs_root *tree_root = fs_info->tree_root;
3333         int ret;
3334         int err = 0;
3335
3336         if (ctx.progress_enabled) {
3337                 ctx.tp = TASK_FS_ROOTS;
3338                 task_start(ctx.info);
3339         }
3340
3341         /*
3342          * Just in case we made any changes to the extent tree that weren't
3343          * reflected into the free space cache yet.
3344          */
3345         if (repair)
3346                 reset_cached_block_groups(fs_info);
3347         memset(&wc, 0, sizeof(wc));
3348         cache_tree_init(&wc.shared);
3349         btrfs_init_path(&path);
3350
3351 again:
3352         key.offset = 0;
3353         key.objectid = 0;
3354         key.type = BTRFS_ROOT_ITEM_KEY;
3355         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3356         if (ret < 0) {
3357                 err = 1;
3358                 goto out;
3359         }
3360         tree_node = tree_root->node;
3361         while (1) {
3362                 if (tree_node != tree_root->node) {
3363                         free_root_recs_tree(root_cache);
3364                         btrfs_release_path(&path);
3365                         goto again;
3366                 }
3367                 leaf = path.nodes[0];
3368                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3369                         ret = btrfs_next_leaf(tree_root, &path);
3370                         if (ret) {
3371                                 if (ret < 0)
3372                                         err = 1;
3373                                 break;
3374                         }
3375                         leaf = path.nodes[0];
3376                 }
3377                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3378                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3379                     fs_root_objectid(key.objectid)) {
3380                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3381                                 tmp_root = btrfs_read_fs_root_no_cache(
3382                                                 fs_info, &key);
3383                         } else {
3384                                 key.offset = (u64)-1;
3385                                 tmp_root = btrfs_read_fs_root(
3386                                                 fs_info, &key);
3387                         }
3388                         if (IS_ERR(tmp_root)) {
3389                                 err = 1;
3390                                 goto next;
3391                         }
3392                         ret = check_fs_root(tmp_root, root_cache, &wc);
3393                         if (ret == -EAGAIN) {
3394                                 free_root_recs_tree(root_cache);
3395                                 btrfs_release_path(&path);
3396                                 goto again;
3397                         }
3398                         if (ret)
3399                                 err = 1;
3400                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3401                                 btrfs_free_fs_root(tmp_root);
3402                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3403                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3404                         process_root_ref(leaf, path.slots[0], &key,
3405                                          root_cache);
3406                 }
3407 next:
3408                 path.slots[0]++;
3409         }
3410 out:
3411         btrfs_release_path(&path);
3412         if (err)
3413                 free_extent_cache_tree(&wc.shared);
3414         if (!cache_tree_empty(&wc.shared))
3415                 fprintf(stderr, "warning line %d\n", __LINE__);
3416
3417         task_stop(ctx.info);
3418
3419         return err;
3420 }
3421
3422 static struct tree_backref *find_tree_backref(struct extent_record *rec,
3423                                                 u64 parent, u64 root)
3424 {
3425         struct rb_node *node;
3426         struct tree_backref *back = NULL;
3427         struct tree_backref match = {
3428                 .node = {
3429                         .is_data = 0,
3430                 },
3431         };
3432
3433         if (parent) {
3434                 match.parent = parent;
3435                 match.node.full_backref = 1;
3436         } else {
3437                 match.root = root;
3438         }
3439
3440         node = rb_search(&rec->backref_tree, &match.node.node,
3441                          (rb_compare_keys)compare_extent_backref, NULL);
3442         if (node)
3443                 back = to_tree_backref(rb_node_to_extent_backref(node));
3444
3445         return back;
3446 }
3447
3448 static struct data_backref *find_data_backref(struct extent_record *rec,
3449                                                 u64 parent, u64 root,
3450                                                 u64 owner, u64 offset,
3451                                                 int found_ref,
3452                                                 u64 disk_bytenr, u64 bytes)
3453 {
3454         struct rb_node *node;
3455         struct data_backref *back = NULL;
3456         struct data_backref match = {
3457                 .node = {
3458                         .is_data = 1,
3459                 },
3460                 .owner = owner,
3461                 .offset = offset,
3462                 .bytes = bytes,
3463                 .found_ref = found_ref,
3464                 .disk_bytenr = disk_bytenr,
3465         };
3466
3467         if (parent) {
3468                 match.parent = parent;
3469                 match.node.full_backref = 1;
3470         } else {
3471                 match.root = root;
3472         }
3473
3474         node = rb_search(&rec->backref_tree, &match.node.node,
3475                          (rb_compare_keys)compare_extent_backref, NULL);
3476         if (node)
3477                 back = to_data_backref(rb_node_to_extent_backref(node));
3478
3479         return back;
3480 }
3481
3482 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
3483                           struct cache_tree *root_cache)
3484 {
3485         int ret;
3486
3487         if (!ctx.progress_enabled)
3488                 fprintf(stderr, "checking fs roots\n");
3489         if (check_mode == CHECK_MODE_LOWMEM)
3490                 ret = check_fs_roots_lowmem(fs_info);
3491         else
3492                 ret = check_fs_roots(fs_info, root_cache);
3493
3494         return ret;
3495 }
3496
3497 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3498 {
3499         struct extent_backref *back, *tmp;
3500         struct tree_backref *tback;
3501         struct data_backref *dback;
3502         u64 found = 0;
3503         int err = 0;
3504
3505         rbtree_postorder_for_each_entry_safe(back, tmp,
3506                                              &rec->backref_tree, node) {
3507                 if (!back->found_extent_tree) {
3508                         err = 1;
3509                         if (!print_errs)
3510                                 goto out;
3511                         if (back->is_data) {
3512                                 dback = to_data_backref(back);
3513                                 fprintf(stderr,
3514 "data backref %llu %s %llu owner %llu offset %llu num_refs %lu not found in extent tree\n",
3515                                         (unsigned long long)rec->start,
3516                                         back->full_backref ?
3517                                         "parent" : "root",
3518                                         back->full_backref ?
3519                                         (unsigned long long)dback->parent :
3520                                         (unsigned long long)dback->root,
3521                                         (unsigned long long)dback->owner,
3522                                         (unsigned long long)dback->offset,
3523                                         (unsigned long)dback->num_refs);
3524                         } else {
3525                                 tback = to_tree_backref(back);
3526                                 fprintf(stderr,
3527 "tree backref %llu parent %llu root %llu not found in extent tree\n",
3528                                         (unsigned long long)rec->start,
3529                                         (unsigned long long)tback->parent,
3530                                         (unsigned long long)tback->root);
3531                         }
3532                 }
3533                 if (!back->is_data && !back->found_ref) {
3534                         err = 1;
3535                         if (!print_errs)
3536                                 goto out;
3537                         tback = to_tree_backref(back);
3538                         fprintf(stderr,
3539                                 "backref %llu %s %llu not referenced back %p\n",
3540                                 (unsigned long long)rec->start,
3541                                 back->full_backref ? "parent" : "root",
3542                                 back->full_backref ?
3543                                 (unsigned long long)tback->parent :
3544                                 (unsigned long long)tback->root, back);
3545                 }
3546                 if (back->is_data) {
3547                         dback = to_data_backref(back);
3548                         if (dback->found_ref != dback->num_refs) {
3549                                 err = 1;
3550                                 if (!print_errs)
3551                                         goto out;
3552                                 fprintf(stderr,
3553 "incorrect local backref count on %llu %s %llu owner %llu offset %llu found %u wanted %u back %p\n",
3554                                         (unsigned long long)rec->start,
3555                                         back->full_backref ?
3556                                         "parent" : "root",
3557                                         back->full_backref ?
3558                                         (unsigned long long)dback->parent :
3559                                         (unsigned long long)dback->root,
3560                                         (unsigned long long)dback->owner,
3561                                         (unsigned long long)dback->offset,
3562                                         dback->found_ref, dback->num_refs,
3563                                         back);
3564                         }
3565                         if (dback->disk_bytenr != rec->start) {
3566                                 err = 1;
3567                                 if (!print_errs)
3568                                         goto out;
3569                                 fprintf(stderr,
3570 "backref disk bytenr does not match extent record, bytenr=%llu, ref bytenr=%llu\n",
3571                                         (unsigned long long)rec->start,
3572                                         (unsigned long long)dback->disk_bytenr);
3573                         }
3574
3575                         if (dback->bytes != rec->nr) {
3576                                 err = 1;
3577                                 if (!print_errs)
3578                                         goto out;
3579                                 fprintf(stderr,
3580 "backref bytes do not match extent backref, bytenr=%llu, ref bytes=%llu, backref bytes=%llu\n",
3581                                         (unsigned long long)rec->start,
3582                                         (unsigned long long)rec->nr,
3583                                         (unsigned long long)dback->bytes);
3584                         }
3585                 }
3586                 if (!back->is_data) {
3587                         found += 1;
3588                 } else {
3589                         dback = to_data_backref(back);
3590                         found += dback->found_ref;
3591                 }
3592         }
3593         if (found != rec->refs) {
3594                 err = 1;
3595                 if (!print_errs)
3596                         goto out;
3597                 fprintf(stderr,
3598         "incorrect global backref count on %llu found %llu wanted %llu\n",
3599                         (unsigned long long)rec->start,
3600                         (unsigned long long)found,
3601                         (unsigned long long)rec->refs);
3602         }
3603 out:
3604         return err;
3605 }
3606
3607 static void __free_one_backref(struct rb_node *node)
3608 {
3609         struct extent_backref *back = rb_node_to_extent_backref(node);
3610
3611         free(back);
3612 }
3613
3614 static void free_all_extent_backrefs(struct extent_record *rec)
3615 {
3616         rb_free_nodes(&rec->backref_tree, __free_one_backref);
3617 }
3618
3619 static void free_extent_record_cache(struct cache_tree *extent_cache)
3620 {
3621         struct cache_extent *cache;
3622         struct extent_record *rec;
3623
3624         while (1) {
3625                 cache = first_cache_extent(extent_cache);
3626                 if (!cache)
3627                         break;
3628                 rec = container_of(cache, struct extent_record, cache);
3629                 remove_cache_extent(extent_cache, cache);
3630                 free_all_extent_backrefs(rec);
3631                 free(rec);
3632         }
3633 }
3634
3635 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3636                                  struct extent_record *rec)
3637 {
3638         if (rec->content_checked && rec->owner_ref_checked &&
3639             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3640             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3641             !rec->bad_full_backref && !rec->crossing_stripes &&
3642             !rec->wrong_chunk_type) {
3643                 remove_cache_extent(extent_cache, &rec->cache);
3644                 free_all_extent_backrefs(rec);
3645                 list_del_init(&rec->list);
3646                 free(rec);
3647         }
3648         return 0;
3649 }
3650
3651 static int check_owner_ref(struct btrfs_root *root,
3652                             struct extent_record *rec,
3653                             struct extent_buffer *buf)
3654 {
3655         struct extent_backref *node, *tmp;
3656         struct tree_backref *back;
3657         struct btrfs_root *ref_root;
3658         struct btrfs_key key;
3659         struct btrfs_path path;
3660         struct extent_buffer *parent;
3661         int level;
3662         int found = 0;
3663         int ret;
3664
3665         rbtree_postorder_for_each_entry_safe(node, tmp,
3666                                              &rec->backref_tree, node) {
3667                 if (node->is_data)
3668                         continue;
3669                 if (!node->found_ref)
3670                         continue;
3671                 if (node->full_backref)
3672                         continue;
3673                 back = to_tree_backref(node);
3674                 if (btrfs_header_owner(buf) == back->root)
3675                         return 0;
3676         }
3677         BUG_ON(rec->is_root);
3678
3679         /* try to find the block by search corresponding fs tree */
3680         key.objectid = btrfs_header_owner(buf);
3681         key.type = BTRFS_ROOT_ITEM_KEY;
3682         key.offset = (u64)-1;
3683
3684         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3685         if (IS_ERR(ref_root))
3686                 return 1;
3687
3688         level = btrfs_header_level(buf);
3689         if (level == 0)
3690                 btrfs_item_key_to_cpu(buf, &key, 0);
3691         else
3692                 btrfs_node_key_to_cpu(buf, &key, 0);
3693
3694         btrfs_init_path(&path);
3695         path.lowest_level = level + 1;
3696         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3697         if (ret < 0)
3698                 return 0;
3699
3700         parent = path.nodes[level + 1];
3701         if (parent && buf->start == btrfs_node_blockptr(parent,
3702                                                         path.slots[level + 1]))
3703                 found = 1;
3704
3705         btrfs_release_path(&path);
3706         return found ? 0 : 1;
3707 }
3708
3709 static int is_extent_tree_record(struct extent_record *rec)
3710 {
3711         struct extent_backref *node, *tmp;
3712         struct tree_backref *back;
3713         int is_extent = 0;
3714
3715         rbtree_postorder_for_each_entry_safe(node, tmp,
3716                                              &rec->backref_tree, node) {
3717                 if (node->is_data)
3718                         return 0;
3719                 back = to_tree_backref(node);
3720                 if (node->full_backref)
3721                         return 0;
3722                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3723                         is_extent = 1;
3724         }
3725         return is_extent;
3726 }
3727
3728
3729 static int record_bad_block_io(struct btrfs_fs_info *info,
3730                                struct cache_tree *extent_cache,
3731                                u64 start, u64 len)
3732 {
3733         struct extent_record *rec;
3734         struct cache_extent *cache;
3735         struct btrfs_key key;
3736
3737         cache = lookup_cache_extent(extent_cache, start, len);
3738         if (!cache)
3739                 return 0;
3740
3741         rec = container_of(cache, struct extent_record, cache);
3742         if (!is_extent_tree_record(rec))
3743                 return 0;
3744
3745         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3746         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3747 }
3748
3749 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3750                        struct extent_buffer *buf, int slot)
3751 {
3752         if (btrfs_header_level(buf)) {
3753                 struct btrfs_key_ptr ptr1, ptr2;
3754
3755                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3756                                    sizeof(struct btrfs_key_ptr));
3757                 read_extent_buffer(buf, &ptr2,
3758                                    btrfs_node_key_ptr_offset(slot + 1),
3759                                    sizeof(struct btrfs_key_ptr));
3760                 write_extent_buffer(buf, &ptr1,
3761                                     btrfs_node_key_ptr_offset(slot + 1),
3762                                     sizeof(struct btrfs_key_ptr));
3763                 write_extent_buffer(buf, &ptr2,
3764                                     btrfs_node_key_ptr_offset(slot),
3765                                     sizeof(struct btrfs_key_ptr));
3766                 if (slot == 0) {
3767                         struct btrfs_disk_key key;
3768
3769                         btrfs_node_key(buf, &key, 0);
3770                         btrfs_fixup_low_keys(root, path, &key,
3771                                              btrfs_header_level(buf) + 1);
3772                 }
3773         } else {
3774                 struct btrfs_item *item1, *item2;
3775                 struct btrfs_key k1, k2;
3776                 char *item1_data, *item2_data;
3777                 u32 item1_offset, item2_offset, item1_size, item2_size;
3778
3779                 item1 = btrfs_item_nr(slot);
3780                 item2 = btrfs_item_nr(slot + 1);
3781                 btrfs_item_key_to_cpu(buf, &k1, slot);
3782                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3783                 item1_offset = btrfs_item_offset(buf, item1);
3784                 item2_offset = btrfs_item_offset(buf, item2);
3785                 item1_size = btrfs_item_size(buf, item1);
3786                 item2_size = btrfs_item_size(buf, item2);
3787
3788                 item1_data = malloc(item1_size);
3789                 if (!item1_data)
3790                         return -ENOMEM;
3791                 item2_data = malloc(item2_size);
3792                 if (!item2_data) {
3793                         free(item1_data);
3794                         return -ENOMEM;
3795                 }
3796
3797                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3798                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3799
3800                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3801                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3802                 free(item1_data);
3803                 free(item2_data);
3804
3805                 btrfs_set_item_offset(buf, item1, item2_offset);
3806                 btrfs_set_item_offset(buf, item2, item1_offset);
3807                 btrfs_set_item_size(buf, item1, item2_size);
3808                 btrfs_set_item_size(buf, item2, item1_size);
3809
3810                 path->slots[0] = slot;
3811                 btrfs_set_item_key_unsafe(root, path, &k2);
3812                 path->slots[0] = slot + 1;
3813                 btrfs_set_item_key_unsafe(root, path, &k1);
3814         }
3815         return 0;
3816 }
3817
3818 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
3819 {
3820         struct extent_buffer *buf;
3821         struct btrfs_key k1, k2;
3822         int i;
3823         int level = path->lowest_level;
3824         int ret = -EIO;
3825
3826         buf = path->nodes[level];
3827         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3828                 if (level) {
3829                         btrfs_node_key_to_cpu(buf, &k1, i);
3830                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
3831                 } else {
3832                         btrfs_item_key_to_cpu(buf, &k1, i);
3833                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
3834                 }
3835                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
3836                         continue;
3837                 ret = swap_values(root, path, buf, i);
3838                 if (ret)
3839                         break;
3840                 btrfs_mark_buffer_dirty(buf);
3841                 i = 0;
3842         }
3843         return ret;
3844 }
3845
3846 static int delete_bogus_item(struct btrfs_root *root,
3847                              struct btrfs_path *path,
3848                              struct extent_buffer *buf, int slot)
3849 {
3850         struct btrfs_key key;
3851         int nritems = btrfs_header_nritems(buf);
3852
3853         btrfs_item_key_to_cpu(buf, &key, slot);
3854
3855         /* These are all the keys we can deal with missing. */
3856         if (key.type != BTRFS_DIR_INDEX_KEY &&
3857             key.type != BTRFS_EXTENT_ITEM_KEY &&
3858             key.type != BTRFS_METADATA_ITEM_KEY &&
3859             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
3860             key.type != BTRFS_EXTENT_DATA_REF_KEY)
3861                 return -1;
3862
3863         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
3864                (unsigned long long)key.objectid, key.type,
3865                (unsigned long long)key.offset, slot, buf->start);
3866         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
3867                               btrfs_item_nr_offset(slot + 1),
3868                               sizeof(struct btrfs_item) *
3869                               (nritems - slot - 1));
3870         btrfs_set_header_nritems(buf, nritems - 1);
3871         if (slot == 0) {
3872                 struct btrfs_disk_key disk_key;
3873
3874                 btrfs_item_key(buf, &disk_key, 0);
3875                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
3876         }
3877         btrfs_mark_buffer_dirty(buf);
3878         return 0;
3879 }
3880
3881 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
3882 {
3883         struct extent_buffer *buf;
3884         int i;
3885         int ret = 0;
3886
3887         /* We should only get this for leaves */
3888         BUG_ON(path->lowest_level);
3889         buf = path->nodes[0];
3890 again:
3891         for (i = 0; i < btrfs_header_nritems(buf); i++) {
3892                 unsigned int shift = 0, offset;
3893
3894                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
3895                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3896                         if (btrfs_item_end_nr(buf, i) >
3897                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3898                                 ret = delete_bogus_item(root, path, buf, i);
3899                                 if (!ret)
3900                                         goto again;
3901                                 fprintf(stderr,
3902                                 "item is off the end of the leaf, can't fix\n");
3903                                 ret = -EIO;
3904                                 break;
3905                         }
3906                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
3907                                 btrfs_item_end_nr(buf, i);
3908                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
3909                            btrfs_item_offset_nr(buf, i - 1)) {
3910                         if (btrfs_item_end_nr(buf, i) >
3911                             btrfs_item_offset_nr(buf, i - 1)) {
3912                                 ret = delete_bogus_item(root, path, buf, i);
3913                                 if (!ret)
3914                                         goto again;
3915                                 fprintf(stderr, "items overlap, can't fix\n");
3916                                 ret = -EIO;
3917                                 break;
3918                         }
3919                         shift = btrfs_item_offset_nr(buf, i - 1) -
3920                                 btrfs_item_end_nr(buf, i);
3921                 }
3922                 if (!shift)
3923                         continue;
3924
3925                 printf("Shifting item nr %d by %u bytes in block %llu\n",
3926                        i, shift, (unsigned long long)buf->start);
3927                 offset = btrfs_item_offset_nr(buf, i);
3928                 memmove_extent_buffer(buf,
3929                                       btrfs_leaf_data(buf) + offset + shift,
3930                                       btrfs_leaf_data(buf) + offset,
3931                                       btrfs_item_size_nr(buf, i));
3932                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
3933                                       offset + shift);
3934                 btrfs_mark_buffer_dirty(buf);
3935         }
3936
3937         /*
3938          * We may have moved things, in which case we want to exit so we don't
3939          * write those changes out.  Once we have proper abort functionality in
3940          * progs this can be changed to something nicer.
3941          */
3942         BUG_ON(ret);
3943         return ret;
3944 }
3945
3946 /*
3947  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
3948  * then just return -EIO.
3949  */
3950 static int try_to_fix_bad_block(struct btrfs_root *root,
3951                                 struct extent_buffer *buf,
3952                                 enum btrfs_tree_block_status status)
3953 {
3954         struct btrfs_trans_handle *trans;
3955         struct ulist *roots;
3956         struct ulist_node *node;
3957         struct btrfs_root *search_root;
3958         struct btrfs_path path;
3959         struct ulist_iterator iter;
3960         struct btrfs_key root_key, key;
3961         int ret;
3962
3963         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
3964             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
3965                 return -EIO;
3966
3967         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
3968         if (ret)
3969                 return -EIO;
3970
3971         btrfs_init_path(&path);
3972         ULIST_ITER_INIT(&iter);
3973         while ((node = ulist_next(roots, &iter))) {
3974                 root_key.objectid = node->val;
3975                 root_key.type = BTRFS_ROOT_ITEM_KEY;
3976                 root_key.offset = (u64)-1;
3977
3978                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
3979                 if (IS_ERR(root)) {
3980                         ret = -EIO;
3981                         break;
3982                 }
3983
3984
3985                 trans = btrfs_start_transaction(search_root, 0);
3986                 if (IS_ERR(trans)) {
3987                         ret = PTR_ERR(trans);
3988                         break;
3989                 }
3990
3991                 path.lowest_level = btrfs_header_level(buf);
3992                 path.skip_check_block = 1;
3993                 if (path.lowest_level)
3994                         btrfs_node_key_to_cpu(buf, &key, 0);
3995                 else
3996                         btrfs_item_key_to_cpu(buf, &key, 0);
3997                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
3998                 if (ret) {
3999                         ret = -EIO;
4000                         btrfs_commit_transaction(trans, search_root);
4001                         break;
4002                 }
4003                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4004                         ret = fix_key_order(search_root, &path);
4005                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4006                         ret = fix_item_offset(search_root, &path);
4007                 if (ret) {
4008                         btrfs_commit_transaction(trans, search_root);
4009                         break;
4010                 }
4011                 btrfs_release_path(&path);
4012                 btrfs_commit_transaction(trans, search_root);
4013         }
4014         ulist_free(roots);
4015         btrfs_release_path(&path);
4016         return ret;
4017 }
4018
4019 static int check_block(struct btrfs_root *root,
4020                        struct cache_tree *extent_cache,
4021                        struct extent_buffer *buf, u64 flags)
4022 {
4023         struct extent_record *rec;
4024         struct cache_extent *cache;
4025         struct btrfs_key key;
4026         enum btrfs_tree_block_status status;
4027         int ret = 0;
4028         int level;
4029
4030         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4031         if (!cache)
4032                 return 1;
4033         rec = container_of(cache, struct extent_record, cache);
4034         rec->generation = btrfs_header_generation(buf);
4035
4036         level = btrfs_header_level(buf);
4037         if (btrfs_header_nritems(buf) > 0) {
4038
4039                 if (level == 0)
4040                         btrfs_item_key_to_cpu(buf, &key, 0);
4041                 else
4042                         btrfs_node_key_to_cpu(buf, &key, 0);
4043
4044                 rec->info_objectid = key.objectid;
4045         }
4046         rec->info_level = level;
4047
4048         if (btrfs_is_leaf(buf))
4049                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4050         else
4051                 status = btrfs_check_node(root, &rec->parent_key, buf);
4052
4053         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4054                 if (repair)
4055                         status = try_to_fix_bad_block(root, buf, status);
4056                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4057                         ret = -EIO;
4058                         fprintf(stderr, "bad block %llu\n",
4059                                 (unsigned long long)buf->start);
4060                 } else {
4061                         /*
4062                          * Signal to callers we need to start the scan over
4063                          * again since we'll have cowed blocks.
4064                          */
4065                         ret = -EAGAIN;
4066                 }
4067         } else {
4068                 rec->content_checked = 1;
4069                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4070                         rec->owner_ref_checked = 1;
4071                 else {
4072                         ret = check_owner_ref(root, rec, buf);
4073                         if (!ret)
4074                                 rec->owner_ref_checked = 1;
4075                 }
4076         }
4077         if (!ret)
4078                 maybe_free_extent_rec(extent_cache, rec);
4079         return ret;
4080 }
4081
4082 #if 0
4083 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4084                                                 u64 parent, u64 root)
4085 {
4086         struct list_head *cur = rec->backrefs.next;
4087         struct extent_backref *node;
4088         struct tree_backref *back;
4089
4090         while (cur != &rec->backrefs) {
4091                 node = to_extent_backref(cur);
4092                 cur = cur->next;
4093                 if (node->is_data)
4094                         continue;
4095                 back = to_tree_backref(node);
4096                 if (parent > 0) {
4097                         if (!node->full_backref)
4098                                 continue;
4099                         if (parent == back->parent)
4100                                 return back;
4101                 } else {
4102                         if (node->full_backref)
4103                                 continue;
4104                         if (back->root == root)
4105                                 return back;
4106                 }
4107         }
4108         return NULL;
4109 }
4110 #endif
4111
4112 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4113                                                 u64 parent, u64 root)
4114 {
4115         struct tree_backref *ref = malloc(sizeof(*ref));
4116
4117         if (!ref)
4118                 return NULL;
4119         memset(&ref->node, 0, sizeof(ref->node));
4120         if (parent > 0) {
4121                 ref->parent = parent;
4122                 ref->node.full_backref = 1;
4123         } else {
4124                 ref->root = root;
4125                 ref->node.full_backref = 0;
4126         }
4127
4128         return ref;
4129 }
4130
4131 #if 0
4132 static struct data_backref *find_data_backref(struct extent_record *rec,
4133                                                 u64 parent, u64 root,
4134                                                 u64 owner, u64 offset,
4135                                                 int found_ref,
4136                                                 u64 disk_bytenr, u64 bytes)
4137 {
4138         struct list_head *cur = rec->backrefs.next;
4139         struct extent_backref *node;
4140         struct data_backref *back;
4141
4142         while (cur != &rec->backrefs) {
4143                 node = to_extent_backref(cur);
4144                 cur = cur->next;
4145                 if (!node->is_data)
4146                         continue;
4147                 back = to_data_backref(node);
4148                 if (parent > 0) {
4149                         if (!node->full_backref)
4150                                 continue;
4151                         if (parent == back->parent)
4152                                 return back;
4153                 } else {
4154                         if (node->full_backref)
4155                                 continue;
4156                         if (back->root == root && back->owner == owner &&
4157                             back->offset == offset) {
4158                                 if (found_ref && node->found_ref &&
4159                                     (back->bytes != bytes ||
4160                                     back->disk_bytenr != disk_bytenr))
4161                                         continue;
4162                                 return back;
4163                         }
4164                 }
4165         }
4166         return NULL;
4167 }
4168 #endif
4169
4170 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4171                                                 u64 parent, u64 root,
4172                                                 u64 owner, u64 offset,
4173                                                 u64 max_size)
4174 {
4175         struct data_backref *ref = malloc(sizeof(*ref));
4176
4177         if (!ref)
4178                 return NULL;
4179         memset(&ref->node, 0, sizeof(ref->node));
4180         ref->node.is_data = 1;
4181
4182         if (parent > 0) {
4183                 ref->parent = parent;
4184                 ref->owner = 0;
4185                 ref->offset = 0;
4186                 ref->node.full_backref = 1;
4187         } else {
4188                 ref->root = root;
4189                 ref->owner = owner;
4190                 ref->offset = offset;
4191                 ref->node.full_backref = 0;
4192         }
4193         ref->bytes = max_size;
4194         ref->found_ref = 0;
4195         ref->num_refs = 0;
4196         if (max_size > rec->max_size)
4197                 rec->max_size = max_size;
4198         return ref;
4199 }
4200
4201 /* Check if the type of extent matches with its chunk */
4202 static void check_extent_type(struct extent_record *rec)
4203 {
4204         struct btrfs_block_group_cache *bg_cache;
4205
4206         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4207         if (!bg_cache)
4208                 return;
4209
4210         /* data extent, check chunk directly*/
4211         if (!rec->metadata) {
4212                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4213                         rec->wrong_chunk_type = 1;
4214                 return;
4215         }
4216
4217         /* metadata extent, check the obvious case first */
4218         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4219                                  BTRFS_BLOCK_GROUP_METADATA))) {
4220                 rec->wrong_chunk_type = 1;
4221                 return;
4222         }
4223
4224         /*
4225          * Check SYSTEM extent, as it's also marked as metadata, we can only
4226          * make sure it's a SYSTEM extent by its backref
4227          */
4228         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4229                 struct extent_backref *node;
4230                 struct tree_backref *tback;
4231                 u64 bg_type;
4232
4233                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4234                 if (node->is_data) {
4235                         /* tree block shouldn't have data backref */
4236                         rec->wrong_chunk_type = 1;
4237                         return;
4238                 }
4239                 tback = container_of(node, struct tree_backref, node);
4240
4241                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4242                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4243                 else
4244                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4245                 if (!(bg_cache->flags & bg_type))
4246                         rec->wrong_chunk_type = 1;
4247         }
4248 }
4249
4250 /*
4251  * Allocate a new extent record, fill default values from @tmpl and insert int
4252  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4253  * the cache, otherwise it fails.
4254  */
4255 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4256                 struct extent_record *tmpl)
4257 {
4258         struct extent_record *rec;
4259         int ret = 0;
4260
4261         BUG_ON(tmpl->max_size == 0);
4262         rec = malloc(sizeof(*rec));
4263         if (!rec)
4264                 return -ENOMEM;
4265         rec->start = tmpl->start;
4266         rec->max_size = tmpl->max_size;
4267         rec->nr = max(tmpl->nr, tmpl->max_size);
4268         rec->found_rec = tmpl->found_rec;
4269         rec->content_checked = tmpl->content_checked;
4270         rec->owner_ref_checked = tmpl->owner_ref_checked;
4271         rec->num_duplicates = 0;
4272         rec->metadata = tmpl->metadata;
4273         rec->flag_block_full_backref = FLAG_UNSET;
4274         rec->bad_full_backref = 0;
4275         rec->crossing_stripes = 0;
4276         rec->wrong_chunk_type = 0;
4277         rec->is_root = tmpl->is_root;
4278         rec->refs = tmpl->refs;
4279         rec->extent_item_refs = tmpl->extent_item_refs;
4280         rec->parent_generation = tmpl->parent_generation;
4281         INIT_LIST_HEAD(&rec->backrefs);
4282         INIT_LIST_HEAD(&rec->dups);
4283         INIT_LIST_HEAD(&rec->list);
4284         rec->backref_tree = RB_ROOT;
4285         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4286         rec->cache.start = tmpl->start;
4287         rec->cache.size = tmpl->nr;
4288         ret = insert_cache_extent(extent_cache, &rec->cache);
4289         if (ret) {
4290                 free(rec);
4291                 return ret;
4292         }
4293         bytes_used += rec->nr;
4294
4295         if (tmpl->metadata)
4296                 rec->crossing_stripes = check_crossing_stripes(global_info,
4297                                 rec->start, global_info->nodesize);
4298         check_extent_type(rec);
4299         return ret;
4300 }
4301
4302 /*
4303  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4304  * some are hints:
4305  * - refs              - if found, increase refs
4306  * - is_root           - if found, set
4307  * - content_checked   - if found, set
4308  * - owner_ref_checked - if found, set
4309  *
4310  * If not found, create a new one, initialize and insert.
4311  */
4312 static int add_extent_rec(struct cache_tree *extent_cache,
4313                 struct extent_record *tmpl)
4314 {
4315         struct extent_record *rec;
4316         struct cache_extent *cache;
4317         int ret = 0;
4318         int dup = 0;
4319
4320         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4321         if (cache) {
4322                 rec = container_of(cache, struct extent_record, cache);
4323                 if (tmpl->refs)
4324                         rec->refs++;
4325                 if (rec->nr == 1)
4326                         rec->nr = max(tmpl->nr, tmpl->max_size);
4327
4328                 /*
4329                  * We need to make sure to reset nr to whatever the extent
4330                  * record says was the real size, this way we can compare it to
4331                  * the backrefs.
4332                  */
4333                 if (tmpl->found_rec) {
4334                         if (tmpl->start != rec->start || rec->found_rec) {
4335                                 struct extent_record *tmp;
4336
4337                                 dup = 1;
4338                                 if (list_empty(&rec->list))
4339                                         list_add_tail(&rec->list,
4340                                                       &duplicate_extents);
4341
4342                                 /*
4343                                  * We have to do this song and dance in case we
4344                                  * find an extent record that falls inside of
4345                                  * our current extent record but does not have
4346                                  * the same objectid.
4347                                  */
4348                                 tmp = malloc(sizeof(*tmp));
4349                                 if (!tmp)
4350                                         return -ENOMEM;
4351                                 tmp->start = tmpl->start;
4352                                 tmp->max_size = tmpl->max_size;
4353                                 tmp->nr = tmpl->nr;
4354                                 tmp->found_rec = 1;
4355                                 tmp->metadata = tmpl->metadata;
4356                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4357                                 INIT_LIST_HEAD(&tmp->list);
4358                                 list_add_tail(&tmp->list, &rec->dups);
4359                                 rec->num_duplicates++;
4360                         } else {
4361                                 rec->nr = tmpl->nr;
4362                                 rec->found_rec = 1;
4363                         }
4364                 }
4365
4366                 if (tmpl->extent_item_refs && !dup) {
4367                         if (rec->extent_item_refs) {
4368                                 fprintf(stderr,
4369                         "block %llu rec extent_item_refs %llu, passed %llu\n",
4370                                         (unsigned long long)tmpl->start,
4371                                         (unsigned long long)
4372                                                         rec->extent_item_refs,
4373                                         (unsigned long long)
4374                                                         tmpl->extent_item_refs);
4375                         }
4376                         rec->extent_item_refs = tmpl->extent_item_refs;
4377                 }
4378                 if (tmpl->is_root)
4379                         rec->is_root = 1;
4380                 if (tmpl->content_checked)
4381                         rec->content_checked = 1;
4382                 if (tmpl->owner_ref_checked)
4383                         rec->owner_ref_checked = 1;
4384                 memcpy(&rec->parent_key, &tmpl->parent_key,
4385                                 sizeof(tmpl->parent_key));
4386                 if (tmpl->parent_generation)
4387                         rec->parent_generation = tmpl->parent_generation;
4388                 if (rec->max_size < tmpl->max_size)
4389                         rec->max_size = tmpl->max_size;
4390
4391                 /*
4392                  * A metadata extent can't cross stripe_len boundary, otherwise
4393                  * kernel scrub won't be able to handle it.
4394                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4395                  * it.
4396                  */
4397                 if (tmpl->metadata)
4398                         rec->crossing_stripes = check_crossing_stripes(
4399                                         global_info, rec->start,
4400                                         global_info->nodesize);
4401                 check_extent_type(rec);
4402                 maybe_free_extent_rec(extent_cache, rec);
4403                 return ret;
4404         }
4405
4406         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4407
4408         return ret;
4409 }
4410
4411 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4412                             u64 parent, u64 root, int found_ref)
4413 {
4414         struct extent_record *rec;
4415         struct tree_backref *back;
4416         struct cache_extent *cache;
4417         int ret;
4418         bool insert = false;
4419
4420         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4421         if (!cache) {
4422                 struct extent_record tmpl;
4423
4424                 memset(&tmpl, 0, sizeof(tmpl));
4425                 tmpl.start = bytenr;
4426                 tmpl.nr = 1;
4427                 tmpl.metadata = 1;
4428                 tmpl.max_size = 1;
4429
4430                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4431                 if (ret)
4432                         return ret;
4433
4434                 /* really a bug in cache_extent implement now */
4435                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4436                 if (!cache)
4437                         return -ENOENT;
4438         }
4439
4440         rec = container_of(cache, struct extent_record, cache);
4441         if (rec->start != bytenr) {
4442                 /*
4443                  * Several cause, from unaligned bytenr to over lapping extents
4444                  */
4445                 return -EEXIST;
4446         }
4447
4448         back = find_tree_backref(rec, parent, root);
4449         if (!back) {
4450                 back = alloc_tree_backref(rec, parent, root);
4451                 if (!back)
4452                         return -ENOMEM;
4453                 insert = true;
4454         }
4455
4456         if (found_ref) {
4457                 if (back->node.found_ref) {
4458                         fprintf(stderr,
4459         "Extent back ref already exists for %llu parent %llu root %llu\n",
4460                                 (unsigned long long)bytenr,
4461                                 (unsigned long long)parent,
4462                                 (unsigned long long)root);
4463                 }
4464                 back->node.found_ref = 1;
4465         } else {
4466                 if (back->node.found_extent_tree) {
4467                         fprintf(stderr,
4468         "extent back ref already exists for %llu parent %llu root %llu\n",
4469                                 (unsigned long long)bytenr,
4470                                 (unsigned long long)parent,
4471                                 (unsigned long long)root);
4472                 }
4473                 back->node.found_extent_tree = 1;
4474         }
4475         if (insert)
4476                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4477                         compare_extent_backref));
4478         check_extent_type(rec);
4479         maybe_free_extent_rec(extent_cache, rec);
4480         return 0;
4481 }
4482
4483 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4484                             u64 parent, u64 root, u64 owner, u64 offset,
4485                             u32 num_refs, int found_ref, u64 max_size)
4486 {
4487         struct extent_record *rec;
4488         struct data_backref *back;
4489         struct cache_extent *cache;
4490         int ret;
4491         bool insert = false;
4492
4493         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4494         if (!cache) {
4495                 struct extent_record tmpl;
4496
4497                 memset(&tmpl, 0, sizeof(tmpl));
4498                 tmpl.start = bytenr;
4499                 tmpl.nr = 1;
4500                 tmpl.max_size = max_size;
4501
4502                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4503                 if (ret)
4504                         return ret;
4505
4506                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4507                 if (!cache)
4508                         abort();
4509         }
4510
4511         rec = container_of(cache, struct extent_record, cache);
4512         if (rec->max_size < max_size)
4513                 rec->max_size = max_size;
4514
4515         /*
4516          * If found_ref is set then max_size is the real size and must match the
4517          * existing refs.  So if we have already found a ref then we need to
4518          * make sure that this ref matches the existing one, otherwise we need
4519          * to add a new backref so we can notice that the backrefs don't match
4520          * and we need to figure out who is telling the truth.  This is to
4521          * account for that awful fsync bug I introduced where we'd end up with
4522          * a btrfs_file_extent_item that would have its length include multiple
4523          * prealloc extents or point inside of a prealloc extent.
4524          */
4525         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4526                                  bytenr, max_size);
4527         if (!back) {
4528                 back = alloc_data_backref(rec, parent, root, owner, offset,
4529                                           max_size);
4530                 BUG_ON(!back);
4531                 insert = true;
4532         }
4533
4534         if (found_ref) {
4535                 BUG_ON(num_refs != 1);
4536                 if (back->node.found_ref)
4537                         BUG_ON(back->bytes != max_size);
4538                 back->node.found_ref = 1;
4539                 back->found_ref += 1;
4540                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
4541                         back->bytes = max_size;
4542                         back->disk_bytenr = bytenr;
4543
4544                         /* Need to reinsert if not already in the tree */
4545                         if (!insert) {
4546                                 rb_erase(&back->node.node, &rec->backref_tree);
4547                                 insert = true;
4548                         }
4549                 }
4550                 rec->refs += 1;
4551                 rec->content_checked = 1;
4552                 rec->owner_ref_checked = 1;
4553         } else {
4554                 if (back->node.found_extent_tree) {
4555                         fprintf(stderr,
4556 "Extent back ref already exists for %llu parent %llu root %llu owner %llu offset %llu num_refs %lu\n",
4557                                 (unsigned long long)bytenr,
4558                                 (unsigned long long)parent,
4559                                 (unsigned long long)root,
4560                                 (unsigned long long)owner,
4561                                 (unsigned long long)offset,
4562                                 (unsigned long)num_refs);
4563                 }
4564                 back->num_refs = num_refs;
4565                 back->node.found_extent_tree = 1;
4566         }
4567         if (insert)
4568                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4569                         compare_extent_backref));
4570
4571         maybe_free_extent_rec(extent_cache, rec);
4572         return 0;
4573 }
4574
4575 static int add_pending(struct cache_tree *pending,
4576                        struct cache_tree *seen, u64 bytenr, u32 size)
4577 {
4578         int ret;
4579
4580         ret = add_cache_extent(seen, bytenr, size);
4581         if (ret)
4582                 return ret;
4583         add_cache_extent(pending, bytenr, size);
4584         return 0;
4585 }
4586
4587 static int pick_next_pending(struct cache_tree *pending,
4588                         struct cache_tree *reada,
4589                         struct cache_tree *nodes,
4590                         u64 last, struct block_info *bits, int bits_nr,
4591                         int *reada_bits)
4592 {
4593         unsigned long node_start = last;
4594         struct cache_extent *cache;
4595         int ret;
4596
4597         cache = search_cache_extent(reada, 0);
4598         if (cache) {
4599                 bits[0].start = cache->start;
4600                 bits[0].size = cache->size;
4601                 *reada_bits = 1;
4602                 return 1;
4603         }
4604         *reada_bits = 0;
4605         if (node_start > 32768)
4606                 node_start -= 32768;
4607
4608         cache = search_cache_extent(nodes, node_start);
4609         if (!cache)
4610                 cache = search_cache_extent(nodes, 0);
4611
4612         if (!cache) {
4613                 cache = search_cache_extent(pending, 0);
4614                 if (!cache)
4615                         return 0;
4616                 ret = 0;
4617                 do {
4618                         bits[ret].start = cache->start;
4619                         bits[ret].size = cache->size;
4620                         cache = next_cache_extent(cache);
4621                         ret++;
4622                 } while (cache && ret < bits_nr);
4623                 return ret;
4624         }
4625
4626         ret = 0;
4627         do {
4628                 bits[ret].start = cache->start;
4629                 bits[ret].size = cache->size;
4630                 cache = next_cache_extent(cache);
4631                 ret++;
4632         } while (cache && ret < bits_nr);
4633
4634         if (bits_nr - ret > 8) {
4635                 u64 lookup = bits[0].start + bits[0].size;
4636                 struct cache_extent *next;
4637
4638                 next = search_cache_extent(pending, lookup);
4639                 while (next) {
4640                         if (next->start - lookup > 32768)
4641                                 break;
4642                         bits[ret].start = next->start;
4643                         bits[ret].size = next->size;
4644                         lookup = next->start + next->size;
4645                         ret++;
4646                         if (ret == bits_nr)
4647                                 break;
4648                         next = next_cache_extent(next);
4649                         if (!next)
4650                                 break;
4651                 }
4652         }
4653         return ret;
4654 }
4655
4656 static void free_chunk_record(struct cache_extent *cache)
4657 {
4658         struct chunk_record *rec;
4659
4660         rec = container_of(cache, struct chunk_record, cache);
4661         list_del_init(&rec->list);
4662         list_del_init(&rec->dextents);
4663         free(rec);
4664 }
4665
4666 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4667 {
4668         cache_tree_free_extents(chunk_cache, free_chunk_record);
4669 }
4670
4671 static void free_device_record(struct rb_node *node)
4672 {
4673         struct device_record *rec;
4674
4675         rec = container_of(node, struct device_record, node);
4676         free(rec);
4677 }
4678
4679 FREE_RB_BASED_TREE(device_cache, free_device_record);
4680
4681 int insert_block_group_record(struct block_group_tree *tree,
4682                               struct block_group_record *bg_rec)
4683 {
4684         int ret;
4685
4686         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4687         if (ret)
4688                 return ret;
4689
4690         list_add_tail(&bg_rec->list, &tree->block_groups);
4691         return 0;
4692 }
4693
4694 static void free_block_group_record(struct cache_extent *cache)
4695 {
4696         struct block_group_record *rec;
4697
4698         rec = container_of(cache, struct block_group_record, cache);
4699         list_del_init(&rec->list);
4700         free(rec);
4701 }
4702
4703 void free_block_group_tree(struct block_group_tree *tree)
4704 {
4705         cache_tree_free_extents(&tree->tree, free_block_group_record);
4706 }
4707
4708 int insert_device_extent_record(struct device_extent_tree *tree,
4709                                 struct device_extent_record *de_rec)
4710 {
4711         int ret;
4712
4713         /*
4714          * Device extent is a bit different from the other extents, because
4715          * the extents which belong to the different devices may have the
4716          * same start and size, so we need use the special extent cache
4717          * search/insert functions.
4718          */
4719         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4720         if (ret)
4721                 return ret;
4722
4723         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4724         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4725         return 0;
4726 }
4727
4728 static void free_device_extent_record(struct cache_extent *cache)
4729 {
4730         struct device_extent_record *rec;
4731
4732         rec = container_of(cache, struct device_extent_record, cache);
4733         if (!list_empty(&rec->chunk_list))
4734                 list_del_init(&rec->chunk_list);
4735         if (!list_empty(&rec->device_list))
4736                 list_del_init(&rec->device_list);
4737         free(rec);
4738 }
4739
4740 void free_device_extent_tree(struct device_extent_tree *tree)
4741 {
4742         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4743 }
4744
4745 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4746 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4747                                  struct extent_buffer *leaf, int slot)
4748 {
4749         struct btrfs_extent_ref_v0 *ref0;
4750         struct btrfs_key key;
4751         int ret;
4752
4753         btrfs_item_key_to_cpu(leaf, &key, slot);
4754         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4755         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4756                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
4757                                 0, 0);
4758         } else {
4759                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
4760                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4761         }
4762         return ret;
4763 }
4764 #endif
4765
4766 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4767                                             struct btrfs_key *key,
4768                                             int slot)
4769 {
4770         struct btrfs_chunk *ptr;
4771         struct chunk_record *rec;
4772         int num_stripes, i;
4773
4774         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4775         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4776
4777         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4778         if (!rec) {
4779                 fprintf(stderr, "memory allocation failed\n");
4780                 exit(-1);
4781         }
4782
4783         INIT_LIST_HEAD(&rec->list);
4784         INIT_LIST_HEAD(&rec->dextents);
4785         rec->bg_rec = NULL;
4786
4787         rec->cache.start = key->offset;
4788         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4789
4790         rec->generation = btrfs_header_generation(leaf);
4791
4792         rec->objectid = key->objectid;
4793         rec->type = key->type;
4794         rec->offset = key->offset;
4795
4796         rec->length = rec->cache.size;
4797         rec->owner = btrfs_chunk_owner(leaf, ptr);
4798         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4799         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4800         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4801         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4802         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4803         rec->num_stripes = num_stripes;
4804         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4805
4806         for (i = 0; i < rec->num_stripes; ++i) {
4807                 rec->stripes[i].devid =
4808                         btrfs_stripe_devid_nr(leaf, ptr, i);
4809                 rec->stripes[i].offset =
4810                         btrfs_stripe_offset_nr(leaf, ptr, i);
4811                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4812                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4813                                 BTRFS_UUID_SIZE);
4814         }
4815
4816         return rec;
4817 }
4818
4819 static int process_chunk_item(struct cache_tree *chunk_cache,
4820                               struct btrfs_key *key, struct extent_buffer *eb,
4821                               int slot)
4822 {
4823         struct chunk_record *rec;
4824         struct btrfs_chunk *chunk;
4825         int ret = 0;
4826
4827         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
4828         /*
4829          * Do extra check for this chunk item,
4830          *
4831          * It's still possible one can craft a leaf with CHUNK_ITEM, with
4832          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
4833          * and owner<->key_type check.
4834          */
4835         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
4836                                       key->offset);
4837         if (ret < 0) {
4838                 error("chunk(%llu, %llu) is not valid, ignore it",
4839                       key->offset, btrfs_chunk_length(eb, chunk));
4840                 return 0;
4841         }
4842         rec = btrfs_new_chunk_record(eb, key, slot);
4843         ret = insert_cache_extent(chunk_cache, &rec->cache);
4844         if (ret) {
4845                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4846                         rec->offset, rec->length);
4847                 free(rec);
4848         }
4849
4850         return ret;
4851 }
4852
4853 static int process_device_item(struct rb_root *dev_cache,
4854                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4855 {
4856         struct btrfs_dev_item *ptr;
4857         struct device_record *rec;
4858         int ret = 0;
4859
4860         ptr = btrfs_item_ptr(eb,
4861                 slot, struct btrfs_dev_item);
4862
4863         rec = malloc(sizeof(*rec));
4864         if (!rec) {
4865                 fprintf(stderr, "memory allocation failed\n");
4866                 return -ENOMEM;
4867         }
4868
4869         rec->devid = key->offset;
4870         rec->generation = btrfs_header_generation(eb);
4871
4872         rec->objectid = key->objectid;
4873         rec->type = key->type;
4874         rec->offset = key->offset;
4875
4876         rec->devid = btrfs_device_id(eb, ptr);
4877         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4878         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4879
4880         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4881         if (ret) {
4882                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4883                 free(rec);
4884         }
4885
4886         return ret;
4887 }
4888
4889 struct block_group_record *
4890 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4891                              int slot)
4892 {
4893         struct btrfs_block_group_item *ptr;
4894         struct block_group_record *rec;
4895
4896         rec = calloc(1, sizeof(*rec));
4897         if (!rec) {
4898                 fprintf(stderr, "memory allocation failed\n");
4899                 exit(-1);
4900         }
4901
4902         rec->cache.start = key->objectid;
4903         rec->cache.size = key->offset;
4904
4905         rec->generation = btrfs_header_generation(leaf);
4906
4907         rec->objectid = key->objectid;
4908         rec->type = key->type;
4909         rec->offset = key->offset;
4910
4911         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
4912         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
4913
4914         INIT_LIST_HEAD(&rec->list);
4915
4916         return rec;
4917 }
4918
4919 static int process_block_group_item(struct block_group_tree *block_group_cache,
4920                                     struct btrfs_key *key,
4921                                     struct extent_buffer *eb, int slot)
4922 {
4923         struct block_group_record *rec;
4924         int ret = 0;
4925
4926         rec = btrfs_new_block_group_record(eb, key, slot);
4927         ret = insert_block_group_record(block_group_cache, rec);
4928         if (ret) {
4929                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
4930                         rec->objectid, rec->offset);
4931                 free(rec);
4932         }
4933
4934         return ret;
4935 }
4936
4937 struct device_extent_record *
4938 btrfs_new_device_extent_record(struct extent_buffer *leaf,
4939                                struct btrfs_key *key, int slot)
4940 {
4941         struct device_extent_record *rec;
4942         struct btrfs_dev_extent *ptr;
4943
4944         rec = calloc(1, sizeof(*rec));
4945         if (!rec) {
4946                 fprintf(stderr, "memory allocation failed\n");
4947                 exit(-1);
4948         }
4949
4950         rec->cache.objectid = key->objectid;
4951         rec->cache.start = key->offset;
4952
4953         rec->generation = btrfs_header_generation(leaf);
4954
4955         rec->objectid = key->objectid;
4956         rec->type = key->type;
4957         rec->offset = key->offset;
4958
4959         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
4960         rec->chunk_objecteid =
4961                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
4962         rec->chunk_offset =
4963                 btrfs_dev_extent_chunk_offset(leaf, ptr);
4964         rec->length = btrfs_dev_extent_length(leaf, ptr);
4965         rec->cache.size = rec->length;
4966
4967         INIT_LIST_HEAD(&rec->chunk_list);
4968         INIT_LIST_HEAD(&rec->device_list);
4969
4970         return rec;
4971 }
4972
4973 static int
4974 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
4975                            struct btrfs_key *key, struct extent_buffer *eb,
4976                            int slot)
4977 {
4978         struct device_extent_record *rec;
4979         int ret;
4980
4981         rec = btrfs_new_device_extent_record(eb, key, slot);
4982         ret = insert_device_extent_record(dev_extent_cache, rec);
4983         if (ret) {
4984                 fprintf(stderr,
4985                         "Device extent[%llu, %llu, %llu] existed.\n",
4986                         rec->objectid, rec->offset, rec->length);
4987                 free(rec);
4988         }
4989
4990         return ret;
4991 }
4992
4993 static int process_extent_item(struct btrfs_root *root,
4994                                struct cache_tree *extent_cache,
4995                                struct extent_buffer *eb, int slot)
4996 {
4997         struct btrfs_extent_item *ei;
4998         struct btrfs_extent_inline_ref *iref;
4999         struct btrfs_extent_data_ref *dref;
5000         struct btrfs_shared_data_ref *sref;
5001         struct btrfs_key key;
5002         struct extent_record tmpl;
5003         unsigned long end;
5004         unsigned long ptr;
5005         int ret;
5006         int type;
5007         u32 item_size = btrfs_item_size_nr(eb, slot);
5008         u64 refs = 0;
5009         u64 offset;
5010         u64 num_bytes;
5011         int metadata = 0;
5012
5013         btrfs_item_key_to_cpu(eb, &key, slot);
5014
5015         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5016                 metadata = 1;
5017                 num_bytes = root->fs_info->nodesize;
5018         } else {
5019                 num_bytes = key.offset;
5020         }
5021
5022         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
5023                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5024                       key.objectid, root->fs_info->sectorsize);
5025                 return -EIO;
5026         }
5027         if (item_size < sizeof(*ei)) {
5028 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5029                 struct btrfs_extent_item_v0 *ei0;
5030
5031                 if (item_size != sizeof(*ei0)) {
5032                         error(
5033         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
5034                                 key.objectid, key.type, key.offset,
5035                                 btrfs_header_bytenr(eb), slot);
5036                         BUG();
5037                 }
5038                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5039                 refs = btrfs_extent_refs_v0(eb, ei0);
5040 #else
5041                 BUG();
5042 #endif
5043                 memset(&tmpl, 0, sizeof(tmpl));
5044                 tmpl.start = key.objectid;
5045                 tmpl.nr = num_bytes;
5046                 tmpl.extent_item_refs = refs;
5047                 tmpl.metadata = metadata;
5048                 tmpl.found_rec = 1;
5049                 tmpl.max_size = num_bytes;
5050
5051                 return add_extent_rec(extent_cache, &tmpl);
5052         }
5053
5054         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5055         refs = btrfs_extent_refs(eb, ei);
5056         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5057                 metadata = 1;
5058         else
5059                 metadata = 0;
5060         if (metadata && num_bytes != root->fs_info->nodesize) {
5061                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5062                       num_bytes, root->fs_info->nodesize);
5063                 return -EIO;
5064         }
5065         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
5066                 error("ignore invalid data extent, length %llu is not aligned to %u",
5067                       num_bytes, root->fs_info->sectorsize);
5068                 return -EIO;
5069         }
5070
5071         memset(&tmpl, 0, sizeof(tmpl));
5072         tmpl.start = key.objectid;
5073         tmpl.nr = num_bytes;
5074         tmpl.extent_item_refs = refs;
5075         tmpl.metadata = metadata;
5076         tmpl.found_rec = 1;
5077         tmpl.max_size = num_bytes;
5078         add_extent_rec(extent_cache, &tmpl);
5079
5080         ptr = (unsigned long)(ei + 1);
5081         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5082             key.type == BTRFS_EXTENT_ITEM_KEY)
5083                 ptr += sizeof(struct btrfs_tree_block_info);
5084
5085         end = (unsigned long)ei + item_size;
5086         while (ptr < end) {
5087                 iref = (struct btrfs_extent_inline_ref *)ptr;
5088                 type = btrfs_extent_inline_ref_type(eb, iref);
5089                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5090                 switch (type) {
5091                 case BTRFS_TREE_BLOCK_REF_KEY:
5092                         ret = add_tree_backref(extent_cache, key.objectid,
5093                                         0, offset, 0);
5094                         if (ret < 0)
5095                                 error(
5096                         "add_tree_backref failed (extent items tree block): %s",
5097                                       strerror(-ret));
5098                         break;
5099                 case BTRFS_SHARED_BLOCK_REF_KEY:
5100                         ret = add_tree_backref(extent_cache, key.objectid,
5101                                         offset, 0, 0);
5102                         if (ret < 0)
5103                                 error(
5104                         "add_tree_backref failed (extent items shared block): %s",
5105                                       strerror(-ret));
5106                         break;
5107                 case BTRFS_EXTENT_DATA_REF_KEY:
5108                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5109                         add_data_backref(extent_cache, key.objectid, 0,
5110                                         btrfs_extent_data_ref_root(eb, dref),
5111                                         btrfs_extent_data_ref_objectid(eb,
5112                                                                        dref),
5113                                         btrfs_extent_data_ref_offset(eb, dref),
5114                                         btrfs_extent_data_ref_count(eb, dref),
5115                                         0, num_bytes);
5116                         break;
5117                 case BTRFS_SHARED_DATA_REF_KEY:
5118                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5119                         add_data_backref(extent_cache, key.objectid, offset,
5120                                         0, 0, 0,
5121                                         btrfs_shared_data_ref_count(eb, sref),
5122                                         0, num_bytes);
5123                         break;
5124                 default:
5125                         fprintf(stderr,
5126                                 "corrupt extent record: key [%llu,%u,%llu]\n",
5127                                 key.objectid, key.type, num_bytes);
5128                         goto out;
5129                 }
5130                 ptr += btrfs_extent_inline_ref_size(type);
5131         }
5132         WARN_ON(ptr > end);
5133 out:
5134         return 0;
5135 }
5136
5137 static int check_cache_range(struct btrfs_root *root,
5138                              struct btrfs_block_group_cache *cache,
5139                              u64 offset, u64 bytes)
5140 {
5141         struct btrfs_free_space *entry;
5142         u64 *logical;
5143         u64 bytenr;
5144         int stripe_len;
5145         int i, nr, ret;
5146
5147         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5148                 bytenr = btrfs_sb_offset(i);
5149                 ret = btrfs_rmap_block(root->fs_info,
5150                                        cache->key.objectid, bytenr, 0,
5151                                        &logical, &nr, &stripe_len);
5152                 if (ret)
5153                         return ret;
5154
5155                 while (nr--) {
5156                         if (logical[nr] + stripe_len <= offset)
5157                                 continue;
5158                         if (offset + bytes <= logical[nr])
5159                                 continue;
5160                         if (logical[nr] == offset) {
5161                                 if (stripe_len >= bytes) {
5162                                         free(logical);
5163                                         return 0;
5164                                 }
5165                                 bytes -= stripe_len;
5166                                 offset += stripe_len;
5167                         } else if (logical[nr] < offset) {
5168                                 if (logical[nr] + stripe_len >=
5169                                     offset + bytes) {
5170                                         free(logical);
5171                                         return 0;
5172                                 }
5173                                 bytes = (offset + bytes) -
5174                                         (logical[nr] + stripe_len);
5175                                 offset = logical[nr] + stripe_len;
5176                         } else {
5177                                 /*
5178                                  * Could be tricky, the super may land in the
5179                                  * middle of the area we're checking.  First
5180                                  * check the easiest case, it's at the end.
5181                                  */
5182                                 if (logical[nr] + stripe_len >=
5183                                     bytes + offset) {
5184                                         bytes = logical[nr] - offset;
5185                                         continue;
5186                                 }
5187
5188                                 /* Check the left side */
5189                                 ret = check_cache_range(root, cache,
5190                                                         offset,
5191                                                         logical[nr] - offset);
5192                                 if (ret) {
5193                                         free(logical);
5194                                         return ret;
5195                                 }
5196
5197                                 /* Now we continue with the right side */
5198                                 bytes = (offset + bytes) -
5199                                         (logical[nr] + stripe_len);
5200                                 offset = logical[nr] + stripe_len;
5201                         }
5202                 }
5203
5204                 free(logical);
5205         }
5206
5207         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5208         if (!entry) {
5209                 fprintf(stderr, "there is no free space entry for %llu-%llu\n",
5210                         offset, offset+bytes);
5211                 return -EINVAL;
5212         }
5213
5214         if (entry->offset != offset) {
5215                 fprintf(stderr, "wanted offset %llu, found %llu\n", offset,
5216                         entry->offset);
5217                 return -EINVAL;
5218         }
5219
5220         if (entry->bytes != bytes) {
5221                 fprintf(stderr, "wanted bytes %llu, found %llu for off %llu\n",
5222                         bytes, entry->bytes, offset);
5223                 return -EINVAL;
5224         }
5225
5226         unlink_free_space(cache->free_space_ctl, entry);
5227         free(entry);
5228         return 0;
5229 }
5230
5231 static int verify_space_cache(struct btrfs_root *root,
5232                               struct btrfs_block_group_cache *cache)
5233 {
5234         struct btrfs_path path;
5235         struct extent_buffer *leaf;
5236         struct btrfs_key key;
5237         u64 last;
5238         int ret = 0;
5239
5240         root = root->fs_info->extent_root;
5241
5242         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5243
5244         btrfs_init_path(&path);
5245         key.objectid = last;
5246         key.offset = 0;
5247         key.type = BTRFS_EXTENT_ITEM_KEY;
5248         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5249         if (ret < 0)
5250                 goto out;
5251         ret = 0;
5252         while (1) {
5253                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5254                         ret = btrfs_next_leaf(root, &path);
5255                         if (ret < 0)
5256                                 goto out;
5257                         if (ret > 0) {
5258                                 ret = 0;
5259                                 break;
5260                         }
5261                 }
5262                 leaf = path.nodes[0];
5263                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5264                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5265                         break;
5266                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5267                     key.type != BTRFS_METADATA_ITEM_KEY) {
5268                         path.slots[0]++;
5269                         continue;
5270                 }
5271
5272                 if (last == key.objectid) {
5273                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5274                                 last = key.objectid + key.offset;
5275                         else
5276                                 last = key.objectid + root->fs_info->nodesize;
5277                         path.slots[0]++;
5278                         continue;
5279                 }
5280
5281                 ret = check_cache_range(root, cache, last,
5282                                         key.objectid - last);
5283                 if (ret)
5284                         break;
5285                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5286                         last = key.objectid + key.offset;
5287                 else
5288                         last = key.objectid + root->fs_info->nodesize;
5289                 path.slots[0]++;
5290         }
5291
5292         if (last < cache->key.objectid + cache->key.offset)
5293                 ret = check_cache_range(root, cache, last,
5294                                         cache->key.objectid +
5295                                         cache->key.offset - last);
5296
5297 out:
5298         btrfs_release_path(&path);
5299
5300         if (!ret &&
5301             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5302                 fprintf(stderr, "There are still entries left in the space "
5303                         "cache\n");
5304                 ret = -EINVAL;
5305         }
5306
5307         return ret;
5308 }
5309
5310 static int check_space_cache(struct btrfs_root *root)
5311 {
5312         struct btrfs_block_group_cache *cache;
5313         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5314         int ret;
5315         int error = 0;
5316
5317         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5318             btrfs_super_generation(root->fs_info->super_copy) !=
5319             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5320                 printf("cache and super generation don't match, space cache "
5321                        "will be invalidated\n");
5322                 return 0;
5323         }
5324
5325         if (ctx.progress_enabled) {
5326                 ctx.tp = TASK_FREE_SPACE;
5327                 task_start(ctx.info);
5328         }
5329
5330         while (1) {
5331                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5332                 if (!cache)
5333                         break;
5334
5335                 start = cache->key.objectid + cache->key.offset;
5336                 if (!cache->free_space_ctl) {
5337                         if (btrfs_init_free_space_ctl(cache,
5338                                                 root->fs_info->sectorsize)) {
5339                                 ret = -ENOMEM;
5340                                 break;
5341                         }
5342                 } else {
5343                         btrfs_remove_free_space_cache(cache);
5344                 }
5345
5346                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
5347                         ret = exclude_super_stripes(root, cache);
5348                         if (ret) {
5349                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5350                                         strerror(-ret));
5351                                 error++;
5352                                 continue;
5353                         }
5354                         ret = load_free_space_tree(root->fs_info, cache);
5355                         free_excluded_extents(root, cache);
5356                         if (ret < 0) {
5357                                 fprintf(stderr, "could not load free space tree: %s\n",
5358                                         strerror(-ret));
5359                                 error++;
5360                                 continue;
5361                         }
5362                         error += ret;
5363                 } else {
5364                         ret = load_free_space_cache(root->fs_info, cache);
5365                         if (ret < 0)
5366                                 error++;
5367                         if (ret <= 0)
5368                                 continue;
5369                 }
5370
5371                 ret = verify_space_cache(root, cache);
5372                 if (ret) {
5373                         fprintf(stderr, "cache appears valid but isn't %llu\n",
5374                                 cache->key.objectid);
5375                         error++;
5376                 }
5377         }
5378
5379         task_stop(ctx.info);
5380
5381         return error ? -EINVAL : 0;
5382 }
5383
5384 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5385                         u64 num_bytes, unsigned long leaf_offset,
5386                         struct extent_buffer *eb)
5387 {
5388         struct btrfs_fs_info *fs_info = root->fs_info;
5389         u64 offset = 0;
5390         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
5391         char *data;
5392         unsigned long csum_offset;
5393         u32 csum;
5394         u32 csum_expected;
5395         u64 read_len;
5396         u64 data_checked = 0;
5397         u64 tmp;
5398         int ret = 0;
5399         int mirror;
5400         int num_copies;
5401
5402         if (num_bytes % fs_info->sectorsize)
5403                 return -EINVAL;
5404
5405         data = malloc(num_bytes);
5406         if (!data)
5407                 return -ENOMEM;
5408
5409         while (offset < num_bytes) {
5410                 mirror = 0;
5411 again:
5412                 read_len = num_bytes - offset;
5413                 /* read as much space once a time */
5414                 ret = read_extent_data(fs_info, data + offset,
5415                                 bytenr + offset, &read_len, mirror);
5416                 if (ret)
5417                         goto out;
5418                 data_checked = 0;
5419                 /* verify every 4k data's checksum */
5420                 while (data_checked < read_len) {
5421                         csum = ~(u32)0;
5422                         tmp = offset + data_checked;
5423
5424                         csum = btrfs_csum_data((char *)data + tmp,
5425                                                csum, fs_info->sectorsize);
5426                         btrfs_csum_final(csum, (u8 *)&csum);
5427
5428                         csum_offset = leaf_offset +
5429                                  tmp / fs_info->sectorsize * csum_size;
5430                         read_extent_buffer(eb, (char *)&csum_expected,
5431                                            csum_offset, csum_size);
5432                         /* try another mirror */
5433                         if (csum != csum_expected) {
5434                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5435                                                 mirror, bytenr + tmp,
5436                                                 csum, csum_expected);
5437                                 num_copies = btrfs_num_copies(root->fs_info,
5438                                                 bytenr, num_bytes);
5439                                 if (mirror < num_copies - 1) {
5440                                         mirror += 1;
5441                                         goto again;
5442                                 }
5443                         }
5444                         data_checked += fs_info->sectorsize;
5445                 }
5446                 offset += read_len;
5447         }
5448 out:
5449         free(data);
5450         return ret;
5451 }
5452
5453 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5454                                u64 num_bytes)
5455 {
5456         struct btrfs_path path;
5457         struct extent_buffer *leaf;
5458         struct btrfs_key key;
5459         int ret;
5460
5461         btrfs_init_path(&path);
5462         key.objectid = bytenr;
5463         key.type = BTRFS_EXTENT_ITEM_KEY;
5464         key.offset = (u64)-1;
5465
5466 again:
5467         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
5468                                 0, 0);
5469         if (ret < 0) {
5470                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5471                 btrfs_release_path(&path);
5472                 return ret;
5473         } else if (ret) {
5474                 if (path.slots[0] > 0) {
5475                         path.slots[0]--;
5476                 } else {
5477                         ret = btrfs_prev_leaf(root, &path);
5478                         if (ret < 0) {
5479                                 goto out;
5480                         } else if (ret > 0) {
5481                                 ret = 0;
5482                                 goto out;
5483                         }
5484                 }
5485         }
5486
5487         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5488
5489         /*
5490          * Block group items come before extent items if they have the same
5491          * bytenr, so walk back one more just in case.  Dear future traveller,
5492          * first congrats on mastering time travel.  Now if it's not too much
5493          * trouble could you go back to 2006 and tell Chris to make the
5494          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5495          * EXTENT_ITEM_KEY please?
5496          */
5497         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5498                 if (path.slots[0] > 0) {
5499                         path.slots[0]--;
5500                 } else {
5501                         ret = btrfs_prev_leaf(root, &path);
5502                         if (ret < 0) {
5503                                 goto out;
5504                         } else if (ret > 0) {
5505                                 ret = 0;
5506                                 goto out;
5507                         }
5508                 }
5509                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5510         }
5511
5512         while (num_bytes) {
5513                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5514                         ret = btrfs_next_leaf(root, &path);
5515                         if (ret < 0) {
5516                                 fprintf(stderr, "Error going to next leaf "
5517                                         "%d\n", ret);
5518                                 btrfs_release_path(&path);
5519                                 return ret;
5520                         } else if (ret) {
5521                                 break;
5522                         }
5523                 }
5524                 leaf = path.nodes[0];
5525                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5526                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5527                         path.slots[0]++;
5528                         continue;
5529                 }
5530                 if (key.objectid + key.offset < bytenr) {
5531                         path.slots[0]++;
5532                         continue;
5533                 }
5534                 if (key.objectid > bytenr + num_bytes)
5535                         break;
5536
5537                 if (key.objectid == bytenr) {
5538                         if (key.offset >= num_bytes) {
5539                                 num_bytes = 0;
5540                                 break;
5541                         }
5542                         num_bytes -= key.offset;
5543                         bytenr += key.offset;
5544                 } else if (key.objectid < bytenr) {
5545                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5546                                 num_bytes = 0;
5547                                 break;
5548                         }
5549                         num_bytes = (bytenr + num_bytes) -
5550                                 (key.objectid + key.offset);
5551                         bytenr = key.objectid + key.offset;
5552                 } else {
5553                         if (key.objectid + key.offset < bytenr + num_bytes) {
5554                                 u64 new_start = key.objectid + key.offset;
5555                                 u64 new_bytes = bytenr + num_bytes - new_start;
5556
5557                                 /*
5558                                  * Weird case, the extent is in the middle of
5559                                  * our range, we'll have to search one side
5560                                  * and then the other.  Not sure if this happens
5561                                  * in real life, but no harm in coding it up
5562                                  * anyway just in case.
5563                                  */
5564                                 btrfs_release_path(&path);
5565                                 ret = check_extent_exists(root, new_start,
5566                                                           new_bytes);
5567                                 if (ret) {
5568                                         fprintf(stderr, "Right section didn't "
5569                                                 "have a record\n");
5570                                         break;
5571                                 }
5572                                 num_bytes = key.objectid - bytenr;
5573                                 goto again;
5574                         }
5575                         num_bytes = key.objectid - bytenr;
5576                 }
5577                 path.slots[0]++;
5578         }
5579         ret = 0;
5580
5581 out:
5582         if (num_bytes && !ret) {
5583                 fprintf(stderr,
5584                         "there are no extents for csum range %llu-%llu\n",
5585                         bytenr, bytenr+num_bytes);
5586                 ret = 1;
5587         }
5588
5589         btrfs_release_path(&path);
5590         return ret;
5591 }
5592
5593 static int check_csums(struct btrfs_root *root)
5594 {
5595         struct btrfs_path path;
5596         struct extent_buffer *leaf;
5597         struct btrfs_key key;
5598         u64 offset = 0, num_bytes = 0;
5599         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5600         int errors = 0;
5601         int ret;
5602         u64 data_len;
5603         unsigned long leaf_offset;
5604         bool verify_csum = !!check_data_csum;
5605
5606         root = root->fs_info->csum_root;
5607         if (!extent_buffer_uptodate(root->node)) {
5608                 fprintf(stderr, "No valid csum tree found\n");
5609                 return -ENOENT;
5610         }
5611
5612         btrfs_init_path(&path);
5613         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5614         key.type = BTRFS_EXTENT_CSUM_KEY;
5615         key.offset = 0;
5616         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5617         if (ret < 0) {
5618                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5619                 btrfs_release_path(&path);
5620                 return ret;
5621         }
5622
5623         if (ret > 0 && path.slots[0])
5624                 path.slots[0]--;
5625         ret = 0;
5626
5627         /*
5628          * For metadata dump (btrfs-image) all data is wiped so verifying data
5629          * csum is meaningless and will always report csum error.
5630          */
5631         if (check_data_csum && (btrfs_super_flags(root->fs_info->super_copy) &
5632             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))) {
5633                 printf("skip data csum verification for metadata dump\n");
5634                 verify_csum = false;
5635         }
5636
5637         while (1) {
5638                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5639                         ret = btrfs_next_leaf(root, &path);
5640                         if (ret < 0) {
5641                                 fprintf(stderr, "Error going to next leaf "
5642                                         "%d\n", ret);
5643                                 break;
5644                         }
5645                         if (ret)
5646                                 break;
5647                 }
5648                 leaf = path.nodes[0];
5649
5650                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5651                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5652                         path.slots[0]++;
5653                         continue;
5654                 }
5655
5656                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
5657                               csum_size) * root->fs_info->sectorsize;
5658                 if (!verify_csum)
5659                         goto skip_csum_check;
5660                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5661                 ret = check_extent_csums(root, key.offset, data_len,
5662                                          leaf_offset, leaf);
5663                 if (ret)
5664                         break;
5665 skip_csum_check:
5666                 if (!num_bytes) {
5667                         offset = key.offset;
5668                 } else if (key.offset != offset + num_bytes) {
5669                         ret = check_extent_exists(root, offset, num_bytes);
5670                         if (ret) {
5671                                 fprintf(stderr,
5672                 "csum exists for %llu-%llu but there is no extent record\n",
5673                                         offset, offset+num_bytes);
5674                                 errors++;
5675                         }
5676                         offset = key.offset;
5677                         num_bytes = 0;
5678                 }
5679                 num_bytes += data_len;
5680                 path.slots[0]++;
5681         }
5682
5683         btrfs_release_path(&path);
5684         return errors;
5685 }
5686
5687 static int is_dropped_key(struct btrfs_key *key,
5688                           struct btrfs_key *drop_key)
5689 {
5690         if (key->objectid < drop_key->objectid)
5691                 return 1;
5692         else if (key->objectid == drop_key->objectid) {
5693                 if (key->type < drop_key->type)
5694                         return 1;
5695                 else if (key->type == drop_key->type) {
5696                         if (key->offset < drop_key->offset)
5697                                 return 1;
5698                 }
5699         }
5700         return 0;
5701 }
5702
5703 /*
5704  * Here are the rules for FULL_BACKREF.
5705  *
5706  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5707  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5708  *      FULL_BACKREF set.
5709  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
5710  *    if it happened after the relocation occurred since we'll have dropped the
5711  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5712  *    have no real way to know for sure.
5713  *
5714  * We process the blocks one root at a time, and we start from the lowest root
5715  * objectid and go to the highest.  So we can just lookup the owner backref for
5716  * the record and if we don't find it then we know it doesn't exist and we have
5717  * a FULL BACKREF.
5718  *
5719  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5720  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5721  * be set or not and then we can check later once we've gathered all the refs.
5722  */
5723 static int calc_extent_flag(struct cache_tree *extent_cache,
5724                            struct extent_buffer *buf,
5725                            struct root_item_record *ri,
5726                            u64 *flags)
5727 {
5728         struct extent_record *rec;
5729         struct cache_extent *cache;
5730         struct tree_backref *tback;
5731         u64 owner = 0;
5732
5733         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5734         /* we have added this extent before */
5735         if (!cache)
5736                 return -ENOENT;
5737
5738         rec = container_of(cache, struct extent_record, cache);
5739
5740         /*
5741          * Except file/reloc tree, we can not have
5742          * FULL BACKREF MODE
5743          */
5744         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5745                 goto normal;
5746         /*
5747          * root node
5748          */
5749         if (buf->start == ri->bytenr)
5750                 goto normal;
5751
5752         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5753                 goto full_backref;
5754
5755         owner = btrfs_header_owner(buf);
5756         if (owner == ri->objectid)
5757                 goto normal;
5758
5759         tback = find_tree_backref(rec, 0, owner);
5760         if (!tback)
5761                 goto full_backref;
5762 normal:
5763         *flags = 0;
5764         if (rec->flag_block_full_backref != FLAG_UNSET &&
5765             rec->flag_block_full_backref != 0)
5766                 rec->bad_full_backref = 1;
5767         return 0;
5768 full_backref:
5769         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5770         if (rec->flag_block_full_backref != FLAG_UNSET &&
5771             rec->flag_block_full_backref != 1)
5772                 rec->bad_full_backref = 1;
5773         return 0;
5774 }
5775
5776 static void report_mismatch_key_root(u8 key_type, u64 rootid)
5777 {
5778         fprintf(stderr, "Invalid key type(");
5779         print_key_type(stderr, 0, key_type);
5780         fprintf(stderr, ") found in root(");
5781         print_objectid(stderr, rootid, 0);
5782         fprintf(stderr, ")\n");
5783 }
5784
5785 /*
5786  * Check if the key is valid with its extent buffer.
5787  *
5788  * This is a early check in case invalid key exists in a extent buffer
5789  * This is not comprehensive yet, but should prevent wrong key/item passed
5790  * further
5791  */
5792 static int check_type_with_root(u64 rootid, u8 key_type)
5793 {
5794         switch (key_type) {
5795         /* Only valid in chunk tree */
5796         case BTRFS_DEV_ITEM_KEY:
5797         case BTRFS_CHUNK_ITEM_KEY:
5798                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
5799                         goto err;
5800                 break;
5801         /* valid in csum and log tree */
5802         case BTRFS_CSUM_TREE_OBJECTID:
5803                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
5804                       is_fstree(rootid)))
5805                         goto err;
5806                 break;
5807         case BTRFS_EXTENT_ITEM_KEY:
5808         case BTRFS_METADATA_ITEM_KEY:
5809         case BTRFS_BLOCK_GROUP_ITEM_KEY:
5810                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
5811                         goto err;
5812                 break;
5813         case BTRFS_ROOT_ITEM_KEY:
5814                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
5815                         goto err;
5816                 break;
5817         case BTRFS_DEV_EXTENT_KEY:
5818                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
5819                         goto err;
5820                 break;
5821         }
5822         return 0;
5823 err:
5824         report_mismatch_key_root(key_type, rootid);
5825         return -EINVAL;
5826 }
5827
5828 static int run_next_block(struct btrfs_root *root,
5829                           struct block_info *bits,
5830                           int bits_nr,
5831                           u64 *last,
5832                           struct cache_tree *pending,
5833                           struct cache_tree *seen,
5834                           struct cache_tree *reada,
5835                           struct cache_tree *nodes,
5836                           struct cache_tree *extent_cache,
5837                           struct cache_tree *chunk_cache,
5838                           struct rb_root *dev_cache,
5839                           struct block_group_tree *block_group_cache,
5840                           struct device_extent_tree *dev_extent_cache,
5841                           struct root_item_record *ri)
5842 {
5843         struct btrfs_fs_info *fs_info = root->fs_info;
5844         struct extent_buffer *buf;
5845         struct extent_record *rec = NULL;
5846         u64 bytenr;
5847         u32 size;
5848         u64 parent;
5849         u64 owner;
5850         u64 flags;
5851         u64 ptr;
5852         u64 gen = 0;
5853         int ret = 0;
5854         int i;
5855         int nritems;
5856         struct btrfs_key key;
5857         struct cache_extent *cache;
5858         int reada_bits;
5859
5860         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5861                                     bits_nr, &reada_bits);
5862         if (nritems == 0)
5863                 return 1;
5864
5865         if (!reada_bits) {
5866                 for (i = 0; i < nritems; i++) {
5867                         ret = add_cache_extent(reada, bits[i].start,
5868                                                bits[i].size);
5869                         if (ret == -EEXIST)
5870                                 continue;
5871
5872                         /* fixme, get the parent transid */
5873                         readahead_tree_block(fs_info, bits[i].start, 0);
5874                 }
5875         }
5876         *last = bits[0].start;
5877         bytenr = bits[0].start;
5878         size = bits[0].size;
5879
5880         cache = lookup_cache_extent(pending, bytenr, size);
5881         if (cache) {
5882                 remove_cache_extent(pending, cache);
5883                 free(cache);
5884         }
5885         cache = lookup_cache_extent(reada, bytenr, size);
5886         if (cache) {
5887                 remove_cache_extent(reada, cache);
5888                 free(cache);
5889         }
5890         cache = lookup_cache_extent(nodes, bytenr, size);
5891         if (cache) {
5892                 remove_cache_extent(nodes, cache);
5893                 free(cache);
5894         }
5895         cache = lookup_cache_extent(extent_cache, bytenr, size);
5896         if (cache) {
5897                 rec = container_of(cache, struct extent_record, cache);
5898                 gen = rec->parent_generation;
5899         }
5900
5901         /* fixme, get the real parent transid */
5902         buf = read_tree_block(root->fs_info, bytenr, gen);
5903         if (!extent_buffer_uptodate(buf)) {
5904                 record_bad_block_io(root->fs_info,
5905                                     extent_cache, bytenr, size);
5906                 goto out;
5907         }
5908
5909         nritems = btrfs_header_nritems(buf);
5910
5911         flags = 0;
5912         if (!init_extent_tree) {
5913                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5914                                        btrfs_header_level(buf), 1, NULL,
5915                                        &flags);
5916                 if (ret < 0) {
5917                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5918                         if (ret < 0) {
5919                                 fprintf(stderr, "Couldn't calc extent flags\n");
5920                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5921                         }
5922                 }
5923         } else {
5924                 flags = 0;
5925                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5926                 if (ret < 0) {
5927                         fprintf(stderr, "Couldn't calc extent flags\n");
5928                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5929                 }
5930         }
5931
5932         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5933                 if (ri != NULL &&
5934                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5935                     ri->objectid == btrfs_header_owner(buf)) {
5936                         /*
5937                          * Ok we got to this block from it's original owner and
5938                          * we have FULL_BACKREF set.  Relocation can leave
5939                          * converted blocks over so this is altogether possible,
5940                          * however it's not possible if the generation > the
5941                          * last snapshot, so check for this case.
5942                          */
5943                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5944                             btrfs_header_generation(buf) > ri->last_snapshot) {
5945                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5946                                 rec->bad_full_backref = 1;
5947                         }
5948                 }
5949         } else {
5950                 if (ri != NULL &&
5951                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5952                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5953                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5954                         rec->bad_full_backref = 1;
5955                 }
5956         }
5957
5958         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5959                 rec->flag_block_full_backref = 1;
5960                 parent = bytenr;
5961                 owner = 0;
5962         } else {
5963                 rec->flag_block_full_backref = 0;
5964                 parent = 0;
5965                 owner = btrfs_header_owner(buf);
5966         }
5967
5968         ret = check_block(root, extent_cache, buf, flags);
5969         if (ret)
5970                 goto out;
5971
5972         if (btrfs_is_leaf(buf)) {
5973                 btree_space_waste += btrfs_leaf_free_space(fs_info, buf);
5974                 for (i = 0; i < nritems; i++) {
5975                         struct btrfs_file_extent_item *fi;
5976
5977                         btrfs_item_key_to_cpu(buf, &key, i);
5978                         /*
5979                          * Check key type against the leaf owner.
5980                          * Could filter quite a lot of early error if
5981                          * owner is correct
5982                          */
5983                         if (check_type_with_root(btrfs_header_owner(buf),
5984                                                  key.type)) {
5985                                 fprintf(stderr, "ignoring invalid key\n");
5986                                 continue;
5987                         }
5988                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5989                                 process_extent_item(root, extent_cache, buf,
5990                                                     i);
5991                                 continue;
5992                         }
5993                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5994                                 process_extent_item(root, extent_cache, buf,
5995                                                     i);
5996                                 continue;
5997                         }
5998                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5999                                 total_csum_bytes +=
6000                                         btrfs_item_size_nr(buf, i);
6001                                 continue;
6002                         }
6003                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6004                                 process_chunk_item(chunk_cache, &key, buf, i);
6005                                 continue;
6006                         }
6007                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6008                                 process_device_item(dev_cache, &key, buf, i);
6009                                 continue;
6010                         }
6011                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6012                                 process_block_group_item(block_group_cache,
6013                                         &key, buf, i);
6014                                 continue;
6015                         }
6016                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6017                                 process_device_extent_item(dev_extent_cache,
6018                                         &key, buf, i);
6019                                 continue;
6020
6021                         }
6022                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6023 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6024                                 process_extent_ref_v0(extent_cache, buf, i);
6025 #else
6026                                 BUG();
6027 #endif
6028                                 continue;
6029                         }
6030
6031                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6032                                 ret = add_tree_backref(extent_cache,
6033                                                 key.objectid, 0, key.offset, 0);
6034                                 if (ret < 0)
6035                                         error(
6036                                 "add_tree_backref failed (leaf tree block): %s",
6037                                               strerror(-ret));
6038                                 continue;
6039                         }
6040                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6041                                 ret = add_tree_backref(extent_cache,
6042                                                 key.objectid, key.offset, 0, 0);
6043                                 if (ret < 0)
6044                                         error(
6045                                 "add_tree_backref failed (leaf shared block): %s",
6046                                               strerror(-ret));
6047                                 continue;
6048                         }
6049                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6050                                 struct btrfs_extent_data_ref *ref;
6051
6052                                 ref = btrfs_item_ptr(buf, i,
6053                                                 struct btrfs_extent_data_ref);
6054                                 add_data_backref(extent_cache,
6055                                         key.objectid, 0,
6056                                         btrfs_extent_data_ref_root(buf, ref),
6057                                         btrfs_extent_data_ref_objectid(buf,
6058                                                                        ref),
6059                                         btrfs_extent_data_ref_offset(buf, ref),
6060                                         btrfs_extent_data_ref_count(buf, ref),
6061                                         0, root->fs_info->sectorsize);
6062                                 continue;
6063                         }
6064                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6065                                 struct btrfs_shared_data_ref *ref;
6066
6067                                 ref = btrfs_item_ptr(buf, i,
6068                                                 struct btrfs_shared_data_ref);
6069                                 add_data_backref(extent_cache,
6070                                         key.objectid, key.offset, 0, 0, 0,
6071                                         btrfs_shared_data_ref_count(buf, ref),
6072                                         0, root->fs_info->sectorsize);
6073                                 continue;
6074                         }
6075                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6076                                 struct bad_item *bad;
6077
6078                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6079                                         continue;
6080                                 if (!owner)
6081                                         continue;
6082                                 bad = malloc(sizeof(struct bad_item));
6083                                 if (!bad)
6084                                         continue;
6085                                 INIT_LIST_HEAD(&bad->list);
6086                                 memcpy(&bad->key, &key,
6087                                        sizeof(struct btrfs_key));
6088                                 bad->root_id = owner;
6089                                 list_add_tail(&bad->list, &delete_items);
6090                                 continue;
6091                         }
6092                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6093                                 continue;
6094                         fi = btrfs_item_ptr(buf, i,
6095                                             struct btrfs_file_extent_item);
6096                         if (btrfs_file_extent_type(buf, fi) ==
6097                             BTRFS_FILE_EXTENT_INLINE)
6098                                 continue;
6099                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6100                                 continue;
6101
6102                         data_bytes_allocated +=
6103                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6104                         if (data_bytes_allocated < root->fs_info->sectorsize)
6105                                 abort();
6106
6107                         data_bytes_referenced +=
6108                                 btrfs_file_extent_num_bytes(buf, fi);
6109                         add_data_backref(extent_cache,
6110                                 btrfs_file_extent_disk_bytenr(buf, fi),
6111                                 parent, owner, key.objectid, key.offset -
6112                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6113                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6114                 }
6115         } else {
6116                 int level;
6117
6118                 level = btrfs_header_level(buf);
6119                 for (i = 0; i < nritems; i++) {
6120                         struct extent_record tmpl;
6121
6122                         ptr = btrfs_node_blockptr(buf, i);
6123                         size = root->fs_info->nodesize;
6124                         btrfs_node_key_to_cpu(buf, &key, i);
6125                         if (ri != NULL) {
6126                                 if ((level == ri->drop_level)
6127                                     && is_dropped_key(&key, &ri->drop_key)) {
6128                                         continue;
6129                                 }
6130                         }
6131
6132                         memset(&tmpl, 0, sizeof(tmpl));
6133                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6134                         tmpl.parent_generation =
6135                                 btrfs_node_ptr_generation(buf, i);
6136                         tmpl.start = ptr;
6137                         tmpl.nr = size;
6138                         tmpl.refs = 1;
6139                         tmpl.metadata = 1;
6140                         tmpl.max_size = size;
6141                         ret = add_extent_rec(extent_cache, &tmpl);
6142                         if (ret < 0)
6143                                 goto out;
6144
6145                         ret = add_tree_backref(extent_cache, ptr, parent,
6146                                         owner, 1);
6147                         if (ret < 0) {
6148                                 error(
6149                                 "add_tree_backref failed (non-leaf block): %s",
6150                                       strerror(-ret));
6151                                 continue;
6152                         }
6153
6154                         if (level > 1)
6155                                 add_pending(nodes, seen, ptr, size);
6156                         else
6157                                 add_pending(pending, seen, ptr, size);
6158                 }
6159                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
6160                                       nritems) * sizeof(struct btrfs_key_ptr);
6161         }
6162         total_btree_bytes += buf->len;
6163         if (fs_root_objectid(btrfs_header_owner(buf)))
6164                 total_fs_tree_bytes += buf->len;
6165         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6166                 total_extent_tree_bytes += buf->len;
6167 out:
6168         free_extent_buffer(buf);
6169         return ret;
6170 }
6171
6172 static int add_root_to_pending(struct extent_buffer *buf,
6173                                struct cache_tree *extent_cache,
6174                                struct cache_tree *pending,
6175                                struct cache_tree *seen,
6176                                struct cache_tree *nodes,
6177                                u64 objectid)
6178 {
6179         struct extent_record tmpl;
6180         int ret;
6181
6182         if (btrfs_header_level(buf) > 0)
6183                 add_pending(nodes, seen, buf->start, buf->len);
6184         else
6185                 add_pending(pending, seen, buf->start, buf->len);
6186
6187         memset(&tmpl, 0, sizeof(tmpl));
6188         tmpl.start = buf->start;
6189         tmpl.nr = buf->len;
6190         tmpl.is_root = 1;
6191         tmpl.refs = 1;
6192         tmpl.metadata = 1;
6193         tmpl.max_size = buf->len;
6194         add_extent_rec(extent_cache, &tmpl);
6195
6196         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6197             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6198                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6199                                 0, 1);
6200         else
6201                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6202                                 1);
6203         return ret;
6204 }
6205
6206 /* as we fix the tree, we might be deleting blocks that
6207  * we're tracking for repair.  This hook makes sure we
6208  * remove any backrefs for blocks as we are fixing them.
6209  */
6210 static int free_extent_hook(struct btrfs_trans_handle *trans,
6211                             struct btrfs_root *root,
6212                             u64 bytenr, u64 num_bytes, u64 parent,
6213                             u64 root_objectid, u64 owner, u64 offset,
6214                             int refs_to_drop)
6215 {
6216         struct extent_record *rec;
6217         struct cache_extent *cache;
6218         int is_data;
6219         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6220
6221         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6222         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6223         if (!cache)
6224                 return 0;
6225
6226         rec = container_of(cache, struct extent_record, cache);
6227         if (is_data) {
6228                 struct data_backref *back;
6229
6230                 back = find_data_backref(rec, parent, root_objectid, owner,
6231                                          offset, 1, bytenr, num_bytes);
6232                 if (!back)
6233                         goto out;
6234                 if (back->node.found_ref) {
6235                         back->found_ref -= refs_to_drop;
6236                         if (rec->refs)
6237                                 rec->refs -= refs_to_drop;
6238                 }
6239                 if (back->node.found_extent_tree) {
6240                         back->num_refs -= refs_to_drop;
6241                         if (rec->extent_item_refs)
6242                                 rec->extent_item_refs -= refs_to_drop;
6243                 }
6244                 if (back->found_ref == 0)
6245                         back->node.found_ref = 0;
6246                 if (back->num_refs == 0)
6247                         back->node.found_extent_tree = 0;
6248
6249                 if (!back->node.found_extent_tree && back->node.found_ref) {
6250                         rb_erase(&back->node.node, &rec->backref_tree);
6251                         free(back);
6252                 }
6253         } else {
6254                 struct tree_backref *back;
6255
6256                 back = find_tree_backref(rec, parent, root_objectid);
6257                 if (!back)
6258                         goto out;
6259                 if (back->node.found_ref) {
6260                         if (rec->refs)
6261                                 rec->refs--;
6262                         back->node.found_ref = 0;
6263                 }
6264                 if (back->node.found_extent_tree) {
6265                         if (rec->extent_item_refs)
6266                                 rec->extent_item_refs--;
6267                         back->node.found_extent_tree = 0;
6268                 }
6269                 if (!back->node.found_extent_tree && back->node.found_ref) {
6270                         rb_erase(&back->node.node, &rec->backref_tree);
6271                         free(back);
6272                 }
6273         }
6274         maybe_free_extent_rec(extent_cache, rec);
6275 out:
6276         return 0;
6277 }
6278
6279 static int delete_extent_records(struct btrfs_trans_handle *trans,
6280                                  struct btrfs_root *root,
6281                                  struct btrfs_path *path,
6282                                  u64 bytenr)
6283 {
6284         struct btrfs_key key;
6285         struct btrfs_key found_key;
6286         struct extent_buffer *leaf;
6287         int ret;
6288         int slot;
6289
6290
6291         key.objectid = bytenr;
6292         key.type = (u8)-1;
6293         key.offset = (u64)-1;
6294
6295         while (1) {
6296                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6297                                         &key, path, 0, 1);
6298                 if (ret < 0)
6299                         break;
6300
6301                 if (ret > 0) {
6302                         ret = 0;
6303                         if (path->slots[0] == 0)
6304                                 break;
6305                         path->slots[0]--;
6306                 }
6307                 ret = 0;
6308
6309                 leaf = path->nodes[0];
6310                 slot = path->slots[0];
6311
6312                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6313                 if (found_key.objectid != bytenr)
6314                         break;
6315
6316                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6317                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6318                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6319                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6320                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6321                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6322                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6323                         btrfs_release_path(path);
6324                         if (found_key.type == 0) {
6325                                 if (found_key.offset == 0)
6326                                         break;
6327                                 key.offset = found_key.offset - 1;
6328                                 key.type = found_key.type;
6329                         }
6330                         key.type = found_key.type - 1;
6331                         key.offset = (u64)-1;
6332                         continue;
6333                 }
6334
6335                 fprintf(stderr,
6336                         "repair deleting extent record: key [%llu,%u,%llu]\n",
6337                         found_key.objectid, found_key.type, found_key.offset);
6338
6339                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6340                 if (ret)
6341                         break;
6342                 btrfs_release_path(path);
6343
6344                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6345                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6346                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6347                                 found_key.offset : root->fs_info->nodesize;
6348
6349                         ret = btrfs_update_block_group(root, bytenr,
6350                                                        bytes, 0, 0);
6351                         if (ret)
6352                                 break;
6353                 }
6354         }
6355
6356         btrfs_release_path(path);
6357         return ret;
6358 }
6359
6360 /*
6361  * for a single backref, this will allocate a new extent
6362  * and add the backref to it.
6363  */
6364 static int record_extent(struct btrfs_trans_handle *trans,
6365                          struct btrfs_fs_info *info,
6366                          struct btrfs_path *path,
6367                          struct extent_record *rec,
6368                          struct extent_backref *back,
6369                          int allocated, u64 flags)
6370 {
6371         int ret = 0;
6372         struct btrfs_root *extent_root = info->extent_root;
6373         struct extent_buffer *leaf;
6374         struct btrfs_key ins_key;
6375         struct btrfs_extent_item *ei;
6376         struct data_backref *dback;
6377         struct btrfs_tree_block_info *bi;
6378
6379         if (!back->is_data)
6380                 rec->max_size = max_t(u64, rec->max_size,
6381                                     info->nodesize);
6382
6383         if (!allocated) {
6384                 u32 item_size = sizeof(*ei);
6385
6386                 if (!back->is_data)
6387                         item_size += sizeof(*bi);
6388
6389                 ins_key.objectid = rec->start;
6390                 ins_key.offset = rec->max_size;
6391                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6392
6393                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6394                                         &ins_key, item_size);
6395                 if (ret)
6396                         goto fail;
6397
6398                 leaf = path->nodes[0];
6399                 ei = btrfs_item_ptr(leaf, path->slots[0],
6400                                     struct btrfs_extent_item);
6401
6402                 btrfs_set_extent_refs(leaf, ei, 0);
6403                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6404
6405                 if (back->is_data) {
6406                         btrfs_set_extent_flags(leaf, ei,
6407                                                BTRFS_EXTENT_FLAG_DATA);
6408                 } else {
6409                         struct btrfs_disk_key copy_key;
6410
6411                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6412                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6413                                              sizeof(*bi));
6414
6415                         btrfs_set_disk_key_objectid(&copy_key,
6416                                                     rec->info_objectid);
6417                         btrfs_set_disk_key_type(&copy_key, 0);
6418                         btrfs_set_disk_key_offset(&copy_key, 0);
6419
6420                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6421                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6422
6423                         btrfs_set_extent_flags(leaf, ei,
6424                                         flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
6425                 }
6426
6427                 btrfs_mark_buffer_dirty(leaf);
6428                 ret = btrfs_update_block_group(extent_root, rec->start,
6429                                                rec->max_size, 1, 0);
6430                 if (ret)
6431                         goto fail;
6432                 btrfs_release_path(path);
6433         }
6434
6435         if (back->is_data) {
6436                 u64 parent;
6437                 int i;
6438
6439                 dback = to_data_backref(back);
6440                 if (back->full_backref)
6441                         parent = dback->parent;
6442                 else
6443                         parent = 0;
6444
6445                 for (i = 0; i < dback->found_ref; i++) {
6446                         /* if parent != 0, we're doing a full backref
6447                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6448                          * just makes the backref allocator create a data
6449                          * backref
6450                          */
6451                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6452                                                    rec->start, rec->max_size,
6453                                                    parent,
6454                                                    dback->root,
6455                                                    parent ?
6456                                                    BTRFS_FIRST_FREE_OBJECTID :
6457                                                    dback->owner,
6458                                                    dback->offset);
6459                         if (ret)
6460                                 break;
6461                 }
6462                 fprintf(stderr,
6463 "adding new data backref on %llu %s %llu owner %llu offset %llu found %d\n",
6464                         (unsigned long long)rec->start,
6465                         back->full_backref ? "parent" : "root",
6466                         back->full_backref ? (unsigned long long)parent :
6467                                              (unsigned long long)dback->root,
6468                         (unsigned long long)dback->owner,
6469                         (unsigned long long)dback->offset, dback->found_ref);
6470         } else {
6471                 u64 parent;
6472                 struct tree_backref *tback;
6473
6474                 tback = to_tree_backref(back);
6475                 if (back->full_backref)
6476                         parent = tback->parent;
6477                 else
6478                         parent = 0;
6479
6480                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6481                                            rec->start, rec->max_size,
6482                                            parent, tback->root, 0, 0);
6483                 fprintf(stderr,
6484 "adding new tree backref on start %llu len %llu parent %llu root %llu\n",
6485                         rec->start, rec->max_size, parent, tback->root);
6486         }
6487 fail:
6488         btrfs_release_path(path);
6489         return ret;
6490 }
6491
6492 static struct extent_entry *find_entry(struct list_head *entries,
6493                                        u64 bytenr, u64 bytes)
6494 {
6495         struct extent_entry *entry = NULL;
6496
6497         list_for_each_entry(entry, entries, list) {
6498                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6499                         return entry;
6500         }
6501
6502         return NULL;
6503 }
6504
6505 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6506 {
6507         struct extent_entry *entry, *best = NULL, *prev = NULL;
6508
6509         list_for_each_entry(entry, entries, list) {
6510                 /*
6511                  * If there are as many broken entries as entries then we know
6512                  * not to trust this particular entry.
6513                  */
6514                 if (entry->broken == entry->count)
6515                         continue;
6516
6517                 /*
6518                  * Special case, when there are only two entries and 'best' is
6519                  * the first one
6520                  */
6521                 if (!prev) {
6522                         best = entry;
6523                         prev = entry;
6524                         continue;
6525                 }
6526
6527                 /*
6528                  * If our current entry == best then we can't be sure our best
6529                  * is really the best, so we need to keep searching.
6530                  */
6531                 if (best && best->count == entry->count) {
6532                         prev = entry;
6533                         best = NULL;
6534                         continue;
6535                 }
6536
6537                 /* Prev == entry, not good enough, have to keep searching */
6538                 if (!prev->broken && prev->count == entry->count)
6539                         continue;
6540
6541                 if (!best)
6542                         best = (prev->count > entry->count) ? prev : entry;
6543                 else if (best->count < entry->count)
6544                         best = entry;
6545                 prev = entry;
6546         }
6547
6548         return best;
6549 }
6550
6551 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6552                       struct data_backref *dback, struct extent_entry *entry)
6553 {
6554         struct btrfs_trans_handle *trans;
6555         struct btrfs_root *root;
6556         struct btrfs_file_extent_item *fi;
6557         struct extent_buffer *leaf;
6558         struct btrfs_key key;
6559         u64 bytenr, bytes;
6560         int ret, err;
6561
6562         key.objectid = dback->root;
6563         key.type = BTRFS_ROOT_ITEM_KEY;
6564         key.offset = (u64)-1;
6565         root = btrfs_read_fs_root(info, &key);
6566         if (IS_ERR(root)) {
6567                 fprintf(stderr, "Couldn't find root for our ref\n");
6568                 return -EINVAL;
6569         }
6570
6571         /*
6572          * The backref points to the original offset of the extent if it was
6573          * split, so we need to search down to the offset we have and then walk
6574          * forward until we find the backref we're looking for.
6575          */
6576         key.objectid = dback->owner;
6577         key.type = BTRFS_EXTENT_DATA_KEY;
6578         key.offset = dback->offset;
6579         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6580         if (ret < 0) {
6581                 fprintf(stderr, "Error looking up ref %d\n", ret);
6582                 return ret;
6583         }
6584
6585         while (1) {
6586                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6587                         ret = btrfs_next_leaf(root, path);
6588                         if (ret) {
6589                                 fprintf(stderr, "Couldn't find our ref, next\n");
6590                                 return -EINVAL;
6591                         }
6592                 }
6593                 leaf = path->nodes[0];
6594                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6595                 if (key.objectid != dback->owner ||
6596                     key.type != BTRFS_EXTENT_DATA_KEY) {
6597                         fprintf(stderr, "Couldn't find our ref, search\n");
6598                         return -EINVAL;
6599                 }
6600                 fi = btrfs_item_ptr(leaf, path->slots[0],
6601                                     struct btrfs_file_extent_item);
6602                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6603                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6604
6605                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6606                         break;
6607                 path->slots[0]++;
6608         }
6609
6610         btrfs_release_path(path);
6611
6612         trans = btrfs_start_transaction(root, 1);
6613         if (IS_ERR(trans))
6614                 return PTR_ERR(trans);
6615
6616         /*
6617          * Ok we have the key of the file extent we want to fix, now we can cow
6618          * down to the thing and fix it.
6619          */
6620         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6621         if (ret < 0) {
6622                 fprintf(stderr, "error cowing down to ref [%llu,%u,%llu]: %d\n",
6623                         key.objectid, key.type, key.offset, ret);
6624                 goto out;
6625         }
6626         if (ret > 0) {
6627                 fprintf(stderr,
6628                 "well that's odd, we just found this key [%llu,%u,%llu]\n",
6629                         key.objectid, key.type, key.offset);
6630                 ret = -EINVAL;
6631                 goto out;
6632         }
6633         leaf = path->nodes[0];
6634         fi = btrfs_item_ptr(leaf, path->slots[0],
6635                             struct btrfs_file_extent_item);
6636
6637         if (btrfs_file_extent_compression(leaf, fi) &&
6638             dback->disk_bytenr != entry->bytenr) {
6639                 fprintf(stderr,
6640 "ref doesn't match the record start and is compressed, please take a btrfs-image of this file system and send it to a btrfs developer so they can complete this functionality for bytenr %llu\n",
6641                         dback->disk_bytenr);
6642                 ret = -EINVAL;
6643                 goto out;
6644         }
6645
6646         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6647                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6648         } else if (dback->disk_bytenr > entry->bytenr) {
6649                 u64 off_diff, offset;
6650
6651                 off_diff = dback->disk_bytenr - entry->bytenr;
6652                 offset = btrfs_file_extent_offset(leaf, fi);
6653                 if (dback->disk_bytenr + offset +
6654                     btrfs_file_extent_num_bytes(leaf, fi) >
6655                     entry->bytenr + entry->bytes) {
6656                         fprintf(stderr,
6657 "ref is past the entry end, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6658                                 dback->disk_bytenr);
6659                         ret = -EINVAL;
6660                         goto out;
6661                 }
6662                 offset += off_diff;
6663                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6664                 btrfs_set_file_extent_offset(leaf, fi, offset);
6665         } else if (dback->disk_bytenr < entry->bytenr) {
6666                 u64 offset;
6667
6668                 offset = btrfs_file_extent_offset(leaf, fi);
6669                 if (dback->disk_bytenr + offset < entry->bytenr) {
6670                         fprintf(stderr,
6671 "ref is before the entry start, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6672                                 dback->disk_bytenr);
6673                         ret = -EINVAL;
6674                         goto out;
6675                 }
6676
6677                 offset += dback->disk_bytenr;
6678                 offset -= entry->bytenr;
6679                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6680                 btrfs_set_file_extent_offset(leaf, fi, offset);
6681         }
6682
6683         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6684
6685         /*
6686          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6687          * only do this if we aren't using compression, otherwise it's a
6688          * trickier case.
6689          */
6690         if (!btrfs_file_extent_compression(leaf, fi))
6691                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6692         else
6693                 printf("ram bytes may be wrong?\n");
6694         btrfs_mark_buffer_dirty(leaf);
6695 out:
6696         err = btrfs_commit_transaction(trans, root);
6697         btrfs_release_path(path);
6698         return ret ? ret : err;
6699 }
6700
6701 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6702                            struct extent_record *rec)
6703 {
6704         struct extent_backref *back, *tmp;
6705         struct data_backref *dback;
6706         struct extent_entry *entry, *best = NULL;
6707         LIST_HEAD(entries);
6708         int nr_entries = 0;
6709         int broken_entries = 0;
6710         int ret = 0;
6711         short mismatch = 0;
6712
6713         /*
6714          * Metadata is easy and the backrefs should always agree on bytenr and
6715          * size, if not we've got bigger issues.
6716          */
6717         if (rec->metadata)
6718                 return 0;
6719
6720         rbtree_postorder_for_each_entry_safe(back, tmp,
6721                                              &rec->backref_tree, node) {
6722                 if (back->full_backref || !back->is_data)
6723                         continue;
6724
6725                 dback = to_data_backref(back);
6726
6727                 /*
6728                  * We only pay attention to backrefs that we found a real
6729                  * backref for.
6730                  */
6731                 if (dback->found_ref == 0)
6732                         continue;
6733
6734                 /*
6735                  * For now we only catch when the bytes don't match, not the
6736                  * bytenr.  We can easily do this at the same time, but I want
6737                  * to have a fs image to test on before we just add repair
6738                  * functionality willy-nilly so we know we won't screw up the
6739                  * repair.
6740                  */
6741
6742                 entry = find_entry(&entries, dback->disk_bytenr,
6743                                    dback->bytes);
6744                 if (!entry) {
6745                         entry = malloc(sizeof(struct extent_entry));
6746                         if (!entry) {
6747                                 ret = -ENOMEM;
6748                                 goto out;
6749                         }
6750                         memset(entry, 0, sizeof(*entry));
6751                         entry->bytenr = dback->disk_bytenr;
6752                         entry->bytes = dback->bytes;
6753                         list_add_tail(&entry->list, &entries);
6754                         nr_entries++;
6755                 }
6756
6757                 /*
6758                  * If we only have on entry we may think the entries agree when
6759                  * in reality they don't so we have to do some extra checking.
6760                  */
6761                 if (dback->disk_bytenr != rec->start ||
6762                     dback->bytes != rec->nr || back->broken)
6763                         mismatch = 1;
6764
6765                 if (back->broken) {
6766                         entry->broken++;
6767                         broken_entries++;
6768                 }
6769
6770                 entry->count++;
6771         }
6772
6773         /* Yay all the backrefs agree, carry on good sir */
6774         if (nr_entries <= 1 && !mismatch)
6775                 goto out;
6776
6777         fprintf(stderr,
6778                 "attempting to repair backref discrepency for bytenr %llu\n",
6779                 rec->start);
6780
6781         /*
6782          * First we want to see if the backrefs can agree amongst themselves who
6783          * is right, so figure out which one of the entries has the highest
6784          * count.
6785          */
6786         best = find_most_right_entry(&entries);
6787
6788         /*
6789          * Ok so we may have an even split between what the backrefs think, so
6790          * this is where we use the extent ref to see what it thinks.
6791          */
6792         if (!best) {
6793                 entry = find_entry(&entries, rec->start, rec->nr);
6794                 if (!entry && (!broken_entries || !rec->found_rec)) {
6795                         fprintf(stderr,
6796 "backrefs don't agree with each other and extent record doesn't agree with anybody, so we can't fix bytenr %llu bytes %llu\n",
6797                                 rec->start, rec->nr);
6798                         ret = -EINVAL;
6799                         goto out;
6800                 } else if (!entry) {
6801                         /*
6802                          * Ok our backrefs were broken, we'll assume this is the
6803                          * correct value and add an entry for this range.
6804                          */
6805                         entry = malloc(sizeof(struct extent_entry));
6806                         if (!entry) {
6807                                 ret = -ENOMEM;
6808                                 goto out;
6809                         }
6810                         memset(entry, 0, sizeof(*entry));
6811                         entry->bytenr = rec->start;
6812                         entry->bytes = rec->nr;
6813                         list_add_tail(&entry->list, &entries);
6814                         nr_entries++;
6815                 }
6816                 entry->count++;
6817                 best = find_most_right_entry(&entries);
6818                 if (!best) {
6819                         fprintf(stderr,
6820 "backrefs and extent record evenly split on who is right, this is going to require user input to fix bytenr %llu bytes %llu\n",
6821                                 rec->start, rec->nr);
6822                         ret = -EINVAL;
6823                         goto out;
6824                 }
6825         }
6826
6827         /*
6828          * I don't think this can happen currently as we'll abort() if we catch
6829          * this case higher up, but in case somebody removes that we still can't
6830          * deal with it properly here yet, so just bail out of that's the case.
6831          */
6832         if (best->bytenr != rec->start) {
6833                 fprintf(stderr,
6834 "extent start and backref starts don't match, please use btrfs-image on this file system and send it to a btrfs developer so they can make fsck fix this particular case.  bytenr is %llu, bytes is %llu\n",
6835                         rec->start, rec->nr);
6836                 ret = -EINVAL;
6837                 goto out;
6838         }
6839
6840         /*
6841          * Ok great we all agreed on an extent record, let's go find the real
6842          * references and fix up the ones that don't match.
6843          */
6844         rbtree_postorder_for_each_entry_safe(back, tmp,
6845                                              &rec->backref_tree, node) {
6846                 if (back->full_backref || !back->is_data)
6847                         continue;
6848
6849                 dback = to_data_backref(back);
6850
6851                 /*
6852                  * Still ignoring backrefs that don't have a real ref attached
6853                  * to them.
6854                  */
6855                 if (dback->found_ref == 0)
6856                         continue;
6857
6858                 if (dback->bytes == best->bytes &&
6859                     dback->disk_bytenr == best->bytenr)
6860                         continue;
6861
6862                 ret = repair_ref(info, path, dback, best);
6863                 if (ret)
6864                         goto out;
6865         }
6866
6867         /*
6868          * Ok we messed with the actual refs, which means we need to drop our
6869          * entire cache and go back and rescan.  I know this is a huge pain and
6870          * adds a lot of extra work, but it's the only way to be safe.  Once all
6871          * the backrefs agree we may not need to do anything to the extent
6872          * record itself.
6873          */
6874         ret = -EAGAIN;
6875 out:
6876         while (!list_empty(&entries)) {
6877                 entry = list_entry(entries.next, struct extent_entry, list);
6878                 list_del_init(&entry->list);
6879                 free(entry);
6880         }
6881         return ret;
6882 }
6883
6884 static int process_duplicates(struct cache_tree *extent_cache,
6885                               struct extent_record *rec)
6886 {
6887         struct extent_record *good, *tmp;
6888         struct cache_extent *cache;
6889         int ret;
6890
6891         /*
6892          * If we found a extent record for this extent then return, or if we
6893          * have more than one duplicate we are likely going to need to delete
6894          * something.
6895          */
6896         if (rec->found_rec || rec->num_duplicates > 1)
6897                 return 0;
6898
6899         /* Shouldn't happen but just in case */
6900         BUG_ON(!rec->num_duplicates);
6901
6902         /*
6903          * So this happens if we end up with a backref that doesn't match the
6904          * actual extent entry.  So either the backref is bad or the extent
6905          * entry is bad.  Either way we want to have the extent_record actually
6906          * reflect what we found in the extent_tree, so we need to take the
6907          * duplicate out and use that as the extent_record since the only way we
6908          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6909          */
6910         remove_cache_extent(extent_cache, &rec->cache);
6911
6912         good = to_extent_record(rec->dups.next);
6913         list_del_init(&good->list);
6914         INIT_LIST_HEAD(&good->backrefs);
6915         INIT_LIST_HEAD(&good->dups);
6916         good->cache.start = good->start;
6917         good->cache.size = good->nr;
6918         good->content_checked = 0;
6919         good->owner_ref_checked = 0;
6920         good->num_duplicates = 0;
6921         good->refs = rec->refs;
6922         list_splice_init(&rec->backrefs, &good->backrefs);
6923         while (1) {
6924                 cache = lookup_cache_extent(extent_cache, good->start,
6925                                             good->nr);
6926                 if (!cache)
6927                         break;
6928                 tmp = container_of(cache, struct extent_record, cache);
6929
6930                 /*
6931                  * If we find another overlapping extent and it's found_rec is
6932                  * set then it's a duplicate and we need to try and delete
6933                  * something.
6934                  */
6935                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6936                         if (list_empty(&good->list))
6937                                 list_add_tail(&good->list,
6938                                               &duplicate_extents);
6939                         good->num_duplicates += tmp->num_duplicates + 1;
6940                         list_splice_init(&tmp->dups, &good->dups);
6941                         list_del_init(&tmp->list);
6942                         list_add_tail(&tmp->list, &good->dups);
6943                         remove_cache_extent(extent_cache, &tmp->cache);
6944                         continue;
6945                 }
6946
6947                 /*
6948                  * Ok we have another non extent item backed extent rec, so lets
6949                  * just add it to this extent and carry on like we did above.
6950                  */
6951                 good->refs += tmp->refs;
6952                 list_splice_init(&tmp->backrefs, &good->backrefs);
6953                 remove_cache_extent(extent_cache, &tmp->cache);
6954                 free(tmp);
6955         }
6956         ret = insert_cache_extent(extent_cache, &good->cache);
6957         BUG_ON(ret);
6958         free(rec);
6959         return good->num_duplicates ? 0 : 1;
6960 }
6961
6962 static int delete_duplicate_records(struct btrfs_root *root,
6963                                     struct extent_record *rec)
6964 {
6965         struct btrfs_trans_handle *trans;
6966         LIST_HEAD(delete_list);
6967         struct btrfs_path path;
6968         struct extent_record *tmp, *good, *n;
6969         int nr_del = 0;
6970         int ret = 0, err;
6971         struct btrfs_key key;
6972
6973         btrfs_init_path(&path);
6974
6975         good = rec;
6976         /* Find the record that covers all of the duplicates. */
6977         list_for_each_entry(tmp, &rec->dups, list) {
6978                 if (good->start < tmp->start)
6979                         continue;
6980                 if (good->nr > tmp->nr)
6981                         continue;
6982
6983                 if (tmp->start + tmp->nr < good->start + good->nr) {
6984                         fprintf(stderr,
6985 "Ok we have overlapping extents that aren't completely covered by each other, this is going to require more careful thought. The extents are [%llu-%llu] and [%llu-%llu]\n",
6986                                 tmp->start, tmp->nr, good->start, good->nr);
6987                         abort();
6988                 }
6989                 good = tmp;
6990         }
6991
6992         if (good != rec)
6993                 list_add_tail(&rec->list, &delete_list);
6994
6995         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6996                 if (tmp == good)
6997                         continue;
6998                 list_move_tail(&tmp->list, &delete_list);
6999         }
7000
7001         root = root->fs_info->extent_root;
7002         trans = btrfs_start_transaction(root, 1);
7003         if (IS_ERR(trans)) {
7004                 ret = PTR_ERR(trans);
7005                 goto out;
7006         }
7007
7008         list_for_each_entry(tmp, &delete_list, list) {
7009                 if (tmp->found_rec == 0)
7010                         continue;
7011                 key.objectid = tmp->start;
7012                 key.type = BTRFS_EXTENT_ITEM_KEY;
7013                 key.offset = tmp->nr;
7014
7015                 /* Shouldn't happen but just in case */
7016                 if (tmp->metadata) {
7017                         fprintf(stderr,
7018 "well this shouldn't happen, extent record overlaps but is metadata? [%llu, %llu]\n",
7019                                 tmp->start, tmp->nr);
7020                         abort();
7021                 }
7022
7023                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7024                 if (ret) {
7025                         if (ret > 0)
7026                                 ret = -EINVAL;
7027                         break;
7028                 }
7029                 ret = btrfs_del_item(trans, root, &path);
7030                 if (ret)
7031                         break;
7032                 btrfs_release_path(&path);
7033                 nr_del++;
7034         }
7035         err = btrfs_commit_transaction(trans, root);
7036         if (err && !ret)
7037                 ret = err;
7038 out:
7039         while (!list_empty(&delete_list)) {
7040                 tmp = to_extent_record(delete_list.next);
7041                 list_del_init(&tmp->list);
7042                 if (tmp == rec)
7043                         continue;
7044                 free(tmp);
7045         }
7046
7047         while (!list_empty(&rec->dups)) {
7048                 tmp = to_extent_record(rec->dups.next);
7049                 list_del_init(&tmp->list);
7050                 free(tmp);
7051         }
7052
7053         btrfs_release_path(&path);
7054
7055         if (!ret && !nr_del)
7056                 rec->num_duplicates = 0;
7057
7058         return ret ? ret : nr_del;
7059 }
7060
7061 static int find_possible_backrefs(struct btrfs_fs_info *info,
7062                                   struct btrfs_path *path,
7063                                   struct cache_tree *extent_cache,
7064                                   struct extent_record *rec)
7065 {
7066         struct btrfs_root *root;
7067         struct extent_backref *back, *tmp;
7068         struct data_backref *dback;
7069         struct cache_extent *cache;
7070         struct btrfs_file_extent_item *fi;
7071         struct btrfs_key key;
7072         u64 bytenr, bytes;
7073         int ret;
7074
7075         rbtree_postorder_for_each_entry_safe(back, tmp,
7076                                              &rec->backref_tree, node) {
7077                 /* Don't care about full backrefs (poor unloved backrefs) */
7078                 if (back->full_backref || !back->is_data)
7079                         continue;
7080
7081                 dback = to_data_backref(back);
7082
7083                 /* We found this one, we don't need to do a lookup */
7084                 if (dback->found_ref)
7085                         continue;
7086
7087                 key.objectid = dback->root;
7088                 key.type = BTRFS_ROOT_ITEM_KEY;
7089                 key.offset = (u64)-1;
7090
7091                 root = btrfs_read_fs_root(info, &key);
7092
7093                 /* No root, definitely a bad ref, skip */
7094                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7095                         continue;
7096                 /* Other err, exit */
7097                 if (IS_ERR(root))
7098                         return PTR_ERR(root);
7099
7100                 key.objectid = dback->owner;
7101                 key.type = BTRFS_EXTENT_DATA_KEY;
7102                 key.offset = dback->offset;
7103                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7104                 if (ret) {
7105                         btrfs_release_path(path);
7106                         if (ret < 0)
7107                                 return ret;
7108                         /* Didn't find it, we can carry on */
7109                         ret = 0;
7110                         continue;
7111                 }
7112
7113                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7114                                     struct btrfs_file_extent_item);
7115                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7116                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7117                 btrfs_release_path(path);
7118                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7119                 if (cache) {
7120                         struct extent_record *tmp;
7121
7122                         tmp = container_of(cache, struct extent_record, cache);
7123
7124                         /*
7125                          * If we found an extent record for the bytenr for this
7126                          * particular backref then we can't add it to our
7127                          * current extent record.  We only want to add backrefs
7128                          * that don't have a corresponding extent item in the
7129                          * extent tree since they likely belong to this record
7130                          * and we need to fix it if it doesn't match bytenrs.
7131                          */
7132                         if  (tmp->found_rec)
7133                                 continue;
7134                 }
7135
7136                 dback->found_ref += 1;
7137                 dback->disk_bytenr = bytenr;
7138                 dback->bytes = bytes;
7139
7140                 /*
7141                  * Set this so the verify backref code knows not to trust the
7142                  * values in this backref.
7143                  */
7144                 back->broken = 1;
7145         }
7146
7147         return 0;
7148 }
7149
7150 /*
7151  * Record orphan data ref into corresponding root.
7152  *
7153  * Return 0 if the extent item contains data ref and recorded.
7154  * Return 1 if the extent item contains no useful data ref
7155  *   On that case, it may contains only shared_dataref or metadata backref
7156  *   or the file extent exists(this should be handled by the extent bytenr
7157  *   recovery routine)
7158  * Return <0 if something goes wrong.
7159  */
7160 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7161                                       struct extent_record *rec)
7162 {
7163         struct btrfs_key key;
7164         struct btrfs_root *dest_root;
7165         struct extent_backref *back, *tmp;
7166         struct data_backref *dback;
7167         struct orphan_data_extent *orphan;
7168         struct btrfs_path path;
7169         int recorded_data_ref = 0;
7170         int ret = 0;
7171
7172         if (rec->metadata)
7173                 return 1;
7174         btrfs_init_path(&path);
7175         rbtree_postorder_for_each_entry_safe(back, tmp,
7176                                              &rec->backref_tree, node) {
7177                 if (back->full_backref || !back->is_data ||
7178                     !back->found_extent_tree)
7179                         continue;
7180                 dback = to_data_backref(back);
7181                 if (dback->found_ref)
7182                         continue;
7183                 key.objectid = dback->root;
7184                 key.type = BTRFS_ROOT_ITEM_KEY;
7185                 key.offset = (u64)-1;
7186
7187                 dest_root = btrfs_read_fs_root(fs_info, &key);
7188
7189                 /* For non-exist root we just skip it */
7190                 if (IS_ERR(dest_root) || !dest_root)
7191                         continue;
7192
7193                 key.objectid = dback->owner;
7194                 key.type = BTRFS_EXTENT_DATA_KEY;
7195                 key.offset = dback->offset;
7196
7197                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7198                 btrfs_release_path(&path);
7199                 /*
7200                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7201                  * we need to record it for inode/file extent rebuild.
7202                  * For ret > 0, we record it only for file extent rebuild.
7203                  * For ret == 0, the file extent exists but only bytenr
7204                  * mismatch, let the original bytenr fix routine to handle,
7205                  * don't record it.
7206                  */
7207                 if (ret == 0)
7208                         continue;
7209                 ret = 0;
7210                 orphan = malloc(sizeof(*orphan));
7211                 if (!orphan) {
7212                         ret = -ENOMEM;
7213                         goto out;
7214                 }
7215                 INIT_LIST_HEAD(&orphan->list);
7216                 orphan->root = dback->root;
7217                 orphan->objectid = dback->owner;
7218                 orphan->offset = dback->offset;
7219                 orphan->disk_bytenr = rec->cache.start;
7220                 orphan->disk_len = rec->cache.size;
7221                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7222                 recorded_data_ref = 1;
7223         }
7224 out:
7225         btrfs_release_path(&path);
7226         if (!ret)
7227                 return !recorded_data_ref;
7228         else
7229                 return ret;
7230 }
7231
7232 /*
7233  * when an incorrect extent item is found, this will delete
7234  * all of the existing entries for it and recreate them
7235  * based on what the tree scan found.
7236  */
7237 static int fixup_extent_refs(struct btrfs_fs_info *info,
7238                              struct cache_tree *extent_cache,
7239                              struct extent_record *rec)
7240 {
7241         struct btrfs_trans_handle *trans = NULL;
7242         int ret;
7243         struct btrfs_path path;
7244         struct cache_extent *cache;
7245         struct extent_backref *back, *tmp;
7246         int allocated = 0;
7247         u64 flags = 0;
7248
7249         if (rec->flag_block_full_backref)
7250                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7251
7252         btrfs_init_path(&path);
7253         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7254                 /*
7255                  * Sometimes the backrefs themselves are so broken they don't
7256                  * get attached to any meaningful rec, so first go back and
7257                  * check any of our backrefs that we couldn't find and throw
7258                  * them into the list if we find the backref so that
7259                  * verify_backrefs can figure out what to do.
7260                  */
7261                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7262                 if (ret < 0)
7263                         goto out;
7264         }
7265
7266         /* step one, make sure all of the backrefs agree */
7267         ret = verify_backrefs(info, &path, rec);
7268         if (ret < 0)
7269                 goto out;
7270
7271         trans = btrfs_start_transaction(info->extent_root, 1);
7272         if (IS_ERR(trans)) {
7273                 ret = PTR_ERR(trans);
7274                 goto out;
7275         }
7276
7277         /* step two, delete all the existing records */
7278         ret = delete_extent_records(trans, info->extent_root, &path,
7279                                     rec->start);
7280
7281         if (ret < 0)
7282                 goto out;
7283
7284         /* was this block corrupt?  If so, don't add references to it */
7285         cache = lookup_cache_extent(info->corrupt_blocks,
7286                                     rec->start, rec->max_size);
7287         if (cache) {
7288                 ret = 0;
7289                 goto out;
7290         }
7291
7292         /* step three, recreate all the refs we did find */
7293         rbtree_postorder_for_each_entry_safe(back, tmp,
7294                                              &rec->backref_tree, node) {
7295                 /*
7296                  * if we didn't find any references, don't create a
7297                  * new extent record
7298                  */
7299                 if (!back->found_ref)
7300                         continue;
7301
7302                 rec->bad_full_backref = 0;
7303                 ret = record_extent(trans, info, &path, rec, back, allocated,
7304                                     flags);
7305                 allocated = 1;
7306
7307                 if (ret)
7308                         goto out;
7309         }
7310 out:
7311         if (trans) {
7312                 int err = btrfs_commit_transaction(trans, info->extent_root);
7313
7314                 if (!ret)
7315                         ret = err;
7316         }
7317
7318         if (!ret)
7319                 fprintf(stderr, "Repaired extent references for %llu\n",
7320                                 (unsigned long long)rec->start);
7321
7322         btrfs_release_path(&path);
7323         return ret;
7324 }
7325
7326 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7327                               struct extent_record *rec)
7328 {
7329         struct btrfs_trans_handle *trans;
7330         struct btrfs_root *root = fs_info->extent_root;
7331         struct btrfs_path path;
7332         struct btrfs_extent_item *ei;
7333         struct btrfs_key key;
7334         u64 flags;
7335         int ret = 0;
7336
7337         key.objectid = rec->start;
7338         if (rec->metadata) {
7339                 key.type = BTRFS_METADATA_ITEM_KEY;
7340                 key.offset = rec->info_level;
7341         } else {
7342                 key.type = BTRFS_EXTENT_ITEM_KEY;
7343                 key.offset = rec->max_size;
7344         }
7345
7346         trans = btrfs_start_transaction(root, 0);
7347         if (IS_ERR(trans))
7348                 return PTR_ERR(trans);
7349
7350         btrfs_init_path(&path);
7351         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7352         if (ret < 0) {
7353                 btrfs_release_path(&path);
7354                 btrfs_commit_transaction(trans, root);
7355                 return ret;
7356         } else if (ret) {
7357                 fprintf(stderr, "Didn't find extent for %llu\n",
7358                         (unsigned long long)rec->start);
7359                 btrfs_release_path(&path);
7360                 btrfs_commit_transaction(trans, root);
7361                 return -ENOENT;
7362         }
7363
7364         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7365                             struct btrfs_extent_item);
7366         flags = btrfs_extent_flags(path.nodes[0], ei);
7367         if (rec->flag_block_full_backref) {
7368                 fprintf(stderr, "setting full backref on %llu\n",
7369                         (unsigned long long)key.objectid);
7370                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7371         } else {
7372                 fprintf(stderr, "clearing full backref on %llu\n",
7373                         (unsigned long long)key.objectid);
7374                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7375         }
7376         btrfs_set_extent_flags(path.nodes[0], ei, flags);
7377         btrfs_mark_buffer_dirty(path.nodes[0]);
7378         btrfs_release_path(&path);
7379         ret = btrfs_commit_transaction(trans, root);
7380         if (!ret)
7381                 fprintf(stderr, "Repaired extent flags for %llu\n",
7382                                 (unsigned long long)rec->start);
7383
7384         return ret;
7385 }
7386
7387 /* right now we only prune from the extent allocation tree */
7388 static int prune_one_block(struct btrfs_trans_handle *trans,
7389                            struct btrfs_fs_info *info,
7390                            struct btrfs_corrupt_block *corrupt)
7391 {
7392         int ret;
7393         struct btrfs_path path;
7394         struct extent_buffer *eb;
7395         u64 found;
7396         int slot;
7397         int nritems;
7398         int level = corrupt->level + 1;
7399
7400         btrfs_init_path(&path);
7401 again:
7402         /* we want to stop at the parent to our busted block */
7403         path.lowest_level = level;
7404
7405         ret = btrfs_search_slot(trans, info->extent_root,
7406                                 &corrupt->key, &path, -1, 1);
7407
7408         if (ret < 0)
7409                 goto out;
7410
7411         eb = path.nodes[level];
7412         if (!eb) {
7413                 ret = -ENOENT;
7414                 goto out;
7415         }
7416
7417         /*
7418          * hopefully the search gave us the block we want to prune,
7419          * lets try that first
7420          */
7421         slot = path.slots[level];
7422         found =  btrfs_node_blockptr(eb, slot);
7423         if (found == corrupt->cache.start)
7424                 goto del_ptr;
7425
7426         nritems = btrfs_header_nritems(eb);
7427
7428         /* the search failed, lets scan this node and hope we find it */
7429         for (slot = 0; slot < nritems; slot++) {
7430                 found =  btrfs_node_blockptr(eb, slot);
7431                 if (found == corrupt->cache.start)
7432                         goto del_ptr;
7433         }
7434         /*
7435          * We couldn't find the bad block.
7436          * TODO: search all the nodes for pointers to this block
7437          */
7438         if (eb == info->extent_root->node) {
7439                 ret = -ENOENT;
7440                 goto out;
7441         } else {
7442                 level++;
7443                 btrfs_release_path(&path);
7444                 goto again;
7445         }
7446
7447 del_ptr:
7448         printk("deleting pointer to block %llu\n", corrupt->cache.start);
7449         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
7450
7451 out:
7452         btrfs_release_path(&path);
7453         return ret;
7454 }
7455
7456 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7457 {
7458         struct btrfs_trans_handle *trans = NULL;
7459         struct cache_extent *cache;
7460         struct btrfs_corrupt_block *corrupt;
7461
7462         while (1) {
7463                 cache = search_cache_extent(info->corrupt_blocks, 0);
7464                 if (!cache)
7465                         break;
7466                 if (!trans) {
7467                         trans = btrfs_start_transaction(info->extent_root, 1);
7468                         if (IS_ERR(trans))
7469                                 return PTR_ERR(trans);
7470                 }
7471                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7472                 prune_one_block(trans, info, corrupt);
7473                 remove_cache_extent(info->corrupt_blocks, cache);
7474         }
7475         if (trans)
7476                 return btrfs_commit_transaction(trans, info->extent_root);
7477         return 0;
7478 }
7479
7480 static int check_extent_refs(struct btrfs_root *root,
7481                              struct cache_tree *extent_cache)
7482 {
7483         struct extent_record *rec;
7484         struct cache_extent *cache;
7485         int ret = 0;
7486         int had_dups = 0;
7487         int err = 0;
7488
7489         if (repair) {
7490                 /*
7491                  * if we're doing a repair, we have to make sure
7492                  * we don't allocate from the problem extents.
7493                  * In the worst case, this will be all the
7494                  * extents in the FS
7495                  */
7496                 cache = search_cache_extent(extent_cache, 0);
7497                 while (cache) {
7498                         rec = container_of(cache, struct extent_record, cache);
7499                         set_extent_dirty(root->fs_info->excluded_extents,
7500                                          rec->start,
7501                                          rec->start + rec->max_size - 1);
7502                         cache = next_cache_extent(cache);
7503                 }
7504
7505                 /* pin down all the corrupted blocks too */
7506                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7507                 while (cache) {
7508                         set_extent_dirty(root->fs_info->excluded_extents,
7509                                          cache->start,
7510                                          cache->start + cache->size - 1);
7511                         cache = next_cache_extent(cache);
7512                 }
7513                 prune_corrupt_blocks(root->fs_info);
7514                 reset_cached_block_groups(root->fs_info);
7515         }
7516
7517         reset_cached_block_groups(root->fs_info);
7518
7519         /*
7520          * We need to delete any duplicate entries we find first otherwise we
7521          * could mess up the extent tree when we have backrefs that actually
7522          * belong to a different extent item and not the weird duplicate one.
7523          */
7524         while (repair && !list_empty(&duplicate_extents)) {
7525                 rec = to_extent_record(duplicate_extents.next);
7526                 list_del_init(&rec->list);
7527
7528                 /* Sometimes we can find a backref before we find an actual
7529                  * extent, so we need to process it a little bit to see if there
7530                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7531                  * if this is a backref screwup.  If we need to delete stuff
7532                  * process_duplicates() will return 0, otherwise it will return
7533                  * 1 and we
7534                  */
7535                 if (process_duplicates(extent_cache, rec))
7536                         continue;
7537                 ret = delete_duplicate_records(root, rec);
7538                 if (ret < 0)
7539                         return ret;
7540                 /*
7541                  * delete_duplicate_records will return the number of entries
7542                  * deleted, so if it's greater than 0 then we know we actually
7543                  * did something and we need to remove.
7544                  */
7545                 if (ret)
7546                         had_dups = 1;
7547         }
7548
7549         if (had_dups)
7550                 return -EAGAIN;
7551
7552         while (1) {
7553                 int cur_err = 0;
7554                 int fix = 0;
7555
7556                 cache = search_cache_extent(extent_cache, 0);
7557                 if (!cache)
7558                         break;
7559                 rec = container_of(cache, struct extent_record, cache);
7560                 if (rec->num_duplicates) {
7561                         fprintf(stderr,
7562                                 "extent item %llu has multiple extent items\n",
7563                                 (unsigned long long)rec->start);
7564                         cur_err = 1;
7565                 }
7566
7567                 if (rec->refs != rec->extent_item_refs) {
7568                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7569                                 (unsigned long long)rec->start,
7570                                 (unsigned long long)rec->nr);
7571                         fprintf(stderr, "extent item %llu, found %llu\n",
7572                                 (unsigned long long)rec->extent_item_refs,
7573                                 (unsigned long long)rec->refs);
7574                         ret = record_orphan_data_extents(root->fs_info, rec);
7575                         if (ret < 0)
7576                                 goto repair_abort;
7577                         fix = ret;
7578                         cur_err = 1;
7579                 }
7580                 if (all_backpointers_checked(rec, 1)) {
7581                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7582                                 (unsigned long long)rec->start,
7583                                 (unsigned long long)rec->nr);
7584                         fix = 1;
7585                         cur_err = 1;
7586                 }
7587                 if (!rec->owner_ref_checked) {
7588                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7589                                 (unsigned long long)rec->start,
7590                                 (unsigned long long)rec->nr);
7591                         fix = 1;
7592                         cur_err = 1;
7593                 }
7594
7595                 if (repair && fix) {
7596                         ret = fixup_extent_refs(root->fs_info, extent_cache,
7597                                                 rec);
7598                         if (ret)
7599                                 goto repair_abort;
7600                 }
7601
7602
7603                 if (rec->bad_full_backref) {
7604                         fprintf(stderr, "bad full backref, on [%llu]\n",
7605                                 (unsigned long long)rec->start);
7606                         if (repair) {
7607                                 ret = fixup_extent_flags(root->fs_info, rec);
7608                                 if (ret)
7609                                         goto repair_abort;
7610                                 fix = 1;
7611                         }
7612                         cur_err = 1;
7613                 }
7614                 /*
7615                  * Although it's not a extent ref's problem, we reuse this
7616                  * routine for error reporting.
7617                  * No repair function yet.
7618                  */
7619                 if (rec->crossing_stripes) {
7620                         fprintf(stderr,
7621                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7622                                 rec->start, rec->start + rec->max_size);
7623                         cur_err = 1;
7624                 }
7625
7626                 if (rec->wrong_chunk_type) {
7627                         fprintf(stderr,
7628                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7629                                 rec->start, rec->start + rec->max_size);
7630                         cur_err = 1;
7631                 }
7632
7633                 err = cur_err;
7634                 remove_cache_extent(extent_cache, cache);
7635                 free_all_extent_backrefs(rec);
7636                 if (!init_extent_tree && repair && (!cur_err || fix))
7637                         clear_extent_dirty(root->fs_info->excluded_extents,
7638                                            rec->start,
7639                                            rec->start + rec->max_size - 1);
7640                 free(rec);
7641         }
7642 repair_abort:
7643         if (repair) {
7644                 if (ret && ret != -EAGAIN) {
7645                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7646                         exit(1);
7647                 } else if (!ret) {
7648                         struct btrfs_trans_handle *trans;
7649
7650                         root = root->fs_info->extent_root;
7651                         trans = btrfs_start_transaction(root, 1);
7652                         if (IS_ERR(trans)) {
7653                                 ret = PTR_ERR(trans);
7654                                 goto repair_abort;
7655                         }
7656
7657                         ret = btrfs_fix_block_accounting(trans, root);
7658                         if (ret)
7659                                 goto repair_abort;
7660                         ret = btrfs_commit_transaction(trans, root);
7661                         if (ret)
7662                                 goto repair_abort;
7663                 }
7664                 return ret;
7665         }
7666
7667         if (err)
7668                 err = -EIO;
7669         return err;
7670 }
7671
7672 /*
7673  * Check the chunk with its block group/dev list ref:
7674  * Return 0 if all refs seems valid.
7675  * Return 1 if part of refs seems valid, need later check for rebuild ref
7676  * like missing block group and needs to search extent tree to rebuild them.
7677  * Return -1 if essential refs are missing and unable to rebuild.
7678  */
7679 static int check_chunk_refs(struct chunk_record *chunk_rec,
7680                             struct block_group_tree *block_group_cache,
7681                             struct device_extent_tree *dev_extent_cache,
7682                             int silent)
7683 {
7684         struct cache_extent *block_group_item;
7685         struct block_group_record *block_group_rec;
7686         struct cache_extent *dev_extent_item;
7687         struct device_extent_record *dev_extent_rec;
7688         u64 devid;
7689         u64 offset;
7690         u64 length;
7691         int metadump_v2 = 0;
7692         int i;
7693         int ret = 0;
7694
7695         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7696                                                chunk_rec->offset,
7697                                                chunk_rec->length);
7698         if (block_group_item) {
7699                 block_group_rec = container_of(block_group_item,
7700                                                struct block_group_record,
7701                                                cache);
7702                 if (chunk_rec->length != block_group_rec->offset ||
7703                     chunk_rec->offset != block_group_rec->objectid ||
7704                     (!metadump_v2 &&
7705                      chunk_rec->type_flags != block_group_rec->flags)) {
7706                         if (!silent)
7707                                 fprintf(stderr,
7708                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7709                                         chunk_rec->objectid,
7710                                         chunk_rec->type,
7711                                         chunk_rec->offset,
7712                                         chunk_rec->length,
7713                                         chunk_rec->offset,
7714                                         chunk_rec->type_flags,
7715                                         block_group_rec->objectid,
7716                                         block_group_rec->type,
7717                                         block_group_rec->offset,
7718                                         block_group_rec->offset,
7719                                         block_group_rec->objectid,
7720                                         block_group_rec->flags);
7721                         ret = -1;
7722                 } else {
7723                         list_del_init(&block_group_rec->list);
7724                         chunk_rec->bg_rec = block_group_rec;
7725                 }
7726         } else {
7727                 if (!silent)
7728                         fprintf(stderr,
7729                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7730                                 chunk_rec->objectid,
7731                                 chunk_rec->type,
7732                                 chunk_rec->offset,
7733                                 chunk_rec->length,
7734                                 chunk_rec->offset,
7735                                 chunk_rec->type_flags);
7736                 ret = 1;
7737         }
7738
7739         if (metadump_v2)
7740                 return ret;
7741
7742         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7743                                     chunk_rec->num_stripes);
7744         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7745                 devid = chunk_rec->stripes[i].devid;
7746                 offset = chunk_rec->stripes[i].offset;
7747                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7748                                                        devid, offset, length);
7749                 if (dev_extent_item) {
7750                         dev_extent_rec = container_of(dev_extent_item,
7751                                                 struct device_extent_record,
7752                                                 cache);
7753                         if (dev_extent_rec->objectid != devid ||
7754                             dev_extent_rec->offset != offset ||
7755                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7756                             dev_extent_rec->length != length) {
7757                                 if (!silent)
7758                                         fprintf(stderr,
7759                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7760                                                 chunk_rec->objectid,
7761                                                 chunk_rec->type,
7762                                                 chunk_rec->offset,
7763                                                 chunk_rec->stripes[i].devid,
7764                                                 chunk_rec->stripes[i].offset,
7765                                                 dev_extent_rec->objectid,
7766                                                 dev_extent_rec->offset,
7767                                                 dev_extent_rec->length);
7768                                 ret = -1;
7769                         } else {
7770                                 list_move(&dev_extent_rec->chunk_list,
7771                                           &chunk_rec->dextents);
7772                         }
7773                 } else {
7774                         if (!silent)
7775                                 fprintf(stderr,
7776                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7777                                         chunk_rec->objectid,
7778                                         chunk_rec->type,
7779                                         chunk_rec->offset,
7780                                         chunk_rec->stripes[i].devid,
7781                                         chunk_rec->stripes[i].offset);
7782                         ret = -1;
7783                 }
7784         }
7785         return ret;
7786 }
7787
7788 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7789 int check_chunks(struct cache_tree *chunk_cache,
7790                  struct block_group_tree *block_group_cache,
7791                  struct device_extent_tree *dev_extent_cache,
7792                  struct list_head *good, struct list_head *bad,
7793                  struct list_head *rebuild, int silent)
7794 {
7795         struct cache_extent *chunk_item;
7796         struct chunk_record *chunk_rec;
7797         struct block_group_record *bg_rec;
7798         struct device_extent_record *dext_rec;
7799         int err;
7800         int ret = 0;
7801
7802         chunk_item = first_cache_extent(chunk_cache);
7803         while (chunk_item) {
7804                 chunk_rec = container_of(chunk_item, struct chunk_record,
7805                                          cache);
7806                 err = check_chunk_refs(chunk_rec, block_group_cache,
7807                                        dev_extent_cache, silent);
7808                 if (err < 0)
7809                         ret = err;
7810                 if (err == 0 && good)
7811                         list_add_tail(&chunk_rec->list, good);
7812                 if (err > 0 && rebuild)
7813                         list_add_tail(&chunk_rec->list, rebuild);
7814                 if (err < 0 && bad)
7815                         list_add_tail(&chunk_rec->list, bad);
7816                 chunk_item = next_cache_extent(chunk_item);
7817         }
7818
7819         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7820                 if (!silent)
7821                         fprintf(stderr,
7822                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7823                                 bg_rec->objectid,
7824                                 bg_rec->offset,
7825                                 bg_rec->flags);
7826                 if (!ret)
7827                         ret = 1;
7828         }
7829
7830         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7831                             chunk_list) {
7832                 if (!silent)
7833                         fprintf(stderr,
7834                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7835                                 dext_rec->objectid,
7836                                 dext_rec->offset,
7837                                 dext_rec->length);
7838                 if (!ret)
7839                         ret = 1;
7840         }
7841         return ret;
7842 }
7843
7844
7845 static int check_device_used(struct device_record *dev_rec,
7846                              struct device_extent_tree *dext_cache)
7847 {
7848         struct cache_extent *cache;
7849         struct device_extent_record *dev_extent_rec;
7850         u64 total_byte = 0;
7851
7852         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7853         while (cache) {
7854                 dev_extent_rec = container_of(cache,
7855                                               struct device_extent_record,
7856                                               cache);
7857                 if (dev_extent_rec->objectid != dev_rec->devid)
7858                         break;
7859
7860                 list_del_init(&dev_extent_rec->device_list);
7861                 total_byte += dev_extent_rec->length;
7862                 cache = next_cache_extent(cache);
7863         }
7864
7865         if (total_byte != dev_rec->byte_used) {
7866                 fprintf(stderr,
7867                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7868                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7869                         dev_rec->type, dev_rec->offset);
7870                 return -1;
7871         } else {
7872                 return 0;
7873         }
7874 }
7875
7876 /*
7877  * Unlike device size alignment check above, some super total_bytes check
7878  * failure can lead to mount failure for newer kernel.
7879  *
7880  * So this function will return the error for a fatal super total_bytes problem.
7881  */
7882 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
7883 {
7884         struct btrfs_device *dev;
7885         struct list_head *dev_list = &fs_info->fs_devices->devices;
7886         u64 total_bytes = 0;
7887         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
7888
7889         list_for_each_entry(dev, dev_list, dev_list)
7890                 total_bytes += dev->total_bytes;
7891
7892         /* Important check, which can cause unmountable fs */
7893         if (super_bytes < total_bytes) {
7894                 error("super total bytes %llu smaller than real device(s) size %llu",
7895                         super_bytes, total_bytes);
7896                 error("mounting this fs may fail for newer kernels");
7897                 error("this can be fixed by 'btrfs rescue fix-device-size'");
7898                 return false;
7899         }
7900
7901         /*
7902          * Optional check, just to make everything aligned and match with each
7903          * other.
7904          *
7905          * For a btrfs-image restored fs, we don't need to check it anyway.
7906          */
7907         if (btrfs_super_flags(fs_info->super_copy) &
7908             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
7909                 return true;
7910         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
7911             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
7912             super_bytes != total_bytes) {
7913                 warning("minor unaligned/mismatch device size detected");
7914                 warning(
7915                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
7916         }
7917         return true;
7918 }
7919
7920 /* check btrfs_dev_item -> btrfs_dev_extent */
7921 static int check_devices(struct rb_root *dev_cache,
7922                          struct device_extent_tree *dev_extent_cache)
7923 {
7924         struct rb_node *dev_node;
7925         struct device_record *dev_rec;
7926         struct device_extent_record *dext_rec;
7927         int err;
7928         int ret = 0;
7929
7930         dev_node = rb_first(dev_cache);
7931         while (dev_node) {
7932                 dev_rec = container_of(dev_node, struct device_record, node);
7933                 err = check_device_used(dev_rec, dev_extent_cache);
7934                 if (err)
7935                         ret = err;
7936
7937                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
7938                                          global_info->sectorsize);
7939                 dev_node = rb_next(dev_node);
7940         }
7941         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7942                             device_list) {
7943                 fprintf(stderr,
7944                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7945                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7946                 if (!ret)
7947                         ret = 1;
7948         }
7949         return ret;
7950 }
7951
7952 static int add_root_item_to_list(struct list_head *head,
7953                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7954                                   u8 level, u8 drop_level,
7955                                   struct btrfs_key *drop_key)
7956 {
7957         struct root_item_record *ri_rec;
7958
7959         ri_rec = malloc(sizeof(*ri_rec));
7960         if (!ri_rec)
7961                 return -ENOMEM;
7962         ri_rec->bytenr = bytenr;
7963         ri_rec->objectid = objectid;
7964         ri_rec->level = level;
7965         ri_rec->drop_level = drop_level;
7966         ri_rec->last_snapshot = last_snapshot;
7967         if (drop_key)
7968                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7969         list_add_tail(&ri_rec->list, head);
7970
7971         return 0;
7972 }
7973
7974 static void free_root_item_list(struct list_head *list)
7975 {
7976         struct root_item_record *ri_rec;
7977
7978         while (!list_empty(list)) {
7979                 ri_rec = list_first_entry(list, struct root_item_record,
7980                                           list);
7981                 list_del_init(&ri_rec->list);
7982                 free(ri_rec);
7983         }
7984 }
7985
7986 static int deal_root_from_list(struct list_head *list,
7987                                struct btrfs_root *root,
7988                                struct block_info *bits,
7989                                int bits_nr,
7990                                struct cache_tree *pending,
7991                                struct cache_tree *seen,
7992                                struct cache_tree *reada,
7993                                struct cache_tree *nodes,
7994                                struct cache_tree *extent_cache,
7995                                struct cache_tree *chunk_cache,
7996                                struct rb_root *dev_cache,
7997                                struct block_group_tree *block_group_cache,
7998                                struct device_extent_tree *dev_extent_cache)
7999 {
8000         int ret = 0;
8001         u64 last;
8002
8003         while (!list_empty(list)) {
8004                 struct root_item_record *rec;
8005                 struct extent_buffer *buf;
8006
8007                 rec = list_entry(list->next,
8008                                  struct root_item_record, list);
8009                 last = 0;
8010                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
8011                 if (!extent_buffer_uptodate(buf)) {
8012                         free_extent_buffer(buf);
8013                         ret = -EIO;
8014                         break;
8015                 }
8016                 ret = add_root_to_pending(buf, extent_cache, pending,
8017                                     seen, nodes, rec->objectid);
8018                 if (ret < 0)
8019                         break;
8020                 /*
8021                  * To rebuild extent tree, we need deal with snapshot
8022                  * one by one, otherwise we deal with node firstly which
8023                  * can maximize readahead.
8024                  */
8025                 while (1) {
8026                         ret = run_next_block(root, bits, bits_nr, &last,
8027                                              pending, seen, reada, nodes,
8028                                              extent_cache, chunk_cache,
8029                                              dev_cache, block_group_cache,
8030                                              dev_extent_cache, rec);
8031                         if (ret != 0)
8032                                 break;
8033                 }
8034                 free_extent_buffer(buf);
8035                 list_del(&rec->list);
8036                 free(rec);
8037                 if (ret < 0)
8038                         break;
8039         }
8040         while (ret >= 0) {
8041                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8042                                      reada, nodes, extent_cache, chunk_cache,
8043                                      dev_cache, block_group_cache,
8044                                      dev_extent_cache, NULL);
8045                 if (ret != 0) {
8046                         if (ret > 0)
8047                                 ret = 0;
8048                         break;
8049                 }
8050         }
8051         return ret;
8052 }
8053
8054 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8055 {
8056         struct rb_root dev_cache;
8057         struct cache_tree chunk_cache;
8058         struct block_group_tree block_group_cache;
8059         struct device_extent_tree dev_extent_cache;
8060         struct cache_tree extent_cache;
8061         struct cache_tree seen;
8062         struct cache_tree pending;
8063         struct cache_tree reada;
8064         struct cache_tree nodes;
8065         struct extent_io_tree excluded_extents;
8066         struct cache_tree corrupt_blocks;
8067         struct btrfs_path path;
8068         struct btrfs_key key;
8069         struct btrfs_key found_key;
8070         int ret, err = 0;
8071         struct block_info *bits;
8072         int bits_nr;
8073         struct extent_buffer *leaf;
8074         int slot;
8075         struct btrfs_root_item ri;
8076         struct list_head dropping_trees;
8077         struct list_head normal_trees;
8078         struct btrfs_root *root1;
8079         struct btrfs_root *root;
8080         u64 objectid;
8081         u8 level;
8082
8083         root = fs_info->fs_root;
8084         dev_cache = RB_ROOT;
8085         cache_tree_init(&chunk_cache);
8086         block_group_tree_init(&block_group_cache);
8087         device_extent_tree_init(&dev_extent_cache);
8088
8089         cache_tree_init(&extent_cache);
8090         cache_tree_init(&seen);
8091         cache_tree_init(&pending);
8092         cache_tree_init(&nodes);
8093         cache_tree_init(&reada);
8094         cache_tree_init(&corrupt_blocks);
8095         extent_io_tree_init(&excluded_extents);
8096         INIT_LIST_HEAD(&dropping_trees);
8097         INIT_LIST_HEAD(&normal_trees);
8098
8099         if (repair) {
8100                 fs_info->excluded_extents = &excluded_extents;
8101                 fs_info->fsck_extent_cache = &extent_cache;
8102                 fs_info->free_extent_hook = free_extent_hook;
8103                 fs_info->corrupt_blocks = &corrupt_blocks;
8104         }
8105
8106         bits_nr = 1024;
8107         bits = malloc(bits_nr * sizeof(struct block_info));
8108         if (!bits) {
8109                 perror("malloc");
8110                 exit(1);
8111         }
8112
8113         if (ctx.progress_enabled) {
8114                 ctx.tp = TASK_EXTENTS;
8115                 task_start(ctx.info);
8116         }
8117
8118 again:
8119         root1 = fs_info->tree_root;
8120         level = btrfs_header_level(root1->node);
8121         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8122                                     root1->node->start, 0, level, 0, NULL);
8123         if (ret < 0)
8124                 goto out;
8125         root1 = fs_info->chunk_root;
8126         level = btrfs_header_level(root1->node);
8127         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8128                                     root1->node->start, 0, level, 0, NULL);
8129         if (ret < 0)
8130                 goto out;
8131         btrfs_init_path(&path);
8132         key.offset = 0;
8133         key.objectid = 0;
8134         key.type = BTRFS_ROOT_ITEM_KEY;
8135         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
8136         if (ret < 0)
8137                 goto out;
8138         while (1) {
8139                 leaf = path.nodes[0];
8140                 slot = path.slots[0];
8141                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8142                         ret = btrfs_next_leaf(root, &path);
8143                         if (ret != 0)
8144                                 break;
8145                         leaf = path.nodes[0];
8146                         slot = path.slots[0];
8147                 }
8148                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8149                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8150                         unsigned long offset;
8151                         u64 last_snapshot;
8152
8153                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8154                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8155                         last_snapshot = btrfs_root_last_snapshot(&ri);
8156                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8157                                 level = btrfs_root_level(&ri);
8158                                 ret = add_root_item_to_list(&normal_trees,
8159                                                 found_key.objectid,
8160                                                 btrfs_root_bytenr(&ri),
8161                                                 last_snapshot, level,
8162                                                 0, NULL);
8163                                 if (ret < 0)
8164                                         goto out;
8165                         } else {
8166                                 level = btrfs_root_level(&ri);
8167                                 objectid = found_key.objectid;
8168                                 btrfs_disk_key_to_cpu(&found_key,
8169                                                       &ri.drop_progress);
8170                                 ret = add_root_item_to_list(&dropping_trees,
8171                                                 objectid,
8172                                                 btrfs_root_bytenr(&ri),
8173                                                 last_snapshot, level,
8174                                                 ri.drop_level, &found_key);
8175                                 if (ret < 0)
8176                                         goto out;
8177                         }
8178                 }
8179                 path.slots[0]++;
8180         }
8181         btrfs_release_path(&path);
8182
8183         /*
8184          * check_block can return -EAGAIN if it fixes something, please keep
8185          * this in mind when dealing with return values from these functions, if
8186          * we get -EAGAIN we want to fall through and restart the loop.
8187          */
8188         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8189                                   &seen, &reada, &nodes, &extent_cache,
8190                                   &chunk_cache, &dev_cache, &block_group_cache,
8191                                   &dev_extent_cache);
8192         if (ret < 0) {
8193                 if (ret == -EAGAIN)
8194                         goto loop;
8195                 goto out;
8196         }
8197         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8198                                   &pending, &seen, &reada, &nodes,
8199                                   &extent_cache, &chunk_cache, &dev_cache,
8200                                   &block_group_cache, &dev_extent_cache);
8201         if (ret < 0) {
8202                 if (ret == -EAGAIN)
8203                         goto loop;
8204                 goto out;
8205         }
8206
8207         ret = check_chunks(&chunk_cache, &block_group_cache,
8208                            &dev_extent_cache, NULL, NULL, NULL, 0);
8209         if (ret) {
8210                 if (ret == -EAGAIN)
8211                         goto loop;
8212                 err = ret;
8213         }
8214
8215         ret = check_extent_refs(root, &extent_cache);
8216         if (ret < 0) {
8217                 if (ret == -EAGAIN)
8218                         goto loop;
8219                 goto out;
8220         }
8221
8222         ret = check_devices(&dev_cache, &dev_extent_cache);
8223         if (ret && err)
8224                 ret = err;
8225
8226 out:
8227         task_stop(ctx.info);
8228         if (repair) {
8229                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8230                 extent_io_tree_cleanup(&excluded_extents);
8231                 fs_info->fsck_extent_cache = NULL;
8232                 fs_info->free_extent_hook = NULL;
8233                 fs_info->corrupt_blocks = NULL;
8234                 fs_info->excluded_extents = NULL;
8235         }
8236         free(bits);
8237         free_chunk_cache_tree(&chunk_cache);
8238         free_device_cache_tree(&dev_cache);
8239         free_block_group_tree(&block_group_cache);
8240         free_device_extent_tree(&dev_extent_cache);
8241         free_extent_cache_tree(&seen);
8242         free_extent_cache_tree(&pending);
8243         free_extent_cache_tree(&reada);
8244         free_extent_cache_tree(&nodes);
8245         free_root_item_list(&normal_trees);
8246         free_root_item_list(&dropping_trees);
8247         return ret;
8248 loop:
8249         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8250         free_extent_cache_tree(&seen);
8251         free_extent_cache_tree(&pending);
8252         free_extent_cache_tree(&reada);
8253         free_extent_cache_tree(&nodes);
8254         free_chunk_cache_tree(&chunk_cache);
8255         free_block_group_tree(&block_group_cache);
8256         free_device_cache_tree(&dev_cache);
8257         free_device_extent_tree(&dev_extent_cache);
8258         free_extent_record_cache(&extent_cache);
8259         free_root_item_list(&normal_trees);
8260         free_root_item_list(&dropping_trees);
8261         extent_io_tree_cleanup(&excluded_extents);
8262         goto again;
8263 }
8264
8265 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8266 {
8267         int ret;
8268
8269         if (!ctx.progress_enabled)
8270                 fprintf(stderr, "checking extents\n");
8271         if (check_mode == CHECK_MODE_LOWMEM)
8272                 ret = check_chunks_and_extents_lowmem(fs_info);
8273         else
8274                 ret = check_chunks_and_extents(fs_info);
8275
8276         /* Also repair device size related problems */
8277         if (repair && !ret) {
8278                 ret = btrfs_fix_device_and_super_size(fs_info);
8279                 if (ret > 0)
8280                         ret = 0;
8281         }
8282         return ret;
8283 }
8284
8285 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8286                            struct btrfs_root *root, int overwrite)
8287 {
8288         struct extent_buffer *c;
8289         struct extent_buffer *old = root->node;
8290         int level;
8291         int ret;
8292         struct btrfs_disk_key disk_key = {0,0,0};
8293
8294         level = 0;
8295
8296         if (overwrite) {
8297                 c = old;
8298                 extent_buffer_get(c);
8299                 goto init;
8300         }
8301         c = btrfs_alloc_free_block(trans, root,
8302                                    root->fs_info->nodesize,
8303                                    root->root_key.objectid,
8304                                    &disk_key, level, 0, 0);
8305         if (IS_ERR(c)) {
8306                 c = old;
8307                 extent_buffer_get(c);
8308                 overwrite = 1;
8309         }
8310 init:
8311         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8312         btrfs_set_header_level(c, level);
8313         btrfs_set_header_bytenr(c, c->start);
8314         btrfs_set_header_generation(c, trans->transid);
8315         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8316         btrfs_set_header_owner(c, root->root_key.objectid);
8317
8318         write_extent_buffer(c, root->fs_info->fsid,
8319                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8320
8321         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8322                             btrfs_header_chunk_tree_uuid(c),
8323                             BTRFS_UUID_SIZE);
8324
8325         btrfs_mark_buffer_dirty(c);
8326         /*
8327          * this case can happen in the following case:
8328          *
8329          * 1.overwrite previous root.
8330          *
8331          * 2.reinit reloc data root, this is because we skip pin
8332          * down reloc data tree before which means we can allocate
8333          * same block bytenr here.
8334          */
8335         if (old->start == c->start) {
8336                 btrfs_set_root_generation(&root->root_item,
8337                                           trans->transid);
8338                 root->root_item.level = btrfs_header_level(root->node);
8339                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8340                                         &root->root_key, &root->root_item);
8341                 if (ret) {
8342                         free_extent_buffer(c);
8343                         return ret;
8344                 }
8345         }
8346         free_extent_buffer(old);
8347         root->node = c;
8348         add_root_to_dirty_list(root);
8349         return 0;
8350 }
8351
8352 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8353                                 struct extent_buffer *eb, int tree_root)
8354 {
8355         struct extent_buffer *tmp;
8356         struct btrfs_root_item *ri;
8357         struct btrfs_key key;
8358         u64 bytenr;
8359         int level = btrfs_header_level(eb);
8360         int nritems;
8361         int ret;
8362         int i;
8363
8364         /*
8365          * If we have pinned this block before, don't pin it again.
8366          * This can not only avoid forever loop with broken filesystem
8367          * but also give us some speedups.
8368          */
8369         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8370                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8371                 return 0;
8372
8373         btrfs_pin_extent(fs_info, eb->start, eb->len);
8374
8375         nritems = btrfs_header_nritems(eb);
8376         for (i = 0; i < nritems; i++) {
8377                 if (level == 0) {
8378                         btrfs_item_key_to_cpu(eb, &key, i);
8379                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8380                                 continue;
8381                         /* Skip the extent root and reloc roots */
8382                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8383                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8384                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8385                                 continue;
8386                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8387                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8388
8389                         /*
8390                          * If at any point we start needing the real root we
8391                          * will have to build a stump root for the root we are
8392                          * in, but for now this doesn't actually use the root so
8393                          * just pass in extent_root.
8394                          */
8395                         tmp = read_tree_block(fs_info, bytenr, 0);
8396                         if (!extent_buffer_uptodate(tmp)) {
8397                                 fprintf(stderr, "Error reading root block\n");
8398                                 return -EIO;
8399                         }
8400                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8401                         free_extent_buffer(tmp);
8402                         if (ret)
8403                                 return ret;
8404                 } else {
8405                         bytenr = btrfs_node_blockptr(eb, i);
8406
8407                         /* If we aren't the tree root don't read the block */
8408                         if (level == 1 && !tree_root) {
8409                                 btrfs_pin_extent(fs_info, bytenr,
8410                                                 fs_info->nodesize);
8411                                 continue;
8412                         }
8413
8414                         tmp = read_tree_block(fs_info, bytenr, 0);
8415                         if (!extent_buffer_uptodate(tmp)) {
8416                                 fprintf(stderr, "Error reading tree block\n");
8417                                 return -EIO;
8418                         }
8419                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8420                         free_extent_buffer(tmp);
8421                         if (ret)
8422                                 return ret;
8423                 }
8424         }
8425
8426         return 0;
8427 }
8428
8429 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8430 {
8431         int ret;
8432
8433         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8434         if (ret)
8435                 return ret;
8436
8437         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8438 }
8439
8440 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8441 {
8442         struct btrfs_block_group_cache *cache;
8443         struct btrfs_path path;
8444         struct extent_buffer *leaf;
8445         struct btrfs_chunk *chunk;
8446         struct btrfs_key key;
8447         int ret;
8448         u64 start;
8449
8450         btrfs_init_path(&path);
8451         key.objectid = 0;
8452         key.type = BTRFS_CHUNK_ITEM_KEY;
8453         key.offset = 0;
8454         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
8455         if (ret < 0) {
8456                 btrfs_release_path(&path);
8457                 return ret;
8458         }
8459
8460         /*
8461          * We do this in case the block groups were screwed up and had alloc
8462          * bits that aren't actually set on the chunks.  This happens with
8463          * restored images every time and could happen in real life I guess.
8464          */
8465         fs_info->avail_data_alloc_bits = 0;
8466         fs_info->avail_metadata_alloc_bits = 0;
8467         fs_info->avail_system_alloc_bits = 0;
8468
8469         /* First we need to create the in-memory block groups */
8470         while (1) {
8471                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8472                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
8473                         if (ret < 0) {
8474                                 btrfs_release_path(&path);
8475                                 return ret;
8476                         }
8477                         if (ret) {
8478                                 ret = 0;
8479                                 break;
8480                         }
8481                 }
8482                 leaf = path.nodes[0];
8483                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8484                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8485                         path.slots[0]++;
8486                         continue;
8487                 }
8488
8489                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
8490                 btrfs_add_block_group(fs_info, 0,
8491                                       btrfs_chunk_type(leaf, chunk), key.offset,
8492                                       btrfs_chunk_length(leaf, chunk));
8493                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8494                                  key.offset + btrfs_chunk_length(leaf, chunk));
8495                 path.slots[0]++;
8496         }
8497         start = 0;
8498         while (1) {
8499                 cache = btrfs_lookup_first_block_group(fs_info, start);
8500                 if (!cache)
8501                         break;
8502                 cache->cached = 1;
8503                 start = cache->key.objectid + cache->key.offset;
8504         }
8505
8506         btrfs_release_path(&path);
8507         return 0;
8508 }
8509
8510 static int reset_balance(struct btrfs_trans_handle *trans,
8511                          struct btrfs_fs_info *fs_info)
8512 {
8513         struct btrfs_root *root = fs_info->tree_root;
8514         struct btrfs_path path;
8515         struct extent_buffer *leaf;
8516         struct btrfs_key key;
8517         int del_slot, del_nr = 0;
8518         int ret;
8519         int found = 0;
8520
8521         btrfs_init_path(&path);
8522         key.objectid = BTRFS_BALANCE_OBJECTID;
8523         key.type = BTRFS_BALANCE_ITEM_KEY;
8524         key.offset = 0;
8525         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8526         if (ret) {
8527                 if (ret > 0)
8528                         ret = 0;
8529                 if (!ret)
8530                         goto reinit_data_reloc;
8531                 else
8532                         goto out;
8533         }
8534
8535         ret = btrfs_del_item(trans, root, &path);
8536         if (ret)
8537                 goto out;
8538         btrfs_release_path(&path);
8539
8540         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8541         key.type = BTRFS_ROOT_ITEM_KEY;
8542         key.offset = 0;
8543         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8544         if (ret < 0)
8545                 goto out;
8546         while (1) {
8547                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8548                         if (!found)
8549                                 break;
8550
8551                         if (del_nr) {
8552                                 ret = btrfs_del_items(trans, root, &path,
8553                                                       del_slot, del_nr);
8554                                 del_nr = 0;
8555                                 if (ret)
8556                                         goto out;
8557                         }
8558                         key.offset++;
8559                         btrfs_release_path(&path);
8560
8561                         found = 0;
8562                         ret = btrfs_search_slot(trans, root, &key, &path,
8563                                                 -1, 1);
8564                         if (ret < 0)
8565                                 goto out;
8566                         continue;
8567                 }
8568                 found = 1;
8569                 leaf = path.nodes[0];
8570                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8571                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8572                         break;
8573                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8574                         path.slots[0]++;
8575                         continue;
8576                 }
8577                 if (!del_nr) {
8578                         del_slot = path.slots[0];
8579                         del_nr = 1;
8580                 } else {
8581                         del_nr++;
8582                 }
8583                 path.slots[0]++;
8584         }
8585
8586         if (del_nr) {
8587                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
8588                 if (ret)
8589                         goto out;
8590         }
8591         btrfs_release_path(&path);
8592
8593 reinit_data_reloc:
8594         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8595         key.type = BTRFS_ROOT_ITEM_KEY;
8596         key.offset = (u64)-1;
8597         root = btrfs_read_fs_root(fs_info, &key);
8598         if (IS_ERR(root)) {
8599                 fprintf(stderr, "Error reading data reloc tree\n");
8600                 ret = PTR_ERR(root);
8601                 goto out;
8602         }
8603         record_root_in_trans(trans, root);
8604         ret = btrfs_fsck_reinit_root(trans, root, 0);
8605         if (ret)
8606                 goto out;
8607         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8608 out:
8609         btrfs_release_path(&path);
8610         return ret;
8611 }
8612
8613 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8614                               struct btrfs_fs_info *fs_info)
8615 {
8616         u64 start = 0;
8617         int ret;
8618
8619         /*
8620          * The only reason we don't do this is because right now we're just
8621          * walking the trees we find and pinning down their bytes, we don't look
8622          * at any of the leaves.  In order to do mixed groups we'd have to check
8623          * the leaves of any fs roots and pin down the bytes for any file
8624          * extents we find.  Not hard but why do it if we don't have to?
8625          */
8626         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
8627                 fprintf(stderr, "We don't support re-initing the extent tree "
8628                         "for mixed block groups yet, please notify a btrfs "
8629                         "developer you want to do this so they can add this "
8630                         "functionality.\n");
8631                 return -EINVAL;
8632         }
8633
8634         /*
8635          * first we need to walk all of the trees except the extent tree and pin
8636          * down the bytes that are in use so we don't overwrite any existing
8637          * metadata.
8638          */
8639         ret = pin_metadata_blocks(fs_info);
8640         if (ret) {
8641                 fprintf(stderr, "error pinning down used bytes\n");
8642                 return ret;
8643         }
8644
8645         /*
8646          * Need to drop all the block groups since we're going to recreate all
8647          * of them again.
8648          */
8649         btrfs_free_block_groups(fs_info);
8650         ret = reset_block_groups(fs_info);
8651         if (ret) {
8652                 fprintf(stderr, "error resetting the block groups\n");
8653                 return ret;
8654         }
8655
8656         /* Ok we can allocate now, reinit the extent root */
8657         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8658         if (ret) {
8659                 fprintf(stderr, "extent root initialization failed\n");
8660                 /*
8661                  * When the transaction code is updated we should end the
8662                  * transaction, but for now progs only knows about commit so
8663                  * just return an error.
8664                  */
8665                 return ret;
8666         }
8667
8668         /*
8669          * Now we have all the in-memory block groups setup so we can make
8670          * allocations properly, and the metadata we care about is safe since we
8671          * pinned all of it above.
8672          */
8673         while (1) {
8674                 struct btrfs_block_group_cache *cache;
8675
8676                 cache = btrfs_lookup_first_block_group(fs_info, start);
8677                 if (!cache)
8678                         break;
8679                 start = cache->key.objectid + cache->key.offset;
8680                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8681                                         &cache->key, &cache->item,
8682                                         sizeof(cache->item));
8683                 if (ret) {
8684                         fprintf(stderr, "Error adding block group\n");
8685                         return ret;
8686                 }
8687                 btrfs_extent_post_op(trans, fs_info->extent_root);
8688         }
8689
8690         ret = reset_balance(trans, fs_info);
8691         if (ret)
8692                 fprintf(stderr, "error resetting the pending balance\n");
8693
8694         return ret;
8695 }
8696
8697 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8698 {
8699         struct btrfs_path path;
8700         struct btrfs_trans_handle *trans;
8701         struct btrfs_key key;
8702         int ret;
8703
8704         printf("Recowing metadata block %llu\n", eb->start);
8705         key.objectid = btrfs_header_owner(eb);
8706         key.type = BTRFS_ROOT_ITEM_KEY;
8707         key.offset = (u64)-1;
8708
8709         root = btrfs_read_fs_root(root->fs_info, &key);
8710         if (IS_ERR(root)) {
8711                 fprintf(stderr, "Couldn't find owner root %llu\n",
8712                         key.objectid);
8713                 return PTR_ERR(root);
8714         }
8715
8716         trans = btrfs_start_transaction(root, 1);
8717         if (IS_ERR(trans))
8718                 return PTR_ERR(trans);
8719
8720         btrfs_init_path(&path);
8721         path.lowest_level = btrfs_header_level(eb);
8722         if (path.lowest_level)
8723                 btrfs_node_key_to_cpu(eb, &key, 0);
8724         else
8725                 btrfs_item_key_to_cpu(eb, &key, 0);
8726
8727         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8728         btrfs_commit_transaction(trans, root);
8729         btrfs_release_path(&path);
8730         return ret;
8731 }
8732
8733 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8734 {
8735         struct btrfs_path path;
8736         struct btrfs_trans_handle *trans;
8737         struct btrfs_key key;
8738         int ret;
8739
8740         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8741                bad->key.type, bad->key.offset);
8742         key.objectid = bad->root_id;
8743         key.type = BTRFS_ROOT_ITEM_KEY;
8744         key.offset = (u64)-1;
8745
8746         root = btrfs_read_fs_root(root->fs_info, &key);
8747         if (IS_ERR(root)) {
8748                 fprintf(stderr, "Couldn't find owner root %llu\n",
8749                         key.objectid);
8750                 return PTR_ERR(root);
8751         }
8752
8753         trans = btrfs_start_transaction(root, 1);
8754         if (IS_ERR(trans))
8755                 return PTR_ERR(trans);
8756
8757         btrfs_init_path(&path);
8758         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
8759         if (ret) {
8760                 if (ret > 0)
8761                         ret = 0;
8762                 goto out;
8763         }
8764         ret = btrfs_del_item(trans, root, &path);
8765 out:
8766         btrfs_commit_transaction(trans, root);
8767         btrfs_release_path(&path);
8768         return ret;
8769 }
8770
8771 static int zero_log_tree(struct btrfs_root *root)
8772 {
8773         struct btrfs_trans_handle *trans;
8774         int ret;
8775
8776         trans = btrfs_start_transaction(root, 1);
8777         if (IS_ERR(trans)) {
8778                 ret = PTR_ERR(trans);
8779                 return ret;
8780         }
8781         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8782         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8783         ret = btrfs_commit_transaction(trans, root);
8784         return ret;
8785 }
8786
8787 static int populate_csum(struct btrfs_trans_handle *trans,
8788                          struct btrfs_root *csum_root, char *buf, u64 start,
8789                          u64 len)
8790 {
8791         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8792         u64 offset = 0;
8793         u64 sectorsize;
8794         int ret = 0;
8795
8796         while (offset < len) {
8797                 sectorsize = fs_info->sectorsize;
8798                 ret = read_extent_data(fs_info, buf, start + offset,
8799                                        &sectorsize, 0);
8800                 if (ret)
8801                         break;
8802                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8803                                             start + offset, buf, sectorsize);
8804                 if (ret)
8805                         break;
8806                 offset += sectorsize;
8807         }
8808         return ret;
8809 }
8810
8811 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8812                                       struct btrfs_root *csum_root,
8813                                       struct btrfs_root *cur_root)
8814 {
8815         struct btrfs_path path;
8816         struct btrfs_key key;
8817         struct extent_buffer *node;
8818         struct btrfs_file_extent_item *fi;
8819         char *buf = NULL;
8820         u64 start = 0;
8821         u64 len = 0;
8822         int slot = 0;
8823         int ret = 0;
8824
8825         buf = malloc(cur_root->fs_info->sectorsize);
8826         if (!buf)
8827                 return -ENOMEM;
8828
8829         btrfs_init_path(&path);
8830         key.objectid = 0;
8831         key.offset = 0;
8832         key.type = 0;
8833         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
8834         if (ret < 0)
8835                 goto out;
8836         /* Iterate all regular file extents and fill its csum */
8837         while (1) {
8838                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8839
8840                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8841                         goto next;
8842                 node = path.nodes[0];
8843                 slot = path.slots[0];
8844                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8845                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8846                         goto next;
8847                 start = btrfs_file_extent_disk_bytenr(node, fi);
8848                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8849
8850                 ret = populate_csum(trans, csum_root, buf, start, len);
8851                 if (ret == -EEXIST)
8852                         ret = 0;
8853                 if (ret < 0)
8854                         goto out;
8855 next:
8856                 /*
8857                  * TODO: if next leaf is corrupted, jump to nearest next valid
8858                  * leaf.
8859                  */
8860                 ret = btrfs_next_item(cur_root, &path);
8861                 if (ret < 0)
8862                         goto out;
8863                 if (ret > 0) {
8864                         ret = 0;
8865                         goto out;
8866                 }
8867         }
8868
8869 out:
8870         btrfs_release_path(&path);
8871         free(buf);
8872         return ret;
8873 }
8874
8875 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8876                                   struct btrfs_root *csum_root)
8877 {
8878         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8879         struct btrfs_path path;
8880         struct btrfs_root *tree_root = fs_info->tree_root;
8881         struct btrfs_root *cur_root;
8882         struct extent_buffer *node;
8883         struct btrfs_key key;
8884         int slot = 0;
8885         int ret = 0;
8886
8887         btrfs_init_path(&path);
8888         key.objectid = BTRFS_FS_TREE_OBJECTID;
8889         key.offset = 0;
8890         key.type = BTRFS_ROOT_ITEM_KEY;
8891         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
8892         if (ret < 0)
8893                 goto out;
8894         if (ret > 0) {
8895                 ret = -ENOENT;
8896                 goto out;
8897         }
8898
8899         while (1) {
8900                 node = path.nodes[0];
8901                 slot = path.slots[0];
8902                 btrfs_item_key_to_cpu(node, &key, slot);
8903                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8904                         goto out;
8905                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8906                         goto next;
8907                 if (!is_fstree(key.objectid))
8908                         goto next;
8909                 key.offset = (u64)-1;
8910
8911                 cur_root = btrfs_read_fs_root(fs_info, &key);
8912                 if (IS_ERR(cur_root) || !cur_root) {
8913                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8914                                 key.objectid);
8915                         goto out;
8916                 }
8917                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8918                                 cur_root);
8919                 if (ret < 0)
8920                         goto out;
8921 next:
8922                 ret = btrfs_next_item(tree_root, &path);
8923                 if (ret > 0) {
8924                         ret = 0;
8925                         goto out;
8926                 }
8927                 if (ret < 0)
8928                         goto out;
8929         }
8930
8931 out:
8932         btrfs_release_path(&path);
8933         return ret;
8934 }
8935
8936 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8937                                       struct btrfs_root *csum_root)
8938 {
8939         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8940         struct btrfs_path path;
8941         struct btrfs_extent_item *ei;
8942         struct extent_buffer *leaf;
8943         char *buf;
8944         struct btrfs_key key;
8945         int ret;
8946
8947         btrfs_init_path(&path);
8948         key.objectid = 0;
8949         key.type = BTRFS_EXTENT_ITEM_KEY;
8950         key.offset = 0;
8951         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8952         if (ret < 0) {
8953                 btrfs_release_path(&path);
8954                 return ret;
8955         }
8956
8957         buf = malloc(csum_root->fs_info->sectorsize);
8958         if (!buf) {
8959                 btrfs_release_path(&path);
8960                 return -ENOMEM;
8961         }
8962
8963         while (1) {
8964                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8965                         ret = btrfs_next_leaf(extent_root, &path);
8966                         if (ret < 0)
8967                                 break;
8968                         if (ret) {
8969                                 ret = 0;
8970                                 break;
8971                         }
8972                 }
8973                 leaf = path.nodes[0];
8974
8975                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8976                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8977                         path.slots[0]++;
8978                         continue;
8979                 }
8980
8981                 ei = btrfs_item_ptr(leaf, path.slots[0],
8982                                     struct btrfs_extent_item);
8983                 if (!(btrfs_extent_flags(leaf, ei) &
8984                       BTRFS_EXTENT_FLAG_DATA)) {
8985                         path.slots[0]++;
8986                         continue;
8987                 }
8988
8989                 ret = populate_csum(trans, csum_root, buf, key.objectid,
8990                                     key.offset);
8991                 if (ret)
8992                         break;
8993                 path.slots[0]++;
8994         }
8995
8996         btrfs_release_path(&path);
8997         free(buf);
8998         return ret;
8999 }
9000
9001 /*
9002  * Recalculate the csum and put it into the csum tree.
9003  *
9004  * Extent tree init will wipe out all the extent info, so in that case, we
9005  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9006  * will use fs/subvol trees to init the csum tree.
9007  */
9008 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9009                           struct btrfs_root *csum_root,
9010                           int search_fs_tree)
9011 {
9012         if (search_fs_tree)
9013                 return fill_csum_tree_from_fs(trans, csum_root);
9014         else
9015                 return fill_csum_tree_from_extent(trans, csum_root);
9016 }
9017
9018 static void free_roots_info_cache(void)
9019 {
9020         if (!roots_info_cache)
9021                 return;
9022
9023         while (!cache_tree_empty(roots_info_cache)) {
9024                 struct cache_extent *entry;
9025                 struct root_item_info *rii;
9026
9027                 entry = first_cache_extent(roots_info_cache);
9028                 if (!entry)
9029                         break;
9030                 remove_cache_extent(roots_info_cache, entry);
9031                 rii = container_of(entry, struct root_item_info, cache_extent);
9032                 free(rii);
9033         }
9034
9035         free(roots_info_cache);
9036         roots_info_cache = NULL;
9037 }
9038
9039 static int build_roots_info_cache(struct btrfs_fs_info *info)
9040 {
9041         int ret = 0;
9042         struct btrfs_key key;
9043         struct extent_buffer *leaf;
9044         struct btrfs_path path;
9045
9046         if (!roots_info_cache) {
9047                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9048                 if (!roots_info_cache)
9049                         return -ENOMEM;
9050                 cache_tree_init(roots_info_cache);
9051         }
9052
9053         btrfs_init_path(&path);
9054         key.objectid = 0;
9055         key.type = BTRFS_EXTENT_ITEM_KEY;
9056         key.offset = 0;
9057         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
9058         if (ret < 0)
9059                 goto out;
9060         leaf = path.nodes[0];
9061
9062         while (1) {
9063                 struct btrfs_key found_key;
9064                 struct btrfs_extent_item *ei;
9065                 struct btrfs_extent_inline_ref *iref;
9066                 unsigned long item_end;
9067                 int slot = path.slots[0];
9068                 int type;
9069                 u64 flags;
9070                 u64 root_id;
9071                 u8 level;
9072                 struct cache_extent *entry;
9073                 struct root_item_info *rii;
9074
9075                 if (slot >= btrfs_header_nritems(leaf)) {
9076                         ret = btrfs_next_leaf(info->extent_root, &path);
9077                         if (ret < 0) {
9078                                 break;
9079                         } else if (ret) {
9080                                 ret = 0;
9081                                 break;
9082                         }
9083                         leaf = path.nodes[0];
9084                         slot = path.slots[0];
9085                 }
9086
9087                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9088
9089                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9090                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9091                         goto next;
9092
9093                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9094                 flags = btrfs_extent_flags(leaf, ei);
9095                 item_end = (unsigned long)ei + btrfs_item_size_nr(leaf, slot);
9096
9097                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9098                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9099                         goto next;
9100
9101                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9102                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9103                         level = found_key.offset;
9104                 } else {
9105                         struct btrfs_tree_block_info *binfo;
9106
9107                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9108                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9109                         level = btrfs_tree_block_level(leaf, binfo);
9110                 }
9111
9112                 /*
9113                  * It's a valid extent/metadata item that has no inline ref,
9114                  * but SHARED_BLOCK_REF or other shared references.
9115                  * So we need to do extra check to avoid reading beyond leaf
9116                  * boudnary.
9117                  */
9118                 if ((unsigned long)iref >= item_end)
9119                         goto next;
9120
9121                 /*
9122                  * For a root extent, it must be of the following type and the
9123                  * first (and only one) iref in the item.
9124                  */
9125                 type = btrfs_extent_inline_ref_type(leaf, iref);
9126                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9127                         goto next;
9128
9129                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9130                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9131                 if (!entry) {
9132                         rii = malloc(sizeof(struct root_item_info));
9133                         if (!rii) {
9134                                 ret = -ENOMEM;
9135                                 goto out;
9136                         }
9137                         rii->cache_extent.start = root_id;
9138                         rii->cache_extent.size = 1;
9139                         rii->level = (u8)-1;
9140                         entry = &rii->cache_extent;
9141                         ret = insert_cache_extent(roots_info_cache, entry);
9142                         ASSERT(ret == 0);
9143                 } else {
9144                         rii = container_of(entry, struct root_item_info,
9145                                            cache_extent);
9146                 }
9147
9148                 ASSERT(rii->cache_extent.start == root_id);
9149                 ASSERT(rii->cache_extent.size == 1);
9150
9151                 if (level > rii->level || rii->level == (u8)-1) {
9152                         rii->level = level;
9153                         rii->bytenr = found_key.objectid;
9154                         rii->gen = btrfs_extent_generation(leaf, ei);
9155                         rii->node_count = 1;
9156                 } else if (level == rii->level) {
9157                         rii->node_count++;
9158                 }
9159 next:
9160                 path.slots[0]++;
9161         }
9162
9163 out:
9164         btrfs_release_path(&path);
9165
9166         return ret;
9167 }
9168
9169 static int maybe_repair_root_item(struct btrfs_path *path,
9170                                   const struct btrfs_key *root_key,
9171                                   const int read_only_mode)
9172 {
9173         const u64 root_id = root_key->objectid;
9174         struct cache_extent *entry;
9175         struct root_item_info *rii;
9176         struct btrfs_root_item ri;
9177         unsigned long offset;
9178
9179         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9180         if (!entry) {
9181                 fprintf(stderr,
9182                         "Error: could not find extent items for root %llu\n",
9183                         root_key->objectid);
9184                 return -ENOENT;
9185         }
9186
9187         rii = container_of(entry, struct root_item_info, cache_extent);
9188         ASSERT(rii->cache_extent.start == root_id);
9189         ASSERT(rii->cache_extent.size == 1);
9190
9191         if (rii->node_count != 1) {
9192                 fprintf(stderr,
9193                         "Error: could not find btree root extent for root %llu\n",
9194                         root_id);
9195                 return -ENOENT;
9196         }
9197
9198         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9199         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9200
9201         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9202             btrfs_root_level(&ri) != rii->level ||
9203             btrfs_root_generation(&ri) != rii->gen) {
9204
9205                 /*
9206                  * If we're in repair mode but our caller told us to not update
9207                  * the root item, i.e. just check if it needs to be updated, don't
9208                  * print this message, since the caller will call us again shortly
9209                  * for the same root item without read only mode (the caller will
9210                  * open a transaction first).
9211                  */
9212                 if (!(read_only_mode && repair))
9213                         fprintf(stderr,
9214                                 "%sroot item for root %llu,"
9215                                 " current bytenr %llu, current gen %llu, current level %u,"
9216                                 " new bytenr %llu, new gen %llu, new level %u\n",
9217                                 (read_only_mode ? "" : "fixing "),
9218                                 root_id,
9219                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9220                                 btrfs_root_level(&ri),
9221                                 rii->bytenr, rii->gen, rii->level);
9222
9223                 if (btrfs_root_generation(&ri) > rii->gen) {
9224                         fprintf(stderr,
9225                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9226                                 root_id, btrfs_root_generation(&ri), rii->gen);
9227                         return -EINVAL;
9228                 }
9229
9230                 if (!read_only_mode) {
9231                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9232                         btrfs_set_root_level(&ri, rii->level);
9233                         btrfs_set_root_generation(&ri, rii->gen);
9234                         write_extent_buffer(path->nodes[0], &ri,
9235                                             offset, sizeof(ri));
9236                 }
9237
9238                 return 1;
9239         }
9240
9241         return 0;
9242 }
9243
9244 /*
9245  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9246  * caused read-only snapshots to be corrupted if they were created at a moment
9247  * when the source subvolume/snapshot had orphan items. The issue was that the
9248  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9249  * node instead of the post orphan cleanup root node.
9250  * So this function, and its callees, just detects and fixes those cases. Even
9251  * though the regression was for read-only snapshots, this function applies to
9252  * any snapshot/subvolume root.
9253  * This must be run before any other repair code - not doing it so, makes other
9254  * repair code delete or modify backrefs in the extent tree for example, which
9255  * will result in an inconsistent fs after repairing the root items.
9256  */
9257 static int repair_root_items(struct btrfs_fs_info *info)
9258 {
9259         struct btrfs_path path;
9260         struct btrfs_key key;
9261         struct extent_buffer *leaf;
9262         struct btrfs_trans_handle *trans = NULL;
9263         int ret = 0;
9264         int bad_roots = 0;
9265         int need_trans = 0;
9266
9267         btrfs_init_path(&path);
9268
9269         ret = build_roots_info_cache(info);
9270         if (ret)
9271                 goto out;
9272
9273         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9274         key.type = BTRFS_ROOT_ITEM_KEY;
9275         key.offset = 0;
9276
9277 again:
9278         /*
9279          * Avoid opening and committing transactions if a leaf doesn't have
9280          * any root items that need to be fixed, so that we avoid rotating
9281          * backup roots unnecessarily.
9282          */
9283         if (need_trans) {
9284                 trans = btrfs_start_transaction(info->tree_root, 1);
9285                 if (IS_ERR(trans)) {
9286                         ret = PTR_ERR(trans);
9287                         goto out;
9288                 }
9289         }
9290
9291         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
9292                                 0, trans ? 1 : 0);
9293         if (ret < 0)
9294                 goto out;
9295         leaf = path.nodes[0];
9296
9297         while (1) {
9298                 struct btrfs_key found_key;
9299
9300                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
9301                         int no_more_keys = find_next_key(&path, &key);
9302
9303                         btrfs_release_path(&path);
9304                         if (trans) {
9305                                 ret = btrfs_commit_transaction(trans,
9306                                                                info->tree_root);
9307                                 trans = NULL;
9308                                 if (ret < 0)
9309                                         goto out;
9310                         }
9311                         need_trans = 0;
9312                         if (no_more_keys)
9313                                 break;
9314                         goto again;
9315                 }
9316
9317                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9318
9319                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9320                         goto next;
9321                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9322                         goto next;
9323
9324                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
9325                 if (ret < 0)
9326                         goto out;
9327                 if (ret) {
9328                         if (!trans && repair) {
9329                                 need_trans = 1;
9330                                 key = found_key;
9331                                 btrfs_release_path(&path);
9332                                 goto again;
9333                         }
9334                         bad_roots++;
9335                 }
9336 next:
9337                 path.slots[0]++;
9338         }
9339         ret = 0;
9340 out:
9341         free_roots_info_cache();
9342         btrfs_release_path(&path);
9343         if (trans)
9344                 btrfs_commit_transaction(trans, info->tree_root);
9345         if (ret < 0)
9346                 return ret;
9347
9348         return bad_roots;
9349 }
9350
9351 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
9352 {
9353         struct btrfs_trans_handle *trans;
9354         struct btrfs_block_group_cache *bg_cache;
9355         u64 current = 0;
9356         int ret = 0;
9357
9358         /* Clear all free space cache inodes and its extent data */
9359         while (1) {
9360                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
9361                 if (!bg_cache)
9362                         break;
9363                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
9364                 if (ret < 0)
9365                         return ret;
9366                 current = bg_cache->key.objectid + bg_cache->key.offset;
9367         }
9368
9369         /* Don't forget to set cache_generation to -1 */
9370         trans = btrfs_start_transaction(fs_info->tree_root, 0);
9371         if (IS_ERR(trans)) {
9372                 error("failed to update super block cache generation");
9373                 return PTR_ERR(trans);
9374         }
9375         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
9376         btrfs_commit_transaction(trans, fs_info->tree_root);
9377
9378         return ret;
9379 }
9380
9381 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
9382                 int clear_version)
9383 {
9384         int ret = 0;
9385
9386         if (clear_version == 1) {
9387                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9388                         error(
9389                 "free space cache v2 detected, use --clear-space-cache v2");
9390                         ret = 1;
9391                         goto close_out;
9392                 }
9393                 printf("Clearing free space cache\n");
9394                 ret = clear_free_space_cache(fs_info);
9395                 if (ret) {
9396                         error("failed to clear free space cache");
9397                         ret = 1;
9398                 } else {
9399                         printf("Free space cache cleared\n");
9400                 }
9401         } else if (clear_version == 2) {
9402                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9403                         printf("no free space cache v2 to clear\n");
9404                         ret = 0;
9405                         goto close_out;
9406                 }
9407                 printf("Clear free space cache v2\n");
9408                 ret = btrfs_clear_free_space_tree(fs_info);
9409                 if (ret) {
9410                         error("failed to clear free space cache v2: %d", ret);
9411                         ret = 1;
9412                 } else {
9413                         printf("free space cache v2 cleared\n");
9414                 }
9415         }
9416 close_out:
9417         return ret;
9418 }
9419
9420 const char * const cmd_check_usage[] = {
9421         "btrfs check [options] <device>",
9422         "Check structural integrity of a filesystem (unmounted).",
9423         "Check structural integrity of an unmounted filesystem. Verify internal",
9424         "trees' consistency and item connectivity. In the repair mode try to",
9425         "fix the problems found. ",
9426         "WARNING: the repair mode is considered dangerous",
9427         "",
9428         "-s|--super <superblock>     use this superblock copy",
9429         "-b|--backup                 use the first valid backup root copy",
9430         "--force                     skip mount checks, repair is not possible",
9431         "--repair                    try to repair the filesystem",
9432         "--readonly                  run in read-only mode (default)",
9433         "--init-csum-tree            create a new CRC tree",
9434         "--init-extent-tree          create a new extent tree",
9435         "--mode <MODE>               allows choice of memory/IO trade-offs",
9436         "                            where MODE is one of:",
9437         "                            original - read inodes and extents to memory (requires",
9438         "                                       more memory, does less IO)",
9439         "                            lowmem   - try to use less memory but read blocks again",
9440         "                                       when needed",
9441         "--check-data-csum           verify checksums of data blocks",
9442         "-Q|--qgroup-report          print a report on qgroup consistency",
9443         "-E|--subvol-extents <subvolid>",
9444         "                            print subvolume extents and sharing state",
9445         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9446         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9447         "-p|--progress               indicate progress",
9448         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
9449         NULL
9450 };
9451
9452 int cmd_check(int argc, char **argv)
9453 {
9454         struct cache_tree root_cache;
9455         struct btrfs_root *root;
9456         struct btrfs_fs_info *info;
9457         u64 bytenr = 0;
9458         u64 subvolid = 0;
9459         u64 tree_root_bytenr = 0;
9460         u64 chunk_root_bytenr = 0;
9461         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9462         int ret = 0;
9463         int err = 0;
9464         u64 num;
9465         int init_csum_tree = 0;
9466         int readonly = 0;
9467         int clear_space_cache = 0;
9468         int qgroup_report = 0;
9469         int qgroups_repaired = 0;
9470         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
9471         int force = 0;
9472
9473         while(1) {
9474                 int c;
9475                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9476                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9477                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
9478                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
9479                         GETOPT_VAL_FORCE };
9480                 static const struct option long_options[] = {
9481                         { "super", required_argument, NULL, 's' },
9482                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9483                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9484                         { "init-csum-tree", no_argument, NULL,
9485                                 GETOPT_VAL_INIT_CSUM },
9486                         { "init-extent-tree", no_argument, NULL,
9487                                 GETOPT_VAL_INIT_EXTENT },
9488                         { "check-data-csum", no_argument, NULL,
9489                                 GETOPT_VAL_CHECK_CSUM },
9490                         { "backup", no_argument, NULL, 'b' },
9491                         { "subvol-extents", required_argument, NULL, 'E' },
9492                         { "qgroup-report", no_argument, NULL, 'Q' },
9493                         { "tree-root", required_argument, NULL, 'r' },
9494                         { "chunk-root", required_argument, NULL,
9495                                 GETOPT_VAL_CHUNK_TREE },
9496                         { "progress", no_argument, NULL, 'p' },
9497                         { "mode", required_argument, NULL,
9498                                 GETOPT_VAL_MODE },
9499                         { "clear-space-cache", required_argument, NULL,
9500                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
9501                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
9502                         { NULL, 0, NULL, 0}
9503                 };
9504
9505                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
9506                 if (c < 0)
9507                         break;
9508                 switch(c) {
9509                         case 'a': /* ignored */ break;
9510                         case 'b':
9511                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9512                                 break;
9513                         case 's':
9514                                 num = arg_strtou64(optarg);
9515                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9516                                         error(
9517                                         "super mirror should be less than %d",
9518                                                 BTRFS_SUPER_MIRROR_MAX);
9519                                         exit(1);
9520                                 }
9521                                 bytenr = btrfs_sb_offset(((int)num));
9522                                 printf("using SB copy %llu, bytenr %llu\n", num,
9523                                        (unsigned long long)bytenr);
9524                                 break;
9525                         case 'Q':
9526                                 qgroup_report = 1;
9527                                 break;
9528                         case 'E':
9529                                 subvolid = arg_strtou64(optarg);
9530                                 break;
9531                         case 'r':
9532                                 tree_root_bytenr = arg_strtou64(optarg);
9533                                 break;
9534                         case GETOPT_VAL_CHUNK_TREE:
9535                                 chunk_root_bytenr = arg_strtou64(optarg);
9536                                 break;
9537                         case 'p':
9538                                 ctx.progress_enabled = true;
9539                                 break;
9540                         case '?':
9541                         case 'h':
9542                                 usage(cmd_check_usage);
9543                         case GETOPT_VAL_REPAIR:
9544                                 printf("enabling repair mode\n");
9545                                 repair = 1;
9546                                 ctree_flags |= OPEN_CTREE_WRITES;
9547                                 break;
9548                         case GETOPT_VAL_READONLY:
9549                                 readonly = 1;
9550                                 break;
9551                         case GETOPT_VAL_INIT_CSUM:
9552                                 printf("Creating a new CRC tree\n");
9553                                 init_csum_tree = 1;
9554                                 repair = 1;
9555                                 ctree_flags |= OPEN_CTREE_WRITES;
9556                                 break;
9557                         case GETOPT_VAL_INIT_EXTENT:
9558                                 init_extent_tree = 1;
9559                                 ctree_flags |= (OPEN_CTREE_WRITES |
9560                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9561                                 repair = 1;
9562                                 break;
9563                         case GETOPT_VAL_CHECK_CSUM:
9564                                 check_data_csum = 1;
9565                                 break;
9566                         case GETOPT_VAL_MODE:
9567                                 check_mode = parse_check_mode(optarg);
9568                                 if (check_mode == CHECK_MODE_UNKNOWN) {
9569                                         error("unknown mode: %s", optarg);
9570                                         exit(1);
9571                                 }
9572                                 break;
9573                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
9574                                 if (strcmp(optarg, "v1") == 0) {
9575                                         clear_space_cache = 1;
9576                                 } else if (strcmp(optarg, "v2") == 0) {
9577                                         clear_space_cache = 2;
9578                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
9579                                 } else {
9580                                         error(
9581                 "invalid argument to --clear-space-cache, must be v1 or v2");
9582                                         exit(1);
9583                                 }
9584                                 ctree_flags |= OPEN_CTREE_WRITES;
9585                                 break;
9586                         case GETOPT_VAL_FORCE:
9587                                 force = 1;
9588                                 break;
9589                 }
9590         }
9591
9592         if (check_argc_exact(argc - optind, 1))
9593                 usage(cmd_check_usage);
9594
9595         if (ctx.progress_enabled) {
9596                 ctx.tp = TASK_NOTHING;
9597                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9598         }
9599
9600         /* This check is the only reason for --readonly to exist */
9601         if (readonly && repair) {
9602                 error("repair options are not compatible with --readonly");
9603                 exit(1);
9604         }
9605
9606         /*
9607          * experimental and dangerous
9608          */
9609         if (repair && check_mode == CHECK_MODE_LOWMEM)
9610                 warning("low-memory mode repair support is only partial");
9611
9612         radix_tree_init();
9613         cache_tree_init(&root_cache);
9614
9615         ret = check_mounted(argv[optind]);
9616         if (!force) {
9617                 if (ret < 0) {
9618                         error("could not check mount status: %s",
9619                                         strerror(-ret));
9620                         err |= !!ret;
9621                         goto err_out;
9622                 } else if (ret) {
9623                         error(
9624 "%s is currently mounted, use --force if you really intend to check the filesystem",
9625                                 argv[optind]);
9626                         ret = -EBUSY;
9627                         err |= !!ret;
9628                         goto err_out;
9629                 }
9630         } else {
9631                 if (repair) {
9632                         error("repair and --force is not yet supported");
9633                         ret = 1;
9634                         err |= !!ret;
9635                         goto err_out;
9636                 }
9637                 if (ret < 0) {
9638                         warning(
9639 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
9640                                 argv[optind]);
9641                 } else if (ret) {
9642                         warning(
9643                         "filesystem mounted, continuing because of --force");
9644                 }
9645                 /* A block device is mounted in exclusive mode by kernel */
9646                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
9647         }
9648
9649         /* only allow partial opening under repair mode */
9650         if (repair)
9651                 ctree_flags |= OPEN_CTREE_PARTIAL;
9652
9653         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9654                                   chunk_root_bytenr, ctree_flags);
9655         if (!info) {
9656                 error("cannot open file system");
9657                 ret = -EIO;
9658                 err |= !!ret;
9659                 goto err_out;
9660         }
9661
9662         global_info = info;
9663         root = info->fs_root;
9664         uuid_unparse(info->super_copy->fsid, uuidbuf);
9665
9666         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9667
9668         /*
9669          * Check the bare minimum before starting anything else that could rely
9670          * on it, namely the tree roots, any local consistency checks
9671          */
9672         if (!extent_buffer_uptodate(info->tree_root->node) ||
9673             !extent_buffer_uptodate(info->dev_root->node) ||
9674             !extent_buffer_uptodate(info->chunk_root->node)) {
9675                 error("critical roots corrupted, unable to check the filesystem");
9676                 err |= !!ret;
9677                 ret = -EIO;
9678                 goto close_out;
9679         }
9680
9681         if (clear_space_cache) {
9682                 ret = do_clear_free_space_cache(info, clear_space_cache);
9683                 err |= !!ret;
9684                 goto close_out;
9685         }
9686
9687         /*
9688          * repair mode will force us to commit transaction which
9689          * will make us fail to load log tree when mounting.
9690          */
9691         if (repair && btrfs_super_log_root(info->super_copy)) {
9692                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
9693                 if (!ret) {
9694                         ret = 1;
9695                         err |= !!ret;
9696                         goto close_out;
9697                 }
9698                 ret = zero_log_tree(root);
9699                 err |= !!ret;
9700                 if (ret) {
9701                         error("failed to zero log tree: %d", ret);
9702                         goto close_out;
9703                 }
9704         }
9705
9706         if (qgroup_report) {
9707                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9708                        uuidbuf);
9709                 ret = qgroup_verify_all(info);
9710                 err |= !!ret;
9711                 if (ret == 0)
9712                         report_qgroups(1);
9713                 goto close_out;
9714         }
9715         if (subvolid) {
9716                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9717                        subvolid, argv[optind], uuidbuf);
9718                 ret = print_extent_state(info, subvolid);
9719                 err |= !!ret;
9720                 goto close_out;
9721         }
9722
9723         if (init_extent_tree || init_csum_tree) {
9724                 struct btrfs_trans_handle *trans;
9725
9726                 trans = btrfs_start_transaction(info->extent_root, 0);
9727                 if (IS_ERR(trans)) {
9728                         error("error starting transaction");
9729                         ret = PTR_ERR(trans);
9730                         err |= !!ret;
9731                         goto close_out;
9732                 }
9733
9734                 if (init_extent_tree) {
9735                         printf("Creating a new extent tree\n");
9736                         ret = reinit_extent_tree(trans, info);
9737                         err |= !!ret;
9738                         if (ret)
9739                                 goto close_out;
9740                 }
9741
9742                 if (init_csum_tree) {
9743                         printf("Reinitialize checksum tree\n");
9744                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9745                         if (ret) {
9746                                 error("checksum tree initialization failed: %d",
9747                                                 ret);
9748                                 ret = -EIO;
9749                                 err |= !!ret;
9750                                 goto close_out;
9751                         }
9752
9753                         ret = fill_csum_tree(trans, info->csum_root,
9754                                              init_extent_tree);
9755                         err |= !!ret;
9756                         if (ret) {
9757                                 error("checksum tree refilling failed: %d", ret);
9758                                 return -EIO;
9759                         }
9760                 }
9761                 /*
9762                  * Ok now we commit and run the normal fsck, which will add
9763                  * extent entries for all of the items it finds.
9764                  */
9765                 ret = btrfs_commit_transaction(trans, info->extent_root);
9766                 err |= !!ret;
9767                 if (ret)
9768                         goto close_out;
9769         }
9770         if (!extent_buffer_uptodate(info->extent_root->node)) {
9771                 error("critical: extent_root, unable to check the filesystem");
9772                 ret = -EIO;
9773                 err |= !!ret;
9774                 goto close_out;
9775         }
9776         if (!extent_buffer_uptodate(info->csum_root->node)) {
9777                 error("critical: csum_root, unable to check the filesystem");
9778                 ret = -EIO;
9779                 err |= !!ret;
9780                 goto close_out;
9781         }
9782
9783         if (!init_extent_tree) {
9784                 ret = repair_root_items(info);
9785                 if (ret < 0) {
9786                         err = !!ret;
9787                         error("failed to repair root items: %s", strerror(-ret));
9788                         goto close_out;
9789                 }
9790                 if (repair) {
9791                         fprintf(stderr, "Fixed %d roots.\n", ret);
9792                         ret = 0;
9793                 } else if (ret > 0) {
9794                         fprintf(stderr,
9795                                 "Found %d roots with an outdated root item.\n",
9796                                 ret);
9797                         fprintf(stderr,
9798         "Please run a filesystem check with the option --repair to fix them.\n");
9799                         ret = 1;
9800                         err |= ret;
9801                         goto close_out;
9802                 }
9803         }
9804
9805         ret = do_check_chunks_and_extents(info);
9806         err |= !!ret;
9807         if (ret)
9808                 error(
9809                 "errors found in extent allocation tree or chunk allocation");
9810
9811         /* Only re-check super size after we checked and repaired the fs */
9812         err |= !is_super_size_valid(info);
9813
9814         if (!ctx.progress_enabled) {
9815                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9816                         fprintf(stderr, "checking free space tree\n");
9817                 else
9818                         fprintf(stderr, "checking free space cache\n");
9819         }
9820         ret = check_space_cache(root);
9821         err |= !!ret;
9822         if (ret) {
9823                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9824                         error("errors found in free space tree");
9825                 else
9826                         error("errors found in free space cache");
9827                 goto out;
9828         }
9829
9830         /*
9831          * We used to have to have these hole extents in between our real
9832          * extents so if we don't have this flag set we need to make sure there
9833          * are no gaps in the file extents for inodes, otherwise we can just
9834          * ignore it when this happens.
9835          */
9836         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
9837         ret = do_check_fs_roots(info, &root_cache);
9838         err |= !!ret;
9839         if (ret) {
9840                 error("errors found in fs roots");
9841                 goto out;
9842         }
9843
9844         fprintf(stderr, "checking csums\n");
9845         ret = check_csums(root);
9846         err |= !!ret;
9847         if (ret) {
9848                 error("errors found in csum tree");
9849                 goto out;
9850         }
9851
9852         fprintf(stderr, "checking root refs\n");
9853         /* For low memory mode, check_fs_roots_v2 handles root refs */
9854         if (check_mode != CHECK_MODE_LOWMEM) {
9855                 ret = check_root_refs(root, &root_cache);
9856                 err |= !!ret;
9857                 if (ret) {
9858                         error("errors found in root refs");
9859                         goto out;
9860                 }
9861         }
9862
9863         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9864                 struct extent_buffer *eb;
9865
9866                 eb = list_first_entry(&root->fs_info->recow_ebs,
9867                                       struct extent_buffer, recow);
9868                 list_del_init(&eb->recow);
9869                 ret = recow_extent_buffer(root, eb);
9870                 err |= !!ret;
9871                 if (ret) {
9872                         error("fails to fix transid errors");
9873                         break;
9874                 }
9875         }
9876
9877         while (!list_empty(&delete_items)) {
9878                 struct bad_item *bad;
9879
9880                 bad = list_first_entry(&delete_items, struct bad_item, list);
9881                 list_del_init(&bad->list);
9882                 if (repair) {
9883                         ret = delete_bad_item(root, bad);
9884                         err |= !!ret;
9885                 }
9886                 free(bad);
9887         }
9888
9889         if (info->quota_enabled) {
9890                 fprintf(stderr, "checking quota groups\n");
9891                 ret = qgroup_verify_all(info);
9892                 err |= !!ret;
9893                 if (ret) {
9894                         error("failed to check quota groups");
9895                         goto out;
9896                 }
9897                 report_qgroups(0);
9898                 ret = repair_qgroups(info, &qgroups_repaired);
9899                 err |= !!ret;
9900                 if (err) {
9901                         error("failed to repair quota groups");
9902                         goto out;
9903                 }
9904                 ret = 0;
9905         }
9906
9907         if (!list_empty(&root->fs_info->recow_ebs)) {
9908                 error("transid errors in file system");
9909                 ret = 1;
9910                 err |= !!ret;
9911         }
9912 out:
9913         printf("found %llu bytes used, ",
9914                (unsigned long long)bytes_used);
9915         if (err)
9916                 printf("error(s) found\n");
9917         else
9918                 printf("no error found\n");
9919         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9920         printf("total tree bytes: %llu\n",
9921                (unsigned long long)total_btree_bytes);
9922         printf("total fs tree bytes: %llu\n",
9923                (unsigned long long)total_fs_tree_bytes);
9924         printf("total extent tree bytes: %llu\n",
9925                (unsigned long long)total_extent_tree_bytes);
9926         printf("btree space waste bytes: %llu\n",
9927                (unsigned long long)btree_space_waste);
9928         printf("file data blocks allocated: %llu\n referenced %llu\n",
9929                 (unsigned long long)data_bytes_allocated,
9930                 (unsigned long long)data_bytes_referenced);
9931
9932         free_qgroup_counts();
9933         free_root_recs_tree(&root_cache);
9934 close_out:
9935         close_ctree(root);
9936 err_out:
9937         if (ctx.progress_enabled)
9938                 task_deinit(ctx.info);
9939
9940         return err;
9941 }