btrfs-progs: check: Move lowmem check code to its own check/lowmem.[ch]
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void record_root_in_trans(struct btrfs_trans_handle *trans,
416                                  struct btrfs_root *root)
417 {
418         if (root->last_trans != trans->transid) {
419                 root->track_dirty = 1;
420                 root->last_trans = trans->transid;
421                 root->commit_root = root->node;
422                 extent_buffer_get(root->node);
423         }
424 }
425
426 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
427 {
428         struct device_record *rec1;
429         struct device_record *rec2;
430
431         rec1 = rb_entry(node1, struct device_record, node);
432         rec2 = rb_entry(node2, struct device_record, node);
433         if (rec1->devid > rec2->devid)
434                 return -1;
435         else if (rec1->devid < rec2->devid)
436                 return 1;
437         else
438                 return 0;
439 }
440
441 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
442 {
443         struct inode_record *rec;
444         struct inode_backref *backref;
445         struct inode_backref *orig;
446         struct inode_backref *tmp;
447         struct orphan_data_extent *src_orphan;
448         struct orphan_data_extent *dst_orphan;
449         struct rb_node *rb;
450         size_t size;
451         int ret;
452
453         rec = malloc(sizeof(*rec));
454         if (!rec)
455                 return ERR_PTR(-ENOMEM);
456         memcpy(rec, orig_rec, sizeof(*rec));
457         rec->refs = 1;
458         INIT_LIST_HEAD(&rec->backrefs);
459         INIT_LIST_HEAD(&rec->orphan_extents);
460         rec->holes = RB_ROOT;
461
462         list_for_each_entry(orig, &orig_rec->backrefs, list) {
463                 size = sizeof(*orig) + orig->namelen + 1;
464                 backref = malloc(size);
465                 if (!backref) {
466                         ret = -ENOMEM;
467                         goto cleanup;
468                 }
469                 memcpy(backref, orig, size);
470                 list_add_tail(&backref->list, &rec->backrefs);
471         }
472         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
473                 dst_orphan = malloc(sizeof(*dst_orphan));
474                 if (!dst_orphan) {
475                         ret = -ENOMEM;
476                         goto cleanup;
477                 }
478                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
479                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
480         }
481         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
482         if (ret < 0)
483                 goto cleanup_rb;
484
485         return rec;
486
487 cleanup_rb:
488         rb = rb_first(&rec->holes);
489         while (rb) {
490                 struct file_extent_hole *hole;
491
492                 hole = rb_entry(rb, struct file_extent_hole, node);
493                 rb = rb_next(rb);
494                 free(hole);
495         }
496
497 cleanup:
498         if (!list_empty(&rec->backrefs))
499                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
500                         list_del(&orig->list);
501                         free(orig);
502                 }
503
504         if (!list_empty(&rec->orphan_extents))
505                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
506                         list_del(&orig->list);
507                         free(orig);
508                 }
509
510         free(rec);
511
512         return ERR_PTR(ret);
513 }
514
515 static void print_orphan_data_extents(struct list_head *orphan_extents,
516                                       u64 objectid)
517 {
518         struct orphan_data_extent *orphan;
519
520         if (list_empty(orphan_extents))
521                 return;
522         printf("The following data extent is lost in tree %llu:\n",
523                objectid);
524         list_for_each_entry(orphan, orphan_extents, list) {
525                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
526                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
527                        orphan->disk_len);
528         }
529 }
530
531 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
532 {
533         u64 root_objectid = root->root_key.objectid;
534         int errors = rec->errors;
535
536         if (!errors)
537                 return;
538         /* reloc root errors, we print its corresponding fs root objectid*/
539         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
540                 root_objectid = root->root_key.offset;
541                 fprintf(stderr, "reloc");
542         }
543         fprintf(stderr, "root %llu inode %llu errors %x",
544                 (unsigned long long) root_objectid,
545                 (unsigned long long) rec->ino, rec->errors);
546
547         if (errors & I_ERR_NO_INODE_ITEM)
548                 fprintf(stderr, ", no inode item");
549         if (errors & I_ERR_NO_ORPHAN_ITEM)
550                 fprintf(stderr, ", no orphan item");
551         if (errors & I_ERR_DUP_INODE_ITEM)
552                 fprintf(stderr, ", dup inode item");
553         if (errors & I_ERR_DUP_DIR_INDEX)
554                 fprintf(stderr, ", dup dir index");
555         if (errors & I_ERR_ODD_DIR_ITEM)
556                 fprintf(stderr, ", odd dir item");
557         if (errors & I_ERR_ODD_FILE_EXTENT)
558                 fprintf(stderr, ", odd file extent");
559         if (errors & I_ERR_BAD_FILE_EXTENT)
560                 fprintf(stderr, ", bad file extent");
561         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
562                 fprintf(stderr, ", file extent overlap");
563         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
564                 fprintf(stderr, ", file extent discount");
565         if (errors & I_ERR_DIR_ISIZE_WRONG)
566                 fprintf(stderr, ", dir isize wrong");
567         if (errors & I_ERR_FILE_NBYTES_WRONG)
568                 fprintf(stderr, ", nbytes wrong");
569         if (errors & I_ERR_ODD_CSUM_ITEM)
570                 fprintf(stderr, ", odd csum item");
571         if (errors & I_ERR_SOME_CSUM_MISSING)
572                 fprintf(stderr, ", some csum missing");
573         if (errors & I_ERR_LINK_COUNT_WRONG)
574                 fprintf(stderr, ", link count wrong");
575         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
576                 fprintf(stderr, ", orphan file extent");
577         fprintf(stderr, "\n");
578         /* Print the orphan extents if needed */
579         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
580                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
581
582         /* Print the holes if needed */
583         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
584                 struct file_extent_hole *hole;
585                 struct rb_node *node;
586                 int found = 0;
587
588                 node = rb_first(&rec->holes);
589                 fprintf(stderr, "Found file extent holes:\n");
590                 while (node) {
591                         found = 1;
592                         hole = rb_entry(node, struct file_extent_hole, node);
593                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
594                                 hole->start, hole->len);
595                         node = rb_next(node);
596                 }
597                 if (!found)
598                         fprintf(stderr, "\tstart: 0, len: %llu\n",
599                                 round_up(rec->isize,
600                                          root->fs_info->sectorsize));
601         }
602 }
603
604 static void print_ref_error(int errors)
605 {
606         if (errors & REF_ERR_NO_DIR_ITEM)
607                 fprintf(stderr, ", no dir item");
608         if (errors & REF_ERR_NO_DIR_INDEX)
609                 fprintf(stderr, ", no dir index");
610         if (errors & REF_ERR_NO_INODE_REF)
611                 fprintf(stderr, ", no inode ref");
612         if (errors & REF_ERR_DUP_DIR_ITEM)
613                 fprintf(stderr, ", dup dir item");
614         if (errors & REF_ERR_DUP_DIR_INDEX)
615                 fprintf(stderr, ", dup dir index");
616         if (errors & REF_ERR_DUP_INODE_REF)
617                 fprintf(stderr, ", dup inode ref");
618         if (errors & REF_ERR_INDEX_UNMATCH)
619                 fprintf(stderr, ", index mismatch");
620         if (errors & REF_ERR_FILETYPE_UNMATCH)
621                 fprintf(stderr, ", filetype mismatch");
622         if (errors & REF_ERR_NAME_TOO_LONG)
623                 fprintf(stderr, ", name too long");
624         if (errors & REF_ERR_NO_ROOT_REF)
625                 fprintf(stderr, ", no root ref");
626         if (errors & REF_ERR_NO_ROOT_BACKREF)
627                 fprintf(stderr, ", no root backref");
628         if (errors & REF_ERR_DUP_ROOT_REF)
629                 fprintf(stderr, ", dup root ref");
630         if (errors & REF_ERR_DUP_ROOT_BACKREF)
631                 fprintf(stderr, ", dup root backref");
632         fprintf(stderr, "\n");
633 }
634
635 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
636                                           u64 ino, int mod)
637 {
638         struct ptr_node *node;
639         struct cache_extent *cache;
640         struct inode_record *rec = NULL;
641         int ret;
642
643         cache = lookup_cache_extent(inode_cache, ino, 1);
644         if (cache) {
645                 node = container_of(cache, struct ptr_node, cache);
646                 rec = node->data;
647                 if (mod && rec->refs > 1) {
648                         node->data = clone_inode_rec(rec);
649                         if (IS_ERR(node->data))
650                                 return node->data;
651                         rec->refs--;
652                         rec = node->data;
653                 }
654         } else if (mod) {
655                 rec = calloc(1, sizeof(*rec));
656                 if (!rec)
657                         return ERR_PTR(-ENOMEM);
658                 rec->ino = ino;
659                 rec->extent_start = (u64)-1;
660                 rec->refs = 1;
661                 INIT_LIST_HEAD(&rec->backrefs);
662                 INIT_LIST_HEAD(&rec->orphan_extents);
663                 rec->holes = RB_ROOT;
664
665                 node = malloc(sizeof(*node));
666                 if (!node) {
667                         free(rec);
668                         return ERR_PTR(-ENOMEM);
669                 }
670                 node->cache.start = ino;
671                 node->cache.size = 1;
672                 node->data = rec;
673
674                 if (ino == BTRFS_FREE_INO_OBJECTID)
675                         rec->found_link = 1;
676
677                 ret = insert_cache_extent(inode_cache, &node->cache);
678                 if (ret)
679                         return ERR_PTR(-EEXIST);
680         }
681         return rec;
682 }
683
684 static void free_orphan_data_extents(struct list_head *orphan_extents)
685 {
686         struct orphan_data_extent *orphan;
687
688         while (!list_empty(orphan_extents)) {
689                 orphan = list_entry(orphan_extents->next,
690                                     struct orphan_data_extent, list);
691                 list_del(&orphan->list);
692                 free(orphan);
693         }
694 }
695
696 static void free_inode_rec(struct inode_record *rec)
697 {
698         struct inode_backref *backref;
699
700         if (--rec->refs > 0)
701                 return;
702
703         while (!list_empty(&rec->backrefs)) {
704                 backref = to_inode_backref(rec->backrefs.next);
705                 list_del(&backref->list);
706                 free(backref);
707         }
708         free_orphan_data_extents(&rec->orphan_extents);
709         free_file_extent_holes(&rec->holes);
710         free(rec);
711 }
712
713 static int can_free_inode_rec(struct inode_record *rec)
714 {
715         if (!rec->errors && rec->checked && rec->found_inode_item &&
716             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
717                 return 1;
718         return 0;
719 }
720
721 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
722                                  struct inode_record *rec)
723 {
724         struct cache_extent *cache;
725         struct inode_backref *tmp, *backref;
726         struct ptr_node *node;
727         u8 filetype;
728
729         if (!rec->found_inode_item)
730                 return;
731
732         filetype = imode_to_type(rec->imode);
733         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
734                 if (backref->found_dir_item && backref->found_dir_index) {
735                         if (backref->filetype != filetype)
736                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
737                         if (!backref->errors && backref->found_inode_ref &&
738                             rec->nlink == rec->found_link) {
739                                 list_del(&backref->list);
740                                 free(backref);
741                         }
742                 }
743         }
744
745         if (!rec->checked || rec->merging)
746                 return;
747
748         if (S_ISDIR(rec->imode)) {
749                 if (rec->found_size != rec->isize)
750                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
751                 if (rec->found_file_extent)
752                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
753         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
754                 if (rec->found_dir_item)
755                         rec->errors |= I_ERR_ODD_DIR_ITEM;
756                 if (rec->found_size != rec->nbytes)
757                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
758                 if (rec->nlink > 0 && !no_holes &&
759                     (rec->extent_end < rec->isize ||
760                      first_extent_gap(&rec->holes) < rec->isize))
761                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
762         }
763
764         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
765                 if (rec->found_csum_item && rec->nodatasum)
766                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
767                 if (rec->some_csum_missing && !rec->nodatasum)
768                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
769         }
770
771         BUG_ON(rec->refs != 1);
772         if (can_free_inode_rec(rec)) {
773                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
774                 node = container_of(cache, struct ptr_node, cache);
775                 BUG_ON(node->data != rec);
776                 remove_cache_extent(inode_cache, &node->cache);
777                 free(node);
778                 free_inode_rec(rec);
779         }
780 }
781
782 static int check_orphan_item(struct btrfs_root *root, u64 ino)
783 {
784         struct btrfs_path path;
785         struct btrfs_key key;
786         int ret;
787
788         key.objectid = BTRFS_ORPHAN_OBJECTID;
789         key.type = BTRFS_ORPHAN_ITEM_KEY;
790         key.offset = ino;
791
792         btrfs_init_path(&path);
793         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
794         btrfs_release_path(&path);
795         if (ret > 0)
796                 ret = -ENOENT;
797         return ret;
798 }
799
800 static int process_inode_item(struct extent_buffer *eb,
801                               int slot, struct btrfs_key *key,
802                               struct shared_node *active_node)
803 {
804         struct inode_record *rec;
805         struct btrfs_inode_item *item;
806
807         rec = active_node->current;
808         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
809         if (rec->found_inode_item) {
810                 rec->errors |= I_ERR_DUP_INODE_ITEM;
811                 return 1;
812         }
813         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
814         rec->nlink = btrfs_inode_nlink(eb, item);
815         rec->isize = btrfs_inode_size(eb, item);
816         rec->nbytes = btrfs_inode_nbytes(eb, item);
817         rec->imode = btrfs_inode_mode(eb, item);
818         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
819                 rec->nodatasum = 1;
820         rec->found_inode_item = 1;
821         if (rec->nlink == 0)
822                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
823         maybe_free_inode_rec(&active_node->inode_cache, rec);
824         return 0;
825 }
826
827 static struct inode_backref *get_inode_backref(struct inode_record *rec,
828                                                 const char *name,
829                                                 int namelen, u64 dir)
830 {
831         struct inode_backref *backref;
832
833         list_for_each_entry(backref, &rec->backrefs, list) {
834                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
835                         break;
836                 if (backref->dir != dir || backref->namelen != namelen)
837                         continue;
838                 if (memcmp(name, backref->name, namelen))
839                         continue;
840                 return backref;
841         }
842
843         backref = malloc(sizeof(*backref) + namelen + 1);
844         if (!backref)
845                 return NULL;
846         memset(backref, 0, sizeof(*backref));
847         backref->dir = dir;
848         backref->namelen = namelen;
849         memcpy(backref->name, name, namelen);
850         backref->name[namelen] = '\0';
851         list_add_tail(&backref->list, &rec->backrefs);
852         return backref;
853 }
854
855 static int add_inode_backref(struct cache_tree *inode_cache,
856                              u64 ino, u64 dir, u64 index,
857                              const char *name, int namelen,
858                              u8 filetype, u8 itemtype, int errors)
859 {
860         struct inode_record *rec;
861         struct inode_backref *backref;
862
863         rec = get_inode_rec(inode_cache, ino, 1);
864         BUG_ON(IS_ERR(rec));
865         backref = get_inode_backref(rec, name, namelen, dir);
866         BUG_ON(!backref);
867         if (errors)
868                 backref->errors |= errors;
869         if (itemtype == BTRFS_DIR_INDEX_KEY) {
870                 if (backref->found_dir_index)
871                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
872                 if (backref->found_inode_ref && backref->index != index)
873                         backref->errors |= REF_ERR_INDEX_UNMATCH;
874                 if (backref->found_dir_item && backref->filetype != filetype)
875                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
876
877                 backref->index = index;
878                 backref->filetype = filetype;
879                 backref->found_dir_index = 1;
880         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
881                 rec->found_link++;
882                 if (backref->found_dir_item)
883                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
884                 if (backref->found_dir_index && backref->filetype != filetype)
885                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
886
887                 backref->filetype = filetype;
888                 backref->found_dir_item = 1;
889         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
890                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
891                 if (backref->found_inode_ref)
892                         backref->errors |= REF_ERR_DUP_INODE_REF;
893                 if (backref->found_dir_index && backref->index != index)
894                         backref->errors |= REF_ERR_INDEX_UNMATCH;
895                 else
896                         backref->index = index;
897
898                 backref->ref_type = itemtype;
899                 backref->found_inode_ref = 1;
900         } else {
901                 BUG_ON(1);
902         }
903
904         maybe_free_inode_rec(inode_cache, rec);
905         return 0;
906 }
907
908 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
909                             struct cache_tree *dst_cache)
910 {
911         struct inode_backref *backref;
912         u32 dir_count = 0;
913         int ret = 0;
914
915         dst->merging = 1;
916         list_for_each_entry(backref, &src->backrefs, list) {
917                 if (backref->found_dir_index) {
918                         add_inode_backref(dst_cache, dst->ino, backref->dir,
919                                         backref->index, backref->name,
920                                         backref->namelen, backref->filetype,
921                                         BTRFS_DIR_INDEX_KEY, backref->errors);
922                 }
923                 if (backref->found_dir_item) {
924                         dir_count++;
925                         add_inode_backref(dst_cache, dst->ino,
926                                         backref->dir, 0, backref->name,
927                                         backref->namelen, backref->filetype,
928                                         BTRFS_DIR_ITEM_KEY, backref->errors);
929                 }
930                 if (backref->found_inode_ref) {
931                         add_inode_backref(dst_cache, dst->ino,
932                                         backref->dir, backref->index,
933                                         backref->name, backref->namelen, 0,
934                                         backref->ref_type, backref->errors);
935                 }
936         }
937
938         if (src->found_dir_item)
939                 dst->found_dir_item = 1;
940         if (src->found_file_extent)
941                 dst->found_file_extent = 1;
942         if (src->found_csum_item)
943                 dst->found_csum_item = 1;
944         if (src->some_csum_missing)
945                 dst->some_csum_missing = 1;
946         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
947                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
948                 if (ret < 0)
949                         return ret;
950         }
951
952         BUG_ON(src->found_link < dir_count);
953         dst->found_link += src->found_link - dir_count;
954         dst->found_size += src->found_size;
955         if (src->extent_start != (u64)-1) {
956                 if (dst->extent_start == (u64)-1) {
957                         dst->extent_start = src->extent_start;
958                         dst->extent_end = src->extent_end;
959                 } else {
960                         if (dst->extent_end > src->extent_start)
961                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
962                         else if (dst->extent_end < src->extent_start) {
963                                 ret = add_file_extent_hole(&dst->holes,
964                                         dst->extent_end,
965                                         src->extent_start - dst->extent_end);
966                         }
967                         if (dst->extent_end < src->extent_end)
968                                 dst->extent_end = src->extent_end;
969                 }
970         }
971
972         dst->errors |= src->errors;
973         if (src->found_inode_item) {
974                 if (!dst->found_inode_item) {
975                         dst->nlink = src->nlink;
976                         dst->isize = src->isize;
977                         dst->nbytes = src->nbytes;
978                         dst->imode = src->imode;
979                         dst->nodatasum = src->nodatasum;
980                         dst->found_inode_item = 1;
981                 } else {
982                         dst->errors |= I_ERR_DUP_INODE_ITEM;
983                 }
984         }
985         dst->merging = 0;
986
987         return 0;
988 }
989
990 static int splice_shared_node(struct shared_node *src_node,
991                               struct shared_node *dst_node)
992 {
993         struct cache_extent *cache;
994         struct ptr_node *node, *ins;
995         struct cache_tree *src, *dst;
996         struct inode_record *rec, *conflict;
997         u64 current_ino = 0;
998         int splice = 0;
999         int ret;
1000
1001         if (--src_node->refs == 0)
1002                 splice = 1;
1003         if (src_node->current)
1004                 current_ino = src_node->current->ino;
1005
1006         src = &src_node->root_cache;
1007         dst = &dst_node->root_cache;
1008 again:
1009         cache = search_cache_extent(src, 0);
1010         while (cache) {
1011                 node = container_of(cache, struct ptr_node, cache);
1012                 rec = node->data;
1013                 cache = next_cache_extent(cache);
1014
1015                 if (splice) {
1016                         remove_cache_extent(src, &node->cache);
1017                         ins = node;
1018                 } else {
1019                         ins = malloc(sizeof(*ins));
1020                         BUG_ON(!ins);
1021                         ins->cache.start = node->cache.start;
1022                         ins->cache.size = node->cache.size;
1023                         ins->data = rec;
1024                         rec->refs++;
1025                 }
1026                 ret = insert_cache_extent(dst, &ins->cache);
1027                 if (ret == -EEXIST) {
1028                         conflict = get_inode_rec(dst, rec->ino, 1);
1029                         BUG_ON(IS_ERR(conflict));
1030                         merge_inode_recs(rec, conflict, dst);
1031                         if (rec->checked) {
1032                                 conflict->checked = 1;
1033                                 if (dst_node->current == conflict)
1034                                         dst_node->current = NULL;
1035                         }
1036                         maybe_free_inode_rec(dst, conflict);
1037                         free_inode_rec(rec);
1038                         free(ins);
1039                 } else {
1040                         BUG_ON(ret);
1041                 }
1042         }
1043
1044         if (src == &src_node->root_cache) {
1045                 src = &src_node->inode_cache;
1046                 dst = &dst_node->inode_cache;
1047                 goto again;
1048         }
1049
1050         if (current_ino > 0 && (!dst_node->current ||
1051             current_ino > dst_node->current->ino)) {
1052                 if (dst_node->current) {
1053                         dst_node->current->checked = 1;
1054                         maybe_free_inode_rec(dst, dst_node->current);
1055                 }
1056                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1057                 BUG_ON(IS_ERR(dst_node->current));
1058         }
1059         return 0;
1060 }
1061
1062 static void free_inode_ptr(struct cache_extent *cache)
1063 {
1064         struct ptr_node *node;
1065         struct inode_record *rec;
1066
1067         node = container_of(cache, struct ptr_node, cache);
1068         rec = node->data;
1069         free_inode_rec(rec);
1070         free(node);
1071 }
1072
1073 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1074
1075 static struct shared_node *find_shared_node(struct cache_tree *shared,
1076                                             u64 bytenr)
1077 {
1078         struct cache_extent *cache;
1079         struct shared_node *node;
1080
1081         cache = lookup_cache_extent(shared, bytenr, 1);
1082         if (cache) {
1083                 node = container_of(cache, struct shared_node, cache);
1084                 return node;
1085         }
1086         return NULL;
1087 }
1088
1089 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1090 {
1091         int ret;
1092         struct shared_node *node;
1093
1094         node = calloc(1, sizeof(*node));
1095         if (!node)
1096                 return -ENOMEM;
1097         node->cache.start = bytenr;
1098         node->cache.size = 1;
1099         cache_tree_init(&node->root_cache);
1100         cache_tree_init(&node->inode_cache);
1101         node->refs = refs;
1102
1103         ret = insert_cache_extent(shared, &node->cache);
1104
1105         return ret;
1106 }
1107
1108 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1109                              struct walk_control *wc, int level)
1110 {
1111         struct shared_node *node;
1112         struct shared_node *dest;
1113         int ret;
1114
1115         if (level == wc->active_node)
1116                 return 0;
1117
1118         BUG_ON(wc->active_node <= level);
1119         node = find_shared_node(&wc->shared, bytenr);
1120         if (!node) {
1121                 ret = add_shared_node(&wc->shared, bytenr, refs);
1122                 BUG_ON(ret);
1123                 node = find_shared_node(&wc->shared, bytenr);
1124                 wc->nodes[level] = node;
1125                 wc->active_node = level;
1126                 return 0;
1127         }
1128
1129         if (wc->root_level == wc->active_node &&
1130             btrfs_root_refs(&root->root_item) == 0) {
1131                 if (--node->refs == 0) {
1132                         free_inode_recs_tree(&node->root_cache);
1133                         free_inode_recs_tree(&node->inode_cache);
1134                         remove_cache_extent(&wc->shared, &node->cache);
1135                         free(node);
1136                 }
1137                 return 1;
1138         }
1139
1140         dest = wc->nodes[wc->active_node];
1141         splice_shared_node(node, dest);
1142         if (node->refs == 0) {
1143                 remove_cache_extent(&wc->shared, &node->cache);
1144                 free(node);
1145         }
1146         return 1;
1147 }
1148
1149 static int leave_shared_node(struct btrfs_root *root,
1150                              struct walk_control *wc, int level)
1151 {
1152         struct shared_node *node;
1153         struct shared_node *dest;
1154         int i;
1155
1156         if (level == wc->root_level)
1157                 return 0;
1158
1159         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1160                 if (wc->nodes[i])
1161                         break;
1162         }
1163         BUG_ON(i >= BTRFS_MAX_LEVEL);
1164
1165         node = wc->nodes[wc->active_node];
1166         wc->nodes[wc->active_node] = NULL;
1167         wc->active_node = i;
1168
1169         dest = wc->nodes[wc->active_node];
1170         if (wc->active_node < wc->root_level ||
1171             btrfs_root_refs(&root->root_item) > 0) {
1172                 BUG_ON(node->refs <= 1);
1173                 splice_shared_node(node, dest);
1174         } else {
1175                 BUG_ON(node->refs < 2);
1176                 node->refs--;
1177         }
1178         return 0;
1179 }
1180
1181 /*
1182  * Returns:
1183  * < 0 - on error
1184  * 1   - if the root with id child_root_id is a child of root parent_root_id
1185  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1186  *       has other root(s) as parent(s)
1187  * 2   - if the root child_root_id doesn't have any parent roots
1188  */
1189 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1190                          u64 child_root_id)
1191 {
1192         struct btrfs_path path;
1193         struct btrfs_key key;
1194         struct extent_buffer *leaf;
1195         int has_parent = 0;
1196         int ret;
1197
1198         btrfs_init_path(&path);
1199
1200         key.objectid = parent_root_id;
1201         key.type = BTRFS_ROOT_REF_KEY;
1202         key.offset = child_root_id;
1203         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1204                                 0, 0);
1205         if (ret < 0)
1206                 return ret;
1207         btrfs_release_path(&path);
1208         if (!ret)
1209                 return 1;
1210
1211         key.objectid = child_root_id;
1212         key.type = BTRFS_ROOT_BACKREF_KEY;
1213         key.offset = 0;
1214         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1215                                 0, 0);
1216         if (ret < 0)
1217                 goto out;
1218
1219         while (1) {
1220                 leaf = path.nodes[0];
1221                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1222                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1223                         if (ret)
1224                                 break;
1225                         leaf = path.nodes[0];
1226                 }
1227
1228                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1229                 if (key.objectid != child_root_id ||
1230                     key.type != BTRFS_ROOT_BACKREF_KEY)
1231                         break;
1232
1233                 has_parent = 1;
1234
1235                 if (key.offset == parent_root_id) {
1236                         btrfs_release_path(&path);
1237                         return 1;
1238                 }
1239
1240                 path.slots[0]++;
1241         }
1242 out:
1243         btrfs_release_path(&path);
1244         if (ret < 0)
1245                 return ret;
1246         return has_parent ? 0 : 2;
1247 }
1248
1249 static int process_dir_item(struct extent_buffer *eb,
1250                             int slot, struct btrfs_key *key,
1251                             struct shared_node *active_node)
1252 {
1253         u32 total;
1254         u32 cur = 0;
1255         u32 len;
1256         u32 name_len;
1257         u32 data_len;
1258         int error;
1259         int nritems = 0;
1260         u8 filetype;
1261         struct btrfs_dir_item *di;
1262         struct inode_record *rec;
1263         struct cache_tree *root_cache;
1264         struct cache_tree *inode_cache;
1265         struct btrfs_key location;
1266         char namebuf[BTRFS_NAME_LEN];
1267
1268         root_cache = &active_node->root_cache;
1269         inode_cache = &active_node->inode_cache;
1270         rec = active_node->current;
1271         rec->found_dir_item = 1;
1272
1273         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1274         total = btrfs_item_size_nr(eb, slot);
1275         while (cur < total) {
1276                 nritems++;
1277                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1278                 name_len = btrfs_dir_name_len(eb, di);
1279                 data_len = btrfs_dir_data_len(eb, di);
1280                 filetype = btrfs_dir_type(eb, di);
1281
1282                 rec->found_size += name_len;
1283                 if (cur + sizeof(*di) + name_len > total ||
1284                     name_len > BTRFS_NAME_LEN) {
1285                         error = REF_ERR_NAME_TOO_LONG;
1286
1287                         if (cur + sizeof(*di) > total)
1288                                 break;
1289                         len = min_t(u32, total - cur - sizeof(*di),
1290                                     BTRFS_NAME_LEN);
1291                 } else {
1292                         len = name_len;
1293                         error = 0;
1294                 }
1295
1296                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1297
1298                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1299                     key->offset != btrfs_name_hash(namebuf, len)) {
1300                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1301                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1302                         key->objectid, key->offset, namebuf, len, filetype,
1303                         key->offset, btrfs_name_hash(namebuf, len));
1304                 }
1305
1306                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1307                         add_inode_backref(inode_cache, location.objectid,
1308                                           key->objectid, key->offset, namebuf,
1309                                           len, filetype, key->type, error);
1310                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1311                         add_inode_backref(root_cache, location.objectid,
1312                                           key->objectid, key->offset,
1313                                           namebuf, len, filetype,
1314                                           key->type, error);
1315                 } else {
1316                         fprintf(stderr,
1317                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1318                                 location.type, key->objectid, key->offset);
1319                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1320                                           key->objectid, key->offset, namebuf,
1321                                           len, filetype, key->type, error);
1322                 }
1323
1324                 len = sizeof(*di) + name_len + data_len;
1325                 di = (struct btrfs_dir_item *)((char *)di + len);
1326                 cur += len;
1327         }
1328         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1329                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1330
1331         return 0;
1332 }
1333
1334 static int process_inode_ref(struct extent_buffer *eb,
1335                              int slot, struct btrfs_key *key,
1336                              struct shared_node *active_node)
1337 {
1338         u32 total;
1339         u32 cur = 0;
1340         u32 len;
1341         u32 name_len;
1342         u64 index;
1343         int error;
1344         struct cache_tree *inode_cache;
1345         struct btrfs_inode_ref *ref;
1346         char namebuf[BTRFS_NAME_LEN];
1347
1348         inode_cache = &active_node->inode_cache;
1349
1350         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1351         total = btrfs_item_size_nr(eb, slot);
1352         while (cur < total) {
1353                 name_len = btrfs_inode_ref_name_len(eb, ref);
1354                 index = btrfs_inode_ref_index(eb, ref);
1355
1356                 /* inode_ref + namelen should not cross item boundary */
1357                 if (cur + sizeof(*ref) + name_len > total ||
1358                     name_len > BTRFS_NAME_LEN) {
1359                         if (total < cur + sizeof(*ref))
1360                                 break;
1361
1362                         /* Still try to read out the remaining part */
1363                         len = min_t(u32, total - cur - sizeof(*ref),
1364                                     BTRFS_NAME_LEN);
1365                         error = REF_ERR_NAME_TOO_LONG;
1366                 } else {
1367                         len = name_len;
1368                         error = 0;
1369                 }
1370
1371                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1372                 add_inode_backref(inode_cache, key->objectid, key->offset,
1373                                   index, namebuf, len, 0, key->type, error);
1374
1375                 len = sizeof(*ref) + name_len;
1376                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1377                 cur += len;
1378         }
1379         return 0;
1380 }
1381
1382 static int process_inode_extref(struct extent_buffer *eb,
1383                                 int slot, struct btrfs_key *key,
1384                                 struct shared_node *active_node)
1385 {
1386         u32 total;
1387         u32 cur = 0;
1388         u32 len;
1389         u32 name_len;
1390         u64 index;
1391         u64 parent;
1392         int error;
1393         struct cache_tree *inode_cache;
1394         struct btrfs_inode_extref *extref;
1395         char namebuf[BTRFS_NAME_LEN];
1396
1397         inode_cache = &active_node->inode_cache;
1398
1399         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1400         total = btrfs_item_size_nr(eb, slot);
1401         while (cur < total) {
1402                 name_len = btrfs_inode_extref_name_len(eb, extref);
1403                 index = btrfs_inode_extref_index(eb, extref);
1404                 parent = btrfs_inode_extref_parent(eb, extref);
1405                 if (name_len <= BTRFS_NAME_LEN) {
1406                         len = name_len;
1407                         error = 0;
1408                 } else {
1409                         len = BTRFS_NAME_LEN;
1410                         error = REF_ERR_NAME_TOO_LONG;
1411                 }
1412                 read_extent_buffer(eb, namebuf,
1413                                    (unsigned long)(extref + 1), len);
1414                 add_inode_backref(inode_cache, key->objectid, parent,
1415                                   index, namebuf, len, 0, key->type, error);
1416
1417                 len = sizeof(*extref) + name_len;
1418                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1419                 cur += len;
1420         }
1421         return 0;
1422
1423 }
1424
1425 static int process_file_extent(struct btrfs_root *root,
1426                                 struct extent_buffer *eb,
1427                                 int slot, struct btrfs_key *key,
1428                                 struct shared_node *active_node)
1429 {
1430         struct inode_record *rec;
1431         struct btrfs_file_extent_item *fi;
1432         u64 num_bytes = 0;
1433         u64 disk_bytenr = 0;
1434         u64 extent_offset = 0;
1435         u64 mask = root->fs_info->sectorsize - 1;
1436         int extent_type;
1437         int ret;
1438
1439         rec = active_node->current;
1440         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1441         rec->found_file_extent = 1;
1442
1443         if (rec->extent_start == (u64)-1) {
1444                 rec->extent_start = key->offset;
1445                 rec->extent_end = key->offset;
1446         }
1447
1448         if (rec->extent_end > key->offset)
1449                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1450         else if (rec->extent_end < key->offset) {
1451                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1452                                            key->offset - rec->extent_end);
1453                 if (ret < 0)
1454                         return ret;
1455         }
1456
1457         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1458         extent_type = btrfs_file_extent_type(eb, fi);
1459
1460         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1461                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1462                 if (num_bytes == 0)
1463                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1464                 rec->found_size += num_bytes;
1465                 num_bytes = (num_bytes + mask) & ~mask;
1466         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1467                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1468                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1469                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1470                 extent_offset = btrfs_file_extent_offset(eb, fi);
1471                 if (num_bytes == 0 || (num_bytes & mask))
1472                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1473                 if (num_bytes + extent_offset >
1474                     btrfs_file_extent_ram_bytes(eb, fi))
1475                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1476                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1477                     (btrfs_file_extent_compression(eb, fi) ||
1478                      btrfs_file_extent_encryption(eb, fi) ||
1479                      btrfs_file_extent_other_encoding(eb, fi)))
1480                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1481                 if (disk_bytenr > 0)
1482                         rec->found_size += num_bytes;
1483         } else {
1484                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1485         }
1486         rec->extent_end = key->offset + num_bytes;
1487
1488         /*
1489          * The data reloc tree will copy full extents into its inode and then
1490          * copy the corresponding csums.  Because the extent it copied could be
1491          * a preallocated extent that hasn't been written to yet there may be no
1492          * csums to copy, ergo we won't have csums for our file extent.  This is
1493          * ok so just don't bother checking csums if the inode belongs to the
1494          * data reloc tree.
1495          */
1496         if (disk_bytenr > 0 &&
1497             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1498                 u64 found;
1499                 if (btrfs_file_extent_compression(eb, fi))
1500                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1501                 else
1502                         disk_bytenr += extent_offset;
1503
1504                 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1505                                        &found);
1506                 if (ret < 0)
1507                         return ret;
1508                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1509                         if (found > 0)
1510                                 rec->found_csum_item = 1;
1511                         if (found < num_bytes)
1512                                 rec->some_csum_missing = 1;
1513                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1514                         if (found > 0)
1515                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1516                 }
1517         }
1518         return 0;
1519 }
1520
1521 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1522                             struct walk_control *wc)
1523 {
1524         struct btrfs_key key;
1525         u32 nritems;
1526         int i;
1527         int ret = 0;
1528         struct cache_tree *inode_cache;
1529         struct shared_node *active_node;
1530
1531         if (wc->root_level == wc->active_node &&
1532             btrfs_root_refs(&root->root_item) == 0)
1533                 return 0;
1534
1535         active_node = wc->nodes[wc->active_node];
1536         inode_cache = &active_node->inode_cache;
1537         nritems = btrfs_header_nritems(eb);
1538         for (i = 0; i < nritems; i++) {
1539                 btrfs_item_key_to_cpu(eb, &key, i);
1540
1541                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1542                         continue;
1543                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1544                         continue;
1545
1546                 if (active_node->current == NULL ||
1547                     active_node->current->ino < key.objectid) {
1548                         if (active_node->current) {
1549                                 active_node->current->checked = 1;
1550                                 maybe_free_inode_rec(inode_cache,
1551                                                      active_node->current);
1552                         }
1553                         active_node->current = get_inode_rec(inode_cache,
1554                                                              key.objectid, 1);
1555                         BUG_ON(IS_ERR(active_node->current));
1556                 }
1557                 switch (key.type) {
1558                 case BTRFS_DIR_ITEM_KEY:
1559                 case BTRFS_DIR_INDEX_KEY:
1560                         ret = process_dir_item(eb, i, &key, active_node);
1561                         break;
1562                 case BTRFS_INODE_REF_KEY:
1563                         ret = process_inode_ref(eb, i, &key, active_node);
1564                         break;
1565                 case BTRFS_INODE_EXTREF_KEY:
1566                         ret = process_inode_extref(eb, i, &key, active_node);
1567                         break;
1568                 case BTRFS_INODE_ITEM_KEY:
1569                         ret = process_inode_item(eb, i, &key, active_node);
1570                         break;
1571                 case BTRFS_EXTENT_DATA_KEY:
1572                         ret = process_file_extent(root, eb, i, &key,
1573                                                   active_node);
1574                         break;
1575                 default:
1576                         break;
1577                 };
1578         }
1579         return ret;
1580 }
1581
1582 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1583                           struct walk_control *wc, int *level,
1584                           struct node_refs *nrefs)
1585 {
1586         enum btrfs_tree_block_status status;
1587         u64 bytenr;
1588         u64 ptr_gen;
1589         struct btrfs_fs_info *fs_info = root->fs_info;
1590         struct extent_buffer *next;
1591         struct extent_buffer *cur;
1592         int ret, err = 0;
1593         u64 refs;
1594
1595         WARN_ON(*level < 0);
1596         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1597
1598         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1599                 refs = nrefs->refs[*level];
1600                 ret = 0;
1601         } else {
1602                 ret = btrfs_lookup_extent_info(NULL, root,
1603                                        path->nodes[*level]->start,
1604                                        *level, 1, &refs, NULL);
1605                 if (ret < 0) {
1606                         err = ret;
1607                         goto out;
1608                 }
1609                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1610                 nrefs->refs[*level] = refs;
1611         }
1612
1613         if (refs > 1) {
1614                 ret = enter_shared_node(root, path->nodes[*level]->start,
1615                                         refs, wc, *level);
1616                 if (ret > 0) {
1617                         err = ret;
1618                         goto out;
1619                 }
1620         }
1621
1622         while (*level >= 0) {
1623                 WARN_ON(*level < 0);
1624                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1625                 cur = path->nodes[*level];
1626
1627                 if (btrfs_header_level(cur) != *level)
1628                         WARN_ON(1);
1629
1630                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1631                         break;
1632                 if (*level == 0) {
1633                         ret = process_one_leaf(root, cur, wc);
1634                         if (ret < 0)
1635                                 err = ret;
1636                         break;
1637                 }
1638                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1639                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1640
1641                 if (bytenr == nrefs->bytenr[*level - 1]) {
1642                         refs = nrefs->refs[*level - 1];
1643                 } else {
1644                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1645                                         *level - 1, 1, &refs, NULL);
1646                         if (ret < 0) {
1647                                 refs = 0;
1648                         } else {
1649                                 nrefs->bytenr[*level - 1] = bytenr;
1650                                 nrefs->refs[*level - 1] = refs;
1651                         }
1652                 }
1653
1654                 if (refs > 1) {
1655                         ret = enter_shared_node(root, bytenr, refs,
1656                                                 wc, *level - 1);
1657                         if (ret > 0) {
1658                                 path->slots[*level]++;
1659                                 continue;
1660                         }
1661                 }
1662
1663                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
1664                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1665                         free_extent_buffer(next);
1666                         reada_walk_down(root, cur, path->slots[*level]);
1667                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
1668                         if (!extent_buffer_uptodate(next)) {
1669                                 struct btrfs_key node_key;
1670
1671                                 btrfs_node_key_to_cpu(path->nodes[*level],
1672                                                       &node_key,
1673                                                       path->slots[*level]);
1674                                 btrfs_add_corrupt_extent_record(root->fs_info,
1675                                                 &node_key,
1676                                                 path->nodes[*level]->start,
1677                                                 root->fs_info->nodesize,
1678                                                 *level);
1679                                 err = -EIO;
1680                                 goto out;
1681                         }
1682                 }
1683
1684                 ret = check_child_node(cur, path->slots[*level], next);
1685                 if (ret) {
1686                         free_extent_buffer(next);
1687                         err = ret;
1688                         goto out;
1689                 }
1690
1691                 if (btrfs_is_leaf(next))
1692                         status = btrfs_check_leaf(root, NULL, next);
1693                 else
1694                         status = btrfs_check_node(root, NULL, next);
1695                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1696                         free_extent_buffer(next);
1697                         err = -EIO;
1698                         goto out;
1699                 }
1700
1701                 *level = *level - 1;
1702                 free_extent_buffer(path->nodes[*level]);
1703                 path->nodes[*level] = next;
1704                 path->slots[*level] = 0;
1705         }
1706 out:
1707         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1708         return err;
1709 }
1710
1711 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1712                         struct walk_control *wc, int *level)
1713 {
1714         int i;
1715         struct extent_buffer *leaf;
1716
1717         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1718                 leaf = path->nodes[i];
1719                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1720                         path->slots[i]++;
1721                         *level = i;
1722                         return 0;
1723                 } else {
1724                         free_extent_buffer(path->nodes[*level]);
1725                         path->nodes[*level] = NULL;
1726                         BUG_ON(*level > wc->active_node);
1727                         if (*level == wc->active_node)
1728                                 leave_shared_node(root, wc, *level);
1729                         *level = i + 1;
1730                 }
1731         }
1732         return 1;
1733 }
1734
1735 static int check_root_dir(struct inode_record *rec)
1736 {
1737         struct inode_backref *backref;
1738         int ret = -1;
1739
1740         if (!rec->found_inode_item || rec->errors)
1741                 goto out;
1742         if (rec->nlink != 1 || rec->found_link != 0)
1743                 goto out;
1744         if (list_empty(&rec->backrefs))
1745                 goto out;
1746         backref = to_inode_backref(rec->backrefs.next);
1747         if (!backref->found_inode_ref)
1748                 goto out;
1749         if (backref->index != 0 || backref->namelen != 2 ||
1750             memcmp(backref->name, "..", 2))
1751                 goto out;
1752         if (backref->found_dir_index || backref->found_dir_item)
1753                 goto out;
1754         ret = 0;
1755 out:
1756         return ret;
1757 }
1758
1759 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1760                               struct btrfs_root *root, struct btrfs_path *path,
1761                               struct inode_record *rec)
1762 {
1763         struct btrfs_inode_item *ei;
1764         struct btrfs_key key;
1765         int ret;
1766
1767         key.objectid = rec->ino;
1768         key.type = BTRFS_INODE_ITEM_KEY;
1769         key.offset = (u64)-1;
1770
1771         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1772         if (ret < 0)
1773                 goto out;
1774         if (ret) {
1775                 if (!path->slots[0]) {
1776                         ret = -ENOENT;
1777                         goto out;
1778                 }
1779                 path->slots[0]--;
1780                 ret = 0;
1781         }
1782         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1783         if (key.objectid != rec->ino) {
1784                 ret = -ENOENT;
1785                 goto out;
1786         }
1787
1788         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1789                             struct btrfs_inode_item);
1790         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1791         btrfs_mark_buffer_dirty(path->nodes[0]);
1792         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1793         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1794                root->root_key.objectid);
1795 out:
1796         btrfs_release_path(path);
1797         return ret;
1798 }
1799
1800 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1801                                     struct btrfs_root *root,
1802                                     struct btrfs_path *path,
1803                                     struct inode_record *rec)
1804 {
1805         int ret;
1806
1807         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1808         btrfs_release_path(path);
1809         if (!ret)
1810                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1811         return ret;
1812 }
1813
1814 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1815                                struct btrfs_root *root,
1816                                struct btrfs_path *path,
1817                                struct inode_record *rec)
1818 {
1819         struct btrfs_inode_item *ei;
1820         struct btrfs_key key;
1821         int ret = 0;
1822
1823         key.objectid = rec->ino;
1824         key.type = BTRFS_INODE_ITEM_KEY;
1825         key.offset = 0;
1826
1827         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1828         if (ret) {
1829                 if (ret > 0)
1830                         ret = -ENOENT;
1831                 goto out;
1832         }
1833
1834         /* Since ret == 0, no need to check anything */
1835         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1836                             struct btrfs_inode_item);
1837         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
1838         btrfs_mark_buffer_dirty(path->nodes[0]);
1839         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
1840         printf("reset nbytes for ino %llu root %llu\n",
1841                rec->ino, root->root_key.objectid);
1842 out:
1843         btrfs_release_path(path);
1844         return ret;
1845 }
1846
1847 static int add_missing_dir_index(struct btrfs_root *root,
1848                                  struct cache_tree *inode_cache,
1849                                  struct inode_record *rec,
1850                                  struct inode_backref *backref)
1851 {
1852         struct btrfs_path path;
1853         struct btrfs_trans_handle *trans;
1854         struct btrfs_dir_item *dir_item;
1855         struct extent_buffer *leaf;
1856         struct btrfs_key key;
1857         struct btrfs_disk_key disk_key;
1858         struct inode_record *dir_rec;
1859         unsigned long name_ptr;
1860         u32 data_size = sizeof(*dir_item) + backref->namelen;
1861         int ret;
1862
1863         trans = btrfs_start_transaction(root, 1);
1864         if (IS_ERR(trans))
1865                 return PTR_ERR(trans);
1866
1867         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
1868                 (unsigned long long)rec->ino);
1869
1870         btrfs_init_path(&path);
1871         key.objectid = backref->dir;
1872         key.type = BTRFS_DIR_INDEX_KEY;
1873         key.offset = backref->index;
1874         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
1875         BUG_ON(ret);
1876
1877         leaf = path.nodes[0];
1878         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
1879
1880         disk_key.objectid = cpu_to_le64(rec->ino);
1881         disk_key.type = BTRFS_INODE_ITEM_KEY;
1882         disk_key.offset = 0;
1883
1884         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
1885         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
1886         btrfs_set_dir_data_len(leaf, dir_item, 0);
1887         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
1888         name_ptr = (unsigned long)(dir_item + 1);
1889         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
1890         btrfs_mark_buffer_dirty(leaf);
1891         btrfs_release_path(&path);
1892         btrfs_commit_transaction(trans, root);
1893
1894         backref->found_dir_index = 1;
1895         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
1896         BUG_ON(IS_ERR(dir_rec));
1897         if (!dir_rec)
1898                 return 0;
1899         dir_rec->found_size += backref->namelen;
1900         if (dir_rec->found_size == dir_rec->isize &&
1901             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
1902                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1903         if (dir_rec->found_size != dir_rec->isize)
1904                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1905
1906         return 0;
1907 }
1908
1909 static int delete_dir_index(struct btrfs_root *root,
1910                             struct inode_backref *backref)
1911 {
1912         struct btrfs_trans_handle *trans;
1913         struct btrfs_dir_item *di;
1914         struct btrfs_path path;
1915         int ret = 0;
1916
1917         trans = btrfs_start_transaction(root, 1);
1918         if (IS_ERR(trans))
1919                 return PTR_ERR(trans);
1920
1921         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1922                 (unsigned long long)backref->dir,
1923                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
1924                 (unsigned long long)root->objectid);
1925
1926         btrfs_init_path(&path);
1927         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
1928                                     backref->name, backref->namelen,
1929                                     backref->index, -1);
1930         if (IS_ERR(di)) {
1931                 ret = PTR_ERR(di);
1932                 btrfs_release_path(&path);
1933                 btrfs_commit_transaction(trans, root);
1934                 if (ret == -ENOENT)
1935                         return 0;
1936                 return ret;
1937         }
1938
1939         if (!di)
1940                 ret = btrfs_del_item(trans, root, &path);
1941         else
1942                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
1943         BUG_ON(ret);
1944         btrfs_release_path(&path);
1945         btrfs_commit_transaction(trans, root);
1946         return ret;
1947 }
1948
1949 static int create_inode_item(struct btrfs_root *root,
1950                              struct inode_record *rec, int root_dir)
1951 {
1952         struct btrfs_trans_handle *trans;
1953         u64 nlink = 0;
1954         u32 mode = 0;
1955         u64 size = 0;
1956         int ret;
1957
1958         trans = btrfs_start_transaction(root, 1);
1959         if (IS_ERR(trans)) {
1960                 ret = PTR_ERR(trans);
1961                 return ret;
1962         }
1963
1964         nlink = root_dir ? 1 : rec->found_link;
1965         if (rec->found_dir_item) {
1966                 if (rec->found_file_extent)
1967                         fprintf(stderr, "root %llu inode %llu has both a dir "
1968                                 "item and extents, unsure if it is a dir or a "
1969                                 "regular file so setting it as a directory\n",
1970                                 (unsigned long long)root->objectid,
1971                                 (unsigned long long)rec->ino);
1972                 mode = S_IFDIR | 0755;
1973                 size = rec->found_size;
1974         } else if (!rec->found_dir_item) {
1975                 size = rec->extent_end;
1976                 mode =  S_IFREG | 0755;
1977         }
1978
1979         ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
1980                                   nlink, mode);
1981         btrfs_commit_transaction(trans, root);
1982         return 0;
1983 }
1984
1985 static int repair_inode_backrefs(struct btrfs_root *root,
1986                                  struct inode_record *rec,
1987                                  struct cache_tree *inode_cache,
1988                                  int delete)
1989 {
1990         struct inode_backref *tmp, *backref;
1991         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1992         int ret = 0;
1993         int repaired = 0;
1994
1995         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1996                 if (!delete && rec->ino == root_dirid) {
1997                         if (!rec->found_inode_item) {
1998                                 ret = create_inode_item(root, rec, 1);
1999                                 if (ret)
2000                                         break;
2001                                 repaired++;
2002                         }
2003                 }
2004
2005                 /* Index 0 for root dir's are special, don't mess with it */
2006                 if (rec->ino == root_dirid && backref->index == 0)
2007                         continue;
2008
2009                 if (delete &&
2010                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2011                      (backref->found_dir_index && backref->found_inode_ref &&
2012                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2013                         ret = delete_dir_index(root, backref);
2014                         if (ret)
2015                                 break;
2016                         repaired++;
2017                         list_del(&backref->list);
2018                         free(backref);
2019                         continue;
2020                 }
2021
2022                 if (!delete && !backref->found_dir_index &&
2023                     backref->found_dir_item && backref->found_inode_ref) {
2024                         ret = add_missing_dir_index(root, inode_cache, rec,
2025                                                     backref);
2026                         if (ret)
2027                                 break;
2028                         repaired++;
2029                         if (backref->found_dir_item &&
2030                             backref->found_dir_index) {
2031                                 if (!backref->errors &&
2032                                     backref->found_inode_ref) {
2033                                         list_del(&backref->list);
2034                                         free(backref);
2035                                         continue;
2036                                 }
2037                         }
2038                 }
2039
2040                 if (!delete && (!backref->found_dir_index &&
2041                                 !backref->found_dir_item &&
2042                                 backref->found_inode_ref)) {
2043                         struct btrfs_trans_handle *trans;
2044                         struct btrfs_key location;
2045
2046                         ret = check_dir_conflict(root, backref->name,
2047                                                  backref->namelen,
2048                                                  backref->dir,
2049                                                  backref->index);
2050                         if (ret) {
2051                                 /*
2052                                  * let nlink fixing routine to handle it,
2053                                  * which can do it better.
2054                                  */
2055                                 ret = 0;
2056                                 break;
2057                         }
2058                         location.objectid = rec->ino;
2059                         location.type = BTRFS_INODE_ITEM_KEY;
2060                         location.offset = 0;
2061
2062                         trans = btrfs_start_transaction(root, 1);
2063                         if (IS_ERR(trans)) {
2064                                 ret = PTR_ERR(trans);
2065                                 break;
2066                         }
2067                         fprintf(stderr, "adding missing dir index/item pair "
2068                                 "for inode %llu\n",
2069                                 (unsigned long long)rec->ino);
2070                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2071                                                     backref->namelen,
2072                                                     backref->dir, &location,
2073                                                     imode_to_type(rec->imode),
2074                                                     backref->index);
2075                         BUG_ON(ret);
2076                         btrfs_commit_transaction(trans, root);
2077                         repaired++;
2078                 }
2079
2080                 if (!delete && (backref->found_inode_ref &&
2081                                 backref->found_dir_index &&
2082                                 backref->found_dir_item &&
2083                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2084                                 !rec->found_inode_item)) {
2085                         ret = create_inode_item(root, rec, 0);
2086                         if (ret)
2087                                 break;
2088                         repaired++;
2089                 }
2090
2091         }
2092         return ret ? ret : repaired;
2093 }
2094
2095 /*
2096  * To determine the file type for nlink/inode_item repair
2097  *
2098  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2099  * Return -ENOENT if file type is not found.
2100  */
2101 static int find_file_type(struct inode_record *rec, u8 *type)
2102 {
2103         struct inode_backref *backref;
2104
2105         /* For inode item recovered case */
2106         if (rec->found_inode_item) {
2107                 *type = imode_to_type(rec->imode);
2108                 return 0;
2109         }
2110
2111         list_for_each_entry(backref, &rec->backrefs, list) {
2112                 if (backref->found_dir_index || backref->found_dir_item) {
2113                         *type = backref->filetype;
2114                         return 0;
2115                 }
2116         }
2117         return -ENOENT;
2118 }
2119
2120 /*
2121  * To determine the file name for nlink repair
2122  *
2123  * Return 0 if file name is found, set name and namelen.
2124  * Return -ENOENT if file name is not found.
2125  */
2126 static int find_file_name(struct inode_record *rec,
2127                           char *name, int *namelen)
2128 {
2129         struct inode_backref *backref;
2130
2131         list_for_each_entry(backref, &rec->backrefs, list) {
2132                 if (backref->found_dir_index || backref->found_dir_item ||
2133                     backref->found_inode_ref) {
2134                         memcpy(name, backref->name, backref->namelen);
2135                         *namelen = backref->namelen;
2136                         return 0;
2137                 }
2138         }
2139         return -ENOENT;
2140 }
2141
2142 /* Reset the nlink of the inode to the correct one */
2143 static int reset_nlink(struct btrfs_trans_handle *trans,
2144                        struct btrfs_root *root,
2145                        struct btrfs_path *path,
2146                        struct inode_record *rec)
2147 {
2148         struct inode_backref *backref;
2149         struct inode_backref *tmp;
2150         struct btrfs_key key;
2151         struct btrfs_inode_item *inode_item;
2152         int ret = 0;
2153
2154         /* We don't believe this either, reset it and iterate backref */
2155         rec->found_link = 0;
2156
2157         /* Remove all backref including the valid ones */
2158         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2159                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2160                                    backref->index, backref->name,
2161                                    backref->namelen, 0);
2162                 if (ret < 0)
2163                         goto out;
2164
2165                 /* remove invalid backref, so it won't be added back */
2166                 if (!(backref->found_dir_index &&
2167                       backref->found_dir_item &&
2168                       backref->found_inode_ref)) {
2169                         list_del(&backref->list);
2170                         free(backref);
2171                 } else {
2172                         rec->found_link++;
2173                 }
2174         }
2175
2176         /* Set nlink to 0 */
2177         key.objectid = rec->ino;
2178         key.type = BTRFS_INODE_ITEM_KEY;
2179         key.offset = 0;
2180         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2181         if (ret < 0)
2182                 goto out;
2183         if (ret > 0) {
2184                 ret = -ENOENT;
2185                 goto out;
2186         }
2187         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2188                                     struct btrfs_inode_item);
2189         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2190         btrfs_mark_buffer_dirty(path->nodes[0]);
2191         btrfs_release_path(path);
2192
2193         /*
2194          * Add back valid inode_ref/dir_item/dir_index,
2195          * add_link() will handle the nlink inc, so new nlink must be correct
2196          */
2197         list_for_each_entry(backref, &rec->backrefs, list) {
2198                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2199                                      backref->name, backref->namelen,
2200                                      backref->filetype, &backref->index, 1, 0);
2201                 if (ret < 0)
2202                         goto out;
2203         }
2204 out:
2205         btrfs_release_path(path);
2206         return ret;
2207 }
2208
2209 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2210                                struct btrfs_root *root,
2211                                struct btrfs_path *path,
2212                                struct inode_record *rec)
2213 {
2214         char namebuf[BTRFS_NAME_LEN] = {0};
2215         u8 type = 0;
2216         int namelen = 0;
2217         int name_recovered = 0;
2218         int type_recovered = 0;
2219         int ret = 0;
2220
2221         /*
2222          * Get file name and type first before these invalid inode ref
2223          * are deleted by remove_all_invalid_backref()
2224          */
2225         name_recovered = !find_file_name(rec, namebuf, &namelen);
2226         type_recovered = !find_file_type(rec, &type);
2227
2228         if (!name_recovered) {
2229                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2230                        rec->ino, rec->ino);
2231                 namelen = count_digits(rec->ino);
2232                 sprintf(namebuf, "%llu", rec->ino);
2233                 name_recovered = 1;
2234         }
2235         if (!type_recovered) {
2236                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2237                        rec->ino);
2238                 type = BTRFS_FT_REG_FILE;
2239                 type_recovered = 1;
2240         }
2241
2242         ret = reset_nlink(trans, root, path, rec);
2243         if (ret < 0) {
2244                 fprintf(stderr,
2245                         "Failed to reset nlink for inode %llu: %s\n",
2246                         rec->ino, strerror(-ret));
2247                 goto out;
2248         }
2249
2250         if (rec->found_link == 0) {
2251                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2252                                               namebuf, namelen, type,
2253                                               (u64 *)&rec->found_link);
2254                 if (ret)
2255                         goto out;
2256         }
2257         printf("Fixed the nlink of inode %llu\n", rec->ino);
2258 out:
2259         /*
2260          * Clear the flag anyway, or we will loop forever for the same inode
2261          * as it will not be removed from the bad inode list and the dead loop
2262          * happens.
2263          */
2264         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2265         btrfs_release_path(path);
2266         return ret;
2267 }
2268
2269 /*
2270  * Check if there is any normal(reg or prealloc) file extent for given
2271  * ino.
2272  * This is used to determine the file type when neither its dir_index/item or
2273  * inode_item exists.
2274  *
2275  * This will *NOT* report error, if any error happens, just consider it does
2276  * not have any normal file extent.
2277  */
2278 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2279 {
2280         struct btrfs_path path;
2281         struct btrfs_key key;
2282         struct btrfs_key found_key;
2283         struct btrfs_file_extent_item *fi;
2284         u8 type;
2285         int ret = 0;
2286
2287         btrfs_init_path(&path);
2288         key.objectid = ino;
2289         key.type = BTRFS_EXTENT_DATA_KEY;
2290         key.offset = 0;
2291
2292         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2293         if (ret < 0) {
2294                 ret = 0;
2295                 goto out;
2296         }
2297         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2298                 ret = btrfs_next_leaf(root, &path);
2299                 if (ret) {
2300                         ret = 0;
2301                         goto out;
2302                 }
2303         }
2304         while (1) {
2305                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2306                                       path.slots[0]);
2307                 if (found_key.objectid != ino ||
2308                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2309                         break;
2310                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2311                                     struct btrfs_file_extent_item);
2312                 type = btrfs_file_extent_type(path.nodes[0], fi);
2313                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2314                         ret = 1;
2315                         goto out;
2316                 }
2317         }
2318 out:
2319         btrfs_release_path(&path);
2320         return ret;
2321 }
2322
2323 static u32 btrfs_type_to_imode(u8 type)
2324 {
2325         static u32 imode_by_btrfs_type[] = {
2326                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2327                 [BTRFS_FT_DIR]          = S_IFDIR,
2328                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2329                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2330                 [BTRFS_FT_FIFO]         = S_IFIFO,
2331                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2332                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2333         };
2334
2335         return imode_by_btrfs_type[(type)];
2336 }
2337
2338 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2339                                 struct btrfs_root *root,
2340                                 struct btrfs_path *path,
2341                                 struct inode_record *rec)
2342 {
2343         u8 filetype;
2344         u32 mode = 0700;
2345         int type_recovered = 0;
2346         int ret = 0;
2347
2348         printf("Trying to rebuild inode:%llu\n", rec->ino);
2349
2350         type_recovered = !find_file_type(rec, &filetype);
2351
2352         /*
2353          * Try to determine inode type if type not found.
2354          *
2355          * For found regular file extent, it must be FILE.
2356          * For found dir_item/index, it must be DIR.
2357          *
2358          * For undetermined one, use FILE as fallback.
2359          *
2360          * TODO:
2361          * 1. If found backref(inode_index/item is already handled) to it,
2362          *    it must be DIR.
2363          *    Need new inode-inode ref structure to allow search for that.
2364          */
2365         if (!type_recovered) {
2366                 if (rec->found_file_extent &&
2367                     find_normal_file_extent(root, rec->ino)) {
2368                         type_recovered = 1;
2369                         filetype = BTRFS_FT_REG_FILE;
2370                 } else if (rec->found_dir_item) {
2371                         type_recovered = 1;
2372                         filetype = BTRFS_FT_DIR;
2373                 } else if (!list_empty(&rec->orphan_extents)) {
2374                         type_recovered = 1;
2375                         filetype = BTRFS_FT_REG_FILE;
2376                 } else{
2377                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2378                                rec->ino);
2379                         type_recovered = 1;
2380                         filetype = BTRFS_FT_REG_FILE;
2381                 }
2382         }
2383
2384         ret = btrfs_new_inode(trans, root, rec->ino,
2385                               mode | btrfs_type_to_imode(filetype));
2386         if (ret < 0)
2387                 goto out;
2388
2389         /*
2390          * Here inode rebuild is done, we only rebuild the inode item,
2391          * don't repair the nlink(like move to lost+found).
2392          * That is the job of nlink repair.
2393          *
2394          * We just fill the record and return
2395          */
2396         rec->found_dir_item = 1;
2397         rec->imode = mode | btrfs_type_to_imode(filetype);
2398         rec->nlink = 0;
2399         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2400         /* Ensure the inode_nlinks repair function will be called */
2401         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2402 out:
2403         return ret;
2404 }
2405
2406 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2407                                       struct btrfs_root *root,
2408                                       struct btrfs_path *path,
2409                                       struct inode_record *rec)
2410 {
2411         struct orphan_data_extent *orphan;
2412         struct orphan_data_extent *tmp;
2413         int ret = 0;
2414
2415         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2416                 /*
2417                  * Check for conflicting file extents
2418                  *
2419                  * Here we don't know whether the extents is compressed or not,
2420                  * so we can only assume it not compressed nor data offset,
2421                  * and use its disk_len as extent length.
2422                  */
2423                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2424                                        orphan->offset, orphan->disk_len, 0);
2425                 btrfs_release_path(path);
2426                 if (ret < 0)
2427                         goto out;
2428                 if (!ret) {
2429                         fprintf(stderr,
2430                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2431                                 orphan->disk_bytenr, orphan->disk_len);
2432                         ret = btrfs_free_extent(trans,
2433                                         root->fs_info->extent_root,
2434                                         orphan->disk_bytenr, orphan->disk_len,
2435                                         0, root->objectid, orphan->objectid,
2436                                         orphan->offset);
2437                         if (ret < 0)
2438                                 goto out;
2439                 }
2440                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2441                                 orphan->offset, orphan->disk_bytenr,
2442                                 orphan->disk_len, orphan->disk_len);
2443                 if (ret < 0)
2444                         goto out;
2445
2446                 /* Update file size info */
2447                 rec->found_size += orphan->disk_len;
2448                 if (rec->found_size == rec->nbytes)
2449                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2450
2451                 /* Update the file extent hole info too */
2452                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2453                                            orphan->disk_len);
2454                 if (ret < 0)
2455                         goto out;
2456                 if (RB_EMPTY_ROOT(&rec->holes))
2457                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2458
2459                 list_del(&orphan->list);
2460                 free(orphan);
2461         }
2462         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2463 out:
2464         return ret;
2465 }
2466
2467 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2468                                         struct btrfs_root *root,
2469                                         struct btrfs_path *path,
2470                                         struct inode_record *rec)
2471 {
2472         struct rb_node *node;
2473         struct file_extent_hole *hole;
2474         int found = 0;
2475         int ret = 0;
2476
2477         node = rb_first(&rec->holes);
2478
2479         while (node) {
2480                 found = 1;
2481                 hole = rb_entry(node, struct file_extent_hole, node);
2482                 ret = btrfs_punch_hole(trans, root, rec->ino,
2483                                        hole->start, hole->len);
2484                 if (ret < 0)
2485                         goto out;
2486                 ret = del_file_extent_hole(&rec->holes, hole->start,
2487                                            hole->len);
2488                 if (ret < 0)
2489                         goto out;
2490                 if (RB_EMPTY_ROOT(&rec->holes))
2491                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2492                 node = rb_first(&rec->holes);
2493         }
2494         /* special case for a file losing all its file extent */
2495         if (!found) {
2496                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2497                                        round_up(rec->isize,
2498                                                 root->fs_info->sectorsize));
2499                 if (ret < 0)
2500                         goto out;
2501         }
2502         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2503                rec->ino, root->objectid);
2504 out:
2505         return ret;
2506 }
2507
2508 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2509 {
2510         struct btrfs_trans_handle *trans;
2511         struct btrfs_path path;
2512         int ret = 0;
2513
2514         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2515                              I_ERR_NO_ORPHAN_ITEM |
2516                              I_ERR_LINK_COUNT_WRONG |
2517                              I_ERR_NO_INODE_ITEM |
2518                              I_ERR_FILE_EXTENT_ORPHAN |
2519                              I_ERR_FILE_EXTENT_DISCOUNT|
2520                              I_ERR_FILE_NBYTES_WRONG)))
2521                 return rec->errors;
2522
2523         /*
2524          * For nlink repair, it may create a dir and add link, so
2525          * 2 for parent(256)'s dir_index and dir_item
2526          * 2 for lost+found dir's inode_item and inode_ref
2527          * 1 for the new inode_ref of the file
2528          * 2 for lost+found dir's dir_index and dir_item for the file
2529          */
2530         trans = btrfs_start_transaction(root, 7);
2531         if (IS_ERR(trans))
2532                 return PTR_ERR(trans);
2533
2534         btrfs_init_path(&path);
2535         if (rec->errors & I_ERR_NO_INODE_ITEM)
2536                 ret = repair_inode_no_item(trans, root, &path, rec);
2537         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2538                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2539         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2540                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2541         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2542                 ret = repair_inode_isize(trans, root, &path, rec);
2543         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2544                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2545         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2546                 ret = repair_inode_nlinks(trans, root, &path, rec);
2547         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2548                 ret = repair_inode_nbytes(trans, root, &path, rec);
2549         btrfs_commit_transaction(trans, root);
2550         btrfs_release_path(&path);
2551         return ret;
2552 }
2553
2554 static int check_inode_recs(struct btrfs_root *root,
2555                             struct cache_tree *inode_cache)
2556 {
2557         struct cache_extent *cache;
2558         struct ptr_node *node;
2559         struct inode_record *rec;
2560         struct inode_backref *backref;
2561         int stage = 0;
2562         int ret = 0;
2563         int err = 0;
2564         u64 error = 0;
2565         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2566
2567         if (btrfs_root_refs(&root->root_item) == 0) {
2568                 if (!cache_tree_empty(inode_cache))
2569                         fprintf(stderr, "warning line %d\n", __LINE__);
2570                 return 0;
2571         }
2572
2573         /*
2574          * We need to repair backrefs first because we could change some of the
2575          * errors in the inode recs.
2576          *
2577          * We also need to go through and delete invalid backrefs first and then
2578          * add the correct ones second.  We do this because we may get EEXIST
2579          * when adding back the correct index because we hadn't yet deleted the
2580          * invalid index.
2581          *
2582          * For example, if we were missing a dir index then the directories
2583          * isize would be wrong, so if we fixed the isize to what we thought it
2584          * would be and then fixed the backref we'd still have a invalid fs, so
2585          * we need to add back the dir index and then check to see if the isize
2586          * is still wrong.
2587          */
2588         while (stage < 3) {
2589                 stage++;
2590                 if (stage == 3 && !err)
2591                         break;
2592
2593                 cache = search_cache_extent(inode_cache, 0);
2594                 while (repair && cache) {
2595                         node = container_of(cache, struct ptr_node, cache);
2596                         rec = node->data;
2597                         cache = next_cache_extent(cache);
2598
2599                         /* Need to free everything up and rescan */
2600                         if (stage == 3) {
2601                                 remove_cache_extent(inode_cache, &node->cache);
2602                                 free(node);
2603                                 free_inode_rec(rec);
2604                                 continue;
2605                         }
2606
2607                         if (list_empty(&rec->backrefs))
2608                                 continue;
2609
2610                         ret = repair_inode_backrefs(root, rec, inode_cache,
2611                                                     stage == 1);
2612                         if (ret < 0) {
2613                                 err = ret;
2614                                 stage = 2;
2615                                 break;
2616                         } if (ret > 0) {
2617                                 err = -EAGAIN;
2618                         }
2619                 }
2620         }
2621         if (err)
2622                 return err;
2623
2624         rec = get_inode_rec(inode_cache, root_dirid, 0);
2625         BUG_ON(IS_ERR(rec));
2626         if (rec) {
2627                 ret = check_root_dir(rec);
2628                 if (ret) {
2629                         fprintf(stderr, "root %llu root dir %llu error\n",
2630                                 (unsigned long long)root->root_key.objectid,
2631                                 (unsigned long long)root_dirid);
2632                         print_inode_error(root, rec);
2633                         error++;
2634                 }
2635         } else {
2636                 if (repair) {
2637                         struct btrfs_trans_handle *trans;
2638
2639                         trans = btrfs_start_transaction(root, 1);
2640                         if (IS_ERR(trans)) {
2641                                 err = PTR_ERR(trans);
2642                                 return err;
2643                         }
2644
2645                         fprintf(stderr,
2646                                 "root %llu missing its root dir, recreating\n",
2647                                 (unsigned long long)root->objectid);
2648
2649                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2650                         BUG_ON(ret);
2651
2652                         btrfs_commit_transaction(trans, root);
2653                         return -EAGAIN;
2654                 }
2655
2656                 fprintf(stderr, "root %llu root dir %llu not found\n",
2657                         (unsigned long long)root->root_key.objectid,
2658                         (unsigned long long)root_dirid);
2659         }
2660
2661         while (1) {
2662                 cache = search_cache_extent(inode_cache, 0);
2663                 if (!cache)
2664                         break;
2665                 node = container_of(cache, struct ptr_node, cache);
2666                 rec = node->data;
2667                 remove_cache_extent(inode_cache, &node->cache);
2668                 free(node);
2669                 if (rec->ino == root_dirid ||
2670                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2671                         free_inode_rec(rec);
2672                         continue;
2673                 }
2674
2675                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2676                         ret = check_orphan_item(root, rec->ino);
2677                         if (ret == 0)
2678                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2679                         if (can_free_inode_rec(rec)) {
2680                                 free_inode_rec(rec);
2681                                 continue;
2682                         }
2683                 }
2684
2685                 if (!rec->found_inode_item)
2686                         rec->errors |= I_ERR_NO_INODE_ITEM;
2687                 if (rec->found_link != rec->nlink)
2688                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2689                 if (repair) {
2690                         ret = try_repair_inode(root, rec);
2691                         if (ret == 0 && can_free_inode_rec(rec)) {
2692                                 free_inode_rec(rec);
2693                                 continue;
2694                         }
2695                         ret = 0;
2696                 }
2697
2698                 if (!(repair && ret == 0))
2699                         error++;
2700                 print_inode_error(root, rec);
2701                 list_for_each_entry(backref, &rec->backrefs, list) {
2702                         if (!backref->found_dir_item)
2703                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2704                         if (!backref->found_dir_index)
2705                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2706                         if (!backref->found_inode_ref)
2707                                 backref->errors |= REF_ERR_NO_INODE_REF;
2708                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2709                                 " namelen %u name %s filetype %d errors %x",
2710                                 (unsigned long long)backref->dir,
2711                                 (unsigned long long)backref->index,
2712                                 backref->namelen, backref->name,
2713                                 backref->filetype, backref->errors);
2714                         print_ref_error(backref->errors);
2715                 }
2716                 free_inode_rec(rec);
2717         }
2718         return (error > 0) ? -1 : 0;
2719 }
2720
2721 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2722                                         u64 objectid)
2723 {
2724         struct cache_extent *cache;
2725         struct root_record *rec = NULL;
2726         int ret;
2727
2728         cache = lookup_cache_extent(root_cache, objectid, 1);
2729         if (cache) {
2730                 rec = container_of(cache, struct root_record, cache);
2731         } else {
2732                 rec = calloc(1, sizeof(*rec));
2733                 if (!rec)
2734                         return ERR_PTR(-ENOMEM);
2735                 rec->objectid = objectid;
2736                 INIT_LIST_HEAD(&rec->backrefs);
2737                 rec->cache.start = objectid;
2738                 rec->cache.size = 1;
2739
2740                 ret = insert_cache_extent(root_cache, &rec->cache);
2741                 if (ret)
2742                         return ERR_PTR(-EEXIST);
2743         }
2744         return rec;
2745 }
2746
2747 static struct root_backref *get_root_backref(struct root_record *rec,
2748                                              u64 ref_root, u64 dir, u64 index,
2749                                              const char *name, int namelen)
2750 {
2751         struct root_backref *backref;
2752
2753         list_for_each_entry(backref, &rec->backrefs, list) {
2754                 if (backref->ref_root != ref_root || backref->dir != dir ||
2755                     backref->namelen != namelen)
2756                         continue;
2757                 if (memcmp(name, backref->name, namelen))
2758                         continue;
2759                 return backref;
2760         }
2761
2762         backref = calloc(1, sizeof(*backref) + namelen + 1);
2763         if (!backref)
2764                 return NULL;
2765         backref->ref_root = ref_root;
2766         backref->dir = dir;
2767         backref->index = index;
2768         backref->namelen = namelen;
2769         memcpy(backref->name, name, namelen);
2770         backref->name[namelen] = '\0';
2771         list_add_tail(&backref->list, &rec->backrefs);
2772         return backref;
2773 }
2774
2775 static void free_root_record(struct cache_extent *cache)
2776 {
2777         struct root_record *rec;
2778         struct root_backref *backref;
2779
2780         rec = container_of(cache, struct root_record, cache);
2781         while (!list_empty(&rec->backrefs)) {
2782                 backref = to_root_backref(rec->backrefs.next);
2783                 list_del(&backref->list);
2784                 free(backref);
2785         }
2786
2787         free(rec);
2788 }
2789
2790 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
2791
2792 static int add_root_backref(struct cache_tree *root_cache,
2793                             u64 root_id, u64 ref_root, u64 dir, u64 index,
2794                             const char *name, int namelen,
2795                             int item_type, int errors)
2796 {
2797         struct root_record *rec;
2798         struct root_backref *backref;
2799
2800         rec = get_root_rec(root_cache, root_id);
2801         BUG_ON(IS_ERR(rec));
2802         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
2803         BUG_ON(!backref);
2804
2805         backref->errors |= errors;
2806
2807         if (item_type != BTRFS_DIR_ITEM_KEY) {
2808                 if (backref->found_dir_index || backref->found_back_ref ||
2809                     backref->found_forward_ref) {
2810                         if (backref->index != index)
2811                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
2812                 } else {
2813                         backref->index = index;
2814                 }
2815         }
2816
2817         if (item_type == BTRFS_DIR_ITEM_KEY) {
2818                 if (backref->found_forward_ref)
2819                         rec->found_ref++;
2820                 backref->found_dir_item = 1;
2821         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
2822                 backref->found_dir_index = 1;
2823         } else if (item_type == BTRFS_ROOT_REF_KEY) {
2824                 if (backref->found_forward_ref)
2825                         backref->errors |= REF_ERR_DUP_ROOT_REF;
2826                 else if (backref->found_dir_item)
2827                         rec->found_ref++;
2828                 backref->found_forward_ref = 1;
2829         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
2830                 if (backref->found_back_ref)
2831                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
2832                 backref->found_back_ref = 1;
2833         } else {
2834                 BUG_ON(1);
2835         }
2836
2837         if (backref->found_forward_ref && backref->found_dir_item)
2838                 backref->reachable = 1;
2839         return 0;
2840 }
2841
2842 static int merge_root_recs(struct btrfs_root *root,
2843                            struct cache_tree *src_cache,
2844                            struct cache_tree *dst_cache)
2845 {
2846         struct cache_extent *cache;
2847         struct ptr_node *node;
2848         struct inode_record *rec;
2849         struct inode_backref *backref;
2850         int ret = 0;
2851
2852         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2853                 free_inode_recs_tree(src_cache);
2854                 return 0;
2855         }
2856
2857         while (1) {
2858                 cache = search_cache_extent(src_cache, 0);
2859                 if (!cache)
2860                         break;
2861                 node = container_of(cache, struct ptr_node, cache);
2862                 rec = node->data;
2863                 remove_cache_extent(src_cache, &node->cache);
2864                 free(node);
2865
2866                 ret = is_child_root(root, root->objectid, rec->ino);
2867                 if (ret < 0)
2868                         break;
2869                 else if (ret == 0)
2870                         goto skip;
2871
2872                 list_for_each_entry(backref, &rec->backrefs, list) {
2873                         BUG_ON(backref->found_inode_ref);
2874                         if (backref->found_dir_item)
2875                                 add_root_backref(dst_cache, rec->ino,
2876                                         root->root_key.objectid, backref->dir,
2877                                         backref->index, backref->name,
2878                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
2879                                         backref->errors);
2880                         if (backref->found_dir_index)
2881                                 add_root_backref(dst_cache, rec->ino,
2882                                         root->root_key.objectid, backref->dir,
2883                                         backref->index, backref->name,
2884                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
2885                                         backref->errors);
2886                 }
2887 skip:
2888                 free_inode_rec(rec);
2889         }
2890         if (ret < 0)
2891                 return ret;
2892         return 0;
2893 }
2894
2895 static int check_root_refs(struct btrfs_root *root,
2896                            struct cache_tree *root_cache)
2897 {
2898         struct root_record *rec;
2899         struct root_record *ref_root;
2900         struct root_backref *backref;
2901         struct cache_extent *cache;
2902         int loop = 1;
2903         int ret;
2904         int error;
2905         int errors = 0;
2906
2907         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
2908         BUG_ON(IS_ERR(rec));
2909         rec->found_ref = 1;
2910
2911         /* fixme: this can not detect circular references */
2912         while (loop) {
2913                 loop = 0;
2914                 cache = search_cache_extent(root_cache, 0);
2915                 while (1) {
2916                         if (!cache)
2917                                 break;
2918                         rec = container_of(cache, struct root_record, cache);
2919                         cache = next_cache_extent(cache);
2920
2921                         if (rec->found_ref == 0)
2922                                 continue;
2923
2924                         list_for_each_entry(backref, &rec->backrefs, list) {
2925                                 if (!backref->reachable)
2926                                         continue;
2927
2928                                 ref_root = get_root_rec(root_cache,
2929                                                         backref->ref_root);
2930                                 BUG_ON(IS_ERR(ref_root));
2931                                 if (ref_root->found_ref > 0)
2932                                         continue;
2933
2934                                 backref->reachable = 0;
2935                                 rec->found_ref--;
2936                                 if (rec->found_ref == 0)
2937                                         loop = 1;
2938                         }
2939                 }
2940         }
2941
2942         cache = search_cache_extent(root_cache, 0);
2943         while (1) {
2944                 if (!cache)
2945                         break;
2946                 rec = container_of(cache, struct root_record, cache);
2947                 cache = next_cache_extent(cache);
2948
2949                 if (rec->found_ref == 0 &&
2950                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
2951                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
2952                         ret = check_orphan_item(root->fs_info->tree_root,
2953                                                 rec->objectid);
2954                         if (ret == 0)
2955                                 continue;
2956
2957                         /*
2958                          * If we don't have a root item then we likely just have
2959                          * a dir item in a snapshot for this root but no actual
2960                          * ref key or anything so it's meaningless.
2961                          */
2962                         if (!rec->found_root_item)
2963                                 continue;
2964                         errors++;
2965                         fprintf(stderr, "fs tree %llu not referenced\n",
2966                                 (unsigned long long)rec->objectid);
2967                 }
2968
2969                 error = 0;
2970                 if (rec->found_ref > 0 && !rec->found_root_item)
2971                         error = 1;
2972                 list_for_each_entry(backref, &rec->backrefs, list) {
2973                         if (!backref->found_dir_item)
2974                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2975                         if (!backref->found_dir_index)
2976                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2977                         if (!backref->found_back_ref)
2978                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
2979                         if (!backref->found_forward_ref)
2980                                 backref->errors |= REF_ERR_NO_ROOT_REF;
2981                         if (backref->reachable && backref->errors)
2982                                 error = 1;
2983                 }
2984                 if (!error)
2985                         continue;
2986
2987                 errors++;
2988                 fprintf(stderr, "fs tree %llu refs %u %s\n",
2989                         (unsigned long long)rec->objectid, rec->found_ref,
2990                          rec->found_root_item ? "" : "not found");
2991
2992                 list_for_each_entry(backref, &rec->backrefs, list) {
2993                         if (!backref->reachable)
2994                                 continue;
2995                         if (!backref->errors && rec->found_root_item)
2996                                 continue;
2997                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
2998                                 " index %llu namelen %u name %s errors %x\n",
2999                                 (unsigned long long)backref->ref_root,
3000                                 (unsigned long long)backref->dir,
3001                                 (unsigned long long)backref->index,
3002                                 backref->namelen, backref->name,
3003                                 backref->errors);
3004                         print_ref_error(backref->errors);
3005                 }
3006         }
3007         return errors > 0 ? 1 : 0;
3008 }
3009
3010 static int process_root_ref(struct extent_buffer *eb, int slot,
3011                             struct btrfs_key *key,
3012                             struct cache_tree *root_cache)
3013 {
3014         u64 dirid;
3015         u64 index;
3016         u32 len;
3017         u32 name_len;
3018         struct btrfs_root_ref *ref;
3019         char namebuf[BTRFS_NAME_LEN];
3020         int error;
3021
3022         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3023
3024         dirid = btrfs_root_ref_dirid(eb, ref);
3025         index = btrfs_root_ref_sequence(eb, ref);
3026         name_len = btrfs_root_ref_name_len(eb, ref);
3027
3028         if (name_len <= BTRFS_NAME_LEN) {
3029                 len = name_len;
3030                 error = 0;
3031         } else {
3032                 len = BTRFS_NAME_LEN;
3033                 error = REF_ERR_NAME_TOO_LONG;
3034         }
3035         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3036
3037         if (key->type == BTRFS_ROOT_REF_KEY) {
3038                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3039                                  index, namebuf, len, key->type, error);
3040         } else {
3041                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3042                                  index, namebuf, len, key->type, error);
3043         }
3044         return 0;
3045 }
3046
3047 static void free_corrupt_block(struct cache_extent *cache)
3048 {
3049         struct btrfs_corrupt_block *corrupt;
3050
3051         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3052         free(corrupt);
3053 }
3054
3055 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3056
3057 /*
3058  * Repair the btree of the given root.
3059  *
3060  * The fix is to remove the node key in corrupt_blocks cache_tree.
3061  * and rebalance the tree.
3062  * After the fix, the btree should be writeable.
3063  */
3064 static int repair_btree(struct btrfs_root *root,
3065                         struct cache_tree *corrupt_blocks)
3066 {
3067         struct btrfs_trans_handle *trans;
3068         struct btrfs_path path;
3069         struct btrfs_corrupt_block *corrupt;
3070         struct cache_extent *cache;
3071         struct btrfs_key key;
3072         u64 offset;
3073         int level;
3074         int ret = 0;
3075
3076         if (cache_tree_empty(corrupt_blocks))
3077                 return 0;
3078
3079         trans = btrfs_start_transaction(root, 1);
3080         if (IS_ERR(trans)) {
3081                 ret = PTR_ERR(trans);
3082                 fprintf(stderr, "Error starting transaction: %s\n",
3083                         strerror(-ret));
3084                 return ret;
3085         }
3086         btrfs_init_path(&path);
3087         cache = first_cache_extent(corrupt_blocks);
3088         while (cache) {
3089                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3090                                        cache);
3091                 level = corrupt->level;
3092                 path.lowest_level = level;
3093                 key.objectid = corrupt->key.objectid;
3094                 key.type = corrupt->key.type;
3095                 key.offset = corrupt->key.offset;
3096
3097                 /*
3098                  * Here we don't want to do any tree balance, since it may
3099                  * cause a balance with corrupted brother leaf/node,
3100                  * so ins_len set to 0 here.
3101                  * Balance will be done after all corrupt node/leaf is deleted.
3102                  */
3103                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3104                 if (ret < 0)
3105                         goto out;
3106                 offset = btrfs_node_blockptr(path.nodes[level],
3107                                              path.slots[level]);
3108
3109                 /* Remove the ptr */
3110                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3111                 if (ret < 0)
3112                         goto out;
3113                 /*
3114                  * Remove the corresponding extent
3115                  * return value is not concerned.
3116                  */
3117                 btrfs_release_path(&path);
3118                 ret = btrfs_free_extent(trans, root, offset,
3119                                 root->fs_info->nodesize, 0,
3120                                 root->root_key.objectid, level - 1, 0);
3121                 cache = next_cache_extent(cache);
3122         }
3123
3124         /* Balance the btree using btrfs_search_slot() */
3125         cache = first_cache_extent(corrupt_blocks);
3126         while (cache) {
3127                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3128                                        cache);
3129                 memcpy(&key, &corrupt->key, sizeof(key));
3130                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3131                 if (ret < 0)
3132                         goto out;
3133                 /* return will always >0 since it won't find the item */
3134                 ret = 0;
3135                 btrfs_release_path(&path);
3136                 cache = next_cache_extent(cache);
3137         }
3138 out:
3139         btrfs_commit_transaction(trans, root);
3140         btrfs_release_path(&path);
3141         return ret;
3142 }
3143
3144 static int check_fs_root(struct btrfs_root *root,
3145                          struct cache_tree *root_cache,
3146                          struct walk_control *wc)
3147 {
3148         int ret = 0;
3149         int err = 0;
3150         int wret;
3151         int level;
3152         struct btrfs_path path;
3153         struct shared_node root_node;
3154         struct root_record *rec;
3155         struct btrfs_root_item *root_item = &root->root_item;
3156         struct cache_tree corrupt_blocks;
3157         struct orphan_data_extent *orphan;
3158         struct orphan_data_extent *tmp;
3159         enum btrfs_tree_block_status status;
3160         struct node_refs nrefs;
3161
3162         /*
3163          * Reuse the corrupt_block cache tree to record corrupted tree block
3164          *
3165          * Unlike the usage in extent tree check, here we do it in a per
3166          * fs/subvol tree base.
3167          */
3168         cache_tree_init(&corrupt_blocks);
3169         root->fs_info->corrupt_blocks = &corrupt_blocks;
3170
3171         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3172                 rec = get_root_rec(root_cache, root->root_key.objectid);
3173                 BUG_ON(IS_ERR(rec));
3174                 if (btrfs_root_refs(root_item) > 0)
3175                         rec->found_root_item = 1;
3176         }
3177
3178         btrfs_init_path(&path);
3179         memset(&root_node, 0, sizeof(root_node));
3180         cache_tree_init(&root_node.root_cache);
3181         cache_tree_init(&root_node.inode_cache);
3182         memset(&nrefs, 0, sizeof(nrefs));
3183
3184         /* Move the orphan extent record to corresponding inode_record */
3185         list_for_each_entry_safe(orphan, tmp,
3186                                  &root->orphan_data_extents, list) {
3187                 struct inode_record *inode;
3188
3189                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3190                                       1);
3191                 BUG_ON(IS_ERR(inode));
3192                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3193                 list_move(&orphan->list, &inode->orphan_extents);
3194         }
3195
3196         level = btrfs_header_level(root->node);
3197         memset(wc->nodes, 0, sizeof(wc->nodes));
3198         wc->nodes[level] = &root_node;
3199         wc->active_node = level;
3200         wc->root_level = level;
3201
3202         /* We may not have checked the root block, lets do that now */
3203         if (btrfs_is_leaf(root->node))
3204                 status = btrfs_check_leaf(root, NULL, root->node);
3205         else
3206                 status = btrfs_check_node(root, NULL, root->node);
3207         if (status != BTRFS_TREE_BLOCK_CLEAN)
3208                 return -EIO;
3209
3210         if (btrfs_root_refs(root_item) > 0 ||
3211             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3212                 path.nodes[level] = root->node;
3213                 extent_buffer_get(root->node);
3214                 path.slots[level] = 0;
3215         } else {
3216                 struct btrfs_key key;
3217                 struct btrfs_disk_key found_key;
3218
3219                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3220                 level = root_item->drop_level;
3221                 path.lowest_level = level;
3222                 if (level > btrfs_header_level(root->node) ||
3223                     level >= BTRFS_MAX_LEVEL) {
3224                         error("ignoring invalid drop level: %u", level);
3225                         goto skip_walking;
3226                 }
3227                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3228                 if (wret < 0)
3229                         goto skip_walking;
3230                 btrfs_node_key(path.nodes[level], &found_key,
3231                                 path.slots[level]);
3232                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3233                                         sizeof(found_key)));
3234         }
3235
3236         while (1) {
3237                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3238                 if (wret < 0)
3239                         ret = wret;
3240                 if (wret != 0)
3241                         break;
3242
3243                 wret = walk_up_tree(root, &path, wc, &level);
3244                 if (wret < 0)
3245                         ret = wret;
3246                 if (wret != 0)
3247                         break;
3248         }
3249 skip_walking:
3250         btrfs_release_path(&path);
3251
3252         if (!cache_tree_empty(&corrupt_blocks)) {
3253                 struct cache_extent *cache;
3254                 struct btrfs_corrupt_block *corrupt;
3255
3256                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3257                        root->root_key.objectid);
3258                 cache = first_cache_extent(&corrupt_blocks);
3259                 while (cache) {
3260                         corrupt = container_of(cache,
3261                                                struct btrfs_corrupt_block,
3262                                                cache);
3263                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3264                                cache->start, corrupt->level,
3265                                corrupt->key.objectid, corrupt->key.type,
3266                                corrupt->key.offset);
3267                         cache = next_cache_extent(cache);
3268                 }
3269                 if (repair) {
3270                         printf("Try to repair the btree for root %llu\n",
3271                                root->root_key.objectid);
3272                         ret = repair_btree(root, &corrupt_blocks);
3273                         if (ret < 0)
3274                                 fprintf(stderr, "Failed to repair btree: %s\n",
3275                                         strerror(-ret));
3276                         if (!ret)
3277                                 printf("Btree for root %llu is fixed\n",
3278                                        root->root_key.objectid);
3279                 }
3280         }
3281
3282         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3283         if (err < 0)
3284                 ret = err;
3285
3286         if (root_node.current) {
3287                 root_node.current->checked = 1;
3288                 maybe_free_inode_rec(&root_node.inode_cache,
3289                                 root_node.current);
3290         }
3291
3292         err = check_inode_recs(root, &root_node.inode_cache);
3293         if (!ret)
3294                 ret = err;
3295
3296         free_corrupt_blocks_tree(&corrupt_blocks);
3297         root->fs_info->corrupt_blocks = NULL;
3298         free_orphan_data_extents(&root->orphan_data_extents);
3299         return ret;
3300 }
3301
3302 static int check_fs_roots(struct btrfs_fs_info *fs_info,
3303                           struct cache_tree *root_cache)
3304 {
3305         struct btrfs_path path;
3306         struct btrfs_key key;
3307         struct walk_control wc;
3308         struct extent_buffer *leaf, *tree_node;
3309         struct btrfs_root *tmp_root;
3310         struct btrfs_root *tree_root = fs_info->tree_root;
3311         int ret;
3312         int err = 0;
3313
3314         if (ctx.progress_enabled) {
3315                 ctx.tp = TASK_FS_ROOTS;
3316                 task_start(ctx.info);
3317         }
3318
3319         /*
3320          * Just in case we made any changes to the extent tree that weren't
3321          * reflected into the free space cache yet.
3322          */
3323         if (repair)
3324                 reset_cached_block_groups(fs_info);
3325         memset(&wc, 0, sizeof(wc));
3326         cache_tree_init(&wc.shared);
3327         btrfs_init_path(&path);
3328
3329 again:
3330         key.offset = 0;
3331         key.objectid = 0;
3332         key.type = BTRFS_ROOT_ITEM_KEY;
3333         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3334         if (ret < 0) {
3335                 err = 1;
3336                 goto out;
3337         }
3338         tree_node = tree_root->node;
3339         while (1) {
3340                 if (tree_node != tree_root->node) {
3341                         free_root_recs_tree(root_cache);
3342                         btrfs_release_path(&path);
3343                         goto again;
3344                 }
3345                 leaf = path.nodes[0];
3346                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3347                         ret = btrfs_next_leaf(tree_root, &path);
3348                         if (ret) {
3349                                 if (ret < 0)
3350                                         err = 1;
3351                                 break;
3352                         }
3353                         leaf = path.nodes[0];
3354                 }
3355                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3356                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3357                     fs_root_objectid(key.objectid)) {
3358                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3359                                 tmp_root = btrfs_read_fs_root_no_cache(
3360                                                 fs_info, &key);
3361                         } else {
3362                                 key.offset = (u64)-1;
3363                                 tmp_root = btrfs_read_fs_root(
3364                                                 fs_info, &key);
3365                         }
3366                         if (IS_ERR(tmp_root)) {
3367                                 err = 1;
3368                                 goto next;
3369                         }
3370                         ret = check_fs_root(tmp_root, root_cache, &wc);
3371                         if (ret == -EAGAIN) {
3372                                 free_root_recs_tree(root_cache);
3373                                 btrfs_release_path(&path);
3374                                 goto again;
3375                         }
3376                         if (ret)
3377                                 err = 1;
3378                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3379                                 btrfs_free_fs_root(tmp_root);
3380                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3381                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3382                         process_root_ref(leaf, path.slots[0], &key,
3383                                          root_cache);
3384                 }
3385 next:
3386                 path.slots[0]++;
3387         }
3388 out:
3389         btrfs_release_path(&path);
3390         if (err)
3391                 free_extent_cache_tree(&wc.shared);
3392         if (!cache_tree_empty(&wc.shared))
3393                 fprintf(stderr, "warning line %d\n", __LINE__);
3394
3395         task_stop(ctx.info);
3396
3397         return err;
3398 }
3399
3400 static struct tree_backref *find_tree_backref(struct extent_record *rec,
3401                                                 u64 parent, u64 root)
3402 {
3403         struct rb_node *node;
3404         struct tree_backref *back = NULL;
3405         struct tree_backref match = {
3406                 .node = {
3407                         .is_data = 0,
3408                 },
3409         };
3410
3411         if (parent) {
3412                 match.parent = parent;
3413                 match.node.full_backref = 1;
3414         } else {
3415                 match.root = root;
3416         }
3417
3418         node = rb_search(&rec->backref_tree, &match.node.node,
3419                          (rb_compare_keys)compare_extent_backref, NULL);
3420         if (node)
3421                 back = to_tree_backref(rb_node_to_extent_backref(node));
3422
3423         return back;
3424 }
3425
3426 static struct data_backref *find_data_backref(struct extent_record *rec,
3427                                                 u64 parent, u64 root,
3428                                                 u64 owner, u64 offset,
3429                                                 int found_ref,
3430                                                 u64 disk_bytenr, u64 bytes)
3431 {
3432         struct rb_node *node;
3433         struct data_backref *back = NULL;
3434         struct data_backref match = {
3435                 .node = {
3436                         .is_data = 1,
3437                 },
3438                 .owner = owner,
3439                 .offset = offset,
3440                 .bytes = bytes,
3441                 .found_ref = found_ref,
3442                 .disk_bytenr = disk_bytenr,
3443         };
3444
3445         if (parent) {
3446                 match.parent = parent;
3447                 match.node.full_backref = 1;
3448         } else {
3449                 match.root = root;
3450         }
3451
3452         node = rb_search(&rec->backref_tree, &match.node.node,
3453                          (rb_compare_keys)compare_extent_backref, NULL);
3454         if (node)
3455                 back = to_data_backref(rb_node_to_extent_backref(node));
3456
3457         return back;
3458 }
3459
3460 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
3461                           struct cache_tree *root_cache)
3462 {
3463         int ret;
3464
3465         if (!ctx.progress_enabled)
3466                 fprintf(stderr, "checking fs roots\n");
3467         if (check_mode == CHECK_MODE_LOWMEM)
3468                 ret = check_fs_roots_v2(fs_info);
3469         else
3470                 ret = check_fs_roots(fs_info, root_cache);
3471
3472         return ret;
3473 }
3474
3475 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3476 {
3477         struct extent_backref *back, *tmp;
3478         struct tree_backref *tback;
3479         struct data_backref *dback;
3480         u64 found = 0;
3481         int err = 0;
3482
3483         rbtree_postorder_for_each_entry_safe(back, tmp,
3484                                              &rec->backref_tree, node) {
3485                 if (!back->found_extent_tree) {
3486                         err = 1;
3487                         if (!print_errs)
3488                                 goto out;
3489                         if (back->is_data) {
3490                                 dback = to_data_backref(back);
3491                                 fprintf(stderr, "Data backref %llu %s %llu"
3492                                         " owner %llu offset %llu num_refs %lu"
3493                                         " not found in extent tree\n",
3494                                         (unsigned long long)rec->start,
3495                                         back->full_backref ?
3496                                         "parent" : "root",
3497                                         back->full_backref ?
3498                                         (unsigned long long)dback->parent:
3499                                         (unsigned long long)dback->root,
3500                                         (unsigned long long)dback->owner,
3501                                         (unsigned long long)dback->offset,
3502                                         (unsigned long)dback->num_refs);
3503                         } else {
3504                                 tback = to_tree_backref(back);
3505                                 fprintf(stderr, "Tree backref %llu parent %llu"
3506                                         " root %llu not found in extent tree\n",
3507                                         (unsigned long long)rec->start,
3508                                         (unsigned long long)tback->parent,
3509                                         (unsigned long long)tback->root);
3510                         }
3511                 }
3512                 if (!back->is_data && !back->found_ref) {
3513                         err = 1;
3514                         if (!print_errs)
3515                                 goto out;
3516                         tback = to_tree_backref(back);
3517                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3518                                 (unsigned long long)rec->start,
3519                                 back->full_backref ? "parent" : "root",
3520                                 back->full_backref ?
3521                                 (unsigned long long)tback->parent :
3522                                 (unsigned long long)tback->root, back);
3523                 }
3524                 if (back->is_data) {
3525                         dback = to_data_backref(back);
3526                         if (dback->found_ref != dback->num_refs) {
3527                                 err = 1;
3528                                 if (!print_errs)
3529                                         goto out;
3530                                 fprintf(stderr, "Incorrect local backref count"
3531                                         " on %llu %s %llu owner %llu"
3532                                         " offset %llu found %u wanted %u back %p\n",
3533                                         (unsigned long long)rec->start,
3534                                         back->full_backref ?
3535                                         "parent" : "root",
3536                                         back->full_backref ?
3537                                         (unsigned long long)dback->parent:
3538                                         (unsigned long long)dback->root,
3539                                         (unsigned long long)dback->owner,
3540                                         (unsigned long long)dback->offset,
3541                                         dback->found_ref, dback->num_refs, back);
3542                         }
3543                         if (dback->disk_bytenr != rec->start) {
3544                                 err = 1;
3545                                 if (!print_errs)
3546                                         goto out;
3547                                 fprintf(stderr, "Backref disk bytenr does not"
3548                                         " match extent record, bytenr=%llu, "
3549                                         "ref bytenr=%llu\n",
3550                                         (unsigned long long)rec->start,
3551                                         (unsigned long long)dback->disk_bytenr);
3552                         }
3553
3554                         if (dback->bytes != rec->nr) {
3555                                 err = 1;
3556                                 if (!print_errs)
3557                                         goto out;
3558                                 fprintf(stderr, "Backref bytes do not match "
3559                                         "extent backref, bytenr=%llu, ref "
3560                                         "bytes=%llu, backref bytes=%llu\n",
3561                                         (unsigned long long)rec->start,
3562                                         (unsigned long long)rec->nr,
3563                                         (unsigned long long)dback->bytes);
3564                         }
3565                 }
3566                 if (!back->is_data) {
3567                         found += 1;
3568                 } else {
3569                         dback = to_data_backref(back);
3570                         found += dback->found_ref;
3571                 }
3572         }
3573         if (found != rec->refs) {
3574                 err = 1;
3575                 if (!print_errs)
3576                         goto out;
3577                 fprintf(stderr, "Incorrect global backref count "
3578                         "on %llu found %llu wanted %llu\n",
3579                         (unsigned long long)rec->start,
3580                         (unsigned long long)found,
3581                         (unsigned long long)rec->refs);
3582         }
3583 out:
3584         return err;
3585 }
3586
3587 static void __free_one_backref(struct rb_node *node)
3588 {
3589         struct extent_backref *back = rb_node_to_extent_backref(node);
3590
3591         free(back);
3592 }
3593
3594 static void free_all_extent_backrefs(struct extent_record *rec)
3595 {
3596         rb_free_nodes(&rec->backref_tree, __free_one_backref);
3597 }
3598
3599 static void free_extent_record_cache(struct cache_tree *extent_cache)
3600 {
3601         struct cache_extent *cache;
3602         struct extent_record *rec;
3603
3604         while (1) {
3605                 cache = first_cache_extent(extent_cache);
3606                 if (!cache)
3607                         break;
3608                 rec = container_of(cache, struct extent_record, cache);
3609                 remove_cache_extent(extent_cache, cache);
3610                 free_all_extent_backrefs(rec);
3611                 free(rec);
3612         }
3613 }
3614
3615 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3616                                  struct extent_record *rec)
3617 {
3618         if (rec->content_checked && rec->owner_ref_checked &&
3619             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3620             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3621             !rec->bad_full_backref && !rec->crossing_stripes &&
3622             !rec->wrong_chunk_type) {
3623                 remove_cache_extent(extent_cache, &rec->cache);
3624                 free_all_extent_backrefs(rec);
3625                 list_del_init(&rec->list);
3626                 free(rec);
3627         }
3628         return 0;
3629 }
3630
3631 static int check_owner_ref(struct btrfs_root *root,
3632                             struct extent_record *rec,
3633                             struct extent_buffer *buf)
3634 {
3635         struct extent_backref *node, *tmp;
3636         struct tree_backref *back;
3637         struct btrfs_root *ref_root;
3638         struct btrfs_key key;
3639         struct btrfs_path path;
3640         struct extent_buffer *parent;
3641         int level;
3642         int found = 0;
3643         int ret;
3644
3645         rbtree_postorder_for_each_entry_safe(node, tmp,
3646                                              &rec->backref_tree, node) {
3647                 if (node->is_data)
3648                         continue;
3649                 if (!node->found_ref)
3650                         continue;
3651                 if (node->full_backref)
3652                         continue;
3653                 back = to_tree_backref(node);
3654                 if (btrfs_header_owner(buf) == back->root)
3655                         return 0;
3656         }
3657         BUG_ON(rec->is_root);
3658
3659         /* try to find the block by search corresponding fs tree */
3660         key.objectid = btrfs_header_owner(buf);
3661         key.type = BTRFS_ROOT_ITEM_KEY;
3662         key.offset = (u64)-1;
3663
3664         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3665         if (IS_ERR(ref_root))
3666                 return 1;
3667
3668         level = btrfs_header_level(buf);
3669         if (level == 0)
3670                 btrfs_item_key_to_cpu(buf, &key, 0);
3671         else
3672                 btrfs_node_key_to_cpu(buf, &key, 0);
3673
3674         btrfs_init_path(&path);
3675         path.lowest_level = level + 1;
3676         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3677         if (ret < 0)
3678                 return 0;
3679
3680         parent = path.nodes[level + 1];
3681         if (parent && buf->start == btrfs_node_blockptr(parent,
3682                                                         path.slots[level + 1]))
3683                 found = 1;
3684
3685         btrfs_release_path(&path);
3686         return found ? 0 : 1;
3687 }
3688
3689 static int is_extent_tree_record(struct extent_record *rec)
3690 {
3691         struct extent_backref *node, *tmp;
3692         struct tree_backref *back;
3693         int is_extent = 0;
3694
3695         rbtree_postorder_for_each_entry_safe(node, tmp,
3696                                              &rec->backref_tree, node) {
3697                 if (node->is_data)
3698                         return 0;
3699                 back = to_tree_backref(node);
3700                 if (node->full_backref)
3701                         return 0;
3702                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3703                         is_extent = 1;
3704         }
3705         return is_extent;
3706 }
3707
3708
3709 static int record_bad_block_io(struct btrfs_fs_info *info,
3710                                struct cache_tree *extent_cache,
3711                                u64 start, u64 len)
3712 {
3713         struct extent_record *rec;
3714         struct cache_extent *cache;
3715         struct btrfs_key key;
3716
3717         cache = lookup_cache_extent(extent_cache, start, len);
3718         if (!cache)
3719                 return 0;
3720
3721         rec = container_of(cache, struct extent_record, cache);
3722         if (!is_extent_tree_record(rec))
3723                 return 0;
3724
3725         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3726         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3727 }
3728
3729 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3730                        struct extent_buffer *buf, int slot)
3731 {
3732         if (btrfs_header_level(buf)) {
3733                 struct btrfs_key_ptr ptr1, ptr2;
3734
3735                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3736                                    sizeof(struct btrfs_key_ptr));
3737                 read_extent_buffer(buf, &ptr2,
3738                                    btrfs_node_key_ptr_offset(slot + 1),
3739                                    sizeof(struct btrfs_key_ptr));
3740                 write_extent_buffer(buf, &ptr1,
3741                                     btrfs_node_key_ptr_offset(slot + 1),
3742                                     sizeof(struct btrfs_key_ptr));
3743                 write_extent_buffer(buf, &ptr2,
3744                                     btrfs_node_key_ptr_offset(slot),
3745                                     sizeof(struct btrfs_key_ptr));
3746                 if (slot == 0) {
3747                         struct btrfs_disk_key key;
3748                         btrfs_node_key(buf, &key, 0);
3749                         btrfs_fixup_low_keys(root, path, &key,
3750                                              btrfs_header_level(buf) + 1);
3751                 }
3752         } else {
3753                 struct btrfs_item *item1, *item2;
3754                 struct btrfs_key k1, k2;
3755                 char *item1_data, *item2_data;
3756                 u32 item1_offset, item2_offset, item1_size, item2_size;
3757
3758                 item1 = btrfs_item_nr(slot);
3759                 item2 = btrfs_item_nr(slot + 1);
3760                 btrfs_item_key_to_cpu(buf, &k1, slot);
3761                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3762                 item1_offset = btrfs_item_offset(buf, item1);
3763                 item2_offset = btrfs_item_offset(buf, item2);
3764                 item1_size = btrfs_item_size(buf, item1);
3765                 item2_size = btrfs_item_size(buf, item2);
3766
3767                 item1_data = malloc(item1_size);
3768                 if (!item1_data)
3769                         return -ENOMEM;
3770                 item2_data = malloc(item2_size);
3771                 if (!item2_data) {
3772                         free(item1_data);
3773                         return -ENOMEM;
3774                 }
3775
3776                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3777                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3778
3779                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3780                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3781                 free(item1_data);
3782                 free(item2_data);
3783
3784                 btrfs_set_item_offset(buf, item1, item2_offset);
3785                 btrfs_set_item_offset(buf, item2, item1_offset);
3786                 btrfs_set_item_size(buf, item1, item2_size);
3787                 btrfs_set_item_size(buf, item2, item1_size);
3788
3789                 path->slots[0] = slot;
3790                 btrfs_set_item_key_unsafe(root, path, &k2);
3791                 path->slots[0] = slot + 1;
3792                 btrfs_set_item_key_unsafe(root, path, &k1);
3793         }
3794         return 0;
3795 }
3796
3797 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
3798 {
3799         struct extent_buffer *buf;
3800         struct btrfs_key k1, k2;
3801         int i;
3802         int level = path->lowest_level;
3803         int ret = -EIO;
3804
3805         buf = path->nodes[level];
3806         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3807                 if (level) {
3808                         btrfs_node_key_to_cpu(buf, &k1, i);
3809                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
3810                 } else {
3811                         btrfs_item_key_to_cpu(buf, &k1, i);
3812                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
3813                 }
3814                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
3815                         continue;
3816                 ret = swap_values(root, path, buf, i);
3817                 if (ret)
3818                         break;
3819                 btrfs_mark_buffer_dirty(buf);
3820                 i = 0;
3821         }
3822         return ret;
3823 }
3824
3825 static int delete_bogus_item(struct btrfs_root *root,
3826                              struct btrfs_path *path,
3827                              struct extent_buffer *buf, int slot)
3828 {
3829         struct btrfs_key key;
3830         int nritems = btrfs_header_nritems(buf);
3831
3832         btrfs_item_key_to_cpu(buf, &key, slot);
3833
3834         /* These are all the keys we can deal with missing. */
3835         if (key.type != BTRFS_DIR_INDEX_KEY &&
3836             key.type != BTRFS_EXTENT_ITEM_KEY &&
3837             key.type != BTRFS_METADATA_ITEM_KEY &&
3838             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
3839             key.type != BTRFS_EXTENT_DATA_REF_KEY)
3840                 return -1;
3841
3842         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
3843                (unsigned long long)key.objectid, key.type,
3844                (unsigned long long)key.offset, slot, buf->start);
3845         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
3846                               btrfs_item_nr_offset(slot + 1),
3847                               sizeof(struct btrfs_item) *
3848                               (nritems - slot - 1));
3849         btrfs_set_header_nritems(buf, nritems - 1);
3850         if (slot == 0) {
3851                 struct btrfs_disk_key disk_key;
3852
3853                 btrfs_item_key(buf, &disk_key, 0);
3854                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
3855         }
3856         btrfs_mark_buffer_dirty(buf);
3857         return 0;
3858 }
3859
3860 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
3861 {
3862         struct extent_buffer *buf;
3863         int i;
3864         int ret = 0;
3865
3866         /* We should only get this for leaves */
3867         BUG_ON(path->lowest_level);
3868         buf = path->nodes[0];
3869 again:
3870         for (i = 0; i < btrfs_header_nritems(buf); i++) {
3871                 unsigned int shift = 0, offset;
3872
3873                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
3874                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3875                         if (btrfs_item_end_nr(buf, i) >
3876                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3877                                 ret = delete_bogus_item(root, path, buf, i);
3878                                 if (!ret)
3879                                         goto again;
3880                                 fprintf(stderr, "item is off the end of the "
3881                                         "leaf, can't fix\n");
3882                                 ret = -EIO;
3883                                 break;
3884                         }
3885                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
3886                                 btrfs_item_end_nr(buf, i);
3887                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
3888                            btrfs_item_offset_nr(buf, i - 1)) {
3889                         if (btrfs_item_end_nr(buf, i) >
3890                             btrfs_item_offset_nr(buf, i - 1)) {
3891                                 ret = delete_bogus_item(root, path, buf, i);
3892                                 if (!ret)
3893                                         goto again;
3894                                 fprintf(stderr, "items overlap, can't fix\n");
3895                                 ret = -EIO;
3896                                 break;
3897                         }
3898                         shift = btrfs_item_offset_nr(buf, i - 1) -
3899                                 btrfs_item_end_nr(buf, i);
3900                 }
3901                 if (!shift)
3902                         continue;
3903
3904                 printf("Shifting item nr %d by %u bytes in block %llu\n",
3905                        i, shift, (unsigned long long)buf->start);
3906                 offset = btrfs_item_offset_nr(buf, i);
3907                 memmove_extent_buffer(buf,
3908                                       btrfs_leaf_data(buf) + offset + shift,
3909                                       btrfs_leaf_data(buf) + offset,
3910                                       btrfs_item_size_nr(buf, i));
3911                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
3912                                       offset + shift);
3913                 btrfs_mark_buffer_dirty(buf);
3914         }
3915
3916         /*
3917          * We may have moved things, in which case we want to exit so we don't
3918          * write those changes out.  Once we have proper abort functionality in
3919          * progs this can be changed to something nicer.
3920          */
3921         BUG_ON(ret);
3922         return ret;
3923 }
3924
3925 /*
3926  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
3927  * then just return -EIO.
3928  */
3929 static int try_to_fix_bad_block(struct btrfs_root *root,
3930                                 struct extent_buffer *buf,
3931                                 enum btrfs_tree_block_status status)
3932 {
3933         struct btrfs_trans_handle *trans;
3934         struct ulist *roots;
3935         struct ulist_node *node;
3936         struct btrfs_root *search_root;
3937         struct btrfs_path path;
3938         struct ulist_iterator iter;
3939         struct btrfs_key root_key, key;
3940         int ret;
3941
3942         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
3943             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
3944                 return -EIO;
3945
3946         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
3947         if (ret)
3948                 return -EIO;
3949
3950         btrfs_init_path(&path);
3951         ULIST_ITER_INIT(&iter);
3952         while ((node = ulist_next(roots, &iter))) {
3953                 root_key.objectid = node->val;
3954                 root_key.type = BTRFS_ROOT_ITEM_KEY;
3955                 root_key.offset = (u64)-1;
3956
3957                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
3958                 if (IS_ERR(root)) {
3959                         ret = -EIO;
3960                         break;
3961                 }
3962
3963
3964                 trans = btrfs_start_transaction(search_root, 0);
3965                 if (IS_ERR(trans)) {
3966                         ret = PTR_ERR(trans);
3967                         break;
3968                 }
3969
3970                 path.lowest_level = btrfs_header_level(buf);
3971                 path.skip_check_block = 1;
3972                 if (path.lowest_level)
3973                         btrfs_node_key_to_cpu(buf, &key, 0);
3974                 else
3975                         btrfs_item_key_to_cpu(buf, &key, 0);
3976                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
3977                 if (ret) {
3978                         ret = -EIO;
3979                         btrfs_commit_transaction(trans, search_root);
3980                         break;
3981                 }
3982                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
3983                         ret = fix_key_order(search_root, &path);
3984                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
3985                         ret = fix_item_offset(search_root, &path);
3986                 if (ret) {
3987                         btrfs_commit_transaction(trans, search_root);
3988                         break;
3989                 }
3990                 btrfs_release_path(&path);
3991                 btrfs_commit_transaction(trans, search_root);
3992         }
3993         ulist_free(roots);
3994         btrfs_release_path(&path);
3995         return ret;
3996 }
3997
3998 static int check_block(struct btrfs_root *root,
3999                        struct cache_tree *extent_cache,
4000                        struct extent_buffer *buf, u64 flags)
4001 {
4002         struct extent_record *rec;
4003         struct cache_extent *cache;
4004         struct btrfs_key key;
4005         enum btrfs_tree_block_status status;
4006         int ret = 0;
4007         int level;
4008
4009         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4010         if (!cache)
4011                 return 1;
4012         rec = container_of(cache, struct extent_record, cache);
4013         rec->generation = btrfs_header_generation(buf);
4014
4015         level = btrfs_header_level(buf);
4016         if (btrfs_header_nritems(buf) > 0) {
4017
4018                 if (level == 0)
4019                         btrfs_item_key_to_cpu(buf, &key, 0);
4020                 else
4021                         btrfs_node_key_to_cpu(buf, &key, 0);
4022
4023                 rec->info_objectid = key.objectid;
4024         }
4025         rec->info_level = level;
4026
4027         if (btrfs_is_leaf(buf))
4028                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4029         else
4030                 status = btrfs_check_node(root, &rec->parent_key, buf);
4031
4032         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4033                 if (repair)
4034                         status = try_to_fix_bad_block(root, buf, status);
4035                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4036                         ret = -EIO;
4037                         fprintf(stderr, "bad block %llu\n",
4038                                 (unsigned long long)buf->start);
4039                 } else {
4040                         /*
4041                          * Signal to callers we need to start the scan over
4042                          * again since we'll have cowed blocks.
4043                          */
4044                         ret = -EAGAIN;
4045                 }
4046         } else {
4047                 rec->content_checked = 1;
4048                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4049                         rec->owner_ref_checked = 1;
4050                 else {
4051                         ret = check_owner_ref(root, rec, buf);
4052                         if (!ret)
4053                                 rec->owner_ref_checked = 1;
4054                 }
4055         }
4056         if (!ret)
4057                 maybe_free_extent_rec(extent_cache, rec);
4058         return ret;
4059 }
4060
4061 #if 0
4062 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4063                                                 u64 parent, u64 root)
4064 {
4065         struct list_head *cur = rec->backrefs.next;
4066         struct extent_backref *node;
4067         struct tree_backref *back;
4068
4069         while(cur != &rec->backrefs) {
4070                 node = to_extent_backref(cur);
4071                 cur = cur->next;
4072                 if (node->is_data)
4073                         continue;
4074                 back = to_tree_backref(node);
4075                 if (parent > 0) {
4076                         if (!node->full_backref)
4077                                 continue;
4078                         if (parent == back->parent)
4079                                 return back;
4080                 } else {
4081                         if (node->full_backref)
4082                                 continue;
4083                         if (back->root == root)
4084                                 return back;
4085                 }
4086         }
4087         return NULL;
4088 }
4089 #endif
4090
4091 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4092                                                 u64 parent, u64 root)
4093 {
4094         struct tree_backref *ref = malloc(sizeof(*ref));
4095
4096         if (!ref)
4097                 return NULL;
4098         memset(&ref->node, 0, sizeof(ref->node));
4099         if (parent > 0) {
4100                 ref->parent = parent;
4101                 ref->node.full_backref = 1;
4102         } else {
4103                 ref->root = root;
4104                 ref->node.full_backref = 0;
4105         }
4106
4107         return ref;
4108 }
4109
4110 #if 0
4111 static struct data_backref *find_data_backref(struct extent_record *rec,
4112                                                 u64 parent, u64 root,
4113                                                 u64 owner, u64 offset,
4114                                                 int found_ref,
4115                                                 u64 disk_bytenr, u64 bytes)
4116 {
4117         struct list_head *cur = rec->backrefs.next;
4118         struct extent_backref *node;
4119         struct data_backref *back;
4120
4121         while(cur != &rec->backrefs) {
4122                 node = to_extent_backref(cur);
4123                 cur = cur->next;
4124                 if (!node->is_data)
4125                         continue;
4126                 back = to_data_backref(node);
4127                 if (parent > 0) {
4128                         if (!node->full_backref)
4129                                 continue;
4130                         if (parent == back->parent)
4131                                 return back;
4132                 } else {
4133                         if (node->full_backref)
4134                                 continue;
4135                         if (back->root == root && back->owner == owner &&
4136                             back->offset == offset) {
4137                                 if (found_ref && node->found_ref &&
4138                                     (back->bytes != bytes ||
4139                                     back->disk_bytenr != disk_bytenr))
4140                                         continue;
4141                                 return back;
4142                         }
4143                 }
4144         }
4145         return NULL;
4146 }
4147 #endif
4148
4149 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4150                                                 u64 parent, u64 root,
4151                                                 u64 owner, u64 offset,
4152                                                 u64 max_size)
4153 {
4154         struct data_backref *ref = malloc(sizeof(*ref));
4155
4156         if (!ref)
4157                 return NULL;
4158         memset(&ref->node, 0, sizeof(ref->node));
4159         ref->node.is_data = 1;
4160
4161         if (parent > 0) {
4162                 ref->parent = parent;
4163                 ref->owner = 0;
4164                 ref->offset = 0;
4165                 ref->node.full_backref = 1;
4166         } else {
4167                 ref->root = root;
4168                 ref->owner = owner;
4169                 ref->offset = offset;
4170                 ref->node.full_backref = 0;
4171         }
4172         ref->bytes = max_size;
4173         ref->found_ref = 0;
4174         ref->num_refs = 0;
4175         if (max_size > rec->max_size)
4176                 rec->max_size = max_size;
4177         return ref;
4178 }
4179
4180 /* Check if the type of extent matches with its chunk */
4181 static void check_extent_type(struct extent_record *rec)
4182 {
4183         struct btrfs_block_group_cache *bg_cache;
4184
4185         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4186         if (!bg_cache)
4187                 return;
4188
4189         /* data extent, check chunk directly*/
4190         if (!rec->metadata) {
4191                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4192                         rec->wrong_chunk_type = 1;
4193                 return;
4194         }
4195
4196         /* metadata extent, check the obvious case first */
4197         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4198                                  BTRFS_BLOCK_GROUP_METADATA))) {
4199                 rec->wrong_chunk_type = 1;
4200                 return;
4201         }
4202
4203         /*
4204          * Check SYSTEM extent, as it's also marked as metadata, we can only
4205          * make sure it's a SYSTEM extent by its backref
4206          */
4207         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4208                 struct extent_backref *node;
4209                 struct tree_backref *tback;
4210                 u64 bg_type;
4211
4212                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4213                 if (node->is_data) {
4214                         /* tree block shouldn't have data backref */
4215                         rec->wrong_chunk_type = 1;
4216                         return;
4217                 }
4218                 tback = container_of(node, struct tree_backref, node);
4219
4220                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4221                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4222                 else
4223                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4224                 if (!(bg_cache->flags & bg_type))
4225                         rec->wrong_chunk_type = 1;
4226         }
4227 }
4228
4229 /*
4230  * Allocate a new extent record, fill default values from @tmpl and insert int
4231  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4232  * the cache, otherwise it fails.
4233  */
4234 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4235                 struct extent_record *tmpl)
4236 {
4237         struct extent_record *rec;
4238         int ret = 0;
4239
4240         BUG_ON(tmpl->max_size == 0);
4241         rec = malloc(sizeof(*rec));
4242         if (!rec)
4243                 return -ENOMEM;
4244         rec->start = tmpl->start;
4245         rec->max_size = tmpl->max_size;
4246         rec->nr = max(tmpl->nr, tmpl->max_size);
4247         rec->found_rec = tmpl->found_rec;
4248         rec->content_checked = tmpl->content_checked;
4249         rec->owner_ref_checked = tmpl->owner_ref_checked;
4250         rec->num_duplicates = 0;
4251         rec->metadata = tmpl->metadata;
4252         rec->flag_block_full_backref = FLAG_UNSET;
4253         rec->bad_full_backref = 0;
4254         rec->crossing_stripes = 0;
4255         rec->wrong_chunk_type = 0;
4256         rec->is_root = tmpl->is_root;
4257         rec->refs = tmpl->refs;
4258         rec->extent_item_refs = tmpl->extent_item_refs;
4259         rec->parent_generation = tmpl->parent_generation;
4260         INIT_LIST_HEAD(&rec->backrefs);
4261         INIT_LIST_HEAD(&rec->dups);
4262         INIT_LIST_HEAD(&rec->list);
4263         rec->backref_tree = RB_ROOT;
4264         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4265         rec->cache.start = tmpl->start;
4266         rec->cache.size = tmpl->nr;
4267         ret = insert_cache_extent(extent_cache, &rec->cache);
4268         if (ret) {
4269                 free(rec);
4270                 return ret;
4271         }
4272         bytes_used += rec->nr;
4273
4274         if (tmpl->metadata)
4275                 rec->crossing_stripes = check_crossing_stripes(global_info,
4276                                 rec->start, global_info->nodesize);
4277         check_extent_type(rec);
4278         return ret;
4279 }
4280
4281 /*
4282  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4283  * some are hints:
4284  * - refs              - if found, increase refs
4285  * - is_root           - if found, set
4286  * - content_checked   - if found, set
4287  * - owner_ref_checked - if found, set
4288  *
4289  * If not found, create a new one, initialize and insert.
4290  */
4291 static int add_extent_rec(struct cache_tree *extent_cache,
4292                 struct extent_record *tmpl)
4293 {
4294         struct extent_record *rec;
4295         struct cache_extent *cache;
4296         int ret = 0;
4297         int dup = 0;
4298
4299         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4300         if (cache) {
4301                 rec = container_of(cache, struct extent_record, cache);
4302                 if (tmpl->refs)
4303                         rec->refs++;
4304                 if (rec->nr == 1)
4305                         rec->nr = max(tmpl->nr, tmpl->max_size);
4306
4307                 /*
4308                  * We need to make sure to reset nr to whatever the extent
4309                  * record says was the real size, this way we can compare it to
4310                  * the backrefs.
4311                  */
4312                 if (tmpl->found_rec) {
4313                         if (tmpl->start != rec->start || rec->found_rec) {
4314                                 struct extent_record *tmp;
4315
4316                                 dup = 1;
4317                                 if (list_empty(&rec->list))
4318                                         list_add_tail(&rec->list,
4319                                                       &duplicate_extents);
4320
4321                                 /*
4322                                  * We have to do this song and dance in case we
4323                                  * find an extent record that falls inside of
4324                                  * our current extent record but does not have
4325                                  * the same objectid.
4326                                  */
4327                                 tmp = malloc(sizeof(*tmp));
4328                                 if (!tmp)
4329                                         return -ENOMEM;
4330                                 tmp->start = tmpl->start;
4331                                 tmp->max_size = tmpl->max_size;
4332                                 tmp->nr = tmpl->nr;
4333                                 tmp->found_rec = 1;
4334                                 tmp->metadata = tmpl->metadata;
4335                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4336                                 INIT_LIST_HEAD(&tmp->list);
4337                                 list_add_tail(&tmp->list, &rec->dups);
4338                                 rec->num_duplicates++;
4339                         } else {
4340                                 rec->nr = tmpl->nr;
4341                                 rec->found_rec = 1;
4342                         }
4343                 }
4344
4345                 if (tmpl->extent_item_refs && !dup) {
4346                         if (rec->extent_item_refs) {
4347                                 fprintf(stderr, "block %llu rec "
4348                                         "extent_item_refs %llu, passed %llu\n",
4349                                         (unsigned long long)tmpl->start,
4350                                         (unsigned long long)
4351                                                         rec->extent_item_refs,
4352                                         (unsigned long long)tmpl->extent_item_refs);
4353                         }
4354                         rec->extent_item_refs = tmpl->extent_item_refs;
4355                 }
4356                 if (tmpl->is_root)
4357                         rec->is_root = 1;
4358                 if (tmpl->content_checked)
4359                         rec->content_checked = 1;
4360                 if (tmpl->owner_ref_checked)
4361                         rec->owner_ref_checked = 1;
4362                 memcpy(&rec->parent_key, &tmpl->parent_key,
4363                                 sizeof(tmpl->parent_key));
4364                 if (tmpl->parent_generation)
4365                         rec->parent_generation = tmpl->parent_generation;
4366                 if (rec->max_size < tmpl->max_size)
4367                         rec->max_size = tmpl->max_size;
4368
4369                 /*
4370                  * A metadata extent can't cross stripe_len boundary, otherwise
4371                  * kernel scrub won't be able to handle it.
4372                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4373                  * it.
4374                  */
4375                 if (tmpl->metadata)
4376                         rec->crossing_stripes = check_crossing_stripes(
4377                                         global_info, rec->start,
4378                                         global_info->nodesize);
4379                 check_extent_type(rec);
4380                 maybe_free_extent_rec(extent_cache, rec);
4381                 return ret;
4382         }
4383
4384         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4385
4386         return ret;
4387 }
4388
4389 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4390                             u64 parent, u64 root, int found_ref)
4391 {
4392         struct extent_record *rec;
4393         struct tree_backref *back;
4394         struct cache_extent *cache;
4395         int ret;
4396         bool insert = false;
4397
4398         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4399         if (!cache) {
4400                 struct extent_record tmpl;
4401
4402                 memset(&tmpl, 0, sizeof(tmpl));
4403                 tmpl.start = bytenr;
4404                 tmpl.nr = 1;
4405                 tmpl.metadata = 1;
4406                 tmpl.max_size = 1;
4407
4408                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4409                 if (ret)
4410                         return ret;
4411
4412                 /* really a bug in cache_extent implement now */
4413                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4414                 if (!cache)
4415                         return -ENOENT;
4416         }
4417
4418         rec = container_of(cache, struct extent_record, cache);
4419         if (rec->start != bytenr) {
4420                 /*
4421                  * Several cause, from unaligned bytenr to over lapping extents
4422                  */
4423                 return -EEXIST;
4424         }
4425
4426         back = find_tree_backref(rec, parent, root);
4427         if (!back) {
4428                 back = alloc_tree_backref(rec, parent, root);
4429                 if (!back)
4430                         return -ENOMEM;
4431                 insert = true;
4432         }
4433
4434         if (found_ref) {
4435                 if (back->node.found_ref) {
4436                         fprintf(stderr, "Extent back ref already exists "
4437                                 "for %llu parent %llu root %llu \n",
4438                                 (unsigned long long)bytenr,
4439                                 (unsigned long long)parent,
4440                                 (unsigned long long)root);
4441                 }
4442                 back->node.found_ref = 1;
4443         } else {
4444                 if (back->node.found_extent_tree) {
4445                         fprintf(stderr, "Extent back ref already exists "
4446                                 "for %llu parent %llu root %llu \n",
4447                                 (unsigned long long)bytenr,
4448                                 (unsigned long long)parent,
4449                                 (unsigned long long)root);
4450                 }
4451                 back->node.found_extent_tree = 1;
4452         }
4453         if (insert)
4454                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4455                         compare_extent_backref));
4456         check_extent_type(rec);
4457         maybe_free_extent_rec(extent_cache, rec);
4458         return 0;
4459 }
4460
4461 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4462                             u64 parent, u64 root, u64 owner, u64 offset,
4463                             u32 num_refs, int found_ref, u64 max_size)
4464 {
4465         struct extent_record *rec;
4466         struct data_backref *back;
4467         struct cache_extent *cache;
4468         int ret;
4469         bool insert = false;
4470
4471         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4472         if (!cache) {
4473                 struct extent_record tmpl;
4474
4475                 memset(&tmpl, 0, sizeof(tmpl));
4476                 tmpl.start = bytenr;
4477                 tmpl.nr = 1;
4478                 tmpl.max_size = max_size;
4479
4480                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4481                 if (ret)
4482                         return ret;
4483
4484                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4485                 if (!cache)
4486                         abort();
4487         }
4488
4489         rec = container_of(cache, struct extent_record, cache);
4490         if (rec->max_size < max_size)
4491                 rec->max_size = max_size;
4492
4493         /*
4494          * If found_ref is set then max_size is the real size and must match the
4495          * existing refs.  So if we have already found a ref then we need to
4496          * make sure that this ref matches the existing one, otherwise we need
4497          * to add a new backref so we can notice that the backrefs don't match
4498          * and we need to figure out who is telling the truth.  This is to
4499          * account for that awful fsync bug I introduced where we'd end up with
4500          * a btrfs_file_extent_item that would have its length include multiple
4501          * prealloc extents or point inside of a prealloc extent.
4502          */
4503         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4504                                  bytenr, max_size);
4505         if (!back) {
4506                 back = alloc_data_backref(rec, parent, root, owner, offset,
4507                                           max_size);
4508                 BUG_ON(!back);
4509                 insert = true;
4510         }
4511
4512         if (found_ref) {
4513                 BUG_ON(num_refs != 1);
4514                 if (back->node.found_ref)
4515                         BUG_ON(back->bytes != max_size);
4516                 back->node.found_ref = 1;
4517                 back->found_ref += 1;
4518                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
4519                         back->bytes = max_size;
4520                         back->disk_bytenr = bytenr;
4521
4522                         /* Need to reinsert if not already in the tree */
4523                         if (!insert) {
4524                                 rb_erase(&back->node.node, &rec->backref_tree);
4525                                 insert = true;
4526                         }
4527                 }
4528                 rec->refs += 1;
4529                 rec->content_checked = 1;
4530                 rec->owner_ref_checked = 1;
4531         } else {
4532                 if (back->node.found_extent_tree) {
4533                         fprintf(stderr, "Extent back ref already exists "
4534                                 "for %llu parent %llu root %llu "
4535                                 "owner %llu offset %llu num_refs %lu\n",
4536                                 (unsigned long long)bytenr,
4537                                 (unsigned long long)parent,
4538                                 (unsigned long long)root,
4539                                 (unsigned long long)owner,
4540                                 (unsigned long long)offset,
4541                                 (unsigned long)num_refs);
4542                 }
4543                 back->num_refs = num_refs;
4544                 back->node.found_extent_tree = 1;
4545         }
4546         if (insert)
4547                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4548                         compare_extent_backref));
4549
4550         maybe_free_extent_rec(extent_cache, rec);
4551         return 0;
4552 }
4553
4554 static int add_pending(struct cache_tree *pending,
4555                        struct cache_tree *seen, u64 bytenr, u32 size)
4556 {
4557         int ret;
4558         ret = add_cache_extent(seen, bytenr, size);
4559         if (ret)
4560                 return ret;
4561         add_cache_extent(pending, bytenr, size);
4562         return 0;
4563 }
4564
4565 static int pick_next_pending(struct cache_tree *pending,
4566                         struct cache_tree *reada,
4567                         struct cache_tree *nodes,
4568                         u64 last, struct block_info *bits, int bits_nr,
4569                         int *reada_bits)
4570 {
4571         unsigned long node_start = last;
4572         struct cache_extent *cache;
4573         int ret;
4574
4575         cache = search_cache_extent(reada, 0);
4576         if (cache) {
4577                 bits[0].start = cache->start;
4578                 bits[0].size = cache->size;
4579                 *reada_bits = 1;
4580                 return 1;
4581         }
4582         *reada_bits = 0;
4583         if (node_start > 32768)
4584                 node_start -= 32768;
4585
4586         cache = search_cache_extent(nodes, node_start);
4587         if (!cache)
4588                 cache = search_cache_extent(nodes, 0);
4589
4590         if (!cache) {
4591                  cache = search_cache_extent(pending, 0);
4592                  if (!cache)
4593                          return 0;
4594                  ret = 0;
4595                  do {
4596                          bits[ret].start = cache->start;
4597                          bits[ret].size = cache->size;
4598                          cache = next_cache_extent(cache);
4599                          ret++;
4600                  } while (cache && ret < bits_nr);
4601                  return ret;
4602         }
4603
4604         ret = 0;
4605         do {
4606                 bits[ret].start = cache->start;
4607                 bits[ret].size = cache->size;
4608                 cache = next_cache_extent(cache);
4609                 ret++;
4610         } while (cache && ret < bits_nr);
4611
4612         if (bits_nr - ret > 8) {
4613                 u64 lookup = bits[0].start + bits[0].size;
4614                 struct cache_extent *next;
4615                 next = search_cache_extent(pending, lookup);
4616                 while(next) {
4617                         if (next->start - lookup > 32768)
4618                                 break;
4619                         bits[ret].start = next->start;
4620                         bits[ret].size = next->size;
4621                         lookup = next->start + next->size;
4622                         ret++;
4623                         if (ret == bits_nr)
4624                                 break;
4625                         next = next_cache_extent(next);
4626                         if (!next)
4627                                 break;
4628                 }
4629         }
4630         return ret;
4631 }
4632
4633 static void free_chunk_record(struct cache_extent *cache)
4634 {
4635         struct chunk_record *rec;
4636
4637         rec = container_of(cache, struct chunk_record, cache);
4638         list_del_init(&rec->list);
4639         list_del_init(&rec->dextents);
4640         free(rec);
4641 }
4642
4643 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4644 {
4645         cache_tree_free_extents(chunk_cache, free_chunk_record);
4646 }
4647
4648 static void free_device_record(struct rb_node *node)
4649 {
4650         struct device_record *rec;
4651
4652         rec = container_of(node, struct device_record, node);
4653         free(rec);
4654 }
4655
4656 FREE_RB_BASED_TREE(device_cache, free_device_record);
4657
4658 int insert_block_group_record(struct block_group_tree *tree,
4659                               struct block_group_record *bg_rec)
4660 {
4661         int ret;
4662
4663         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4664         if (ret)
4665                 return ret;
4666
4667         list_add_tail(&bg_rec->list, &tree->block_groups);
4668         return 0;
4669 }
4670
4671 static void free_block_group_record(struct cache_extent *cache)
4672 {
4673         struct block_group_record *rec;
4674
4675         rec = container_of(cache, struct block_group_record, cache);
4676         list_del_init(&rec->list);
4677         free(rec);
4678 }
4679
4680 void free_block_group_tree(struct block_group_tree *tree)
4681 {
4682         cache_tree_free_extents(&tree->tree, free_block_group_record);
4683 }
4684
4685 int insert_device_extent_record(struct device_extent_tree *tree,
4686                                 struct device_extent_record *de_rec)
4687 {
4688         int ret;
4689
4690         /*
4691          * Device extent is a bit different from the other extents, because
4692          * the extents which belong to the different devices may have the
4693          * same start and size, so we need use the special extent cache
4694          * search/insert functions.
4695          */
4696         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4697         if (ret)
4698                 return ret;
4699
4700         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4701         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4702         return 0;
4703 }
4704
4705 static void free_device_extent_record(struct cache_extent *cache)
4706 {
4707         struct device_extent_record *rec;
4708
4709         rec = container_of(cache, struct device_extent_record, cache);
4710         if (!list_empty(&rec->chunk_list))
4711                 list_del_init(&rec->chunk_list);
4712         if (!list_empty(&rec->device_list))
4713                 list_del_init(&rec->device_list);
4714         free(rec);
4715 }
4716
4717 void free_device_extent_tree(struct device_extent_tree *tree)
4718 {
4719         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4720 }
4721
4722 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4723 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4724                                  struct extent_buffer *leaf, int slot)
4725 {
4726         struct btrfs_extent_ref_v0 *ref0;
4727         struct btrfs_key key;
4728         int ret;
4729
4730         btrfs_item_key_to_cpu(leaf, &key, slot);
4731         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4732         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4733                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
4734                                 0, 0);
4735         } else {
4736                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
4737                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4738         }
4739         return ret;
4740 }
4741 #endif
4742
4743 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4744                                             struct btrfs_key *key,
4745                                             int slot)
4746 {
4747         struct btrfs_chunk *ptr;
4748         struct chunk_record *rec;
4749         int num_stripes, i;
4750
4751         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4752         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4753
4754         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4755         if (!rec) {
4756                 fprintf(stderr, "memory allocation failed\n");
4757                 exit(-1);
4758         }
4759
4760         INIT_LIST_HEAD(&rec->list);
4761         INIT_LIST_HEAD(&rec->dextents);
4762         rec->bg_rec = NULL;
4763
4764         rec->cache.start = key->offset;
4765         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4766
4767         rec->generation = btrfs_header_generation(leaf);
4768
4769         rec->objectid = key->objectid;
4770         rec->type = key->type;
4771         rec->offset = key->offset;
4772
4773         rec->length = rec->cache.size;
4774         rec->owner = btrfs_chunk_owner(leaf, ptr);
4775         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4776         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4777         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4778         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4779         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4780         rec->num_stripes = num_stripes;
4781         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4782
4783         for (i = 0; i < rec->num_stripes; ++i) {
4784                 rec->stripes[i].devid =
4785                         btrfs_stripe_devid_nr(leaf, ptr, i);
4786                 rec->stripes[i].offset =
4787                         btrfs_stripe_offset_nr(leaf, ptr, i);
4788                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4789                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4790                                 BTRFS_UUID_SIZE);
4791         }
4792
4793         return rec;
4794 }
4795
4796 static int process_chunk_item(struct cache_tree *chunk_cache,
4797                               struct btrfs_key *key, struct extent_buffer *eb,
4798                               int slot)
4799 {
4800         struct chunk_record *rec;
4801         struct btrfs_chunk *chunk;
4802         int ret = 0;
4803
4804         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
4805         /*
4806          * Do extra check for this chunk item,
4807          *
4808          * It's still possible one can craft a leaf with CHUNK_ITEM, with
4809          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
4810          * and owner<->key_type check.
4811          */
4812         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
4813                                       key->offset);
4814         if (ret < 0) {
4815                 error("chunk(%llu, %llu) is not valid, ignore it",
4816                       key->offset, btrfs_chunk_length(eb, chunk));
4817                 return 0;
4818         }
4819         rec = btrfs_new_chunk_record(eb, key, slot);
4820         ret = insert_cache_extent(chunk_cache, &rec->cache);
4821         if (ret) {
4822                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4823                         rec->offset, rec->length);
4824                 free(rec);
4825         }
4826
4827         return ret;
4828 }
4829
4830 static int process_device_item(struct rb_root *dev_cache,
4831                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4832 {
4833         struct btrfs_dev_item *ptr;
4834         struct device_record *rec;
4835         int ret = 0;
4836
4837         ptr = btrfs_item_ptr(eb,
4838                 slot, struct btrfs_dev_item);
4839
4840         rec = malloc(sizeof(*rec));
4841         if (!rec) {
4842                 fprintf(stderr, "memory allocation failed\n");
4843                 return -ENOMEM;
4844         }
4845
4846         rec->devid = key->offset;
4847         rec->generation = btrfs_header_generation(eb);
4848
4849         rec->objectid = key->objectid;
4850         rec->type = key->type;
4851         rec->offset = key->offset;
4852
4853         rec->devid = btrfs_device_id(eb, ptr);
4854         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4855         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4856
4857         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4858         if (ret) {
4859                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4860                 free(rec);
4861         }
4862
4863         return ret;
4864 }
4865
4866 struct block_group_record *
4867 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4868                              int slot)
4869 {
4870         struct btrfs_block_group_item *ptr;
4871         struct block_group_record *rec;
4872
4873         rec = calloc(1, sizeof(*rec));
4874         if (!rec) {
4875                 fprintf(stderr, "memory allocation failed\n");
4876                 exit(-1);
4877         }
4878
4879         rec->cache.start = key->objectid;
4880         rec->cache.size = key->offset;
4881
4882         rec->generation = btrfs_header_generation(leaf);
4883
4884         rec->objectid = key->objectid;
4885         rec->type = key->type;
4886         rec->offset = key->offset;
4887
4888         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
4889         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
4890
4891         INIT_LIST_HEAD(&rec->list);
4892
4893         return rec;
4894 }
4895
4896 static int process_block_group_item(struct block_group_tree *block_group_cache,
4897                                     struct btrfs_key *key,
4898                                     struct extent_buffer *eb, int slot)
4899 {
4900         struct block_group_record *rec;
4901         int ret = 0;
4902
4903         rec = btrfs_new_block_group_record(eb, key, slot);
4904         ret = insert_block_group_record(block_group_cache, rec);
4905         if (ret) {
4906                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
4907                         rec->objectid, rec->offset);
4908                 free(rec);
4909         }
4910
4911         return ret;
4912 }
4913
4914 struct device_extent_record *
4915 btrfs_new_device_extent_record(struct extent_buffer *leaf,
4916                                struct btrfs_key *key, int slot)
4917 {
4918         struct device_extent_record *rec;
4919         struct btrfs_dev_extent *ptr;
4920
4921         rec = calloc(1, sizeof(*rec));
4922         if (!rec) {
4923                 fprintf(stderr, "memory allocation failed\n");
4924                 exit(-1);
4925         }
4926
4927         rec->cache.objectid = key->objectid;
4928         rec->cache.start = key->offset;
4929
4930         rec->generation = btrfs_header_generation(leaf);
4931
4932         rec->objectid = key->objectid;
4933         rec->type = key->type;
4934         rec->offset = key->offset;
4935
4936         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
4937         rec->chunk_objecteid =
4938                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
4939         rec->chunk_offset =
4940                 btrfs_dev_extent_chunk_offset(leaf, ptr);
4941         rec->length = btrfs_dev_extent_length(leaf, ptr);
4942         rec->cache.size = rec->length;
4943
4944         INIT_LIST_HEAD(&rec->chunk_list);
4945         INIT_LIST_HEAD(&rec->device_list);
4946
4947         return rec;
4948 }
4949
4950 static int
4951 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
4952                            struct btrfs_key *key, struct extent_buffer *eb,
4953                            int slot)
4954 {
4955         struct device_extent_record *rec;
4956         int ret;
4957
4958         rec = btrfs_new_device_extent_record(eb, key, slot);
4959         ret = insert_device_extent_record(dev_extent_cache, rec);
4960         if (ret) {
4961                 fprintf(stderr,
4962                         "Device extent[%llu, %llu, %llu] existed.\n",
4963                         rec->objectid, rec->offset, rec->length);
4964                 free(rec);
4965         }
4966
4967         return ret;
4968 }
4969
4970 static int process_extent_item(struct btrfs_root *root,
4971                                struct cache_tree *extent_cache,
4972                                struct extent_buffer *eb, int slot)
4973 {
4974         struct btrfs_extent_item *ei;
4975         struct btrfs_extent_inline_ref *iref;
4976         struct btrfs_extent_data_ref *dref;
4977         struct btrfs_shared_data_ref *sref;
4978         struct btrfs_key key;
4979         struct extent_record tmpl;
4980         unsigned long end;
4981         unsigned long ptr;
4982         int ret;
4983         int type;
4984         u32 item_size = btrfs_item_size_nr(eb, slot);
4985         u64 refs = 0;
4986         u64 offset;
4987         u64 num_bytes;
4988         int metadata = 0;
4989
4990         btrfs_item_key_to_cpu(eb, &key, slot);
4991
4992         if (key.type == BTRFS_METADATA_ITEM_KEY) {
4993                 metadata = 1;
4994                 num_bytes = root->fs_info->nodesize;
4995         } else {
4996                 num_bytes = key.offset;
4997         }
4998
4999         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
5000                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5001                       key.objectid, root->fs_info->sectorsize);
5002                 return -EIO;
5003         }
5004         if (item_size < sizeof(*ei)) {
5005 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5006                 struct btrfs_extent_item_v0 *ei0;
5007                 if (item_size != sizeof(*ei0)) {
5008                         error(
5009         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
5010                                 key.objectid, key.type, key.offset,
5011                                 btrfs_header_bytenr(eb), slot);
5012                         BUG();
5013                 }
5014                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5015                 refs = btrfs_extent_refs_v0(eb, ei0);
5016 #else
5017                 BUG();
5018 #endif
5019                 memset(&tmpl, 0, sizeof(tmpl));
5020                 tmpl.start = key.objectid;
5021                 tmpl.nr = num_bytes;
5022                 tmpl.extent_item_refs = refs;
5023                 tmpl.metadata = metadata;
5024                 tmpl.found_rec = 1;
5025                 tmpl.max_size = num_bytes;
5026
5027                 return add_extent_rec(extent_cache, &tmpl);
5028         }
5029
5030         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5031         refs = btrfs_extent_refs(eb, ei);
5032         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5033                 metadata = 1;
5034         else
5035                 metadata = 0;
5036         if (metadata && num_bytes != root->fs_info->nodesize) {
5037                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5038                       num_bytes, root->fs_info->nodesize);
5039                 return -EIO;
5040         }
5041         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
5042                 error("ignore invalid data extent, length %llu is not aligned to %u",
5043                       num_bytes, root->fs_info->sectorsize);
5044                 return -EIO;
5045         }
5046
5047         memset(&tmpl, 0, sizeof(tmpl));
5048         tmpl.start = key.objectid;
5049         tmpl.nr = num_bytes;
5050         tmpl.extent_item_refs = refs;
5051         tmpl.metadata = metadata;
5052         tmpl.found_rec = 1;
5053         tmpl.max_size = num_bytes;
5054         add_extent_rec(extent_cache, &tmpl);
5055
5056         ptr = (unsigned long)(ei + 1);
5057         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5058             key.type == BTRFS_EXTENT_ITEM_KEY)
5059                 ptr += sizeof(struct btrfs_tree_block_info);
5060
5061         end = (unsigned long)ei + item_size;
5062         while (ptr < end) {
5063                 iref = (struct btrfs_extent_inline_ref *)ptr;
5064                 type = btrfs_extent_inline_ref_type(eb, iref);
5065                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5066                 switch (type) {
5067                 case BTRFS_TREE_BLOCK_REF_KEY:
5068                         ret = add_tree_backref(extent_cache, key.objectid,
5069                                         0, offset, 0);
5070                         if (ret < 0)
5071                                 error(
5072                         "add_tree_backref failed (extent items tree block): %s",
5073                                       strerror(-ret));
5074                         break;
5075                 case BTRFS_SHARED_BLOCK_REF_KEY:
5076                         ret = add_tree_backref(extent_cache, key.objectid,
5077                                         offset, 0, 0);
5078                         if (ret < 0)
5079                                 error(
5080                         "add_tree_backref failed (extent items shared block): %s",
5081                                       strerror(-ret));
5082                         break;
5083                 case BTRFS_EXTENT_DATA_REF_KEY:
5084                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5085                         add_data_backref(extent_cache, key.objectid, 0,
5086                                         btrfs_extent_data_ref_root(eb, dref),
5087                                         btrfs_extent_data_ref_objectid(eb,
5088                                                                        dref),
5089                                         btrfs_extent_data_ref_offset(eb, dref),
5090                                         btrfs_extent_data_ref_count(eb, dref),
5091                                         0, num_bytes);
5092                         break;
5093                 case BTRFS_SHARED_DATA_REF_KEY:
5094                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5095                         add_data_backref(extent_cache, key.objectid, offset,
5096                                         0, 0, 0,
5097                                         btrfs_shared_data_ref_count(eb, sref),
5098                                         0, num_bytes);
5099                         break;
5100                 default:
5101                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5102                                 key.objectid, key.type, num_bytes);
5103                         goto out;
5104                 }
5105                 ptr += btrfs_extent_inline_ref_size(type);
5106         }
5107         WARN_ON(ptr > end);
5108 out:
5109         return 0;
5110 }
5111
5112 static int check_cache_range(struct btrfs_root *root,
5113                              struct btrfs_block_group_cache *cache,
5114                              u64 offset, u64 bytes)
5115 {
5116         struct btrfs_free_space *entry;
5117         u64 *logical;
5118         u64 bytenr;
5119         int stripe_len;
5120         int i, nr, ret;
5121
5122         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5123                 bytenr = btrfs_sb_offset(i);
5124                 ret = btrfs_rmap_block(root->fs_info,
5125                                        cache->key.objectid, bytenr, 0,
5126                                        &logical, &nr, &stripe_len);
5127                 if (ret)
5128                         return ret;
5129
5130                 while (nr--) {
5131                         if (logical[nr] + stripe_len <= offset)
5132                                 continue;
5133                         if (offset + bytes <= logical[nr])
5134                                 continue;
5135                         if (logical[nr] == offset) {
5136                                 if (stripe_len >= bytes) {
5137                                         free(logical);
5138                                         return 0;
5139                                 }
5140                                 bytes -= stripe_len;
5141                                 offset += stripe_len;
5142                         } else if (logical[nr] < offset) {
5143                                 if (logical[nr] + stripe_len >=
5144                                     offset + bytes) {
5145                                         free(logical);
5146                                         return 0;
5147                                 }
5148                                 bytes = (offset + bytes) -
5149                                         (logical[nr] + stripe_len);
5150                                 offset = logical[nr] + stripe_len;
5151                         } else {
5152                                 /*
5153                                  * Could be tricky, the super may land in the
5154                                  * middle of the area we're checking.  First
5155                                  * check the easiest case, it's at the end.
5156                                  */
5157                                 if (logical[nr] + stripe_len >=
5158                                     bytes + offset) {
5159                                         bytes = logical[nr] - offset;
5160                                         continue;
5161                                 }
5162
5163                                 /* Check the left side */
5164                                 ret = check_cache_range(root, cache,
5165                                                         offset,
5166                                                         logical[nr] - offset);
5167                                 if (ret) {
5168                                         free(logical);
5169                                         return ret;
5170                                 }
5171
5172                                 /* Now we continue with the right side */
5173                                 bytes = (offset + bytes) -
5174                                         (logical[nr] + stripe_len);
5175                                 offset = logical[nr] + stripe_len;
5176                         }
5177                 }
5178
5179                 free(logical);
5180         }
5181
5182         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5183         if (!entry) {
5184                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5185                         offset, offset+bytes);
5186                 return -EINVAL;
5187         }
5188
5189         if (entry->offset != offset) {
5190                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5191                         entry->offset);
5192                 return -EINVAL;
5193         }
5194
5195         if (entry->bytes != bytes) {
5196                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5197                         bytes, entry->bytes, offset);
5198                 return -EINVAL;
5199         }
5200
5201         unlink_free_space(cache->free_space_ctl, entry);
5202         free(entry);
5203         return 0;
5204 }
5205
5206 static int verify_space_cache(struct btrfs_root *root,
5207                               struct btrfs_block_group_cache *cache)
5208 {
5209         struct btrfs_path path;
5210         struct extent_buffer *leaf;
5211         struct btrfs_key key;
5212         u64 last;
5213         int ret = 0;
5214
5215         root = root->fs_info->extent_root;
5216
5217         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5218
5219         btrfs_init_path(&path);
5220         key.objectid = last;
5221         key.offset = 0;
5222         key.type = BTRFS_EXTENT_ITEM_KEY;
5223         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5224         if (ret < 0)
5225                 goto out;
5226         ret = 0;
5227         while (1) {
5228                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5229                         ret = btrfs_next_leaf(root, &path);
5230                         if (ret < 0)
5231                                 goto out;
5232                         if (ret > 0) {
5233                                 ret = 0;
5234                                 break;
5235                         }
5236                 }
5237                 leaf = path.nodes[0];
5238                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5239                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5240                         break;
5241                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5242                     key.type != BTRFS_METADATA_ITEM_KEY) {
5243                         path.slots[0]++;
5244                         continue;
5245                 }
5246
5247                 if (last == key.objectid) {
5248                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5249                                 last = key.objectid + key.offset;
5250                         else
5251                                 last = key.objectid + root->fs_info->nodesize;
5252                         path.slots[0]++;
5253                         continue;
5254                 }
5255
5256                 ret = check_cache_range(root, cache, last,
5257                                         key.objectid - last);
5258                 if (ret)
5259                         break;
5260                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5261                         last = key.objectid + key.offset;
5262                 else
5263                         last = key.objectid + root->fs_info->nodesize;
5264                 path.slots[0]++;
5265         }
5266
5267         if (last < cache->key.objectid + cache->key.offset)
5268                 ret = check_cache_range(root, cache, last,
5269                                         cache->key.objectid +
5270                                         cache->key.offset - last);
5271
5272 out:
5273         btrfs_release_path(&path);
5274
5275         if (!ret &&
5276             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5277                 fprintf(stderr, "There are still entries left in the space "
5278                         "cache\n");
5279                 ret = -EINVAL;
5280         }
5281
5282         return ret;
5283 }
5284
5285 static int check_space_cache(struct btrfs_root *root)
5286 {
5287         struct btrfs_block_group_cache *cache;
5288         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5289         int ret;
5290         int error = 0;
5291
5292         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5293             btrfs_super_generation(root->fs_info->super_copy) !=
5294             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5295                 printf("cache and super generation don't match, space cache "
5296                        "will be invalidated\n");
5297                 return 0;
5298         }
5299
5300         if (ctx.progress_enabled) {
5301                 ctx.tp = TASK_FREE_SPACE;
5302                 task_start(ctx.info);
5303         }
5304
5305         while (1) {
5306                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5307                 if (!cache)
5308                         break;
5309
5310                 start = cache->key.objectid + cache->key.offset;
5311                 if (!cache->free_space_ctl) {
5312                         if (btrfs_init_free_space_ctl(cache,
5313                                                 root->fs_info->sectorsize)) {
5314                                 ret = -ENOMEM;
5315                                 break;
5316                         }
5317                 } else {
5318                         btrfs_remove_free_space_cache(cache);
5319                 }
5320
5321                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
5322                         ret = exclude_super_stripes(root, cache);
5323                         if (ret) {
5324                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5325                                         strerror(-ret));
5326                                 error++;
5327                                 continue;
5328                         }
5329                         ret = load_free_space_tree(root->fs_info, cache);
5330                         free_excluded_extents(root, cache);
5331                         if (ret < 0) {
5332                                 fprintf(stderr, "could not load free space tree: %s\n",
5333                                         strerror(-ret));
5334                                 error++;
5335                                 continue;
5336                         }
5337                         error += ret;
5338                 } else {
5339                         ret = load_free_space_cache(root->fs_info, cache);
5340                         if (!ret)
5341                                 continue;
5342                 }
5343
5344                 ret = verify_space_cache(root, cache);
5345                 if (ret) {
5346                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5347                                 cache->key.objectid);
5348                         error++;
5349                 }
5350         }
5351
5352         task_stop(ctx.info);
5353
5354         return error ? -EINVAL : 0;
5355 }
5356
5357 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5358                         u64 num_bytes, unsigned long leaf_offset,
5359                         struct extent_buffer *eb) {
5360
5361         struct btrfs_fs_info *fs_info = root->fs_info;
5362         u64 offset = 0;
5363         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
5364         char *data;
5365         unsigned long csum_offset;
5366         u32 csum;
5367         u32 csum_expected;
5368         u64 read_len;
5369         u64 data_checked = 0;
5370         u64 tmp;
5371         int ret = 0;
5372         int mirror;
5373         int num_copies;
5374
5375         if (num_bytes % fs_info->sectorsize)
5376                 return -EINVAL;
5377
5378         data = malloc(num_bytes);
5379         if (!data)
5380                 return -ENOMEM;
5381
5382         while (offset < num_bytes) {
5383                 mirror = 0;
5384 again:
5385                 read_len = num_bytes - offset;
5386                 /* read as much space once a time */
5387                 ret = read_extent_data(fs_info, data + offset,
5388                                 bytenr + offset, &read_len, mirror);
5389                 if (ret)
5390                         goto out;
5391                 data_checked = 0;
5392                 /* verify every 4k data's checksum */
5393                 while (data_checked < read_len) {
5394                         csum = ~(u32)0;
5395                         tmp = offset + data_checked;
5396
5397                         csum = btrfs_csum_data((char *)data + tmp,
5398                                                csum, fs_info->sectorsize);
5399                         btrfs_csum_final(csum, (u8 *)&csum);
5400
5401                         csum_offset = leaf_offset +
5402                                  tmp / fs_info->sectorsize * csum_size;
5403                         read_extent_buffer(eb, (char *)&csum_expected,
5404                                            csum_offset, csum_size);
5405                         /* try another mirror */
5406                         if (csum != csum_expected) {
5407                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5408                                                 mirror, bytenr + tmp,
5409                                                 csum, csum_expected);
5410                                 num_copies = btrfs_num_copies(root->fs_info,
5411                                                 bytenr, num_bytes);
5412                                 if (mirror < num_copies - 1) {
5413                                         mirror += 1;
5414                                         goto again;
5415                                 }
5416                         }
5417                         data_checked += fs_info->sectorsize;
5418                 }
5419                 offset += read_len;
5420         }
5421 out:
5422         free(data);
5423         return ret;
5424 }
5425
5426 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5427                                u64 num_bytes)
5428 {
5429         struct btrfs_path path;
5430         struct extent_buffer *leaf;
5431         struct btrfs_key key;
5432         int ret;
5433
5434         btrfs_init_path(&path);
5435         key.objectid = bytenr;
5436         key.type = BTRFS_EXTENT_ITEM_KEY;
5437         key.offset = (u64)-1;
5438
5439 again:
5440         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
5441                                 0, 0);
5442         if (ret < 0) {
5443                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5444                 btrfs_release_path(&path);
5445                 return ret;
5446         } else if (ret) {
5447                 if (path.slots[0] > 0) {
5448                         path.slots[0]--;
5449                 } else {
5450                         ret = btrfs_prev_leaf(root, &path);
5451                         if (ret < 0) {
5452                                 goto out;
5453                         } else if (ret > 0) {
5454                                 ret = 0;
5455                                 goto out;
5456                         }
5457                 }
5458         }
5459
5460         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5461
5462         /*
5463          * Block group items come before extent items if they have the same
5464          * bytenr, so walk back one more just in case.  Dear future traveller,
5465          * first congrats on mastering time travel.  Now if it's not too much
5466          * trouble could you go back to 2006 and tell Chris to make the
5467          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5468          * EXTENT_ITEM_KEY please?
5469          */
5470         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5471                 if (path.slots[0] > 0) {
5472                         path.slots[0]--;
5473                 } else {
5474                         ret = btrfs_prev_leaf(root, &path);
5475                         if (ret < 0) {
5476                                 goto out;
5477                         } else if (ret > 0) {
5478                                 ret = 0;
5479                                 goto out;
5480                         }
5481                 }
5482                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5483         }
5484
5485         while (num_bytes) {
5486                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5487                         ret = btrfs_next_leaf(root, &path);
5488                         if (ret < 0) {
5489                                 fprintf(stderr, "Error going to next leaf "
5490                                         "%d\n", ret);
5491                                 btrfs_release_path(&path);
5492                                 return ret;
5493                         } else if (ret) {
5494                                 break;
5495                         }
5496                 }
5497                 leaf = path.nodes[0];
5498                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5499                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5500                         path.slots[0]++;
5501                         continue;
5502                 }
5503                 if (key.objectid + key.offset < bytenr) {
5504                         path.slots[0]++;
5505                         continue;
5506                 }
5507                 if (key.objectid > bytenr + num_bytes)
5508                         break;
5509
5510                 if (key.objectid == bytenr) {
5511                         if (key.offset >= num_bytes) {
5512                                 num_bytes = 0;
5513                                 break;
5514                         }
5515                         num_bytes -= key.offset;
5516                         bytenr += key.offset;
5517                 } else if (key.objectid < bytenr) {
5518                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5519                                 num_bytes = 0;
5520                                 break;
5521                         }
5522                         num_bytes = (bytenr + num_bytes) -
5523                                 (key.objectid + key.offset);
5524                         bytenr = key.objectid + key.offset;
5525                 } else {
5526                         if (key.objectid + key.offset < bytenr + num_bytes) {
5527                                 u64 new_start = key.objectid + key.offset;
5528                                 u64 new_bytes = bytenr + num_bytes - new_start;
5529
5530                                 /*
5531                                  * Weird case, the extent is in the middle of
5532                                  * our range, we'll have to search one side
5533                                  * and then the other.  Not sure if this happens
5534                                  * in real life, but no harm in coding it up
5535                                  * anyway just in case.
5536                                  */
5537                                 btrfs_release_path(&path);
5538                                 ret = check_extent_exists(root, new_start,
5539                                                           new_bytes);
5540                                 if (ret) {
5541                                         fprintf(stderr, "Right section didn't "
5542                                                 "have a record\n");
5543                                         break;
5544                                 }
5545                                 num_bytes = key.objectid - bytenr;
5546                                 goto again;
5547                         }
5548                         num_bytes = key.objectid - bytenr;
5549                 }
5550                 path.slots[0]++;
5551         }
5552         ret = 0;
5553
5554 out:
5555         if (num_bytes && !ret) {
5556                 fprintf(stderr, "There are no extents for csum range "
5557                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5558                 ret = 1;
5559         }
5560
5561         btrfs_release_path(&path);
5562         return ret;
5563 }
5564
5565 static int check_csums(struct btrfs_root *root)
5566 {
5567         struct btrfs_path path;
5568         struct extent_buffer *leaf;
5569         struct btrfs_key key;
5570         u64 offset = 0, num_bytes = 0;
5571         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5572         int errors = 0;
5573         int ret;
5574         u64 data_len;
5575         unsigned long leaf_offset;
5576
5577         root = root->fs_info->csum_root;
5578         if (!extent_buffer_uptodate(root->node)) {
5579                 fprintf(stderr, "No valid csum tree found\n");
5580                 return -ENOENT;
5581         }
5582
5583         btrfs_init_path(&path);
5584         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5585         key.type = BTRFS_EXTENT_CSUM_KEY;
5586         key.offset = 0;
5587         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5588         if (ret < 0) {
5589                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5590                 btrfs_release_path(&path);
5591                 return ret;
5592         }
5593
5594         if (ret > 0 && path.slots[0])
5595                 path.slots[0]--;
5596         ret = 0;
5597
5598         while (1) {
5599                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5600                         ret = btrfs_next_leaf(root, &path);
5601                         if (ret < 0) {
5602                                 fprintf(stderr, "Error going to next leaf "
5603                                         "%d\n", ret);
5604                                 break;
5605                         }
5606                         if (ret)
5607                                 break;
5608                 }
5609                 leaf = path.nodes[0];
5610
5611                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5612                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5613                         path.slots[0]++;
5614                         continue;
5615                 }
5616
5617                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
5618                               csum_size) * root->fs_info->sectorsize;
5619                 if (!check_data_csum)
5620                         goto skip_csum_check;
5621                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5622                 ret = check_extent_csums(root, key.offset, data_len,
5623                                          leaf_offset, leaf);
5624                 if (ret)
5625                         break;
5626 skip_csum_check:
5627                 if (!num_bytes) {
5628                         offset = key.offset;
5629                 } else if (key.offset != offset + num_bytes) {
5630                         ret = check_extent_exists(root, offset, num_bytes);
5631                         if (ret) {
5632                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5633                                         "there is no extent record\n",
5634                                         offset, offset+num_bytes);
5635                                 errors++;
5636                         }
5637                         offset = key.offset;
5638                         num_bytes = 0;
5639                 }
5640                 num_bytes += data_len;
5641                 path.slots[0]++;
5642         }
5643
5644         btrfs_release_path(&path);
5645         return errors;
5646 }
5647
5648 static int is_dropped_key(struct btrfs_key *key,
5649                           struct btrfs_key *drop_key) {
5650         if (key->objectid < drop_key->objectid)
5651                 return 1;
5652         else if (key->objectid == drop_key->objectid) {
5653                 if (key->type < drop_key->type)
5654                         return 1;
5655                 else if (key->type == drop_key->type) {
5656                         if (key->offset < drop_key->offset)
5657                                 return 1;
5658                 }
5659         }
5660         return 0;
5661 }
5662
5663 /*
5664  * Here are the rules for FULL_BACKREF.
5665  *
5666  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5667  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5668  *      FULL_BACKREF set.
5669  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
5670  *    if it happened after the relocation occurred since we'll have dropped the
5671  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5672  *    have no real way to know for sure.
5673  *
5674  * We process the blocks one root at a time, and we start from the lowest root
5675  * objectid and go to the highest.  So we can just lookup the owner backref for
5676  * the record and if we don't find it then we know it doesn't exist and we have
5677  * a FULL BACKREF.
5678  *
5679  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5680  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5681  * be set or not and then we can check later once we've gathered all the refs.
5682  */
5683 static int calc_extent_flag(struct cache_tree *extent_cache,
5684                            struct extent_buffer *buf,
5685                            struct root_item_record *ri,
5686                            u64 *flags)
5687 {
5688         struct extent_record *rec;
5689         struct cache_extent *cache;
5690         struct tree_backref *tback;
5691         u64 owner = 0;
5692
5693         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5694         /* we have added this extent before */
5695         if (!cache)
5696                 return -ENOENT;
5697
5698         rec = container_of(cache, struct extent_record, cache);
5699
5700         /*
5701          * Except file/reloc tree, we can not have
5702          * FULL BACKREF MODE
5703          */
5704         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5705                 goto normal;
5706         /*
5707          * root node
5708          */
5709         if (buf->start == ri->bytenr)
5710                 goto normal;
5711
5712         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5713                 goto full_backref;
5714
5715         owner = btrfs_header_owner(buf);
5716         if (owner == ri->objectid)
5717                 goto normal;
5718
5719         tback = find_tree_backref(rec, 0, owner);
5720         if (!tback)
5721                 goto full_backref;
5722 normal:
5723         *flags = 0;
5724         if (rec->flag_block_full_backref != FLAG_UNSET &&
5725             rec->flag_block_full_backref != 0)
5726                 rec->bad_full_backref = 1;
5727         return 0;
5728 full_backref:
5729         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5730         if (rec->flag_block_full_backref != FLAG_UNSET &&
5731             rec->flag_block_full_backref != 1)
5732                 rec->bad_full_backref = 1;
5733         return 0;
5734 }
5735
5736 static void report_mismatch_key_root(u8 key_type, u64 rootid)
5737 {
5738         fprintf(stderr, "Invalid key type(");
5739         print_key_type(stderr, 0, key_type);
5740         fprintf(stderr, ") found in root(");
5741         print_objectid(stderr, rootid, 0);
5742         fprintf(stderr, ")\n");
5743 }
5744
5745 /*
5746  * Check if the key is valid with its extent buffer.
5747  *
5748  * This is a early check in case invalid key exists in a extent buffer
5749  * This is not comprehensive yet, but should prevent wrong key/item passed
5750  * further
5751  */
5752 static int check_type_with_root(u64 rootid, u8 key_type)
5753 {
5754         switch (key_type) {
5755         /* Only valid in chunk tree */
5756         case BTRFS_DEV_ITEM_KEY:
5757         case BTRFS_CHUNK_ITEM_KEY:
5758                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
5759                         goto err;
5760                 break;
5761         /* valid in csum and log tree */
5762         case BTRFS_CSUM_TREE_OBJECTID:
5763                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
5764                       is_fstree(rootid)))
5765                         goto err;
5766                 break;
5767         case BTRFS_EXTENT_ITEM_KEY:
5768         case BTRFS_METADATA_ITEM_KEY:
5769         case BTRFS_BLOCK_GROUP_ITEM_KEY:
5770                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
5771                         goto err;
5772                 break;
5773         case BTRFS_ROOT_ITEM_KEY:
5774                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
5775                         goto err;
5776                 break;
5777         case BTRFS_DEV_EXTENT_KEY:
5778                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
5779                         goto err;
5780                 break;
5781         }
5782         return 0;
5783 err:
5784         report_mismatch_key_root(key_type, rootid);
5785         return -EINVAL;
5786 }
5787
5788 static int run_next_block(struct btrfs_root *root,
5789                           struct block_info *bits,
5790                           int bits_nr,
5791                           u64 *last,
5792                           struct cache_tree *pending,
5793                           struct cache_tree *seen,
5794                           struct cache_tree *reada,
5795                           struct cache_tree *nodes,
5796                           struct cache_tree *extent_cache,
5797                           struct cache_tree *chunk_cache,
5798                           struct rb_root *dev_cache,
5799                           struct block_group_tree *block_group_cache,
5800                           struct device_extent_tree *dev_extent_cache,
5801                           struct root_item_record *ri)
5802 {
5803         struct btrfs_fs_info *fs_info = root->fs_info;
5804         struct extent_buffer *buf;
5805         struct extent_record *rec = NULL;
5806         u64 bytenr;
5807         u32 size;
5808         u64 parent;
5809         u64 owner;
5810         u64 flags;
5811         u64 ptr;
5812         u64 gen = 0;
5813         int ret = 0;
5814         int i;
5815         int nritems;
5816         struct btrfs_key key;
5817         struct cache_extent *cache;
5818         int reada_bits;
5819
5820         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5821                                     bits_nr, &reada_bits);
5822         if (nritems == 0)
5823                 return 1;
5824
5825         if (!reada_bits) {
5826                 for(i = 0; i < nritems; i++) {
5827                         ret = add_cache_extent(reada, bits[i].start,
5828                                                bits[i].size);
5829                         if (ret == -EEXIST)
5830                                 continue;
5831
5832                         /* fixme, get the parent transid */
5833                         readahead_tree_block(fs_info, bits[i].start, 0);
5834                 }
5835         }
5836         *last = bits[0].start;
5837         bytenr = bits[0].start;
5838         size = bits[0].size;
5839
5840         cache = lookup_cache_extent(pending, bytenr, size);
5841         if (cache) {
5842                 remove_cache_extent(pending, cache);
5843                 free(cache);
5844         }
5845         cache = lookup_cache_extent(reada, bytenr, size);
5846         if (cache) {
5847                 remove_cache_extent(reada, cache);
5848                 free(cache);
5849         }
5850         cache = lookup_cache_extent(nodes, bytenr, size);
5851         if (cache) {
5852                 remove_cache_extent(nodes, cache);
5853                 free(cache);
5854         }
5855         cache = lookup_cache_extent(extent_cache, bytenr, size);
5856         if (cache) {
5857                 rec = container_of(cache, struct extent_record, cache);
5858                 gen = rec->parent_generation;
5859         }
5860
5861         /* fixme, get the real parent transid */
5862         buf = read_tree_block(root->fs_info, bytenr, gen);
5863         if (!extent_buffer_uptodate(buf)) {
5864                 record_bad_block_io(root->fs_info,
5865                                     extent_cache, bytenr, size);
5866                 goto out;
5867         }
5868
5869         nritems = btrfs_header_nritems(buf);
5870
5871         flags = 0;
5872         if (!init_extent_tree) {
5873                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5874                                        btrfs_header_level(buf), 1, NULL,
5875                                        &flags);
5876                 if (ret < 0) {
5877                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5878                         if (ret < 0) {
5879                                 fprintf(stderr, "Couldn't calc extent flags\n");
5880                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5881                         }
5882                 }
5883         } else {
5884                 flags = 0;
5885                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5886                 if (ret < 0) {
5887                         fprintf(stderr, "Couldn't calc extent flags\n");
5888                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5889                 }
5890         }
5891
5892         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5893                 if (ri != NULL &&
5894                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5895                     ri->objectid == btrfs_header_owner(buf)) {
5896                         /*
5897                          * Ok we got to this block from it's original owner and
5898                          * we have FULL_BACKREF set.  Relocation can leave
5899                          * converted blocks over so this is altogether possible,
5900                          * however it's not possible if the generation > the
5901                          * last snapshot, so check for this case.
5902                          */
5903                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5904                             btrfs_header_generation(buf) > ri->last_snapshot) {
5905                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5906                                 rec->bad_full_backref = 1;
5907                         }
5908                 }
5909         } else {
5910                 if (ri != NULL &&
5911                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5912                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5913                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5914                         rec->bad_full_backref = 1;
5915                 }
5916         }
5917
5918         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5919                 rec->flag_block_full_backref = 1;
5920                 parent = bytenr;
5921                 owner = 0;
5922         } else {
5923                 rec->flag_block_full_backref = 0;
5924                 parent = 0;
5925                 owner = btrfs_header_owner(buf);
5926         }
5927
5928         ret = check_block(root, extent_cache, buf, flags);
5929         if (ret)
5930                 goto out;
5931
5932         if (btrfs_is_leaf(buf)) {
5933                 btree_space_waste += btrfs_leaf_free_space(root, buf);
5934                 for (i = 0; i < nritems; i++) {
5935                         struct btrfs_file_extent_item *fi;
5936                         btrfs_item_key_to_cpu(buf, &key, i);
5937                         /*
5938                          * Check key type against the leaf owner.
5939                          * Could filter quite a lot of early error if
5940                          * owner is correct
5941                          */
5942                         if (check_type_with_root(btrfs_header_owner(buf),
5943                                                  key.type)) {
5944                                 fprintf(stderr, "ignoring invalid key\n");
5945                                 continue;
5946                         }
5947                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5948                                 process_extent_item(root, extent_cache, buf,
5949                                                     i);
5950                                 continue;
5951                         }
5952                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5953                                 process_extent_item(root, extent_cache, buf,
5954                                                     i);
5955                                 continue;
5956                         }
5957                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5958                                 total_csum_bytes +=
5959                                         btrfs_item_size_nr(buf, i);
5960                                 continue;
5961                         }
5962                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5963                                 process_chunk_item(chunk_cache, &key, buf, i);
5964                                 continue;
5965                         }
5966                         if (key.type == BTRFS_DEV_ITEM_KEY) {
5967                                 process_device_item(dev_cache, &key, buf, i);
5968                                 continue;
5969                         }
5970                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5971                                 process_block_group_item(block_group_cache,
5972                                         &key, buf, i);
5973                                 continue;
5974                         }
5975                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
5976                                 process_device_extent_item(dev_extent_cache,
5977                                         &key, buf, i);
5978                                 continue;
5979
5980                         }
5981                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5982 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5983                                 process_extent_ref_v0(extent_cache, buf, i);
5984 #else
5985                                 BUG();
5986 #endif
5987                                 continue;
5988                         }
5989
5990                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
5991                                 ret = add_tree_backref(extent_cache,
5992                                                 key.objectid, 0, key.offset, 0);
5993                                 if (ret < 0)
5994                                         error(
5995                                 "add_tree_backref failed (leaf tree block): %s",
5996                                               strerror(-ret));
5997                                 continue;
5998                         }
5999                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6000                                 ret = add_tree_backref(extent_cache,
6001                                                 key.objectid, key.offset, 0, 0);
6002                                 if (ret < 0)
6003                                         error(
6004                                 "add_tree_backref failed (leaf shared block): %s",
6005                                               strerror(-ret));
6006                                 continue;
6007                         }
6008                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6009                                 struct btrfs_extent_data_ref *ref;
6010                                 ref = btrfs_item_ptr(buf, i,
6011                                                 struct btrfs_extent_data_ref);
6012                                 add_data_backref(extent_cache,
6013                                         key.objectid, 0,
6014                                         btrfs_extent_data_ref_root(buf, ref),
6015                                         btrfs_extent_data_ref_objectid(buf,
6016                                                                        ref),
6017                                         btrfs_extent_data_ref_offset(buf, ref),
6018                                         btrfs_extent_data_ref_count(buf, ref),
6019                                         0, root->fs_info->sectorsize);
6020                                 continue;
6021                         }
6022                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6023                                 struct btrfs_shared_data_ref *ref;
6024                                 ref = btrfs_item_ptr(buf, i,
6025                                                 struct btrfs_shared_data_ref);
6026                                 add_data_backref(extent_cache,
6027                                         key.objectid, key.offset, 0, 0, 0,
6028                                         btrfs_shared_data_ref_count(buf, ref),
6029                                         0, root->fs_info->sectorsize);
6030                                 continue;
6031                         }
6032                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6033                                 struct bad_item *bad;
6034
6035                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6036                                         continue;
6037                                 if (!owner)
6038                                         continue;
6039                                 bad = malloc(sizeof(struct bad_item));
6040                                 if (!bad)
6041                                         continue;
6042                                 INIT_LIST_HEAD(&bad->list);
6043                                 memcpy(&bad->key, &key,
6044                                        sizeof(struct btrfs_key));
6045                                 bad->root_id = owner;
6046                                 list_add_tail(&bad->list, &delete_items);
6047                                 continue;
6048                         }
6049                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6050                                 continue;
6051                         fi = btrfs_item_ptr(buf, i,
6052                                             struct btrfs_file_extent_item);
6053                         if (btrfs_file_extent_type(buf, fi) ==
6054                             BTRFS_FILE_EXTENT_INLINE)
6055                                 continue;
6056                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6057                                 continue;
6058
6059                         data_bytes_allocated +=
6060                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6061                         if (data_bytes_allocated < root->fs_info->sectorsize) {
6062                                 abort();
6063                         }
6064                         data_bytes_referenced +=
6065                                 btrfs_file_extent_num_bytes(buf, fi);
6066                         add_data_backref(extent_cache,
6067                                 btrfs_file_extent_disk_bytenr(buf, fi),
6068                                 parent, owner, key.objectid, key.offset -
6069                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6070                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6071                 }
6072         } else {
6073                 int level;
6074                 struct btrfs_key first_key;
6075
6076                 first_key.objectid = 0;
6077
6078                 if (nritems > 0)
6079                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6080                 level = btrfs_header_level(buf);
6081                 for (i = 0; i < nritems; i++) {
6082                         struct extent_record tmpl;
6083
6084                         ptr = btrfs_node_blockptr(buf, i);
6085                         size = root->fs_info->nodesize;
6086                         btrfs_node_key_to_cpu(buf, &key, i);
6087                         if (ri != NULL) {
6088                                 if ((level == ri->drop_level)
6089                                     && is_dropped_key(&key, &ri->drop_key)) {
6090                                         continue;
6091                                 }
6092                         }
6093
6094                         memset(&tmpl, 0, sizeof(tmpl));
6095                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6096                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6097                         tmpl.start = ptr;
6098                         tmpl.nr = size;
6099                         tmpl.refs = 1;
6100                         tmpl.metadata = 1;
6101                         tmpl.max_size = size;
6102                         ret = add_extent_rec(extent_cache, &tmpl);
6103                         if (ret < 0)
6104                                 goto out;
6105
6106                         ret = add_tree_backref(extent_cache, ptr, parent,
6107                                         owner, 1);
6108                         if (ret < 0) {
6109                                 error(
6110                                 "add_tree_backref failed (non-leaf block): %s",
6111                                       strerror(-ret));
6112                                 continue;
6113                         }
6114
6115                         if (level > 1) {
6116                                 add_pending(nodes, seen, ptr, size);
6117                         } else {
6118                                 add_pending(pending, seen, ptr, size);
6119                         }
6120                 }
6121                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
6122                                       nritems) * sizeof(struct btrfs_key_ptr);
6123         }
6124         total_btree_bytes += buf->len;
6125         if (fs_root_objectid(btrfs_header_owner(buf)))
6126                 total_fs_tree_bytes += buf->len;
6127         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6128                 total_extent_tree_bytes += buf->len;
6129 out:
6130         free_extent_buffer(buf);
6131         return ret;
6132 }
6133
6134 static int add_root_to_pending(struct extent_buffer *buf,
6135                                struct cache_tree *extent_cache,
6136                                struct cache_tree *pending,
6137                                struct cache_tree *seen,
6138                                struct cache_tree *nodes,
6139                                u64 objectid)
6140 {
6141         struct extent_record tmpl;
6142         int ret;
6143
6144         if (btrfs_header_level(buf) > 0)
6145                 add_pending(nodes, seen, buf->start, buf->len);
6146         else
6147                 add_pending(pending, seen, buf->start, buf->len);
6148
6149         memset(&tmpl, 0, sizeof(tmpl));
6150         tmpl.start = buf->start;
6151         tmpl.nr = buf->len;
6152         tmpl.is_root = 1;
6153         tmpl.refs = 1;
6154         tmpl.metadata = 1;
6155         tmpl.max_size = buf->len;
6156         add_extent_rec(extent_cache, &tmpl);
6157
6158         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6159             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6160                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6161                                 0, 1);
6162         else
6163                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6164                                 1);
6165         return ret;
6166 }
6167
6168 /* as we fix the tree, we might be deleting blocks that
6169  * we're tracking for repair.  This hook makes sure we
6170  * remove any backrefs for blocks as we are fixing them.
6171  */
6172 static int free_extent_hook(struct btrfs_trans_handle *trans,
6173                             struct btrfs_root *root,
6174                             u64 bytenr, u64 num_bytes, u64 parent,
6175                             u64 root_objectid, u64 owner, u64 offset,
6176                             int refs_to_drop)
6177 {
6178         struct extent_record *rec;
6179         struct cache_extent *cache;
6180         int is_data;
6181         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6182
6183         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6184         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6185         if (!cache)
6186                 return 0;
6187
6188         rec = container_of(cache, struct extent_record, cache);
6189         if (is_data) {
6190                 struct data_backref *back;
6191                 back = find_data_backref(rec, parent, root_objectid, owner,
6192                                          offset, 1, bytenr, num_bytes);
6193                 if (!back)
6194                         goto out;
6195                 if (back->node.found_ref) {
6196                         back->found_ref -= refs_to_drop;
6197                         if (rec->refs)
6198                                 rec->refs -= refs_to_drop;
6199                 }
6200                 if (back->node.found_extent_tree) {
6201                         back->num_refs -= refs_to_drop;
6202                         if (rec->extent_item_refs)
6203                                 rec->extent_item_refs -= refs_to_drop;
6204                 }
6205                 if (back->found_ref == 0)
6206                         back->node.found_ref = 0;
6207                 if (back->num_refs == 0)
6208                         back->node.found_extent_tree = 0;
6209
6210                 if (!back->node.found_extent_tree && back->node.found_ref) {
6211                         rb_erase(&back->node.node, &rec->backref_tree);
6212                         free(back);
6213                 }
6214         } else {
6215                 struct tree_backref *back;
6216                 back = find_tree_backref(rec, parent, root_objectid);
6217                 if (!back)
6218                         goto out;
6219                 if (back->node.found_ref) {
6220                         if (rec->refs)
6221                                 rec->refs--;
6222                         back->node.found_ref = 0;
6223                 }
6224                 if (back->node.found_extent_tree) {
6225                         if (rec->extent_item_refs)
6226                                 rec->extent_item_refs--;
6227                         back->node.found_extent_tree = 0;
6228                 }
6229                 if (!back->node.found_extent_tree && back->node.found_ref) {
6230                         rb_erase(&back->node.node, &rec->backref_tree);
6231                         free(back);
6232                 }
6233         }
6234         maybe_free_extent_rec(extent_cache, rec);
6235 out:
6236         return 0;
6237 }
6238
6239 static int delete_extent_records(struct btrfs_trans_handle *trans,
6240                                  struct btrfs_root *root,
6241                                  struct btrfs_path *path,
6242                                  u64 bytenr)
6243 {
6244         struct btrfs_key key;
6245         struct btrfs_key found_key;
6246         struct extent_buffer *leaf;
6247         int ret;
6248         int slot;
6249
6250
6251         key.objectid = bytenr;
6252         key.type = (u8)-1;
6253         key.offset = (u64)-1;
6254
6255         while(1) {
6256                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6257                                         &key, path, 0, 1);
6258                 if (ret < 0)
6259                         break;
6260
6261                 if (ret > 0) {
6262                         ret = 0;
6263                         if (path->slots[0] == 0)
6264                                 break;
6265                         path->slots[0]--;
6266                 }
6267                 ret = 0;
6268
6269                 leaf = path->nodes[0];
6270                 slot = path->slots[0];
6271
6272                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6273                 if (found_key.objectid != bytenr)
6274                         break;
6275
6276                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6277                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6278                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6279                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6280                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6281                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6282                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6283                         btrfs_release_path(path);
6284                         if (found_key.type == 0) {
6285                                 if (found_key.offset == 0)
6286                                         break;
6287                                 key.offset = found_key.offset - 1;
6288                                 key.type = found_key.type;
6289                         }
6290                         key.type = found_key.type - 1;
6291                         key.offset = (u64)-1;
6292                         continue;
6293                 }
6294
6295                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6296                         found_key.objectid, found_key.type, found_key.offset);
6297
6298                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6299                 if (ret)
6300                         break;
6301                 btrfs_release_path(path);
6302
6303                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6304                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6305                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6306                                 found_key.offset : root->fs_info->nodesize;
6307
6308                         ret = btrfs_update_block_group(root, bytenr,
6309                                                        bytes, 0, 0);
6310                         if (ret)
6311                                 break;
6312                 }
6313         }
6314
6315         btrfs_release_path(path);
6316         return ret;
6317 }
6318
6319 /*
6320  * for a single backref, this will allocate a new extent
6321  * and add the backref to it.
6322  */
6323 static int record_extent(struct btrfs_trans_handle *trans,
6324                          struct btrfs_fs_info *info,
6325                          struct btrfs_path *path,
6326                          struct extent_record *rec,
6327                          struct extent_backref *back,
6328                          int allocated, u64 flags)
6329 {
6330         int ret = 0;
6331         struct btrfs_root *extent_root = info->extent_root;
6332         struct extent_buffer *leaf;
6333         struct btrfs_key ins_key;
6334         struct btrfs_extent_item *ei;
6335         struct data_backref *dback;
6336         struct btrfs_tree_block_info *bi;
6337
6338         if (!back->is_data)
6339                 rec->max_size = max_t(u64, rec->max_size,
6340                                     info->nodesize);
6341
6342         if (!allocated) {
6343                 u32 item_size = sizeof(*ei);
6344
6345                 if (!back->is_data)
6346                         item_size += sizeof(*bi);
6347
6348                 ins_key.objectid = rec->start;
6349                 ins_key.offset = rec->max_size;
6350                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6351
6352                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6353                                         &ins_key, item_size);
6354                 if (ret)
6355                         goto fail;
6356
6357                 leaf = path->nodes[0];
6358                 ei = btrfs_item_ptr(leaf, path->slots[0],
6359                                     struct btrfs_extent_item);
6360
6361                 btrfs_set_extent_refs(leaf, ei, 0);
6362                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6363
6364                 if (back->is_data) {
6365                         btrfs_set_extent_flags(leaf, ei,
6366                                                BTRFS_EXTENT_FLAG_DATA);
6367                 } else {
6368                         struct btrfs_disk_key copy_key;;
6369
6370                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6371                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6372                                              sizeof(*bi));
6373
6374                         btrfs_set_disk_key_objectid(&copy_key,
6375                                                     rec->info_objectid);
6376                         btrfs_set_disk_key_type(&copy_key, 0);
6377                         btrfs_set_disk_key_offset(&copy_key, 0);
6378
6379                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6380                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6381
6382                         btrfs_set_extent_flags(leaf, ei,
6383                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6384                 }
6385
6386                 btrfs_mark_buffer_dirty(leaf);
6387                 ret = btrfs_update_block_group(extent_root, rec->start,
6388                                                rec->max_size, 1, 0);
6389                 if (ret)
6390                         goto fail;
6391                 btrfs_release_path(path);
6392         }
6393
6394         if (back->is_data) {
6395                 u64 parent;
6396                 int i;
6397
6398                 dback = to_data_backref(back);
6399                 if (back->full_backref)
6400                         parent = dback->parent;
6401                 else
6402                         parent = 0;
6403
6404                 for (i = 0; i < dback->found_ref; i++) {
6405                         /* if parent != 0, we're doing a full backref
6406                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6407                          * just makes the backref allocator create a data
6408                          * backref
6409                          */
6410                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6411                                                    rec->start, rec->max_size,
6412                                                    parent,
6413                                                    dback->root,
6414                                                    parent ?
6415                                                    BTRFS_FIRST_FREE_OBJECTID :
6416                                                    dback->owner,
6417                                                    dback->offset);
6418                         if (ret)
6419                                 break;
6420                 }
6421                 fprintf(stderr, "adding new data backref"
6422                                 " on %llu %s %llu owner %llu"
6423                                 " offset %llu found %d\n",
6424                                 (unsigned long long)rec->start,
6425                                 back->full_backref ?
6426                                 "parent" : "root",
6427                                 back->full_backref ?
6428                                 (unsigned long long)parent :
6429                                 (unsigned long long)dback->root,
6430                                 (unsigned long long)dback->owner,
6431                                 (unsigned long long)dback->offset,
6432                                 dback->found_ref);
6433         } else {
6434                 u64 parent;
6435                 struct tree_backref *tback;
6436
6437                 tback = to_tree_backref(back);
6438                 if (back->full_backref)
6439                         parent = tback->parent;
6440                 else
6441                         parent = 0;
6442
6443                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6444                                            rec->start, rec->max_size,
6445                                            parent, tback->root, 0, 0);
6446                 fprintf(stderr, "adding new tree backref on "
6447                         "start %llu len %llu parent %llu root %llu\n",
6448                         rec->start, rec->max_size, parent, tback->root);
6449         }
6450 fail:
6451         btrfs_release_path(path);
6452         return ret;
6453 }
6454
6455 static struct extent_entry *find_entry(struct list_head *entries,
6456                                        u64 bytenr, u64 bytes)
6457 {
6458         struct extent_entry *entry = NULL;
6459
6460         list_for_each_entry(entry, entries, list) {
6461                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6462                         return entry;
6463         }
6464
6465         return NULL;
6466 }
6467
6468 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6469 {
6470         struct extent_entry *entry, *best = NULL, *prev = NULL;
6471
6472         list_for_each_entry(entry, entries, list) {
6473                 /*
6474                  * If there are as many broken entries as entries then we know
6475                  * not to trust this particular entry.
6476                  */
6477                 if (entry->broken == entry->count)
6478                         continue;
6479
6480                 /*
6481                  * Special case, when there are only two entries and 'best' is
6482                  * the first one
6483                  */
6484                 if (!prev) {
6485                         best = entry;
6486                         prev = entry;
6487                         continue;
6488                 }
6489
6490                 /*
6491                  * If our current entry == best then we can't be sure our best
6492                  * is really the best, so we need to keep searching.
6493                  */
6494                 if (best && best->count == entry->count) {
6495                         prev = entry;
6496                         best = NULL;
6497                         continue;
6498                 }
6499
6500                 /* Prev == entry, not good enough, have to keep searching */
6501                 if (!prev->broken && prev->count == entry->count)
6502                         continue;
6503
6504                 if (!best)
6505                         best = (prev->count > entry->count) ? prev : entry;
6506                 else if (best->count < entry->count)
6507                         best = entry;
6508                 prev = entry;
6509         }
6510
6511         return best;
6512 }
6513
6514 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6515                       struct data_backref *dback, struct extent_entry *entry)
6516 {
6517         struct btrfs_trans_handle *trans;
6518         struct btrfs_root *root;
6519         struct btrfs_file_extent_item *fi;
6520         struct extent_buffer *leaf;
6521         struct btrfs_key key;
6522         u64 bytenr, bytes;
6523         int ret, err;
6524
6525         key.objectid = dback->root;
6526         key.type = BTRFS_ROOT_ITEM_KEY;
6527         key.offset = (u64)-1;
6528         root = btrfs_read_fs_root(info, &key);
6529         if (IS_ERR(root)) {
6530                 fprintf(stderr, "Couldn't find root for our ref\n");
6531                 return -EINVAL;
6532         }
6533
6534         /*
6535          * The backref points to the original offset of the extent if it was
6536          * split, so we need to search down to the offset we have and then walk
6537          * forward until we find the backref we're looking for.
6538          */
6539         key.objectid = dback->owner;
6540         key.type = BTRFS_EXTENT_DATA_KEY;
6541         key.offset = dback->offset;
6542         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6543         if (ret < 0) {
6544                 fprintf(stderr, "Error looking up ref %d\n", ret);
6545                 return ret;
6546         }
6547
6548         while (1) {
6549                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6550                         ret = btrfs_next_leaf(root, path);
6551                         if (ret) {
6552                                 fprintf(stderr, "Couldn't find our ref, next\n");
6553                                 return -EINVAL;
6554                         }
6555                 }
6556                 leaf = path->nodes[0];
6557                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6558                 if (key.objectid != dback->owner ||
6559                     key.type != BTRFS_EXTENT_DATA_KEY) {
6560                         fprintf(stderr, "Couldn't find our ref, search\n");
6561                         return -EINVAL;
6562                 }
6563                 fi = btrfs_item_ptr(leaf, path->slots[0],
6564                                     struct btrfs_file_extent_item);
6565                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6566                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6567
6568                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6569                         break;
6570                 path->slots[0]++;
6571         }
6572
6573         btrfs_release_path(path);
6574
6575         trans = btrfs_start_transaction(root, 1);
6576         if (IS_ERR(trans))
6577                 return PTR_ERR(trans);
6578
6579         /*
6580          * Ok we have the key of the file extent we want to fix, now we can cow
6581          * down to the thing and fix it.
6582          */
6583         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6584         if (ret < 0) {
6585                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6586                         key.objectid, key.type, key.offset, ret);
6587                 goto out;
6588         }
6589         if (ret > 0) {
6590                 fprintf(stderr, "Well that's odd, we just found this key "
6591                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6592                         key.offset);
6593                 ret = -EINVAL;
6594                 goto out;
6595         }
6596         leaf = path->nodes[0];
6597         fi = btrfs_item_ptr(leaf, path->slots[0],
6598                             struct btrfs_file_extent_item);
6599
6600         if (btrfs_file_extent_compression(leaf, fi) &&
6601             dback->disk_bytenr != entry->bytenr) {
6602                 fprintf(stderr, "Ref doesn't match the record start and is "
6603                         "compressed, please take a btrfs-image of this file "
6604                         "system and send it to a btrfs developer so they can "
6605                         "complete this functionality for bytenr %Lu\n",
6606                         dback->disk_bytenr);
6607                 ret = -EINVAL;
6608                 goto out;
6609         }
6610
6611         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6612                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6613         } else if (dback->disk_bytenr > entry->bytenr) {
6614                 u64 off_diff, offset;
6615
6616                 off_diff = dback->disk_bytenr - entry->bytenr;
6617                 offset = btrfs_file_extent_offset(leaf, fi);
6618                 if (dback->disk_bytenr + offset +
6619                     btrfs_file_extent_num_bytes(leaf, fi) >
6620                     entry->bytenr + entry->bytes) {
6621                         fprintf(stderr, "Ref is past the entry end, please "
6622                                 "take a btrfs-image of this file system and "
6623                                 "send it to a btrfs developer, ref %Lu\n",
6624                                 dback->disk_bytenr);
6625                         ret = -EINVAL;
6626                         goto out;
6627                 }
6628                 offset += off_diff;
6629                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6630                 btrfs_set_file_extent_offset(leaf, fi, offset);
6631         } else if (dback->disk_bytenr < entry->bytenr) {
6632                 u64 offset;
6633
6634                 offset = btrfs_file_extent_offset(leaf, fi);
6635                 if (dback->disk_bytenr + offset < entry->bytenr) {
6636                         fprintf(stderr, "Ref is before the entry start, please"
6637                                 " take a btrfs-image of this file system and "
6638                                 "send it to a btrfs developer, ref %Lu\n",
6639                                 dback->disk_bytenr);
6640                         ret = -EINVAL;
6641                         goto out;
6642                 }
6643
6644                 offset += dback->disk_bytenr;
6645                 offset -= entry->bytenr;
6646                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6647                 btrfs_set_file_extent_offset(leaf, fi, offset);
6648         }
6649
6650         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6651
6652         /*
6653          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6654          * only do this if we aren't using compression, otherwise it's a
6655          * trickier case.
6656          */
6657         if (!btrfs_file_extent_compression(leaf, fi))
6658                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6659         else
6660                 printf("ram bytes may be wrong?\n");
6661         btrfs_mark_buffer_dirty(leaf);
6662 out:
6663         err = btrfs_commit_transaction(trans, root);
6664         btrfs_release_path(path);
6665         return ret ? ret : err;
6666 }
6667
6668 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6669                            struct extent_record *rec)
6670 {
6671         struct extent_backref *back, *tmp;
6672         struct data_backref *dback;
6673         struct extent_entry *entry, *best = NULL;
6674         LIST_HEAD(entries);
6675         int nr_entries = 0;
6676         int broken_entries = 0;
6677         int ret = 0;
6678         short mismatch = 0;
6679
6680         /*
6681          * Metadata is easy and the backrefs should always agree on bytenr and
6682          * size, if not we've got bigger issues.
6683          */
6684         if (rec->metadata)
6685                 return 0;
6686
6687         rbtree_postorder_for_each_entry_safe(back, tmp,
6688                                              &rec->backref_tree, node) {
6689                 if (back->full_backref || !back->is_data)
6690                         continue;
6691
6692                 dback = to_data_backref(back);
6693
6694                 /*
6695                  * We only pay attention to backrefs that we found a real
6696                  * backref for.
6697                  */
6698                 if (dback->found_ref == 0)
6699                         continue;
6700
6701                 /*
6702                  * For now we only catch when the bytes don't match, not the
6703                  * bytenr.  We can easily do this at the same time, but I want
6704                  * to have a fs image to test on before we just add repair
6705                  * functionality willy-nilly so we know we won't screw up the
6706                  * repair.
6707                  */
6708
6709                 entry = find_entry(&entries, dback->disk_bytenr,
6710                                    dback->bytes);
6711                 if (!entry) {
6712                         entry = malloc(sizeof(struct extent_entry));
6713                         if (!entry) {
6714                                 ret = -ENOMEM;
6715                                 goto out;
6716                         }
6717                         memset(entry, 0, sizeof(*entry));
6718                         entry->bytenr = dback->disk_bytenr;
6719                         entry->bytes = dback->bytes;
6720                         list_add_tail(&entry->list, &entries);
6721                         nr_entries++;
6722                 }
6723
6724                 /*
6725                  * If we only have on entry we may think the entries agree when
6726                  * in reality they don't so we have to do some extra checking.
6727                  */
6728                 if (dback->disk_bytenr != rec->start ||
6729                     dback->bytes != rec->nr || back->broken)
6730                         mismatch = 1;
6731
6732                 if (back->broken) {
6733                         entry->broken++;
6734                         broken_entries++;
6735                 }
6736
6737                 entry->count++;
6738         }
6739
6740         /* Yay all the backrefs agree, carry on good sir */
6741         if (nr_entries <= 1 && !mismatch)
6742                 goto out;
6743
6744         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6745                 "%Lu\n", rec->start);
6746
6747         /*
6748          * First we want to see if the backrefs can agree amongst themselves who
6749          * is right, so figure out which one of the entries has the highest
6750          * count.
6751          */
6752         best = find_most_right_entry(&entries);
6753
6754         /*
6755          * Ok so we may have an even split between what the backrefs think, so
6756          * this is where we use the extent ref to see what it thinks.
6757          */
6758         if (!best) {
6759                 entry = find_entry(&entries, rec->start, rec->nr);
6760                 if (!entry && (!broken_entries || !rec->found_rec)) {
6761                         fprintf(stderr, "Backrefs don't agree with each other "
6762                                 "and extent record doesn't agree with anybody,"
6763                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6764                                 rec->start, rec->nr);
6765                         ret = -EINVAL;
6766                         goto out;
6767                 } else if (!entry) {
6768                         /*
6769                          * Ok our backrefs were broken, we'll assume this is the
6770                          * correct value and add an entry for this range.
6771                          */
6772                         entry = malloc(sizeof(struct extent_entry));
6773                         if (!entry) {
6774                                 ret = -ENOMEM;
6775                                 goto out;
6776                         }
6777                         memset(entry, 0, sizeof(*entry));
6778                         entry->bytenr = rec->start;
6779                         entry->bytes = rec->nr;
6780                         list_add_tail(&entry->list, &entries);
6781                         nr_entries++;
6782                 }
6783                 entry->count++;
6784                 best = find_most_right_entry(&entries);
6785                 if (!best) {
6786                         fprintf(stderr, "Backrefs and extent record evenly "
6787                                 "split on who is right, this is going to "
6788                                 "require user input to fix bytenr %Lu bytes "
6789                                 "%Lu\n", rec->start, rec->nr);
6790                         ret = -EINVAL;
6791                         goto out;
6792                 }
6793         }
6794
6795         /*
6796          * I don't think this can happen currently as we'll abort() if we catch
6797          * this case higher up, but in case somebody removes that we still can't
6798          * deal with it properly here yet, so just bail out of that's the case.
6799          */
6800         if (best->bytenr != rec->start) {
6801                 fprintf(stderr, "Extent start and backref starts don't match, "
6802                         "please use btrfs-image on this file system and send "
6803                         "it to a btrfs developer so they can make fsck fix "
6804                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6805                         rec->start, rec->nr);
6806                 ret = -EINVAL;
6807                 goto out;
6808         }
6809
6810         /*
6811          * Ok great we all agreed on an extent record, let's go find the real
6812          * references and fix up the ones that don't match.
6813          */
6814         rbtree_postorder_for_each_entry_safe(back, tmp,
6815                                              &rec->backref_tree, node) {
6816                 if (back->full_backref || !back->is_data)
6817                         continue;
6818
6819                 dback = to_data_backref(back);
6820
6821                 /*
6822                  * Still ignoring backrefs that don't have a real ref attached
6823                  * to them.
6824                  */
6825                 if (dback->found_ref == 0)
6826                         continue;
6827
6828                 if (dback->bytes == best->bytes &&
6829                     dback->disk_bytenr == best->bytenr)
6830                         continue;
6831
6832                 ret = repair_ref(info, path, dback, best);
6833                 if (ret)
6834                         goto out;
6835         }
6836
6837         /*
6838          * Ok we messed with the actual refs, which means we need to drop our
6839          * entire cache and go back and rescan.  I know this is a huge pain and
6840          * adds a lot of extra work, but it's the only way to be safe.  Once all
6841          * the backrefs agree we may not need to do anything to the extent
6842          * record itself.
6843          */
6844         ret = -EAGAIN;
6845 out:
6846         while (!list_empty(&entries)) {
6847                 entry = list_entry(entries.next, struct extent_entry, list);
6848                 list_del_init(&entry->list);
6849                 free(entry);
6850         }
6851         return ret;
6852 }
6853
6854 static int process_duplicates(struct cache_tree *extent_cache,
6855                               struct extent_record *rec)
6856 {
6857         struct extent_record *good, *tmp;
6858         struct cache_extent *cache;
6859         int ret;
6860
6861         /*
6862          * If we found a extent record for this extent then return, or if we
6863          * have more than one duplicate we are likely going to need to delete
6864          * something.
6865          */
6866         if (rec->found_rec || rec->num_duplicates > 1)
6867                 return 0;
6868
6869         /* Shouldn't happen but just in case */
6870         BUG_ON(!rec->num_duplicates);
6871
6872         /*
6873          * So this happens if we end up with a backref that doesn't match the
6874          * actual extent entry.  So either the backref is bad or the extent
6875          * entry is bad.  Either way we want to have the extent_record actually
6876          * reflect what we found in the extent_tree, so we need to take the
6877          * duplicate out and use that as the extent_record since the only way we
6878          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6879          */
6880         remove_cache_extent(extent_cache, &rec->cache);
6881
6882         good = to_extent_record(rec->dups.next);
6883         list_del_init(&good->list);
6884         INIT_LIST_HEAD(&good->backrefs);
6885         INIT_LIST_HEAD(&good->dups);
6886         good->cache.start = good->start;
6887         good->cache.size = good->nr;
6888         good->content_checked = 0;
6889         good->owner_ref_checked = 0;
6890         good->num_duplicates = 0;
6891         good->refs = rec->refs;
6892         list_splice_init(&rec->backrefs, &good->backrefs);
6893         while (1) {
6894                 cache = lookup_cache_extent(extent_cache, good->start,
6895                                             good->nr);
6896                 if (!cache)
6897                         break;
6898                 tmp = container_of(cache, struct extent_record, cache);
6899
6900                 /*
6901                  * If we find another overlapping extent and it's found_rec is
6902                  * set then it's a duplicate and we need to try and delete
6903                  * something.
6904                  */
6905                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6906                         if (list_empty(&good->list))
6907                                 list_add_tail(&good->list,
6908                                               &duplicate_extents);
6909                         good->num_duplicates += tmp->num_duplicates + 1;
6910                         list_splice_init(&tmp->dups, &good->dups);
6911                         list_del_init(&tmp->list);
6912                         list_add_tail(&tmp->list, &good->dups);
6913                         remove_cache_extent(extent_cache, &tmp->cache);
6914                         continue;
6915                 }
6916
6917                 /*
6918                  * Ok we have another non extent item backed extent rec, so lets
6919                  * just add it to this extent and carry on like we did above.
6920                  */
6921                 good->refs += tmp->refs;
6922                 list_splice_init(&tmp->backrefs, &good->backrefs);
6923                 remove_cache_extent(extent_cache, &tmp->cache);
6924                 free(tmp);
6925         }
6926         ret = insert_cache_extent(extent_cache, &good->cache);
6927         BUG_ON(ret);
6928         free(rec);
6929         return good->num_duplicates ? 0 : 1;
6930 }
6931
6932 static int delete_duplicate_records(struct btrfs_root *root,
6933                                     struct extent_record *rec)
6934 {
6935         struct btrfs_trans_handle *trans;
6936         LIST_HEAD(delete_list);
6937         struct btrfs_path path;
6938         struct extent_record *tmp, *good, *n;
6939         int nr_del = 0;
6940         int ret = 0, err;
6941         struct btrfs_key key;
6942
6943         btrfs_init_path(&path);
6944
6945         good = rec;
6946         /* Find the record that covers all of the duplicates. */
6947         list_for_each_entry(tmp, &rec->dups, list) {
6948                 if (good->start < tmp->start)
6949                         continue;
6950                 if (good->nr > tmp->nr)
6951                         continue;
6952
6953                 if (tmp->start + tmp->nr < good->start + good->nr) {
6954                         fprintf(stderr, "Ok we have overlapping extents that "
6955                                 "aren't completely covered by each other, this "
6956                                 "is going to require more careful thought.  "
6957                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6958                                 tmp->start, tmp->nr, good->start, good->nr);
6959                         abort();
6960                 }
6961                 good = tmp;
6962         }
6963
6964         if (good != rec)
6965                 list_add_tail(&rec->list, &delete_list);
6966
6967         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6968                 if (tmp == good)
6969                         continue;
6970                 list_move_tail(&tmp->list, &delete_list);
6971         }
6972
6973         root = root->fs_info->extent_root;
6974         trans = btrfs_start_transaction(root, 1);
6975         if (IS_ERR(trans)) {
6976                 ret = PTR_ERR(trans);
6977                 goto out;
6978         }
6979
6980         list_for_each_entry(tmp, &delete_list, list) {
6981                 if (tmp->found_rec == 0)
6982                         continue;
6983                 key.objectid = tmp->start;
6984                 key.type = BTRFS_EXTENT_ITEM_KEY;
6985                 key.offset = tmp->nr;
6986
6987                 /* Shouldn't happen but just in case */
6988                 if (tmp->metadata) {
6989                         fprintf(stderr, "Well this shouldn't happen, extent "
6990                                 "record overlaps but is metadata? "
6991                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6992                         abort();
6993                 }
6994
6995                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
6996                 if (ret) {
6997                         if (ret > 0)
6998                                 ret = -EINVAL;
6999                         break;
7000                 }
7001                 ret = btrfs_del_item(trans, root, &path);
7002                 if (ret)
7003                         break;
7004                 btrfs_release_path(&path);
7005                 nr_del++;
7006         }
7007         err = btrfs_commit_transaction(trans, root);
7008         if (err && !ret)
7009                 ret = err;
7010 out:
7011         while (!list_empty(&delete_list)) {
7012                 tmp = to_extent_record(delete_list.next);
7013                 list_del_init(&tmp->list);
7014                 if (tmp == rec)
7015                         continue;
7016                 free(tmp);
7017         }
7018
7019         while (!list_empty(&rec->dups)) {
7020                 tmp = to_extent_record(rec->dups.next);
7021                 list_del_init(&tmp->list);
7022                 free(tmp);
7023         }
7024
7025         btrfs_release_path(&path);
7026
7027         if (!ret && !nr_del)
7028                 rec->num_duplicates = 0;
7029
7030         return ret ? ret : nr_del;
7031 }
7032
7033 static int find_possible_backrefs(struct btrfs_fs_info *info,
7034                                   struct btrfs_path *path,
7035                                   struct cache_tree *extent_cache,
7036                                   struct extent_record *rec)
7037 {
7038         struct btrfs_root *root;
7039         struct extent_backref *back, *tmp;
7040         struct data_backref *dback;
7041         struct cache_extent *cache;
7042         struct btrfs_file_extent_item *fi;
7043         struct btrfs_key key;
7044         u64 bytenr, bytes;
7045         int ret;
7046
7047         rbtree_postorder_for_each_entry_safe(back, tmp,
7048                                              &rec->backref_tree, node) {
7049                 /* Don't care about full backrefs (poor unloved backrefs) */
7050                 if (back->full_backref || !back->is_data)
7051                         continue;
7052
7053                 dback = to_data_backref(back);
7054
7055                 /* We found this one, we don't need to do a lookup */
7056                 if (dback->found_ref)
7057                         continue;
7058
7059                 key.objectid = dback->root;
7060                 key.type = BTRFS_ROOT_ITEM_KEY;
7061                 key.offset = (u64)-1;
7062
7063                 root = btrfs_read_fs_root(info, &key);
7064
7065                 /* No root, definitely a bad ref, skip */
7066                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7067                         continue;
7068                 /* Other err, exit */
7069                 if (IS_ERR(root))
7070                         return PTR_ERR(root);
7071
7072                 key.objectid = dback->owner;
7073                 key.type = BTRFS_EXTENT_DATA_KEY;
7074                 key.offset = dback->offset;
7075                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7076                 if (ret) {
7077                         btrfs_release_path(path);
7078                         if (ret < 0)
7079                                 return ret;
7080                         /* Didn't find it, we can carry on */
7081                         ret = 0;
7082                         continue;
7083                 }
7084
7085                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7086                                     struct btrfs_file_extent_item);
7087                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7088                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7089                 btrfs_release_path(path);
7090                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7091                 if (cache) {
7092                         struct extent_record *tmp;
7093                         tmp = container_of(cache, struct extent_record, cache);
7094
7095                         /*
7096                          * If we found an extent record for the bytenr for this
7097                          * particular backref then we can't add it to our
7098                          * current extent record.  We only want to add backrefs
7099                          * that don't have a corresponding extent item in the
7100                          * extent tree since they likely belong to this record
7101                          * and we need to fix it if it doesn't match bytenrs.
7102                          */
7103                         if  (tmp->found_rec)
7104                                 continue;
7105                 }
7106
7107                 dback->found_ref += 1;
7108                 dback->disk_bytenr = bytenr;
7109                 dback->bytes = bytes;
7110
7111                 /*
7112                  * Set this so the verify backref code knows not to trust the
7113                  * values in this backref.
7114                  */
7115                 back->broken = 1;
7116         }
7117
7118         return 0;
7119 }
7120
7121 /*
7122  * Record orphan data ref into corresponding root.
7123  *
7124  * Return 0 if the extent item contains data ref and recorded.
7125  * Return 1 if the extent item contains no useful data ref
7126  *   On that case, it may contains only shared_dataref or metadata backref
7127  *   or the file extent exists(this should be handled by the extent bytenr
7128  *   recovery routine)
7129  * Return <0 if something goes wrong.
7130  */
7131 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7132                                       struct extent_record *rec)
7133 {
7134         struct btrfs_key key;
7135         struct btrfs_root *dest_root;
7136         struct extent_backref *back, *tmp;
7137         struct data_backref *dback;
7138         struct orphan_data_extent *orphan;
7139         struct btrfs_path path;
7140         int recorded_data_ref = 0;
7141         int ret = 0;
7142
7143         if (rec->metadata)
7144                 return 1;
7145         btrfs_init_path(&path);
7146         rbtree_postorder_for_each_entry_safe(back, tmp,
7147                                              &rec->backref_tree, node) {
7148                 if (back->full_backref || !back->is_data ||
7149                     !back->found_extent_tree)
7150                         continue;
7151                 dback = to_data_backref(back);
7152                 if (dback->found_ref)
7153                         continue;
7154                 key.objectid = dback->root;
7155                 key.type = BTRFS_ROOT_ITEM_KEY;
7156                 key.offset = (u64)-1;
7157
7158                 dest_root = btrfs_read_fs_root(fs_info, &key);
7159
7160                 /* For non-exist root we just skip it */
7161                 if (IS_ERR(dest_root) || !dest_root)
7162                         continue;
7163
7164                 key.objectid = dback->owner;
7165                 key.type = BTRFS_EXTENT_DATA_KEY;
7166                 key.offset = dback->offset;
7167
7168                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7169                 btrfs_release_path(&path);
7170                 /*
7171                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7172                  * we need to record it for inode/file extent rebuild.
7173                  * For ret > 0, we record it only for file extent rebuild.
7174                  * For ret == 0, the file extent exists but only bytenr
7175                  * mismatch, let the original bytenr fix routine to handle,
7176                  * don't record it.
7177                  */
7178                 if (ret == 0)
7179                         continue;
7180                 ret = 0;
7181                 orphan = malloc(sizeof(*orphan));
7182                 if (!orphan) {
7183                         ret = -ENOMEM;
7184                         goto out;
7185                 }
7186                 INIT_LIST_HEAD(&orphan->list);
7187                 orphan->root = dback->root;
7188                 orphan->objectid = dback->owner;
7189                 orphan->offset = dback->offset;
7190                 orphan->disk_bytenr = rec->cache.start;
7191                 orphan->disk_len = rec->cache.size;
7192                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7193                 recorded_data_ref = 1;
7194         }
7195 out:
7196         btrfs_release_path(&path);
7197         if (!ret)
7198                 return !recorded_data_ref;
7199         else
7200                 return ret;
7201 }
7202
7203 /*
7204  * when an incorrect extent item is found, this will delete
7205  * all of the existing entries for it and recreate them
7206  * based on what the tree scan found.
7207  */
7208 static int fixup_extent_refs(struct btrfs_fs_info *info,
7209                              struct cache_tree *extent_cache,
7210                              struct extent_record *rec)
7211 {
7212         struct btrfs_trans_handle *trans = NULL;
7213         int ret;
7214         struct btrfs_path path;
7215         struct cache_extent *cache;
7216         struct extent_backref *back, *tmp;
7217         int allocated = 0;
7218         u64 flags = 0;
7219
7220         if (rec->flag_block_full_backref)
7221                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7222
7223         btrfs_init_path(&path);
7224         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7225                 /*
7226                  * Sometimes the backrefs themselves are so broken they don't
7227                  * get attached to any meaningful rec, so first go back and
7228                  * check any of our backrefs that we couldn't find and throw
7229                  * them into the list if we find the backref so that
7230                  * verify_backrefs can figure out what to do.
7231                  */
7232                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7233                 if (ret < 0)
7234                         goto out;
7235         }
7236
7237         /* step one, make sure all of the backrefs agree */
7238         ret = verify_backrefs(info, &path, rec);
7239         if (ret < 0)
7240                 goto out;
7241
7242         trans = btrfs_start_transaction(info->extent_root, 1);
7243         if (IS_ERR(trans)) {
7244                 ret = PTR_ERR(trans);
7245                 goto out;
7246         }
7247
7248         /* step two, delete all the existing records */
7249         ret = delete_extent_records(trans, info->extent_root, &path,
7250                                     rec->start);
7251
7252         if (ret < 0)
7253                 goto out;
7254
7255         /* was this block corrupt?  If so, don't add references to it */
7256         cache = lookup_cache_extent(info->corrupt_blocks,
7257                                     rec->start, rec->max_size);
7258         if (cache) {
7259                 ret = 0;
7260                 goto out;
7261         }
7262
7263         /* step three, recreate all the refs we did find */
7264         rbtree_postorder_for_each_entry_safe(back, tmp,
7265                                              &rec->backref_tree, node) {
7266                 /*
7267                  * if we didn't find any references, don't create a
7268                  * new extent record
7269                  */
7270                 if (!back->found_ref)
7271                         continue;
7272
7273                 rec->bad_full_backref = 0;
7274                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
7275                 allocated = 1;
7276
7277                 if (ret)
7278                         goto out;
7279         }
7280 out:
7281         if (trans) {
7282                 int err = btrfs_commit_transaction(trans, info->extent_root);
7283                 if (!ret)
7284                         ret = err;
7285         }
7286
7287         if (!ret)
7288                 fprintf(stderr, "Repaired extent references for %llu\n",
7289                                 (unsigned long long)rec->start);
7290
7291         btrfs_release_path(&path);
7292         return ret;
7293 }
7294
7295 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7296                               struct extent_record *rec)
7297 {
7298         struct btrfs_trans_handle *trans;
7299         struct btrfs_root *root = fs_info->extent_root;
7300         struct btrfs_path path;
7301         struct btrfs_extent_item *ei;
7302         struct btrfs_key key;
7303         u64 flags;
7304         int ret = 0;
7305
7306         key.objectid = rec->start;
7307         if (rec->metadata) {
7308                 key.type = BTRFS_METADATA_ITEM_KEY;
7309                 key.offset = rec->info_level;
7310         } else {
7311                 key.type = BTRFS_EXTENT_ITEM_KEY;
7312                 key.offset = rec->max_size;
7313         }
7314
7315         trans = btrfs_start_transaction(root, 0);
7316         if (IS_ERR(trans))
7317                 return PTR_ERR(trans);
7318
7319         btrfs_init_path(&path);
7320         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7321         if (ret < 0) {
7322                 btrfs_release_path(&path);
7323                 btrfs_commit_transaction(trans, root);
7324                 return ret;
7325         } else if (ret) {
7326                 fprintf(stderr, "Didn't find extent for %llu\n",
7327                         (unsigned long long)rec->start);
7328                 btrfs_release_path(&path);
7329                 btrfs_commit_transaction(trans, root);
7330                 return -ENOENT;
7331         }
7332
7333         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7334                             struct btrfs_extent_item);
7335         flags = btrfs_extent_flags(path.nodes[0], ei);
7336         if (rec->flag_block_full_backref) {
7337                 fprintf(stderr, "setting full backref on %llu\n",
7338                         (unsigned long long)key.objectid);
7339                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7340         } else {
7341                 fprintf(stderr, "clearing full backref on %llu\n",
7342                         (unsigned long long)key.objectid);
7343                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7344         }
7345         btrfs_set_extent_flags(path.nodes[0], ei, flags);
7346         btrfs_mark_buffer_dirty(path.nodes[0]);
7347         btrfs_release_path(&path);
7348         ret = btrfs_commit_transaction(trans, root);
7349         if (!ret)
7350                 fprintf(stderr, "Repaired extent flags for %llu\n",
7351                                 (unsigned long long)rec->start);
7352
7353         return ret;
7354 }
7355
7356 /* right now we only prune from the extent allocation tree */
7357 static int prune_one_block(struct btrfs_trans_handle *trans,
7358                            struct btrfs_fs_info *info,
7359                            struct btrfs_corrupt_block *corrupt)
7360 {
7361         int ret;
7362         struct btrfs_path path;
7363         struct extent_buffer *eb;
7364         u64 found;
7365         int slot;
7366         int nritems;
7367         int level = corrupt->level + 1;
7368
7369         btrfs_init_path(&path);
7370 again:
7371         /* we want to stop at the parent to our busted block */
7372         path.lowest_level = level;
7373
7374         ret = btrfs_search_slot(trans, info->extent_root,
7375                                 &corrupt->key, &path, -1, 1);
7376
7377         if (ret < 0)
7378                 goto out;
7379
7380         eb = path.nodes[level];
7381         if (!eb) {
7382                 ret = -ENOENT;
7383                 goto out;
7384         }
7385
7386         /*
7387          * hopefully the search gave us the block we want to prune,
7388          * lets try that first
7389          */
7390         slot = path.slots[level];
7391         found =  btrfs_node_blockptr(eb, slot);
7392         if (found == corrupt->cache.start)
7393                 goto del_ptr;
7394
7395         nritems = btrfs_header_nritems(eb);
7396
7397         /* the search failed, lets scan this node and hope we find it */
7398         for (slot = 0; slot < nritems; slot++) {
7399                 found =  btrfs_node_blockptr(eb, slot);
7400                 if (found == corrupt->cache.start)
7401                         goto del_ptr;
7402         }
7403         /*
7404          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7405          * to this block
7406          */
7407         if (eb == info->extent_root->node) {
7408                 ret = -ENOENT;
7409                 goto out;
7410         } else {
7411                 level++;
7412                 btrfs_release_path(&path);
7413                 goto again;
7414         }
7415
7416 del_ptr:
7417         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7418         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
7419
7420 out:
7421         btrfs_release_path(&path);
7422         return ret;
7423 }
7424
7425 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7426 {
7427         struct btrfs_trans_handle *trans = NULL;
7428         struct cache_extent *cache;
7429         struct btrfs_corrupt_block *corrupt;
7430
7431         while (1) {
7432                 cache = search_cache_extent(info->corrupt_blocks, 0);
7433                 if (!cache)
7434                         break;
7435                 if (!trans) {
7436                         trans = btrfs_start_transaction(info->extent_root, 1);
7437                         if (IS_ERR(trans))
7438                                 return PTR_ERR(trans);
7439                 }
7440                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7441                 prune_one_block(trans, info, corrupt);
7442                 remove_cache_extent(info->corrupt_blocks, cache);
7443         }
7444         if (trans)
7445                 return btrfs_commit_transaction(trans, info->extent_root);
7446         return 0;
7447 }
7448
7449 static int check_extent_refs(struct btrfs_root *root,
7450                              struct cache_tree *extent_cache)
7451 {
7452         struct extent_record *rec;
7453         struct cache_extent *cache;
7454         int ret = 0;
7455         int had_dups = 0;
7456         int err = 0;
7457
7458         if (repair) {
7459                 /*
7460                  * if we're doing a repair, we have to make sure
7461                  * we don't allocate from the problem extents.
7462                  * In the worst case, this will be all the
7463                  * extents in the FS
7464                  */
7465                 cache = search_cache_extent(extent_cache, 0);
7466                 while(cache) {
7467                         rec = container_of(cache, struct extent_record, cache);
7468                         set_extent_dirty(root->fs_info->excluded_extents,
7469                                          rec->start,
7470                                          rec->start + rec->max_size - 1);
7471                         cache = next_cache_extent(cache);
7472                 }
7473
7474                 /* pin down all the corrupted blocks too */
7475                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7476                 while(cache) {
7477                         set_extent_dirty(root->fs_info->excluded_extents,
7478                                          cache->start,
7479                                          cache->start + cache->size - 1);
7480                         cache = next_cache_extent(cache);
7481                 }
7482                 prune_corrupt_blocks(root->fs_info);
7483                 reset_cached_block_groups(root->fs_info);
7484         }
7485
7486         reset_cached_block_groups(root->fs_info);
7487
7488         /*
7489          * We need to delete any duplicate entries we find first otherwise we
7490          * could mess up the extent tree when we have backrefs that actually
7491          * belong to a different extent item and not the weird duplicate one.
7492          */
7493         while (repair && !list_empty(&duplicate_extents)) {
7494                 rec = to_extent_record(duplicate_extents.next);
7495                 list_del_init(&rec->list);
7496
7497                 /* Sometimes we can find a backref before we find an actual
7498                  * extent, so we need to process it a little bit to see if there
7499                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7500                  * if this is a backref screwup.  If we need to delete stuff
7501                  * process_duplicates() will return 0, otherwise it will return
7502                  * 1 and we
7503                  */
7504                 if (process_duplicates(extent_cache, rec))
7505                         continue;
7506                 ret = delete_duplicate_records(root, rec);
7507                 if (ret < 0)
7508                         return ret;
7509                 /*
7510                  * delete_duplicate_records will return the number of entries
7511                  * deleted, so if it's greater than 0 then we know we actually
7512                  * did something and we need to remove.
7513                  */
7514                 if (ret)
7515                         had_dups = 1;
7516         }
7517
7518         if (had_dups)
7519                 return -EAGAIN;
7520
7521         while(1) {
7522                 int cur_err = 0;
7523                 int fix = 0;
7524
7525                 cache = search_cache_extent(extent_cache, 0);
7526                 if (!cache)
7527                         break;
7528                 rec = container_of(cache, struct extent_record, cache);
7529                 if (rec->num_duplicates) {
7530                         fprintf(stderr, "extent item %llu has multiple extent "
7531                                 "items\n", (unsigned long long)rec->start);
7532                         cur_err = 1;
7533                 }
7534
7535                 if (rec->refs != rec->extent_item_refs) {
7536                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7537                                 (unsigned long long)rec->start,
7538                                 (unsigned long long)rec->nr);
7539                         fprintf(stderr, "extent item %llu, found %llu\n",
7540                                 (unsigned long long)rec->extent_item_refs,
7541                                 (unsigned long long)rec->refs);
7542                         ret = record_orphan_data_extents(root->fs_info, rec);
7543                         if (ret < 0)
7544                                 goto repair_abort;
7545                         fix = ret;
7546                         cur_err = 1;
7547                 }
7548                 if (all_backpointers_checked(rec, 1)) {
7549                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7550                                 (unsigned long long)rec->start,
7551                                 (unsigned long long)rec->nr);
7552                         fix = 1;
7553                         cur_err = 1;
7554                 }
7555                 if (!rec->owner_ref_checked) {
7556                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7557                                 (unsigned long long)rec->start,
7558                                 (unsigned long long)rec->nr);
7559                         fix = 1;
7560                         cur_err = 1;
7561                 }
7562
7563                 if (repair && fix) {
7564                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
7565                         if (ret)
7566                                 goto repair_abort;
7567                 }
7568
7569
7570                 if (rec->bad_full_backref) {
7571                         fprintf(stderr, "bad full backref, on [%llu]\n",
7572                                 (unsigned long long)rec->start);
7573                         if (repair) {
7574                                 ret = fixup_extent_flags(root->fs_info, rec);
7575                                 if (ret)
7576                                         goto repair_abort;
7577                                 fix = 1;
7578                         }
7579                         cur_err = 1;
7580                 }
7581                 /*
7582                  * Although it's not a extent ref's problem, we reuse this
7583                  * routine for error reporting.
7584                  * No repair function yet.
7585                  */
7586                 if (rec->crossing_stripes) {
7587                         fprintf(stderr,
7588                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7589                                 rec->start, rec->start + rec->max_size);
7590                         cur_err = 1;
7591                 }
7592
7593                 if (rec->wrong_chunk_type) {
7594                         fprintf(stderr,
7595                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7596                                 rec->start, rec->start + rec->max_size);
7597                         cur_err = 1;
7598                 }
7599
7600                 err = cur_err;
7601                 remove_cache_extent(extent_cache, cache);
7602                 free_all_extent_backrefs(rec);
7603                 if (!init_extent_tree && repair && (!cur_err || fix))
7604                         clear_extent_dirty(root->fs_info->excluded_extents,
7605                                            rec->start,
7606                                            rec->start + rec->max_size - 1);
7607                 free(rec);
7608         }
7609 repair_abort:
7610         if (repair) {
7611                 if (ret && ret != -EAGAIN) {
7612                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7613                         exit(1);
7614                 } else if (!ret) {
7615                         struct btrfs_trans_handle *trans;
7616
7617                         root = root->fs_info->extent_root;
7618                         trans = btrfs_start_transaction(root, 1);
7619                         if (IS_ERR(trans)) {
7620                                 ret = PTR_ERR(trans);
7621                                 goto repair_abort;
7622                         }
7623
7624                         ret = btrfs_fix_block_accounting(trans, root);
7625                         if (ret)
7626                                 goto repair_abort;
7627                         ret = btrfs_commit_transaction(trans, root);
7628                         if (ret)
7629                                 goto repair_abort;
7630                 }
7631                 return ret;
7632         }
7633
7634         if (err)
7635                 err = -EIO;
7636         return err;
7637 }
7638
7639 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7640 {
7641         u64 stripe_size;
7642
7643         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7644                 stripe_size = length;
7645                 stripe_size /= num_stripes;
7646         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7647                 stripe_size = length * 2;
7648                 stripe_size /= num_stripes;
7649         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7650                 stripe_size = length;
7651                 stripe_size /= (num_stripes - 1);
7652         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7653                 stripe_size = length;
7654                 stripe_size /= (num_stripes - 2);
7655         } else {
7656                 stripe_size = length;
7657         }
7658         return stripe_size;
7659 }
7660
7661 /*
7662  * Check the chunk with its block group/dev list ref:
7663  * Return 0 if all refs seems valid.
7664  * Return 1 if part of refs seems valid, need later check for rebuild ref
7665  * like missing block group and needs to search extent tree to rebuild them.
7666  * Return -1 if essential refs are missing and unable to rebuild.
7667  */
7668 static int check_chunk_refs(struct chunk_record *chunk_rec,
7669                             struct block_group_tree *block_group_cache,
7670                             struct device_extent_tree *dev_extent_cache,
7671                             int silent)
7672 {
7673         struct cache_extent *block_group_item;
7674         struct block_group_record *block_group_rec;
7675         struct cache_extent *dev_extent_item;
7676         struct device_extent_record *dev_extent_rec;
7677         u64 devid;
7678         u64 offset;
7679         u64 length;
7680         int metadump_v2 = 0;
7681         int i;
7682         int ret = 0;
7683
7684         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7685                                                chunk_rec->offset,
7686                                                chunk_rec->length);
7687         if (block_group_item) {
7688                 block_group_rec = container_of(block_group_item,
7689                                                struct block_group_record,
7690                                                cache);
7691                 if (chunk_rec->length != block_group_rec->offset ||
7692                     chunk_rec->offset != block_group_rec->objectid ||
7693                     (!metadump_v2 &&
7694                      chunk_rec->type_flags != block_group_rec->flags)) {
7695                         if (!silent)
7696                                 fprintf(stderr,
7697                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7698                                         chunk_rec->objectid,
7699                                         chunk_rec->type,
7700                                         chunk_rec->offset,
7701                                         chunk_rec->length,
7702                                         chunk_rec->offset,
7703                                         chunk_rec->type_flags,
7704                                         block_group_rec->objectid,
7705                                         block_group_rec->type,
7706                                         block_group_rec->offset,
7707                                         block_group_rec->offset,
7708                                         block_group_rec->objectid,
7709                                         block_group_rec->flags);
7710                         ret = -1;
7711                 } else {
7712                         list_del_init(&block_group_rec->list);
7713                         chunk_rec->bg_rec = block_group_rec;
7714                 }
7715         } else {
7716                 if (!silent)
7717                         fprintf(stderr,
7718                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7719                                 chunk_rec->objectid,
7720                                 chunk_rec->type,
7721                                 chunk_rec->offset,
7722                                 chunk_rec->length,
7723                                 chunk_rec->offset,
7724                                 chunk_rec->type_flags);
7725                 ret = 1;
7726         }
7727
7728         if (metadump_v2)
7729                 return ret;
7730
7731         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7732                                     chunk_rec->num_stripes);
7733         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7734                 devid = chunk_rec->stripes[i].devid;
7735                 offset = chunk_rec->stripes[i].offset;
7736                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7737                                                        devid, offset, length);
7738                 if (dev_extent_item) {
7739                         dev_extent_rec = container_of(dev_extent_item,
7740                                                 struct device_extent_record,
7741                                                 cache);
7742                         if (dev_extent_rec->objectid != devid ||
7743                             dev_extent_rec->offset != offset ||
7744                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7745                             dev_extent_rec->length != length) {
7746                                 if (!silent)
7747                                         fprintf(stderr,
7748                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7749                                                 chunk_rec->objectid,
7750                                                 chunk_rec->type,
7751                                                 chunk_rec->offset,
7752                                                 chunk_rec->stripes[i].devid,
7753                                                 chunk_rec->stripes[i].offset,
7754                                                 dev_extent_rec->objectid,
7755                                                 dev_extent_rec->offset,
7756                                                 dev_extent_rec->length);
7757                                 ret = -1;
7758                         } else {
7759                                 list_move(&dev_extent_rec->chunk_list,
7760                                           &chunk_rec->dextents);
7761                         }
7762                 } else {
7763                         if (!silent)
7764                                 fprintf(stderr,
7765                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7766                                         chunk_rec->objectid,
7767                                         chunk_rec->type,
7768                                         chunk_rec->offset,
7769                                         chunk_rec->stripes[i].devid,
7770                                         chunk_rec->stripes[i].offset);
7771                         ret = -1;
7772                 }
7773         }
7774         return ret;
7775 }
7776
7777 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7778 int check_chunks(struct cache_tree *chunk_cache,
7779                  struct block_group_tree *block_group_cache,
7780                  struct device_extent_tree *dev_extent_cache,
7781                  struct list_head *good, struct list_head *bad,
7782                  struct list_head *rebuild, int silent)
7783 {
7784         struct cache_extent *chunk_item;
7785         struct chunk_record *chunk_rec;
7786         struct block_group_record *bg_rec;
7787         struct device_extent_record *dext_rec;
7788         int err;
7789         int ret = 0;
7790
7791         chunk_item = first_cache_extent(chunk_cache);
7792         while (chunk_item) {
7793                 chunk_rec = container_of(chunk_item, struct chunk_record,
7794                                          cache);
7795                 err = check_chunk_refs(chunk_rec, block_group_cache,
7796                                        dev_extent_cache, silent);
7797                 if (err < 0)
7798                         ret = err;
7799                 if (err == 0 && good)
7800                         list_add_tail(&chunk_rec->list, good);
7801                 if (err > 0 && rebuild)
7802                         list_add_tail(&chunk_rec->list, rebuild);
7803                 if (err < 0 && bad)
7804                         list_add_tail(&chunk_rec->list, bad);
7805                 chunk_item = next_cache_extent(chunk_item);
7806         }
7807
7808         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7809                 if (!silent)
7810                         fprintf(stderr,
7811                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7812                                 bg_rec->objectid,
7813                                 bg_rec->offset,
7814                                 bg_rec->flags);
7815                 if (!ret)
7816                         ret = 1;
7817         }
7818
7819         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7820                             chunk_list) {
7821                 if (!silent)
7822                         fprintf(stderr,
7823                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7824                                 dext_rec->objectid,
7825                                 dext_rec->offset,
7826                                 dext_rec->length);
7827                 if (!ret)
7828                         ret = 1;
7829         }
7830         return ret;
7831 }
7832
7833
7834 static int check_device_used(struct device_record *dev_rec,
7835                              struct device_extent_tree *dext_cache)
7836 {
7837         struct cache_extent *cache;
7838         struct device_extent_record *dev_extent_rec;
7839         u64 total_byte = 0;
7840
7841         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7842         while (cache) {
7843                 dev_extent_rec = container_of(cache,
7844                                               struct device_extent_record,
7845                                               cache);
7846                 if (dev_extent_rec->objectid != dev_rec->devid)
7847                         break;
7848
7849                 list_del_init(&dev_extent_rec->device_list);
7850                 total_byte += dev_extent_rec->length;
7851                 cache = next_cache_extent(cache);
7852         }
7853
7854         if (total_byte != dev_rec->byte_used) {
7855                 fprintf(stderr,
7856                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7857                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7858                         dev_rec->type, dev_rec->offset);
7859                 return -1;
7860         } else {
7861                 return 0;
7862         }
7863 }
7864
7865 /*
7866  * Unlike device size alignment check above, some super total_bytes check
7867  * failure can lead to mount failure for newer kernel.
7868  *
7869  * So this function will return the error for a fatal super total_bytes problem.
7870  */
7871 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
7872 {
7873         struct btrfs_device *dev;
7874         struct list_head *dev_list = &fs_info->fs_devices->devices;
7875         u64 total_bytes = 0;
7876         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
7877
7878         list_for_each_entry(dev, dev_list, dev_list)
7879                 total_bytes += dev->total_bytes;
7880
7881         /* Important check, which can cause unmountable fs */
7882         if (super_bytes < total_bytes) {
7883                 error("super total bytes %llu smaller than real device(s) size %llu",
7884                         super_bytes, total_bytes);
7885                 error("mounting this fs may fail for newer kernels");
7886                 error("this can be fixed by 'btrfs rescue fix-device-size'");
7887                 return false;
7888         }
7889
7890         /*
7891          * Optional check, just to make everything aligned and match with each
7892          * other.
7893          *
7894          * For a btrfs-image restored fs, we don't need to check it anyway.
7895          */
7896         if (btrfs_super_flags(fs_info->super_copy) &
7897             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
7898                 return true;
7899         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
7900             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
7901             super_bytes != total_bytes) {
7902                 warning("minor unaligned/mismatch device size detected");
7903                 warning(
7904                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
7905         }
7906         return true;
7907 }
7908
7909 /* check btrfs_dev_item -> btrfs_dev_extent */
7910 static int check_devices(struct rb_root *dev_cache,
7911                          struct device_extent_tree *dev_extent_cache)
7912 {
7913         struct rb_node *dev_node;
7914         struct device_record *dev_rec;
7915         struct device_extent_record *dext_rec;
7916         int err;
7917         int ret = 0;
7918
7919         dev_node = rb_first(dev_cache);
7920         while (dev_node) {
7921                 dev_rec = container_of(dev_node, struct device_record, node);
7922                 err = check_device_used(dev_rec, dev_extent_cache);
7923                 if (err)
7924                         ret = err;
7925
7926                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
7927                                          global_info->sectorsize);
7928                 dev_node = rb_next(dev_node);
7929         }
7930         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7931                             device_list) {
7932                 fprintf(stderr,
7933                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7934                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7935                 if (!ret)
7936                         ret = 1;
7937         }
7938         return ret;
7939 }
7940
7941 static int add_root_item_to_list(struct list_head *head,
7942                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7943                                   u8 level, u8 drop_level,
7944                                   struct btrfs_key *drop_key)
7945 {
7946
7947         struct root_item_record *ri_rec;
7948         ri_rec = malloc(sizeof(*ri_rec));
7949         if (!ri_rec)
7950                 return -ENOMEM;
7951         ri_rec->bytenr = bytenr;
7952         ri_rec->objectid = objectid;
7953         ri_rec->level = level;
7954         ri_rec->drop_level = drop_level;
7955         ri_rec->last_snapshot = last_snapshot;
7956         if (drop_key)
7957                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7958         list_add_tail(&ri_rec->list, head);
7959
7960         return 0;
7961 }
7962
7963 static void free_root_item_list(struct list_head *list)
7964 {
7965         struct root_item_record *ri_rec;
7966
7967         while (!list_empty(list)) {
7968                 ri_rec = list_first_entry(list, struct root_item_record,
7969                                           list);
7970                 list_del_init(&ri_rec->list);
7971                 free(ri_rec);
7972         }
7973 }
7974
7975 static int deal_root_from_list(struct list_head *list,
7976                                struct btrfs_root *root,
7977                                struct block_info *bits,
7978                                int bits_nr,
7979                                struct cache_tree *pending,
7980                                struct cache_tree *seen,
7981                                struct cache_tree *reada,
7982                                struct cache_tree *nodes,
7983                                struct cache_tree *extent_cache,
7984                                struct cache_tree *chunk_cache,
7985                                struct rb_root *dev_cache,
7986                                struct block_group_tree *block_group_cache,
7987                                struct device_extent_tree *dev_extent_cache)
7988 {
7989         int ret = 0;
7990         u64 last;
7991
7992         while (!list_empty(list)) {
7993                 struct root_item_record *rec;
7994                 struct extent_buffer *buf;
7995                 rec = list_entry(list->next,
7996                                  struct root_item_record, list);
7997                 last = 0;
7998                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
7999                 if (!extent_buffer_uptodate(buf)) {
8000                         free_extent_buffer(buf);
8001                         ret = -EIO;
8002                         break;
8003                 }
8004                 ret = add_root_to_pending(buf, extent_cache, pending,
8005                                     seen, nodes, rec->objectid);
8006                 if (ret < 0)
8007                         break;
8008                 /*
8009                  * To rebuild extent tree, we need deal with snapshot
8010                  * one by one, otherwise we deal with node firstly which
8011                  * can maximize readahead.
8012                  */
8013                 while (1) {
8014                         ret = run_next_block(root, bits, bits_nr, &last,
8015                                              pending, seen, reada, nodes,
8016                                              extent_cache, chunk_cache,
8017                                              dev_cache, block_group_cache,
8018                                              dev_extent_cache, rec);
8019                         if (ret != 0)
8020                                 break;
8021                 }
8022                 free_extent_buffer(buf);
8023                 list_del(&rec->list);
8024                 free(rec);
8025                 if (ret < 0)
8026                         break;
8027         }
8028         while (ret >= 0) {
8029                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8030                                      reada, nodes, extent_cache, chunk_cache,
8031                                      dev_cache, block_group_cache,
8032                                      dev_extent_cache, NULL);
8033                 if (ret != 0) {
8034                         if (ret > 0)
8035                                 ret = 0;
8036                         break;
8037                 }
8038         }
8039         return ret;
8040 }
8041
8042 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8043 {
8044         struct rb_root dev_cache;
8045         struct cache_tree chunk_cache;
8046         struct block_group_tree block_group_cache;
8047         struct device_extent_tree dev_extent_cache;
8048         struct cache_tree extent_cache;
8049         struct cache_tree seen;
8050         struct cache_tree pending;
8051         struct cache_tree reada;
8052         struct cache_tree nodes;
8053         struct extent_io_tree excluded_extents;
8054         struct cache_tree corrupt_blocks;
8055         struct btrfs_path path;
8056         struct btrfs_key key;
8057         struct btrfs_key found_key;
8058         int ret, err = 0;
8059         struct block_info *bits;
8060         int bits_nr;
8061         struct extent_buffer *leaf;
8062         int slot;
8063         struct btrfs_root_item ri;
8064         struct list_head dropping_trees;
8065         struct list_head normal_trees;
8066         struct btrfs_root *root1;
8067         struct btrfs_root *root;
8068         u64 objectid;
8069         u8 level;
8070
8071         root = fs_info->fs_root;
8072         dev_cache = RB_ROOT;
8073         cache_tree_init(&chunk_cache);
8074         block_group_tree_init(&block_group_cache);
8075         device_extent_tree_init(&dev_extent_cache);
8076
8077         cache_tree_init(&extent_cache);
8078         cache_tree_init(&seen);
8079         cache_tree_init(&pending);
8080         cache_tree_init(&nodes);
8081         cache_tree_init(&reada);
8082         cache_tree_init(&corrupt_blocks);
8083         extent_io_tree_init(&excluded_extents);
8084         INIT_LIST_HEAD(&dropping_trees);
8085         INIT_LIST_HEAD(&normal_trees);
8086
8087         if (repair) {
8088                 fs_info->excluded_extents = &excluded_extents;
8089                 fs_info->fsck_extent_cache = &extent_cache;
8090                 fs_info->free_extent_hook = free_extent_hook;
8091                 fs_info->corrupt_blocks = &corrupt_blocks;
8092         }
8093
8094         bits_nr = 1024;
8095         bits = malloc(bits_nr * sizeof(struct block_info));
8096         if (!bits) {
8097                 perror("malloc");
8098                 exit(1);
8099         }
8100
8101         if (ctx.progress_enabled) {
8102                 ctx.tp = TASK_EXTENTS;
8103                 task_start(ctx.info);
8104         }
8105
8106 again:
8107         root1 = fs_info->tree_root;
8108         level = btrfs_header_level(root1->node);
8109         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8110                                     root1->node->start, 0, level, 0, NULL);
8111         if (ret < 0)
8112                 goto out;
8113         root1 = fs_info->chunk_root;
8114         level = btrfs_header_level(root1->node);
8115         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8116                                     root1->node->start, 0, level, 0, NULL);
8117         if (ret < 0)
8118                 goto out;
8119         btrfs_init_path(&path);
8120         key.offset = 0;
8121         key.objectid = 0;
8122         key.type = BTRFS_ROOT_ITEM_KEY;
8123         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
8124         if (ret < 0)
8125                 goto out;
8126         while(1) {
8127                 leaf = path.nodes[0];
8128                 slot = path.slots[0];
8129                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8130                         ret = btrfs_next_leaf(root, &path);
8131                         if (ret != 0)
8132                                 break;
8133                         leaf = path.nodes[0];
8134                         slot = path.slots[0];
8135                 }
8136                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8137                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8138                         unsigned long offset;
8139                         u64 last_snapshot;
8140
8141                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8142                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8143                         last_snapshot = btrfs_root_last_snapshot(&ri);
8144                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8145                                 level = btrfs_root_level(&ri);
8146                                 ret = add_root_item_to_list(&normal_trees,
8147                                                 found_key.objectid,
8148                                                 btrfs_root_bytenr(&ri),
8149                                                 last_snapshot, level,
8150                                                 0, NULL);
8151                                 if (ret < 0)
8152                                         goto out;
8153                         } else {
8154                                 level = btrfs_root_level(&ri);
8155                                 objectid = found_key.objectid;
8156                                 btrfs_disk_key_to_cpu(&found_key,
8157                                                       &ri.drop_progress);
8158                                 ret = add_root_item_to_list(&dropping_trees,
8159                                                 objectid,
8160                                                 btrfs_root_bytenr(&ri),
8161                                                 last_snapshot, level,
8162                                                 ri.drop_level, &found_key);
8163                                 if (ret < 0)
8164                                         goto out;
8165                         }
8166                 }
8167                 path.slots[0]++;
8168         }
8169         btrfs_release_path(&path);
8170
8171         /*
8172          * check_block can return -EAGAIN if it fixes something, please keep
8173          * this in mind when dealing with return values from these functions, if
8174          * we get -EAGAIN we want to fall through and restart the loop.
8175          */
8176         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8177                                   &seen, &reada, &nodes, &extent_cache,
8178                                   &chunk_cache, &dev_cache, &block_group_cache,
8179                                   &dev_extent_cache);
8180         if (ret < 0) {
8181                 if (ret == -EAGAIN)
8182                         goto loop;
8183                 goto out;
8184         }
8185         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8186                                   &pending, &seen, &reada, &nodes,
8187                                   &extent_cache, &chunk_cache, &dev_cache,
8188                                   &block_group_cache, &dev_extent_cache);
8189         if (ret < 0) {
8190                 if (ret == -EAGAIN)
8191                         goto loop;
8192                 goto out;
8193         }
8194
8195         ret = check_chunks(&chunk_cache, &block_group_cache,
8196                            &dev_extent_cache, NULL, NULL, NULL, 0);
8197         if (ret) {
8198                 if (ret == -EAGAIN)
8199                         goto loop;
8200                 err = ret;
8201         }
8202
8203         ret = check_extent_refs(root, &extent_cache);
8204         if (ret < 0) {
8205                 if (ret == -EAGAIN)
8206                         goto loop;
8207                 goto out;
8208         }
8209
8210         ret = check_devices(&dev_cache, &dev_extent_cache);
8211         if (ret && err)
8212                 ret = err;
8213
8214 out:
8215         task_stop(ctx.info);
8216         if (repair) {
8217                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8218                 extent_io_tree_cleanup(&excluded_extents);
8219                 fs_info->fsck_extent_cache = NULL;
8220                 fs_info->free_extent_hook = NULL;
8221                 fs_info->corrupt_blocks = NULL;
8222                 fs_info->excluded_extents = NULL;
8223         }
8224         free(bits);
8225         free_chunk_cache_tree(&chunk_cache);
8226         free_device_cache_tree(&dev_cache);
8227         free_block_group_tree(&block_group_cache);
8228         free_device_extent_tree(&dev_extent_cache);
8229         free_extent_cache_tree(&seen);
8230         free_extent_cache_tree(&pending);
8231         free_extent_cache_tree(&reada);
8232         free_extent_cache_tree(&nodes);
8233         free_root_item_list(&normal_trees);
8234         free_root_item_list(&dropping_trees);
8235         return ret;
8236 loop:
8237         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8238         free_extent_cache_tree(&seen);
8239         free_extent_cache_tree(&pending);
8240         free_extent_cache_tree(&reada);
8241         free_extent_cache_tree(&nodes);
8242         free_chunk_cache_tree(&chunk_cache);
8243         free_block_group_tree(&block_group_cache);
8244         free_device_cache_tree(&dev_cache);
8245         free_device_extent_tree(&dev_extent_cache);
8246         free_extent_record_cache(&extent_cache);
8247         free_root_item_list(&normal_trees);
8248         free_root_item_list(&dropping_trees);
8249         extent_io_tree_cleanup(&excluded_extents);
8250         goto again;
8251 }
8252
8253 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8254 {
8255         int ret;
8256
8257         if (!ctx.progress_enabled)
8258                 fprintf(stderr, "checking extents\n");
8259         if (check_mode == CHECK_MODE_LOWMEM)
8260                 ret = check_chunks_and_extents_v2(fs_info);
8261         else
8262                 ret = check_chunks_and_extents(fs_info);
8263
8264         /* Also repair device size related problems */
8265         if (repair && !ret) {
8266                 ret = btrfs_fix_device_and_super_size(fs_info);
8267                 if (ret > 0)
8268                         ret = 0;
8269         }
8270         return ret;
8271 }
8272
8273 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8274                            struct btrfs_root *root, int overwrite)
8275 {
8276         struct extent_buffer *c;
8277         struct extent_buffer *old = root->node;
8278         int level;
8279         int ret;
8280         struct btrfs_disk_key disk_key = {0,0,0};
8281
8282         level = 0;
8283
8284         if (overwrite) {
8285                 c = old;
8286                 extent_buffer_get(c);
8287                 goto init;
8288         }
8289         c = btrfs_alloc_free_block(trans, root,
8290                                    root->fs_info->nodesize,
8291                                    root->root_key.objectid,
8292                                    &disk_key, level, 0, 0);
8293         if (IS_ERR(c)) {
8294                 c = old;
8295                 extent_buffer_get(c);
8296                 overwrite = 1;
8297         }
8298 init:
8299         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8300         btrfs_set_header_level(c, level);
8301         btrfs_set_header_bytenr(c, c->start);
8302         btrfs_set_header_generation(c, trans->transid);
8303         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8304         btrfs_set_header_owner(c, root->root_key.objectid);
8305
8306         write_extent_buffer(c, root->fs_info->fsid,
8307                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8308
8309         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8310                             btrfs_header_chunk_tree_uuid(c),
8311                             BTRFS_UUID_SIZE);
8312
8313         btrfs_mark_buffer_dirty(c);
8314         /*
8315          * this case can happen in the following case:
8316          *
8317          * 1.overwrite previous root.
8318          *
8319          * 2.reinit reloc data root, this is because we skip pin
8320          * down reloc data tree before which means we can allocate
8321          * same block bytenr here.
8322          */
8323         if (old->start == c->start) {
8324                 btrfs_set_root_generation(&root->root_item,
8325                                           trans->transid);
8326                 root->root_item.level = btrfs_header_level(root->node);
8327                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8328                                         &root->root_key, &root->root_item);
8329                 if (ret) {
8330                         free_extent_buffer(c);
8331                         return ret;
8332                 }
8333         }
8334         free_extent_buffer(old);
8335         root->node = c;
8336         add_root_to_dirty_list(root);
8337         return 0;
8338 }
8339
8340 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8341                                 struct extent_buffer *eb, int tree_root)
8342 {
8343         struct extent_buffer *tmp;
8344         struct btrfs_root_item *ri;
8345         struct btrfs_key key;
8346         u64 bytenr;
8347         int level = btrfs_header_level(eb);
8348         int nritems;
8349         int ret;
8350         int i;
8351
8352         /*
8353          * If we have pinned this block before, don't pin it again.
8354          * This can not only avoid forever loop with broken filesystem
8355          * but also give us some speedups.
8356          */
8357         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8358                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8359                 return 0;
8360
8361         btrfs_pin_extent(fs_info, eb->start, eb->len);
8362
8363         nritems = btrfs_header_nritems(eb);
8364         for (i = 0; i < nritems; i++) {
8365                 if (level == 0) {
8366                         btrfs_item_key_to_cpu(eb, &key, i);
8367                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8368                                 continue;
8369                         /* Skip the extent root and reloc roots */
8370                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8371                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8372                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8373                                 continue;
8374                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8375                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8376
8377                         /*
8378                          * If at any point we start needing the real root we
8379                          * will have to build a stump root for the root we are
8380                          * in, but for now this doesn't actually use the root so
8381                          * just pass in extent_root.
8382                          */
8383                         tmp = read_tree_block(fs_info, bytenr, 0);
8384                         if (!extent_buffer_uptodate(tmp)) {
8385                                 fprintf(stderr, "Error reading root block\n");
8386                                 return -EIO;
8387                         }
8388                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8389                         free_extent_buffer(tmp);
8390                         if (ret)
8391                                 return ret;
8392                 } else {
8393                         bytenr = btrfs_node_blockptr(eb, i);
8394
8395                         /* If we aren't the tree root don't read the block */
8396                         if (level == 1 && !tree_root) {
8397                                 btrfs_pin_extent(fs_info, bytenr,
8398                                                 fs_info->nodesize);
8399                                 continue;
8400                         }
8401
8402                         tmp = read_tree_block(fs_info, bytenr, 0);
8403                         if (!extent_buffer_uptodate(tmp)) {
8404                                 fprintf(stderr, "Error reading tree block\n");
8405                                 return -EIO;
8406                         }
8407                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8408                         free_extent_buffer(tmp);
8409                         if (ret)
8410                                 return ret;
8411                 }
8412         }
8413
8414         return 0;
8415 }
8416
8417 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8418 {
8419         int ret;
8420
8421         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8422         if (ret)
8423                 return ret;
8424
8425         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8426 }
8427
8428 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8429 {
8430         struct btrfs_block_group_cache *cache;
8431         struct btrfs_path path;
8432         struct extent_buffer *leaf;
8433         struct btrfs_chunk *chunk;
8434         struct btrfs_key key;
8435         int ret;
8436         u64 start;
8437
8438         btrfs_init_path(&path);
8439         key.objectid = 0;
8440         key.type = BTRFS_CHUNK_ITEM_KEY;
8441         key.offset = 0;
8442         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
8443         if (ret < 0) {
8444                 btrfs_release_path(&path);
8445                 return ret;
8446         }
8447
8448         /*
8449          * We do this in case the block groups were screwed up and had alloc
8450          * bits that aren't actually set on the chunks.  This happens with
8451          * restored images every time and could happen in real life I guess.
8452          */
8453         fs_info->avail_data_alloc_bits = 0;
8454         fs_info->avail_metadata_alloc_bits = 0;
8455         fs_info->avail_system_alloc_bits = 0;
8456
8457         /* First we need to create the in-memory block groups */
8458         while (1) {
8459                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8460                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
8461                         if (ret < 0) {
8462                                 btrfs_release_path(&path);
8463                                 return ret;
8464                         }
8465                         if (ret) {
8466                                 ret = 0;
8467                                 break;
8468                         }
8469                 }
8470                 leaf = path.nodes[0];
8471                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8472                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8473                         path.slots[0]++;
8474                         continue;
8475                 }
8476
8477                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
8478                 btrfs_add_block_group(fs_info, 0,
8479                                       btrfs_chunk_type(leaf, chunk), key.offset,
8480                                       btrfs_chunk_length(leaf, chunk));
8481                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8482                                  key.offset + btrfs_chunk_length(leaf, chunk));
8483                 path.slots[0]++;
8484         }
8485         start = 0;
8486         while (1) {
8487                 cache = btrfs_lookup_first_block_group(fs_info, start);
8488                 if (!cache)
8489                         break;
8490                 cache->cached = 1;
8491                 start = cache->key.objectid + cache->key.offset;
8492         }
8493
8494         btrfs_release_path(&path);
8495         return 0;
8496 }
8497
8498 static int reset_balance(struct btrfs_trans_handle *trans,
8499                          struct btrfs_fs_info *fs_info)
8500 {
8501         struct btrfs_root *root = fs_info->tree_root;
8502         struct btrfs_path path;
8503         struct extent_buffer *leaf;
8504         struct btrfs_key key;
8505         int del_slot, del_nr = 0;
8506         int ret;
8507         int found = 0;
8508
8509         btrfs_init_path(&path);
8510         key.objectid = BTRFS_BALANCE_OBJECTID;
8511         key.type = BTRFS_BALANCE_ITEM_KEY;
8512         key.offset = 0;
8513         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8514         if (ret) {
8515                 if (ret > 0)
8516                         ret = 0;
8517                 if (!ret)
8518                         goto reinit_data_reloc;
8519                 else
8520                         goto out;
8521         }
8522
8523         ret = btrfs_del_item(trans, root, &path);
8524         if (ret)
8525                 goto out;
8526         btrfs_release_path(&path);
8527
8528         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8529         key.type = BTRFS_ROOT_ITEM_KEY;
8530         key.offset = 0;
8531         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8532         if (ret < 0)
8533                 goto out;
8534         while (1) {
8535                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8536                         if (!found)
8537                                 break;
8538
8539                         if (del_nr) {
8540                                 ret = btrfs_del_items(trans, root, &path,
8541                                                       del_slot, del_nr);
8542                                 del_nr = 0;
8543                                 if (ret)
8544                                         goto out;
8545                         }
8546                         key.offset++;
8547                         btrfs_release_path(&path);
8548
8549                         found = 0;
8550                         ret = btrfs_search_slot(trans, root, &key, &path,
8551                                                 -1, 1);
8552                         if (ret < 0)
8553                                 goto out;
8554                         continue;
8555                 }
8556                 found = 1;
8557                 leaf = path.nodes[0];
8558                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8559                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8560                         break;
8561                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8562                         path.slots[0]++;
8563                         continue;
8564                 }
8565                 if (!del_nr) {
8566                         del_slot = path.slots[0];
8567                         del_nr = 1;
8568                 } else {
8569                         del_nr++;
8570                 }
8571                 path.slots[0]++;
8572         }
8573
8574         if (del_nr) {
8575                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
8576                 if (ret)
8577                         goto out;
8578         }
8579         btrfs_release_path(&path);
8580
8581 reinit_data_reloc:
8582         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8583         key.type = BTRFS_ROOT_ITEM_KEY;
8584         key.offset = (u64)-1;
8585         root = btrfs_read_fs_root(fs_info, &key);
8586         if (IS_ERR(root)) {
8587                 fprintf(stderr, "Error reading data reloc tree\n");
8588                 ret = PTR_ERR(root);
8589                 goto out;
8590         }
8591         record_root_in_trans(trans, root);
8592         ret = btrfs_fsck_reinit_root(trans, root, 0);
8593         if (ret)
8594                 goto out;
8595         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8596 out:
8597         btrfs_release_path(&path);
8598         return ret;
8599 }
8600
8601 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8602                               struct btrfs_fs_info *fs_info)
8603 {
8604         u64 start = 0;
8605         int ret;
8606
8607         /*
8608          * The only reason we don't do this is because right now we're just
8609          * walking the trees we find and pinning down their bytes, we don't look
8610          * at any of the leaves.  In order to do mixed groups we'd have to check
8611          * the leaves of any fs roots and pin down the bytes for any file
8612          * extents we find.  Not hard but why do it if we don't have to?
8613          */
8614         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
8615                 fprintf(stderr, "We don't support re-initing the extent tree "
8616                         "for mixed block groups yet, please notify a btrfs "
8617                         "developer you want to do this so they can add this "
8618                         "functionality.\n");
8619                 return -EINVAL;
8620         }
8621
8622         /*
8623          * first we need to walk all of the trees except the extent tree and pin
8624          * down the bytes that are in use so we don't overwrite any existing
8625          * metadata.
8626          */
8627         ret = pin_metadata_blocks(fs_info);
8628         if (ret) {
8629                 fprintf(stderr, "error pinning down used bytes\n");
8630                 return ret;
8631         }
8632
8633         /*
8634          * Need to drop all the block groups since we're going to recreate all
8635          * of them again.
8636          */
8637         btrfs_free_block_groups(fs_info);
8638         ret = reset_block_groups(fs_info);
8639         if (ret) {
8640                 fprintf(stderr, "error resetting the block groups\n");
8641                 return ret;
8642         }
8643
8644         /* Ok we can allocate now, reinit the extent root */
8645         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8646         if (ret) {
8647                 fprintf(stderr, "extent root initialization failed\n");
8648                 /*
8649                  * When the transaction code is updated we should end the
8650                  * transaction, but for now progs only knows about commit so
8651                  * just return an error.
8652                  */
8653                 return ret;
8654         }
8655
8656         /*
8657          * Now we have all the in-memory block groups setup so we can make
8658          * allocations properly, and the metadata we care about is safe since we
8659          * pinned all of it above.
8660          */
8661         while (1) {
8662                 struct btrfs_block_group_cache *cache;
8663
8664                 cache = btrfs_lookup_first_block_group(fs_info, start);
8665                 if (!cache)
8666                         break;
8667                 start = cache->key.objectid + cache->key.offset;
8668                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8669                                         &cache->key, &cache->item,
8670                                         sizeof(cache->item));
8671                 if (ret) {
8672                         fprintf(stderr, "Error adding block group\n");
8673                         return ret;
8674                 }
8675                 btrfs_extent_post_op(trans, fs_info->extent_root);
8676         }
8677
8678         ret = reset_balance(trans, fs_info);
8679         if (ret)
8680                 fprintf(stderr, "error resetting the pending balance\n");
8681
8682         return ret;
8683 }
8684
8685 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8686 {
8687         struct btrfs_path path;
8688         struct btrfs_trans_handle *trans;
8689         struct btrfs_key key;
8690         int ret;
8691
8692         printf("Recowing metadata block %llu\n", eb->start);
8693         key.objectid = btrfs_header_owner(eb);
8694         key.type = BTRFS_ROOT_ITEM_KEY;
8695         key.offset = (u64)-1;
8696
8697         root = btrfs_read_fs_root(root->fs_info, &key);
8698         if (IS_ERR(root)) {
8699                 fprintf(stderr, "Couldn't find owner root %llu\n",
8700                         key.objectid);
8701                 return PTR_ERR(root);
8702         }
8703
8704         trans = btrfs_start_transaction(root, 1);
8705         if (IS_ERR(trans))
8706                 return PTR_ERR(trans);
8707
8708         btrfs_init_path(&path);
8709         path.lowest_level = btrfs_header_level(eb);
8710         if (path.lowest_level)
8711                 btrfs_node_key_to_cpu(eb, &key, 0);
8712         else
8713                 btrfs_item_key_to_cpu(eb, &key, 0);
8714
8715         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8716         btrfs_commit_transaction(trans, root);
8717         btrfs_release_path(&path);
8718         return ret;
8719 }
8720
8721 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8722 {
8723         struct btrfs_path path;
8724         struct btrfs_trans_handle *trans;
8725         struct btrfs_key key;
8726         int ret;
8727
8728         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8729                bad->key.type, bad->key.offset);
8730         key.objectid = bad->root_id;
8731         key.type = BTRFS_ROOT_ITEM_KEY;
8732         key.offset = (u64)-1;
8733
8734         root = btrfs_read_fs_root(root->fs_info, &key);
8735         if (IS_ERR(root)) {
8736                 fprintf(stderr, "Couldn't find owner root %llu\n",
8737                         key.objectid);
8738                 return PTR_ERR(root);
8739         }
8740
8741         trans = btrfs_start_transaction(root, 1);
8742         if (IS_ERR(trans))
8743                 return PTR_ERR(trans);
8744
8745         btrfs_init_path(&path);
8746         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
8747         if (ret) {
8748                 if (ret > 0)
8749                         ret = 0;
8750                 goto out;
8751         }
8752         ret = btrfs_del_item(trans, root, &path);
8753 out:
8754         btrfs_commit_transaction(trans, root);
8755         btrfs_release_path(&path);
8756         return ret;
8757 }
8758
8759 static int zero_log_tree(struct btrfs_root *root)
8760 {
8761         struct btrfs_trans_handle *trans;
8762         int ret;
8763
8764         trans = btrfs_start_transaction(root, 1);
8765         if (IS_ERR(trans)) {
8766                 ret = PTR_ERR(trans);
8767                 return ret;
8768         }
8769         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8770         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8771         ret = btrfs_commit_transaction(trans, root);
8772         return ret;
8773 }
8774
8775 static int populate_csum(struct btrfs_trans_handle *trans,
8776                          struct btrfs_root *csum_root, char *buf, u64 start,
8777                          u64 len)
8778 {
8779         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8780         u64 offset = 0;
8781         u64 sectorsize;
8782         int ret = 0;
8783
8784         while (offset < len) {
8785                 sectorsize = fs_info->sectorsize;
8786                 ret = read_extent_data(fs_info, buf, start + offset,
8787                                        &sectorsize, 0);
8788                 if (ret)
8789                         break;
8790                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8791                                             start + offset, buf, sectorsize);
8792                 if (ret)
8793                         break;
8794                 offset += sectorsize;
8795         }
8796         return ret;
8797 }
8798
8799 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8800                                       struct btrfs_root *csum_root,
8801                                       struct btrfs_root *cur_root)
8802 {
8803         struct btrfs_path path;
8804         struct btrfs_key key;
8805         struct extent_buffer *node;
8806         struct btrfs_file_extent_item *fi;
8807         char *buf = NULL;
8808         u64 start = 0;
8809         u64 len = 0;
8810         int slot = 0;
8811         int ret = 0;
8812
8813         buf = malloc(cur_root->fs_info->sectorsize);
8814         if (!buf)
8815                 return -ENOMEM;
8816
8817         btrfs_init_path(&path);
8818         key.objectid = 0;
8819         key.offset = 0;
8820         key.type = 0;
8821         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
8822         if (ret < 0)
8823                 goto out;
8824         /* Iterate all regular file extents and fill its csum */
8825         while (1) {
8826                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8827
8828                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8829                         goto next;
8830                 node = path.nodes[0];
8831                 slot = path.slots[0];
8832                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8833                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8834                         goto next;
8835                 start = btrfs_file_extent_disk_bytenr(node, fi);
8836                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8837
8838                 ret = populate_csum(trans, csum_root, buf, start, len);
8839                 if (ret == -EEXIST)
8840                         ret = 0;
8841                 if (ret < 0)
8842                         goto out;
8843 next:
8844                 /*
8845                  * TODO: if next leaf is corrupted, jump to nearest next valid
8846                  * leaf.
8847                  */
8848                 ret = btrfs_next_item(cur_root, &path);
8849                 if (ret < 0)
8850                         goto out;
8851                 if (ret > 0) {
8852                         ret = 0;
8853                         goto out;
8854                 }
8855         }
8856
8857 out:
8858         btrfs_release_path(&path);
8859         free(buf);
8860         return ret;
8861 }
8862
8863 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8864                                   struct btrfs_root *csum_root)
8865 {
8866         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8867         struct btrfs_path path;
8868         struct btrfs_root *tree_root = fs_info->tree_root;
8869         struct btrfs_root *cur_root;
8870         struct extent_buffer *node;
8871         struct btrfs_key key;
8872         int slot = 0;
8873         int ret = 0;
8874
8875         btrfs_init_path(&path);
8876         key.objectid = BTRFS_FS_TREE_OBJECTID;
8877         key.offset = 0;
8878         key.type = BTRFS_ROOT_ITEM_KEY;
8879         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
8880         if (ret < 0)
8881                 goto out;
8882         if (ret > 0) {
8883                 ret = -ENOENT;
8884                 goto out;
8885         }
8886
8887         while (1) {
8888                 node = path.nodes[0];
8889                 slot = path.slots[0];
8890                 btrfs_item_key_to_cpu(node, &key, slot);
8891                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8892                         goto out;
8893                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8894                         goto next;
8895                 if (!is_fstree(key.objectid))
8896                         goto next;
8897                 key.offset = (u64)-1;
8898
8899                 cur_root = btrfs_read_fs_root(fs_info, &key);
8900                 if (IS_ERR(cur_root) || !cur_root) {
8901                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8902                                 key.objectid);
8903                         goto out;
8904                 }
8905                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8906                                 cur_root);
8907                 if (ret < 0)
8908                         goto out;
8909 next:
8910                 ret = btrfs_next_item(tree_root, &path);
8911                 if (ret > 0) {
8912                         ret = 0;
8913                         goto out;
8914                 }
8915                 if (ret < 0)
8916                         goto out;
8917         }
8918
8919 out:
8920         btrfs_release_path(&path);
8921         return ret;
8922 }
8923
8924 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8925                                       struct btrfs_root *csum_root)
8926 {
8927         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8928         struct btrfs_path path;
8929         struct btrfs_extent_item *ei;
8930         struct extent_buffer *leaf;
8931         char *buf;
8932         struct btrfs_key key;
8933         int ret;
8934
8935         btrfs_init_path(&path);
8936         key.objectid = 0;
8937         key.type = BTRFS_EXTENT_ITEM_KEY;
8938         key.offset = 0;
8939         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8940         if (ret < 0) {
8941                 btrfs_release_path(&path);
8942                 return ret;
8943         }
8944
8945         buf = malloc(csum_root->fs_info->sectorsize);
8946         if (!buf) {
8947                 btrfs_release_path(&path);
8948                 return -ENOMEM;
8949         }
8950
8951         while (1) {
8952                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8953                         ret = btrfs_next_leaf(extent_root, &path);
8954                         if (ret < 0)
8955                                 break;
8956                         if (ret) {
8957                                 ret = 0;
8958                                 break;
8959                         }
8960                 }
8961                 leaf = path.nodes[0];
8962
8963                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8964                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8965                         path.slots[0]++;
8966                         continue;
8967                 }
8968
8969                 ei = btrfs_item_ptr(leaf, path.slots[0],
8970                                     struct btrfs_extent_item);
8971                 if (!(btrfs_extent_flags(leaf, ei) &
8972                       BTRFS_EXTENT_FLAG_DATA)) {
8973                         path.slots[0]++;
8974                         continue;
8975                 }
8976
8977                 ret = populate_csum(trans, csum_root, buf, key.objectid,
8978                                     key.offset);
8979                 if (ret)
8980                         break;
8981                 path.slots[0]++;
8982         }
8983
8984         btrfs_release_path(&path);
8985         free(buf);
8986         return ret;
8987 }
8988
8989 /*
8990  * Recalculate the csum and put it into the csum tree.
8991  *
8992  * Extent tree init will wipe out all the extent info, so in that case, we
8993  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
8994  * will use fs/subvol trees to init the csum tree.
8995  */
8996 static int fill_csum_tree(struct btrfs_trans_handle *trans,
8997                           struct btrfs_root *csum_root,
8998                           int search_fs_tree)
8999 {
9000         if (search_fs_tree)
9001                 return fill_csum_tree_from_fs(trans, csum_root);
9002         else
9003                 return fill_csum_tree_from_extent(trans, csum_root);
9004 }
9005
9006 static void free_roots_info_cache(void)
9007 {
9008         if (!roots_info_cache)
9009                 return;
9010
9011         while (!cache_tree_empty(roots_info_cache)) {
9012                 struct cache_extent *entry;
9013                 struct root_item_info *rii;
9014
9015                 entry = first_cache_extent(roots_info_cache);
9016                 if (!entry)
9017                         break;
9018                 remove_cache_extent(roots_info_cache, entry);
9019                 rii = container_of(entry, struct root_item_info, cache_extent);
9020                 free(rii);
9021         }
9022
9023         free(roots_info_cache);
9024         roots_info_cache = NULL;
9025 }
9026
9027 static int build_roots_info_cache(struct btrfs_fs_info *info)
9028 {
9029         int ret = 0;
9030         struct btrfs_key key;
9031         struct extent_buffer *leaf;
9032         struct btrfs_path path;
9033
9034         if (!roots_info_cache) {
9035                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9036                 if (!roots_info_cache)
9037                         return -ENOMEM;
9038                 cache_tree_init(roots_info_cache);
9039         }
9040
9041         btrfs_init_path(&path);
9042         key.objectid = 0;
9043         key.type = BTRFS_EXTENT_ITEM_KEY;
9044         key.offset = 0;
9045         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
9046         if (ret < 0)
9047                 goto out;
9048         leaf = path.nodes[0];
9049
9050         while (1) {
9051                 struct btrfs_key found_key;
9052                 struct btrfs_extent_item *ei;
9053                 struct btrfs_extent_inline_ref *iref;
9054                 int slot = path.slots[0];
9055                 int type;
9056                 u64 flags;
9057                 u64 root_id;
9058                 u8 level;
9059                 struct cache_extent *entry;
9060                 struct root_item_info *rii;
9061
9062                 if (slot >= btrfs_header_nritems(leaf)) {
9063                         ret = btrfs_next_leaf(info->extent_root, &path);
9064                         if (ret < 0) {
9065                                 break;
9066                         } else if (ret) {
9067                                 ret = 0;
9068                                 break;
9069                         }
9070                         leaf = path.nodes[0];
9071                         slot = path.slots[0];
9072                 }
9073
9074                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9075
9076                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9077                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9078                         goto next;
9079
9080                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9081                 flags = btrfs_extent_flags(leaf, ei);
9082
9083                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9084                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9085                         goto next;
9086
9087                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9088                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9089                         level = found_key.offset;
9090                 } else {
9091                         struct btrfs_tree_block_info *binfo;
9092
9093                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9094                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9095                         level = btrfs_tree_block_level(leaf, binfo);
9096                 }
9097
9098                 /*
9099                  * For a root extent, it must be of the following type and the
9100                  * first (and only one) iref in the item.
9101                  */
9102                 type = btrfs_extent_inline_ref_type(leaf, iref);
9103                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9104                         goto next;
9105
9106                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9107                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9108                 if (!entry) {
9109                         rii = malloc(sizeof(struct root_item_info));
9110                         if (!rii) {
9111                                 ret = -ENOMEM;
9112                                 goto out;
9113                         }
9114                         rii->cache_extent.start = root_id;
9115                         rii->cache_extent.size = 1;
9116                         rii->level = (u8)-1;
9117                         entry = &rii->cache_extent;
9118                         ret = insert_cache_extent(roots_info_cache, entry);
9119                         ASSERT(ret == 0);
9120                 } else {
9121                         rii = container_of(entry, struct root_item_info,
9122                                            cache_extent);
9123                 }
9124
9125                 ASSERT(rii->cache_extent.start == root_id);
9126                 ASSERT(rii->cache_extent.size == 1);
9127
9128                 if (level > rii->level || rii->level == (u8)-1) {
9129                         rii->level = level;
9130                         rii->bytenr = found_key.objectid;
9131                         rii->gen = btrfs_extent_generation(leaf, ei);
9132                         rii->node_count = 1;
9133                 } else if (level == rii->level) {
9134                         rii->node_count++;
9135                 }
9136 next:
9137                 path.slots[0]++;
9138         }
9139
9140 out:
9141         btrfs_release_path(&path);
9142
9143         return ret;
9144 }
9145
9146 static int maybe_repair_root_item(struct btrfs_path *path,
9147                                   const struct btrfs_key *root_key,
9148                                   const int read_only_mode)
9149 {
9150         const u64 root_id = root_key->objectid;
9151         struct cache_extent *entry;
9152         struct root_item_info *rii;
9153         struct btrfs_root_item ri;
9154         unsigned long offset;
9155
9156         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9157         if (!entry) {
9158                 fprintf(stderr,
9159                         "Error: could not find extent items for root %llu\n",
9160                         root_key->objectid);
9161                 return -ENOENT;
9162         }
9163
9164         rii = container_of(entry, struct root_item_info, cache_extent);
9165         ASSERT(rii->cache_extent.start == root_id);
9166         ASSERT(rii->cache_extent.size == 1);
9167
9168         if (rii->node_count != 1) {
9169                 fprintf(stderr,
9170                         "Error: could not find btree root extent for root %llu\n",
9171                         root_id);
9172                 return -ENOENT;
9173         }
9174
9175         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9176         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9177
9178         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9179             btrfs_root_level(&ri) != rii->level ||
9180             btrfs_root_generation(&ri) != rii->gen) {
9181
9182                 /*
9183                  * If we're in repair mode but our caller told us to not update
9184                  * the root item, i.e. just check if it needs to be updated, don't
9185                  * print this message, since the caller will call us again shortly
9186                  * for the same root item without read only mode (the caller will
9187                  * open a transaction first).
9188                  */
9189                 if (!(read_only_mode && repair))
9190                         fprintf(stderr,
9191                                 "%sroot item for root %llu,"
9192                                 " current bytenr %llu, current gen %llu, current level %u,"
9193                                 " new bytenr %llu, new gen %llu, new level %u\n",
9194                                 (read_only_mode ? "" : "fixing "),
9195                                 root_id,
9196                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9197                                 btrfs_root_level(&ri),
9198                                 rii->bytenr, rii->gen, rii->level);
9199
9200                 if (btrfs_root_generation(&ri) > rii->gen) {
9201                         fprintf(stderr,
9202                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9203                                 root_id, btrfs_root_generation(&ri), rii->gen);
9204                         return -EINVAL;
9205                 }
9206
9207                 if (!read_only_mode) {
9208                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9209                         btrfs_set_root_level(&ri, rii->level);
9210                         btrfs_set_root_generation(&ri, rii->gen);
9211                         write_extent_buffer(path->nodes[0], &ri,
9212                                             offset, sizeof(ri));
9213                 }
9214
9215                 return 1;
9216         }
9217
9218         return 0;
9219 }
9220
9221 /*
9222  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9223  * caused read-only snapshots to be corrupted if they were created at a moment
9224  * when the source subvolume/snapshot had orphan items. The issue was that the
9225  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9226  * node instead of the post orphan cleanup root node.
9227  * So this function, and its callees, just detects and fixes those cases. Even
9228  * though the regression was for read-only snapshots, this function applies to
9229  * any snapshot/subvolume root.
9230  * This must be run before any other repair code - not doing it so, makes other
9231  * repair code delete or modify backrefs in the extent tree for example, which
9232  * will result in an inconsistent fs after repairing the root items.
9233  */
9234 static int repair_root_items(struct btrfs_fs_info *info)
9235 {
9236         struct btrfs_path path;
9237         struct btrfs_key key;
9238         struct extent_buffer *leaf;
9239         struct btrfs_trans_handle *trans = NULL;
9240         int ret = 0;
9241         int bad_roots = 0;
9242         int need_trans = 0;
9243
9244         btrfs_init_path(&path);
9245
9246         ret = build_roots_info_cache(info);
9247         if (ret)
9248                 goto out;
9249
9250         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9251         key.type = BTRFS_ROOT_ITEM_KEY;
9252         key.offset = 0;
9253
9254 again:
9255         /*
9256          * Avoid opening and committing transactions if a leaf doesn't have
9257          * any root items that need to be fixed, so that we avoid rotating
9258          * backup roots unnecessarily.
9259          */
9260         if (need_trans) {
9261                 trans = btrfs_start_transaction(info->tree_root, 1);
9262                 if (IS_ERR(trans)) {
9263                         ret = PTR_ERR(trans);
9264                         goto out;
9265                 }
9266         }
9267
9268         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
9269                                 0, trans ? 1 : 0);
9270         if (ret < 0)
9271                 goto out;
9272         leaf = path.nodes[0];
9273
9274         while (1) {
9275                 struct btrfs_key found_key;
9276
9277                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
9278                         int no_more_keys = find_next_key(&path, &key);
9279
9280                         btrfs_release_path(&path);
9281                         if (trans) {
9282                                 ret = btrfs_commit_transaction(trans,
9283                                                                info->tree_root);
9284                                 trans = NULL;
9285                                 if (ret < 0)
9286                                         goto out;
9287                         }
9288                         need_trans = 0;
9289                         if (no_more_keys)
9290                                 break;
9291                         goto again;
9292                 }
9293
9294                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9295
9296                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9297                         goto next;
9298                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9299                         goto next;
9300
9301                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
9302                 if (ret < 0)
9303                         goto out;
9304                 if (ret) {
9305                         if (!trans && repair) {
9306                                 need_trans = 1;
9307                                 key = found_key;
9308                                 btrfs_release_path(&path);
9309                                 goto again;
9310                         }
9311                         bad_roots++;
9312                 }
9313 next:
9314                 path.slots[0]++;
9315         }
9316         ret = 0;
9317 out:
9318         free_roots_info_cache();
9319         btrfs_release_path(&path);
9320         if (trans)
9321                 btrfs_commit_transaction(trans, info->tree_root);
9322         if (ret < 0)
9323                 return ret;
9324
9325         return bad_roots;
9326 }
9327
9328 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
9329 {
9330         struct btrfs_trans_handle *trans;
9331         struct btrfs_block_group_cache *bg_cache;
9332         u64 current = 0;
9333         int ret = 0;
9334
9335         /* Clear all free space cache inodes and its extent data */
9336         while (1) {
9337                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
9338                 if (!bg_cache)
9339                         break;
9340                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
9341                 if (ret < 0)
9342                         return ret;
9343                 current = bg_cache->key.objectid + bg_cache->key.offset;
9344         }
9345
9346         /* Don't forget to set cache_generation to -1 */
9347         trans = btrfs_start_transaction(fs_info->tree_root, 0);
9348         if (IS_ERR(trans)) {
9349                 error("failed to update super block cache generation");
9350                 return PTR_ERR(trans);
9351         }
9352         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
9353         btrfs_commit_transaction(trans, fs_info->tree_root);
9354
9355         return ret;
9356 }
9357
9358 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
9359                 int clear_version)
9360 {
9361         int ret = 0;
9362
9363         if (clear_version == 1) {
9364                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9365                         error(
9366                 "free space cache v2 detected, use --clear-space-cache v2");
9367                         ret = 1;
9368                         goto close_out;
9369                 }
9370                 printf("Clearing free space cache\n");
9371                 ret = clear_free_space_cache(fs_info);
9372                 if (ret) {
9373                         error("failed to clear free space cache");
9374                         ret = 1;
9375                 } else {
9376                         printf("Free space cache cleared\n");
9377                 }
9378         } else if (clear_version == 2) {
9379                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9380                         printf("no free space cache v2 to clear\n");
9381                         ret = 0;
9382                         goto close_out;
9383                 }
9384                 printf("Clear free space cache v2\n");
9385                 ret = btrfs_clear_free_space_tree(fs_info);
9386                 if (ret) {
9387                         error("failed to clear free space cache v2: %d", ret);
9388                         ret = 1;
9389                 } else {
9390                         printf("free space cache v2 cleared\n");
9391                 }
9392         }
9393 close_out:
9394         return ret;
9395 }
9396
9397 const char * const cmd_check_usage[] = {
9398         "btrfs check [options] <device>",
9399         "Check structural integrity of a filesystem (unmounted).",
9400         "Check structural integrity of an unmounted filesystem. Verify internal",
9401         "trees' consistency and item connectivity. In the repair mode try to",
9402         "fix the problems found. ",
9403         "WARNING: the repair mode is considered dangerous",
9404         "",
9405         "-s|--super <superblock>     use this superblock copy",
9406         "-b|--backup                 use the first valid backup root copy",
9407         "--force                     skip mount checks, repair is not possible",
9408         "--repair                    try to repair the filesystem",
9409         "--readonly                  run in read-only mode (default)",
9410         "--init-csum-tree            create a new CRC tree",
9411         "--init-extent-tree          create a new extent tree",
9412         "--mode <MODE>               allows choice of memory/IO trade-offs",
9413         "                            where MODE is one of:",
9414         "                            original - read inodes and extents to memory (requires",
9415         "                                       more memory, does less IO)",
9416         "                            lowmem   - try to use less memory but read blocks again",
9417         "                                       when needed",
9418         "--check-data-csum           verify checksums of data blocks",
9419         "-Q|--qgroup-report          print a report on qgroup consistency",
9420         "-E|--subvol-extents <subvolid>",
9421         "                            print subvolume extents and sharing state",
9422         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9423         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9424         "-p|--progress               indicate progress",
9425         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
9426         NULL
9427 };
9428
9429 int cmd_check(int argc, char **argv)
9430 {
9431         struct cache_tree root_cache;
9432         struct btrfs_root *root;
9433         struct btrfs_fs_info *info;
9434         u64 bytenr = 0;
9435         u64 subvolid = 0;
9436         u64 tree_root_bytenr = 0;
9437         u64 chunk_root_bytenr = 0;
9438         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9439         int ret = 0;
9440         int err = 0;
9441         u64 num;
9442         int init_csum_tree = 0;
9443         int readonly = 0;
9444         int clear_space_cache = 0;
9445         int qgroup_report = 0;
9446         int qgroups_repaired = 0;
9447         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
9448         int force = 0;
9449
9450         while(1) {
9451                 int c;
9452                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9453                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9454                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
9455                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
9456                         GETOPT_VAL_FORCE };
9457                 static const struct option long_options[] = {
9458                         { "super", required_argument, NULL, 's' },
9459                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9460                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9461                         { "init-csum-tree", no_argument, NULL,
9462                                 GETOPT_VAL_INIT_CSUM },
9463                         { "init-extent-tree", no_argument, NULL,
9464                                 GETOPT_VAL_INIT_EXTENT },
9465                         { "check-data-csum", no_argument, NULL,
9466                                 GETOPT_VAL_CHECK_CSUM },
9467                         { "backup", no_argument, NULL, 'b' },
9468                         { "subvol-extents", required_argument, NULL, 'E' },
9469                         { "qgroup-report", no_argument, NULL, 'Q' },
9470                         { "tree-root", required_argument, NULL, 'r' },
9471                         { "chunk-root", required_argument, NULL,
9472                                 GETOPT_VAL_CHUNK_TREE },
9473                         { "progress", no_argument, NULL, 'p' },
9474                         { "mode", required_argument, NULL,
9475                                 GETOPT_VAL_MODE },
9476                         { "clear-space-cache", required_argument, NULL,
9477                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
9478                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
9479                         { NULL, 0, NULL, 0}
9480                 };
9481
9482                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
9483                 if (c < 0)
9484                         break;
9485                 switch(c) {
9486                         case 'a': /* ignored */ break;
9487                         case 'b':
9488                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9489                                 break;
9490                         case 's':
9491                                 num = arg_strtou64(optarg);
9492                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9493                                         error(
9494                                         "super mirror should be less than %d",
9495                                                 BTRFS_SUPER_MIRROR_MAX);
9496                                         exit(1);
9497                                 }
9498                                 bytenr = btrfs_sb_offset(((int)num));
9499                                 printf("using SB copy %llu, bytenr %llu\n", num,
9500                                        (unsigned long long)bytenr);
9501                                 break;
9502                         case 'Q':
9503                                 qgroup_report = 1;
9504                                 break;
9505                         case 'E':
9506                                 subvolid = arg_strtou64(optarg);
9507                                 break;
9508                         case 'r':
9509                                 tree_root_bytenr = arg_strtou64(optarg);
9510                                 break;
9511                         case GETOPT_VAL_CHUNK_TREE:
9512                                 chunk_root_bytenr = arg_strtou64(optarg);
9513                                 break;
9514                         case 'p':
9515                                 ctx.progress_enabled = true;
9516                                 break;
9517                         case '?':
9518                         case 'h':
9519                                 usage(cmd_check_usage);
9520                         case GETOPT_VAL_REPAIR:
9521                                 printf("enabling repair mode\n");
9522                                 repair = 1;
9523                                 ctree_flags |= OPEN_CTREE_WRITES;
9524                                 break;
9525                         case GETOPT_VAL_READONLY:
9526                                 readonly = 1;
9527                                 break;
9528                         case GETOPT_VAL_INIT_CSUM:
9529                                 printf("Creating a new CRC tree\n");
9530                                 init_csum_tree = 1;
9531                                 repair = 1;
9532                                 ctree_flags |= OPEN_CTREE_WRITES;
9533                                 break;
9534                         case GETOPT_VAL_INIT_EXTENT:
9535                                 init_extent_tree = 1;
9536                                 ctree_flags |= (OPEN_CTREE_WRITES |
9537                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9538                                 repair = 1;
9539                                 break;
9540                         case GETOPT_VAL_CHECK_CSUM:
9541                                 check_data_csum = 1;
9542                                 break;
9543                         case GETOPT_VAL_MODE:
9544                                 check_mode = parse_check_mode(optarg);
9545                                 if (check_mode == CHECK_MODE_UNKNOWN) {
9546                                         error("unknown mode: %s", optarg);
9547                                         exit(1);
9548                                 }
9549                                 break;
9550                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
9551                                 if (strcmp(optarg, "v1") == 0) {
9552                                         clear_space_cache = 1;
9553                                 } else if (strcmp(optarg, "v2") == 0) {
9554                                         clear_space_cache = 2;
9555                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
9556                                 } else {
9557                                         error(
9558                 "invalid argument to --clear-space-cache, must be v1 or v2");
9559                                         exit(1);
9560                                 }
9561                                 ctree_flags |= OPEN_CTREE_WRITES;
9562                                 break;
9563                         case GETOPT_VAL_FORCE:
9564                                 force = 1;
9565                                 break;
9566                 }
9567         }
9568
9569         if (check_argc_exact(argc - optind, 1))
9570                 usage(cmd_check_usage);
9571
9572         if (ctx.progress_enabled) {
9573                 ctx.tp = TASK_NOTHING;
9574                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9575         }
9576
9577         /* This check is the only reason for --readonly to exist */
9578         if (readonly && repair) {
9579                 error("repair options are not compatible with --readonly");
9580                 exit(1);
9581         }
9582
9583         /*
9584          * experimental and dangerous
9585          */
9586         if (repair && check_mode == CHECK_MODE_LOWMEM)
9587                 warning("low-memory mode repair support is only partial");
9588
9589         radix_tree_init();
9590         cache_tree_init(&root_cache);
9591
9592         ret = check_mounted(argv[optind]);
9593         if (!force) {
9594                 if (ret < 0) {
9595                         error("could not check mount status: %s",
9596                                         strerror(-ret));
9597                         err |= !!ret;
9598                         goto err_out;
9599                 } else if (ret) {
9600                         error(
9601 "%s is currently mounted, use --force if you really intend to check the filesystem",
9602                                 argv[optind]);
9603                         ret = -EBUSY;
9604                         err |= !!ret;
9605                         goto err_out;
9606                 }
9607         } else {
9608                 if (repair) {
9609                         error("repair and --force is not yet supported");
9610                         ret = 1;
9611                         err |= !!ret;
9612                         goto err_out;
9613                 }
9614                 if (ret < 0) {
9615                         warning(
9616 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
9617                                 argv[optind]);
9618                 } else if (ret) {
9619                         warning(
9620                         "filesystem mounted, continuing because of --force");
9621                 }
9622                 /* A block device is mounted in exclusive mode by kernel */
9623                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
9624         }
9625
9626         /* only allow partial opening under repair mode */
9627         if (repair)
9628                 ctree_flags |= OPEN_CTREE_PARTIAL;
9629
9630         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9631                                   chunk_root_bytenr, ctree_flags);
9632         if (!info) {
9633                 error("cannot open file system");
9634                 ret = -EIO;
9635                 err |= !!ret;
9636                 goto err_out;
9637         }
9638
9639         global_info = info;
9640         root = info->fs_root;
9641         uuid_unparse(info->super_copy->fsid, uuidbuf);
9642
9643         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9644
9645         /*
9646          * Check the bare minimum before starting anything else that could rely
9647          * on it, namely the tree roots, any local consistency checks
9648          */
9649         if (!extent_buffer_uptodate(info->tree_root->node) ||
9650             !extent_buffer_uptodate(info->dev_root->node) ||
9651             !extent_buffer_uptodate(info->chunk_root->node)) {
9652                 error("critical roots corrupted, unable to check the filesystem");
9653                 err |= !!ret;
9654                 ret = -EIO;
9655                 goto close_out;
9656         }
9657
9658         if (clear_space_cache) {
9659                 ret = do_clear_free_space_cache(info, clear_space_cache);
9660                 err |= !!ret;
9661                 goto close_out;
9662         }
9663
9664         /*
9665          * repair mode will force us to commit transaction which
9666          * will make us fail to load log tree when mounting.
9667          */
9668         if (repair && btrfs_super_log_root(info->super_copy)) {
9669                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
9670                 if (!ret) {
9671                         ret = 1;
9672                         err |= !!ret;
9673                         goto close_out;
9674                 }
9675                 ret = zero_log_tree(root);
9676                 err |= !!ret;
9677                 if (ret) {
9678                         error("failed to zero log tree: %d", ret);
9679                         goto close_out;
9680                 }
9681         }
9682
9683         if (qgroup_report) {
9684                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9685                        uuidbuf);
9686                 ret = qgroup_verify_all(info);
9687                 err |= !!ret;
9688                 if (ret == 0)
9689                         report_qgroups(1);
9690                 goto close_out;
9691         }
9692         if (subvolid) {
9693                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9694                        subvolid, argv[optind], uuidbuf);
9695                 ret = print_extent_state(info, subvolid);
9696                 err |= !!ret;
9697                 goto close_out;
9698         }
9699
9700         if (init_extent_tree || init_csum_tree) {
9701                 struct btrfs_trans_handle *trans;
9702
9703                 trans = btrfs_start_transaction(info->extent_root, 0);
9704                 if (IS_ERR(trans)) {
9705                         error("error starting transaction");
9706                         ret = PTR_ERR(trans);
9707                         err |= !!ret;
9708                         goto close_out;
9709                 }
9710
9711                 if (init_extent_tree) {
9712                         printf("Creating a new extent tree\n");
9713                         ret = reinit_extent_tree(trans, info);
9714                         err |= !!ret;
9715                         if (ret)
9716                                 goto close_out;
9717                 }
9718
9719                 if (init_csum_tree) {
9720                         printf("Reinitialize checksum tree\n");
9721                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9722                         if (ret) {
9723                                 error("checksum tree initialization failed: %d",
9724                                                 ret);
9725                                 ret = -EIO;
9726                                 err |= !!ret;
9727                                 goto close_out;
9728                         }
9729
9730                         ret = fill_csum_tree(trans, info->csum_root,
9731                                              init_extent_tree);
9732                         err |= !!ret;
9733                         if (ret) {
9734                                 error("checksum tree refilling failed: %d", ret);
9735                                 return -EIO;
9736                         }
9737                 }
9738                 /*
9739                  * Ok now we commit and run the normal fsck, which will add
9740                  * extent entries for all of the items it finds.
9741                  */
9742                 ret = btrfs_commit_transaction(trans, info->extent_root);
9743                 err |= !!ret;
9744                 if (ret)
9745                         goto close_out;
9746         }
9747         if (!extent_buffer_uptodate(info->extent_root->node)) {
9748                 error("critical: extent_root, unable to check the filesystem");
9749                 ret = -EIO;
9750                 err |= !!ret;
9751                 goto close_out;
9752         }
9753         if (!extent_buffer_uptodate(info->csum_root->node)) {
9754                 error("critical: csum_root, unable to check the filesystem");
9755                 ret = -EIO;
9756                 err |= !!ret;
9757                 goto close_out;
9758         }
9759
9760         if (!init_extent_tree) {
9761                 ret = repair_root_items(info);
9762                 if (ret < 0) {
9763                         err = !!ret;
9764                         error("failed to repair root items: %s", strerror(-ret));
9765                         goto close_out;
9766                 }
9767                 if (repair) {
9768                         fprintf(stderr, "Fixed %d roots.\n", ret);
9769                         ret = 0;
9770                 } else if (ret > 0) {
9771                         fprintf(stderr,
9772                                 "Found %d roots with an outdated root item.\n",
9773                                 ret);
9774                         fprintf(stderr,
9775         "Please run a filesystem check with the option --repair to fix them.\n");
9776                         ret = 1;
9777                         err |= ret;
9778                         goto close_out;
9779                 }
9780         }
9781
9782         ret = do_check_chunks_and_extents(info);
9783         err |= !!ret;
9784         if (ret)
9785                 error(
9786                 "errors found in extent allocation tree or chunk allocation");
9787
9788         /* Only re-check super size after we checked and repaired the fs */
9789         err |= !is_super_size_valid(info);
9790
9791         if (!ctx.progress_enabled) {
9792                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9793                         fprintf(stderr, "checking free space tree\n");
9794                 else
9795                         fprintf(stderr, "checking free space cache\n");
9796         }
9797         ret = check_space_cache(root);
9798         err |= !!ret;
9799         if (ret) {
9800                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9801                         error("errors found in free space tree");
9802                 else
9803                         error("errors found in free space cache");
9804                 goto out;
9805         }
9806
9807         /*
9808          * We used to have to have these hole extents in between our real
9809          * extents so if we don't have this flag set we need to make sure there
9810          * are no gaps in the file extents for inodes, otherwise we can just
9811          * ignore it when this happens.
9812          */
9813         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
9814         ret = do_check_fs_roots(info, &root_cache);
9815         err |= !!ret;
9816         if (ret) {
9817                 error("errors found in fs roots");
9818                 goto out;
9819         }
9820
9821         fprintf(stderr, "checking csums\n");
9822         ret = check_csums(root);
9823         err |= !!ret;
9824         if (ret) {
9825                 error("errors found in csum tree");
9826                 goto out;
9827         }
9828
9829         fprintf(stderr, "checking root refs\n");
9830         /* For low memory mode, check_fs_roots_v2 handles root refs */
9831         if (check_mode != CHECK_MODE_LOWMEM) {
9832                 ret = check_root_refs(root, &root_cache);
9833                 err |= !!ret;
9834                 if (ret) {
9835                         error("errors found in root refs");
9836                         goto out;
9837                 }
9838         }
9839
9840         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9841                 struct extent_buffer *eb;
9842
9843                 eb = list_first_entry(&root->fs_info->recow_ebs,
9844                                       struct extent_buffer, recow);
9845                 list_del_init(&eb->recow);
9846                 ret = recow_extent_buffer(root, eb);
9847                 err |= !!ret;
9848                 if (ret) {
9849                         error("fails to fix transid errors");
9850                         break;
9851                 }
9852         }
9853
9854         while (!list_empty(&delete_items)) {
9855                 struct bad_item *bad;
9856
9857                 bad = list_first_entry(&delete_items, struct bad_item, list);
9858                 list_del_init(&bad->list);
9859                 if (repair) {
9860                         ret = delete_bad_item(root, bad);
9861                         err |= !!ret;
9862                 }
9863                 free(bad);
9864         }
9865
9866         if (info->quota_enabled) {
9867                 fprintf(stderr, "checking quota groups\n");
9868                 ret = qgroup_verify_all(info);
9869                 err |= !!ret;
9870                 if (ret) {
9871                         error("failed to check quota groups");
9872                         goto out;
9873                 }
9874                 report_qgroups(0);
9875                 ret = repair_qgroups(info, &qgroups_repaired);
9876                 err |= !!ret;
9877                 if (err) {
9878                         error("failed to repair quota groups");
9879                         goto out;
9880                 }
9881                 ret = 0;
9882         }
9883
9884         if (!list_empty(&root->fs_info->recow_ebs)) {
9885                 error("transid errors in file system");
9886                 ret = 1;
9887                 err |= !!ret;
9888         }
9889 out:
9890         printf("found %llu bytes used, ",
9891                (unsigned long long)bytes_used);
9892         if (err)
9893                 printf("error(s) found\n");
9894         else
9895                 printf("no error found\n");
9896         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9897         printf("total tree bytes: %llu\n",
9898                (unsigned long long)total_btree_bytes);
9899         printf("total fs tree bytes: %llu\n",
9900                (unsigned long long)total_fs_tree_bytes);
9901         printf("total extent tree bytes: %llu\n",
9902                (unsigned long long)total_extent_tree_bytes);
9903         printf("btree space waste bytes: %llu\n",
9904                (unsigned long long)btree_space_waste);
9905         printf("file data blocks allocated: %llu\n referenced %llu\n",
9906                 (unsigned long long)data_bytes_allocated,
9907                 (unsigned long long)data_bytes_referenced);
9908
9909         free_qgroup_counts();
9910         free_root_recs_tree(&root_cache);
9911 close_out:
9912         close_ctree(root);
9913 err_out:
9914         if (ctx.progress_enabled)
9915                 task_deinit(ctx.info);
9916
9917         return err;
9918 }