btrfs-progs: check: rename files after moving code
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/mode-common.h"
47 #include "check/mode-original.h"
48 #include "check/mode-lowmem.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void record_root_in_trans(struct btrfs_trans_handle *trans,
416                                  struct btrfs_root *root)
417 {
418         if (root->last_trans != trans->transid) {
419                 root->track_dirty = 1;
420                 root->last_trans = trans->transid;
421                 root->commit_root = root->node;
422                 extent_buffer_get(root->node);
423         }
424 }
425
426 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
427 {
428         struct device_record *rec1;
429         struct device_record *rec2;
430
431         rec1 = rb_entry(node1, struct device_record, node);
432         rec2 = rb_entry(node2, struct device_record, node);
433         if (rec1->devid > rec2->devid)
434                 return -1;
435         else if (rec1->devid < rec2->devid)
436                 return 1;
437         else
438                 return 0;
439 }
440
441 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
442 {
443         struct inode_record *rec;
444         struct inode_backref *backref;
445         struct inode_backref *orig;
446         struct inode_backref *tmp;
447         struct orphan_data_extent *src_orphan;
448         struct orphan_data_extent *dst_orphan;
449         struct rb_node *rb;
450         size_t size;
451         int ret;
452
453         rec = malloc(sizeof(*rec));
454         if (!rec)
455                 return ERR_PTR(-ENOMEM);
456         memcpy(rec, orig_rec, sizeof(*rec));
457         rec->refs = 1;
458         INIT_LIST_HEAD(&rec->backrefs);
459         INIT_LIST_HEAD(&rec->orphan_extents);
460         rec->holes = RB_ROOT;
461
462         list_for_each_entry(orig, &orig_rec->backrefs, list) {
463                 size = sizeof(*orig) + orig->namelen + 1;
464                 backref = malloc(size);
465                 if (!backref) {
466                         ret = -ENOMEM;
467                         goto cleanup;
468                 }
469                 memcpy(backref, orig, size);
470                 list_add_tail(&backref->list, &rec->backrefs);
471         }
472         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
473                 dst_orphan = malloc(sizeof(*dst_orphan));
474                 if (!dst_orphan) {
475                         ret = -ENOMEM;
476                         goto cleanup;
477                 }
478                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
479                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
480         }
481         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
482         if (ret < 0)
483                 goto cleanup_rb;
484
485         return rec;
486
487 cleanup_rb:
488         rb = rb_first(&rec->holes);
489         while (rb) {
490                 struct file_extent_hole *hole;
491
492                 hole = rb_entry(rb, struct file_extent_hole, node);
493                 rb = rb_next(rb);
494                 free(hole);
495         }
496
497 cleanup:
498         if (!list_empty(&rec->backrefs))
499                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
500                         list_del(&orig->list);
501                         free(orig);
502                 }
503
504         if (!list_empty(&rec->orphan_extents))
505                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
506                         list_del(&orig->list);
507                         free(orig);
508                 }
509
510         free(rec);
511
512         return ERR_PTR(ret);
513 }
514
515 static void print_orphan_data_extents(struct list_head *orphan_extents,
516                                       u64 objectid)
517 {
518         struct orphan_data_extent *orphan;
519
520         if (list_empty(orphan_extents))
521                 return;
522         printf("The following data extent is lost in tree %llu:\n",
523                objectid);
524         list_for_each_entry(orphan, orphan_extents, list) {
525                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
526                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
527                        orphan->disk_len);
528         }
529 }
530
531 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
532 {
533         u64 root_objectid = root->root_key.objectid;
534         int errors = rec->errors;
535
536         if (!errors)
537                 return;
538         /* reloc root errors, we print its corresponding fs root objectid*/
539         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
540                 root_objectid = root->root_key.offset;
541                 fprintf(stderr, "reloc");
542         }
543         fprintf(stderr, "root %llu inode %llu errors %x",
544                 (unsigned long long) root_objectid,
545                 (unsigned long long) rec->ino, rec->errors);
546
547         if (errors & I_ERR_NO_INODE_ITEM)
548                 fprintf(stderr, ", no inode item");
549         if (errors & I_ERR_NO_ORPHAN_ITEM)
550                 fprintf(stderr, ", no orphan item");
551         if (errors & I_ERR_DUP_INODE_ITEM)
552                 fprintf(stderr, ", dup inode item");
553         if (errors & I_ERR_DUP_DIR_INDEX)
554                 fprintf(stderr, ", dup dir index");
555         if (errors & I_ERR_ODD_DIR_ITEM)
556                 fprintf(stderr, ", odd dir item");
557         if (errors & I_ERR_ODD_FILE_EXTENT)
558                 fprintf(stderr, ", odd file extent");
559         if (errors & I_ERR_BAD_FILE_EXTENT)
560                 fprintf(stderr, ", bad file extent");
561         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
562                 fprintf(stderr, ", file extent overlap");
563         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
564                 fprintf(stderr, ", file extent discount");
565         if (errors & I_ERR_DIR_ISIZE_WRONG)
566                 fprintf(stderr, ", dir isize wrong");
567         if (errors & I_ERR_FILE_NBYTES_WRONG)
568                 fprintf(stderr, ", nbytes wrong");
569         if (errors & I_ERR_ODD_CSUM_ITEM)
570                 fprintf(stderr, ", odd csum item");
571         if (errors & I_ERR_SOME_CSUM_MISSING)
572                 fprintf(stderr, ", some csum missing");
573         if (errors & I_ERR_LINK_COUNT_WRONG)
574                 fprintf(stderr, ", link count wrong");
575         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
576                 fprintf(stderr, ", orphan file extent");
577         fprintf(stderr, "\n");
578         /* Print the orphan extents if needed */
579         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
580                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
581
582         /* Print the holes if needed */
583         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
584                 struct file_extent_hole *hole;
585                 struct rb_node *node;
586                 int found = 0;
587
588                 node = rb_first(&rec->holes);
589                 fprintf(stderr, "Found file extent holes:\n");
590                 while (node) {
591                         found = 1;
592                         hole = rb_entry(node, struct file_extent_hole, node);
593                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
594                                 hole->start, hole->len);
595                         node = rb_next(node);
596                 }
597                 if (!found)
598                         fprintf(stderr, "\tstart: 0, len: %llu\n",
599                                 round_up(rec->isize,
600                                          root->fs_info->sectorsize));
601         }
602 }
603
604 static void print_ref_error(int errors)
605 {
606         if (errors & REF_ERR_NO_DIR_ITEM)
607                 fprintf(stderr, ", no dir item");
608         if (errors & REF_ERR_NO_DIR_INDEX)
609                 fprintf(stderr, ", no dir index");
610         if (errors & REF_ERR_NO_INODE_REF)
611                 fprintf(stderr, ", no inode ref");
612         if (errors & REF_ERR_DUP_DIR_ITEM)
613                 fprintf(stderr, ", dup dir item");
614         if (errors & REF_ERR_DUP_DIR_INDEX)
615                 fprintf(stderr, ", dup dir index");
616         if (errors & REF_ERR_DUP_INODE_REF)
617                 fprintf(stderr, ", dup inode ref");
618         if (errors & REF_ERR_INDEX_UNMATCH)
619                 fprintf(stderr, ", index mismatch");
620         if (errors & REF_ERR_FILETYPE_UNMATCH)
621                 fprintf(stderr, ", filetype mismatch");
622         if (errors & REF_ERR_NAME_TOO_LONG)
623                 fprintf(stderr, ", name too long");
624         if (errors & REF_ERR_NO_ROOT_REF)
625                 fprintf(stderr, ", no root ref");
626         if (errors & REF_ERR_NO_ROOT_BACKREF)
627                 fprintf(stderr, ", no root backref");
628         if (errors & REF_ERR_DUP_ROOT_REF)
629                 fprintf(stderr, ", dup root ref");
630         if (errors & REF_ERR_DUP_ROOT_BACKREF)
631                 fprintf(stderr, ", dup root backref");
632         fprintf(stderr, "\n");
633 }
634
635 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
636                                           u64 ino, int mod)
637 {
638         struct ptr_node *node;
639         struct cache_extent *cache;
640         struct inode_record *rec = NULL;
641         int ret;
642
643         cache = lookup_cache_extent(inode_cache, ino, 1);
644         if (cache) {
645                 node = container_of(cache, struct ptr_node, cache);
646                 rec = node->data;
647                 if (mod && rec->refs > 1) {
648                         node->data = clone_inode_rec(rec);
649                         if (IS_ERR(node->data))
650                                 return node->data;
651                         rec->refs--;
652                         rec = node->data;
653                 }
654         } else if (mod) {
655                 rec = calloc(1, sizeof(*rec));
656                 if (!rec)
657                         return ERR_PTR(-ENOMEM);
658                 rec->ino = ino;
659                 rec->extent_start = (u64)-1;
660                 rec->refs = 1;
661                 INIT_LIST_HEAD(&rec->backrefs);
662                 INIT_LIST_HEAD(&rec->orphan_extents);
663                 rec->holes = RB_ROOT;
664
665                 node = malloc(sizeof(*node));
666                 if (!node) {
667                         free(rec);
668                         return ERR_PTR(-ENOMEM);
669                 }
670                 node->cache.start = ino;
671                 node->cache.size = 1;
672                 node->data = rec;
673
674                 if (ino == BTRFS_FREE_INO_OBJECTID)
675                         rec->found_link = 1;
676
677                 ret = insert_cache_extent(inode_cache, &node->cache);
678                 if (ret)
679                         return ERR_PTR(-EEXIST);
680         }
681         return rec;
682 }
683
684 static void free_orphan_data_extents(struct list_head *orphan_extents)
685 {
686         struct orphan_data_extent *orphan;
687
688         while (!list_empty(orphan_extents)) {
689                 orphan = list_entry(orphan_extents->next,
690                                     struct orphan_data_extent, list);
691                 list_del(&orphan->list);
692                 free(orphan);
693         }
694 }
695
696 static void free_inode_rec(struct inode_record *rec)
697 {
698         struct inode_backref *backref;
699
700         if (--rec->refs > 0)
701                 return;
702
703         while (!list_empty(&rec->backrefs)) {
704                 backref = to_inode_backref(rec->backrefs.next);
705                 list_del(&backref->list);
706                 free(backref);
707         }
708         free_orphan_data_extents(&rec->orphan_extents);
709         free_file_extent_holes(&rec->holes);
710         free(rec);
711 }
712
713 static int can_free_inode_rec(struct inode_record *rec)
714 {
715         if (!rec->errors && rec->checked && rec->found_inode_item &&
716             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
717                 return 1;
718         return 0;
719 }
720
721 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
722                                  struct inode_record *rec)
723 {
724         struct cache_extent *cache;
725         struct inode_backref *tmp, *backref;
726         struct ptr_node *node;
727         u8 filetype;
728
729         if (!rec->found_inode_item)
730                 return;
731
732         filetype = imode_to_type(rec->imode);
733         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
734                 if (backref->found_dir_item && backref->found_dir_index) {
735                         if (backref->filetype != filetype)
736                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
737                         if (!backref->errors && backref->found_inode_ref &&
738                             rec->nlink == rec->found_link) {
739                                 list_del(&backref->list);
740                                 free(backref);
741                         }
742                 }
743         }
744
745         if (!rec->checked || rec->merging)
746                 return;
747
748         if (S_ISDIR(rec->imode)) {
749                 if (rec->found_size != rec->isize)
750                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
751                 if (rec->found_file_extent)
752                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
753         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
754                 if (rec->found_dir_item)
755                         rec->errors |= I_ERR_ODD_DIR_ITEM;
756                 if (rec->found_size != rec->nbytes)
757                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
758                 if (rec->nlink > 0 && !no_holes &&
759                     (rec->extent_end < rec->isize ||
760                      first_extent_gap(&rec->holes) < rec->isize))
761                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
762         }
763
764         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
765                 if (rec->found_csum_item && rec->nodatasum)
766                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
767                 if (rec->some_csum_missing && !rec->nodatasum)
768                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
769         }
770
771         BUG_ON(rec->refs != 1);
772         if (can_free_inode_rec(rec)) {
773                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
774                 node = container_of(cache, struct ptr_node, cache);
775                 BUG_ON(node->data != rec);
776                 remove_cache_extent(inode_cache, &node->cache);
777                 free(node);
778                 free_inode_rec(rec);
779         }
780 }
781
782 static int check_orphan_item(struct btrfs_root *root, u64 ino)
783 {
784         struct btrfs_path path;
785         struct btrfs_key key;
786         int ret;
787
788         key.objectid = BTRFS_ORPHAN_OBJECTID;
789         key.type = BTRFS_ORPHAN_ITEM_KEY;
790         key.offset = ino;
791
792         btrfs_init_path(&path);
793         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
794         btrfs_release_path(&path);
795         if (ret > 0)
796                 ret = -ENOENT;
797         return ret;
798 }
799
800 static int process_inode_item(struct extent_buffer *eb,
801                               int slot, struct btrfs_key *key,
802                               struct shared_node *active_node)
803 {
804         struct inode_record *rec;
805         struct btrfs_inode_item *item;
806
807         rec = active_node->current;
808         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
809         if (rec->found_inode_item) {
810                 rec->errors |= I_ERR_DUP_INODE_ITEM;
811                 return 1;
812         }
813         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
814         rec->nlink = btrfs_inode_nlink(eb, item);
815         rec->isize = btrfs_inode_size(eb, item);
816         rec->nbytes = btrfs_inode_nbytes(eb, item);
817         rec->imode = btrfs_inode_mode(eb, item);
818         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
819                 rec->nodatasum = 1;
820         rec->found_inode_item = 1;
821         if (rec->nlink == 0)
822                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
823         maybe_free_inode_rec(&active_node->inode_cache, rec);
824         return 0;
825 }
826
827 static struct inode_backref *get_inode_backref(struct inode_record *rec,
828                                                 const char *name,
829                                                 int namelen, u64 dir)
830 {
831         struct inode_backref *backref;
832
833         list_for_each_entry(backref, &rec->backrefs, list) {
834                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
835                         break;
836                 if (backref->dir != dir || backref->namelen != namelen)
837                         continue;
838                 if (memcmp(name, backref->name, namelen))
839                         continue;
840                 return backref;
841         }
842
843         backref = malloc(sizeof(*backref) + namelen + 1);
844         if (!backref)
845                 return NULL;
846         memset(backref, 0, sizeof(*backref));
847         backref->dir = dir;
848         backref->namelen = namelen;
849         memcpy(backref->name, name, namelen);
850         backref->name[namelen] = '\0';
851         list_add_tail(&backref->list, &rec->backrefs);
852         return backref;
853 }
854
855 static int add_inode_backref(struct cache_tree *inode_cache,
856                              u64 ino, u64 dir, u64 index,
857                              const char *name, int namelen,
858                              u8 filetype, u8 itemtype, int errors)
859 {
860         struct inode_record *rec;
861         struct inode_backref *backref;
862
863         rec = get_inode_rec(inode_cache, ino, 1);
864         BUG_ON(IS_ERR(rec));
865         backref = get_inode_backref(rec, name, namelen, dir);
866         BUG_ON(!backref);
867         if (errors)
868                 backref->errors |= errors;
869         if (itemtype == BTRFS_DIR_INDEX_KEY) {
870                 if (backref->found_dir_index)
871                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
872                 if (backref->found_inode_ref && backref->index != index)
873                         backref->errors |= REF_ERR_INDEX_UNMATCH;
874                 if (backref->found_dir_item && backref->filetype != filetype)
875                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
876
877                 backref->index = index;
878                 backref->filetype = filetype;
879                 backref->found_dir_index = 1;
880         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
881                 rec->found_link++;
882                 if (backref->found_dir_item)
883                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
884                 if (backref->found_dir_index && backref->filetype != filetype)
885                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
886
887                 backref->filetype = filetype;
888                 backref->found_dir_item = 1;
889         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
890                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
891                 if (backref->found_inode_ref)
892                         backref->errors |= REF_ERR_DUP_INODE_REF;
893                 if (backref->found_dir_index && backref->index != index)
894                         backref->errors |= REF_ERR_INDEX_UNMATCH;
895                 else
896                         backref->index = index;
897
898                 backref->ref_type = itemtype;
899                 backref->found_inode_ref = 1;
900         } else {
901                 BUG_ON(1);
902         }
903
904         maybe_free_inode_rec(inode_cache, rec);
905         return 0;
906 }
907
908 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
909                             struct cache_tree *dst_cache)
910 {
911         struct inode_backref *backref;
912         u32 dir_count = 0;
913         int ret = 0;
914
915         dst->merging = 1;
916         list_for_each_entry(backref, &src->backrefs, list) {
917                 if (backref->found_dir_index) {
918                         add_inode_backref(dst_cache, dst->ino, backref->dir,
919                                         backref->index, backref->name,
920                                         backref->namelen, backref->filetype,
921                                         BTRFS_DIR_INDEX_KEY, backref->errors);
922                 }
923                 if (backref->found_dir_item) {
924                         dir_count++;
925                         add_inode_backref(dst_cache, dst->ino,
926                                         backref->dir, 0, backref->name,
927                                         backref->namelen, backref->filetype,
928                                         BTRFS_DIR_ITEM_KEY, backref->errors);
929                 }
930                 if (backref->found_inode_ref) {
931                         add_inode_backref(dst_cache, dst->ino,
932                                         backref->dir, backref->index,
933                                         backref->name, backref->namelen, 0,
934                                         backref->ref_type, backref->errors);
935                 }
936         }
937
938         if (src->found_dir_item)
939                 dst->found_dir_item = 1;
940         if (src->found_file_extent)
941                 dst->found_file_extent = 1;
942         if (src->found_csum_item)
943                 dst->found_csum_item = 1;
944         if (src->some_csum_missing)
945                 dst->some_csum_missing = 1;
946         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
947                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
948                 if (ret < 0)
949                         return ret;
950         }
951
952         BUG_ON(src->found_link < dir_count);
953         dst->found_link += src->found_link - dir_count;
954         dst->found_size += src->found_size;
955         if (src->extent_start != (u64)-1) {
956                 if (dst->extent_start == (u64)-1) {
957                         dst->extent_start = src->extent_start;
958                         dst->extent_end = src->extent_end;
959                 } else {
960                         if (dst->extent_end > src->extent_start)
961                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
962                         else if (dst->extent_end < src->extent_start) {
963                                 ret = add_file_extent_hole(&dst->holes,
964                                         dst->extent_end,
965                                         src->extent_start - dst->extent_end);
966                         }
967                         if (dst->extent_end < src->extent_end)
968                                 dst->extent_end = src->extent_end;
969                 }
970         }
971
972         dst->errors |= src->errors;
973         if (src->found_inode_item) {
974                 if (!dst->found_inode_item) {
975                         dst->nlink = src->nlink;
976                         dst->isize = src->isize;
977                         dst->nbytes = src->nbytes;
978                         dst->imode = src->imode;
979                         dst->nodatasum = src->nodatasum;
980                         dst->found_inode_item = 1;
981                 } else {
982                         dst->errors |= I_ERR_DUP_INODE_ITEM;
983                 }
984         }
985         dst->merging = 0;
986
987         return 0;
988 }
989
990 static int splice_shared_node(struct shared_node *src_node,
991                               struct shared_node *dst_node)
992 {
993         struct cache_extent *cache;
994         struct ptr_node *node, *ins;
995         struct cache_tree *src, *dst;
996         struct inode_record *rec, *conflict;
997         u64 current_ino = 0;
998         int splice = 0;
999         int ret;
1000
1001         if (--src_node->refs == 0)
1002                 splice = 1;
1003         if (src_node->current)
1004                 current_ino = src_node->current->ino;
1005
1006         src = &src_node->root_cache;
1007         dst = &dst_node->root_cache;
1008 again:
1009         cache = search_cache_extent(src, 0);
1010         while (cache) {
1011                 node = container_of(cache, struct ptr_node, cache);
1012                 rec = node->data;
1013                 cache = next_cache_extent(cache);
1014
1015                 if (splice) {
1016                         remove_cache_extent(src, &node->cache);
1017                         ins = node;
1018                 } else {
1019                         ins = malloc(sizeof(*ins));
1020                         BUG_ON(!ins);
1021                         ins->cache.start = node->cache.start;
1022                         ins->cache.size = node->cache.size;
1023                         ins->data = rec;
1024                         rec->refs++;
1025                 }
1026                 ret = insert_cache_extent(dst, &ins->cache);
1027                 if (ret == -EEXIST) {
1028                         conflict = get_inode_rec(dst, rec->ino, 1);
1029                         BUG_ON(IS_ERR(conflict));
1030                         merge_inode_recs(rec, conflict, dst);
1031                         if (rec->checked) {
1032                                 conflict->checked = 1;
1033                                 if (dst_node->current == conflict)
1034                                         dst_node->current = NULL;
1035                         }
1036                         maybe_free_inode_rec(dst, conflict);
1037                         free_inode_rec(rec);
1038                         free(ins);
1039                 } else {
1040                         BUG_ON(ret);
1041                 }
1042         }
1043
1044         if (src == &src_node->root_cache) {
1045                 src = &src_node->inode_cache;
1046                 dst = &dst_node->inode_cache;
1047                 goto again;
1048         }
1049
1050         if (current_ino > 0 && (!dst_node->current ||
1051             current_ino > dst_node->current->ino)) {
1052                 if (dst_node->current) {
1053                         dst_node->current->checked = 1;
1054                         maybe_free_inode_rec(dst, dst_node->current);
1055                 }
1056                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1057                 BUG_ON(IS_ERR(dst_node->current));
1058         }
1059         return 0;
1060 }
1061
1062 static void free_inode_ptr(struct cache_extent *cache)
1063 {
1064         struct ptr_node *node;
1065         struct inode_record *rec;
1066
1067         node = container_of(cache, struct ptr_node, cache);
1068         rec = node->data;
1069         free_inode_rec(rec);
1070         free(node);
1071 }
1072
1073 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1074
1075 static struct shared_node *find_shared_node(struct cache_tree *shared,
1076                                             u64 bytenr)
1077 {
1078         struct cache_extent *cache;
1079         struct shared_node *node;
1080
1081         cache = lookup_cache_extent(shared, bytenr, 1);
1082         if (cache) {
1083                 node = container_of(cache, struct shared_node, cache);
1084                 return node;
1085         }
1086         return NULL;
1087 }
1088
1089 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1090 {
1091         int ret;
1092         struct shared_node *node;
1093
1094         node = calloc(1, sizeof(*node));
1095         if (!node)
1096                 return -ENOMEM;
1097         node->cache.start = bytenr;
1098         node->cache.size = 1;
1099         cache_tree_init(&node->root_cache);
1100         cache_tree_init(&node->inode_cache);
1101         node->refs = refs;
1102
1103         ret = insert_cache_extent(shared, &node->cache);
1104
1105         return ret;
1106 }
1107
1108 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1109                              struct walk_control *wc, int level)
1110 {
1111         struct shared_node *node;
1112         struct shared_node *dest;
1113         int ret;
1114
1115         if (level == wc->active_node)
1116                 return 0;
1117
1118         BUG_ON(wc->active_node <= level);
1119         node = find_shared_node(&wc->shared, bytenr);
1120         if (!node) {
1121                 ret = add_shared_node(&wc->shared, bytenr, refs);
1122                 BUG_ON(ret);
1123                 node = find_shared_node(&wc->shared, bytenr);
1124                 wc->nodes[level] = node;
1125                 wc->active_node = level;
1126                 return 0;
1127         }
1128
1129         if (wc->root_level == wc->active_node &&
1130             btrfs_root_refs(&root->root_item) == 0) {
1131                 if (--node->refs == 0) {
1132                         free_inode_recs_tree(&node->root_cache);
1133                         free_inode_recs_tree(&node->inode_cache);
1134                         remove_cache_extent(&wc->shared, &node->cache);
1135                         free(node);
1136                 }
1137                 return 1;
1138         }
1139
1140         dest = wc->nodes[wc->active_node];
1141         splice_shared_node(node, dest);
1142         if (node->refs == 0) {
1143                 remove_cache_extent(&wc->shared, &node->cache);
1144                 free(node);
1145         }
1146         return 1;
1147 }
1148
1149 static int leave_shared_node(struct btrfs_root *root,
1150                              struct walk_control *wc, int level)
1151 {
1152         struct shared_node *node;
1153         struct shared_node *dest;
1154         int i;
1155
1156         if (level == wc->root_level)
1157                 return 0;
1158
1159         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1160                 if (wc->nodes[i])
1161                         break;
1162         }
1163         BUG_ON(i >= BTRFS_MAX_LEVEL);
1164
1165         node = wc->nodes[wc->active_node];
1166         wc->nodes[wc->active_node] = NULL;
1167         wc->active_node = i;
1168
1169         dest = wc->nodes[wc->active_node];
1170         if (wc->active_node < wc->root_level ||
1171             btrfs_root_refs(&root->root_item) > 0) {
1172                 BUG_ON(node->refs <= 1);
1173                 splice_shared_node(node, dest);
1174         } else {
1175                 BUG_ON(node->refs < 2);
1176                 node->refs--;
1177         }
1178         return 0;
1179 }
1180
1181 /*
1182  * Returns:
1183  * < 0 - on error
1184  * 1   - if the root with id child_root_id is a child of root parent_root_id
1185  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1186  *       has other root(s) as parent(s)
1187  * 2   - if the root child_root_id doesn't have any parent roots
1188  */
1189 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1190                          u64 child_root_id)
1191 {
1192         struct btrfs_path path;
1193         struct btrfs_key key;
1194         struct extent_buffer *leaf;
1195         int has_parent = 0;
1196         int ret;
1197
1198         btrfs_init_path(&path);
1199
1200         key.objectid = parent_root_id;
1201         key.type = BTRFS_ROOT_REF_KEY;
1202         key.offset = child_root_id;
1203         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1204                                 0, 0);
1205         if (ret < 0)
1206                 return ret;
1207         btrfs_release_path(&path);
1208         if (!ret)
1209                 return 1;
1210
1211         key.objectid = child_root_id;
1212         key.type = BTRFS_ROOT_BACKREF_KEY;
1213         key.offset = 0;
1214         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1215                                 0, 0);
1216         if (ret < 0)
1217                 goto out;
1218
1219         while (1) {
1220                 leaf = path.nodes[0];
1221                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1222                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1223                         if (ret)
1224                                 break;
1225                         leaf = path.nodes[0];
1226                 }
1227
1228                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1229                 if (key.objectid != child_root_id ||
1230                     key.type != BTRFS_ROOT_BACKREF_KEY)
1231                         break;
1232
1233                 has_parent = 1;
1234
1235                 if (key.offset == parent_root_id) {
1236                         btrfs_release_path(&path);
1237                         return 1;
1238                 }
1239
1240                 path.slots[0]++;
1241         }
1242 out:
1243         btrfs_release_path(&path);
1244         if (ret < 0)
1245                 return ret;
1246         return has_parent ? 0 : 2;
1247 }
1248
1249 static int process_dir_item(struct extent_buffer *eb,
1250                             int slot, struct btrfs_key *key,
1251                             struct shared_node *active_node)
1252 {
1253         u32 total;
1254         u32 cur = 0;
1255         u32 len;
1256         u32 name_len;
1257         u32 data_len;
1258         int error;
1259         int nritems = 0;
1260         u8 filetype;
1261         struct btrfs_dir_item *di;
1262         struct inode_record *rec;
1263         struct cache_tree *root_cache;
1264         struct cache_tree *inode_cache;
1265         struct btrfs_key location;
1266         char namebuf[BTRFS_NAME_LEN];
1267
1268         root_cache = &active_node->root_cache;
1269         inode_cache = &active_node->inode_cache;
1270         rec = active_node->current;
1271         rec->found_dir_item = 1;
1272
1273         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1274         total = btrfs_item_size_nr(eb, slot);
1275         while (cur < total) {
1276                 nritems++;
1277                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1278                 name_len = btrfs_dir_name_len(eb, di);
1279                 data_len = btrfs_dir_data_len(eb, di);
1280                 filetype = btrfs_dir_type(eb, di);
1281
1282                 rec->found_size += name_len;
1283                 if (cur + sizeof(*di) + name_len > total ||
1284                     name_len > BTRFS_NAME_LEN) {
1285                         error = REF_ERR_NAME_TOO_LONG;
1286
1287                         if (cur + sizeof(*di) > total)
1288                                 break;
1289                         len = min_t(u32, total - cur - sizeof(*di),
1290                                     BTRFS_NAME_LEN);
1291                 } else {
1292                         len = name_len;
1293                         error = 0;
1294                 }
1295
1296                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1297
1298                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1299                     key->offset != btrfs_name_hash(namebuf, len)) {
1300                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1301                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1302                         key->objectid, key->offset, namebuf, len, filetype,
1303                         key->offset, btrfs_name_hash(namebuf, len));
1304                 }
1305
1306                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1307                         add_inode_backref(inode_cache, location.objectid,
1308                                           key->objectid, key->offset, namebuf,
1309                                           len, filetype, key->type, error);
1310                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1311                         add_inode_backref(root_cache, location.objectid,
1312                                           key->objectid, key->offset,
1313                                           namebuf, len, filetype,
1314                                           key->type, error);
1315                 } else {
1316                         fprintf(stderr,
1317                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1318                                 location.type, key->objectid, key->offset);
1319                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1320                                           key->objectid, key->offset, namebuf,
1321                                           len, filetype, key->type, error);
1322                 }
1323
1324                 len = sizeof(*di) + name_len + data_len;
1325                 di = (struct btrfs_dir_item *)((char *)di + len);
1326                 cur += len;
1327         }
1328         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1329                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1330
1331         return 0;
1332 }
1333
1334 static int process_inode_ref(struct extent_buffer *eb,
1335                              int slot, struct btrfs_key *key,
1336                              struct shared_node *active_node)
1337 {
1338         u32 total;
1339         u32 cur = 0;
1340         u32 len;
1341         u32 name_len;
1342         u64 index;
1343         int error;
1344         struct cache_tree *inode_cache;
1345         struct btrfs_inode_ref *ref;
1346         char namebuf[BTRFS_NAME_LEN];
1347
1348         inode_cache = &active_node->inode_cache;
1349
1350         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1351         total = btrfs_item_size_nr(eb, slot);
1352         while (cur < total) {
1353                 name_len = btrfs_inode_ref_name_len(eb, ref);
1354                 index = btrfs_inode_ref_index(eb, ref);
1355
1356                 /* inode_ref + namelen should not cross item boundary */
1357                 if (cur + sizeof(*ref) + name_len > total ||
1358                     name_len > BTRFS_NAME_LEN) {
1359                         if (total < cur + sizeof(*ref))
1360                                 break;
1361
1362                         /* Still try to read out the remaining part */
1363                         len = min_t(u32, total - cur - sizeof(*ref),
1364                                     BTRFS_NAME_LEN);
1365                         error = REF_ERR_NAME_TOO_LONG;
1366                 } else {
1367                         len = name_len;
1368                         error = 0;
1369                 }
1370
1371                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1372                 add_inode_backref(inode_cache, key->objectid, key->offset,
1373                                   index, namebuf, len, 0, key->type, error);
1374
1375                 len = sizeof(*ref) + name_len;
1376                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1377                 cur += len;
1378         }
1379         return 0;
1380 }
1381
1382 static int process_inode_extref(struct extent_buffer *eb,
1383                                 int slot, struct btrfs_key *key,
1384                                 struct shared_node *active_node)
1385 {
1386         u32 total;
1387         u32 cur = 0;
1388         u32 len;
1389         u32 name_len;
1390         u64 index;
1391         u64 parent;
1392         int error;
1393         struct cache_tree *inode_cache;
1394         struct btrfs_inode_extref *extref;
1395         char namebuf[BTRFS_NAME_LEN];
1396
1397         inode_cache = &active_node->inode_cache;
1398
1399         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1400         total = btrfs_item_size_nr(eb, slot);
1401         while (cur < total) {
1402                 name_len = btrfs_inode_extref_name_len(eb, extref);
1403                 index = btrfs_inode_extref_index(eb, extref);
1404                 parent = btrfs_inode_extref_parent(eb, extref);
1405                 if (name_len <= BTRFS_NAME_LEN) {
1406                         len = name_len;
1407                         error = 0;
1408                 } else {
1409                         len = BTRFS_NAME_LEN;
1410                         error = REF_ERR_NAME_TOO_LONG;
1411                 }
1412                 read_extent_buffer(eb, namebuf,
1413                                    (unsigned long)(extref + 1), len);
1414                 add_inode_backref(inode_cache, key->objectid, parent,
1415                                   index, namebuf, len, 0, key->type, error);
1416
1417                 len = sizeof(*extref) + name_len;
1418                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1419                 cur += len;
1420         }
1421         return 0;
1422
1423 }
1424
1425 static int process_file_extent(struct btrfs_root *root,
1426                                 struct extent_buffer *eb,
1427                                 int slot, struct btrfs_key *key,
1428                                 struct shared_node *active_node)
1429 {
1430         struct inode_record *rec;
1431         struct btrfs_file_extent_item *fi;
1432         u64 num_bytes = 0;
1433         u64 disk_bytenr = 0;
1434         u64 extent_offset = 0;
1435         u64 mask = root->fs_info->sectorsize - 1;
1436         int extent_type;
1437         int ret;
1438
1439         rec = active_node->current;
1440         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1441         rec->found_file_extent = 1;
1442
1443         if (rec->extent_start == (u64)-1) {
1444                 rec->extent_start = key->offset;
1445                 rec->extent_end = key->offset;
1446         }
1447
1448         if (rec->extent_end > key->offset)
1449                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1450         else if (rec->extent_end < key->offset) {
1451                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1452                                            key->offset - rec->extent_end);
1453                 if (ret < 0)
1454                         return ret;
1455         }
1456
1457         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1458         extent_type = btrfs_file_extent_type(eb, fi);
1459
1460         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1461                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1462                 if (num_bytes == 0)
1463                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1464                 rec->found_size += num_bytes;
1465                 num_bytes = (num_bytes + mask) & ~mask;
1466         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1467                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1468                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1469                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1470                 extent_offset = btrfs_file_extent_offset(eb, fi);
1471                 if (num_bytes == 0 || (num_bytes & mask))
1472                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1473                 if (num_bytes + extent_offset >
1474                     btrfs_file_extent_ram_bytes(eb, fi))
1475                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1476                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1477                     (btrfs_file_extent_compression(eb, fi) ||
1478                      btrfs_file_extent_encryption(eb, fi) ||
1479                      btrfs_file_extent_other_encoding(eb, fi)))
1480                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1481                 if (disk_bytenr > 0)
1482                         rec->found_size += num_bytes;
1483         } else {
1484                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1485         }
1486         rec->extent_end = key->offset + num_bytes;
1487
1488         /*
1489          * The data reloc tree will copy full extents into its inode and then
1490          * copy the corresponding csums.  Because the extent it copied could be
1491          * a preallocated extent that hasn't been written to yet there may be no
1492          * csums to copy, ergo we won't have csums for our file extent.  This is
1493          * ok so just don't bother checking csums if the inode belongs to the
1494          * data reloc tree.
1495          */
1496         if (disk_bytenr > 0 &&
1497             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1498                 u64 found;
1499                 if (btrfs_file_extent_compression(eb, fi))
1500                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1501                 else
1502                         disk_bytenr += extent_offset;
1503
1504                 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1505                                        &found);
1506                 if (ret < 0)
1507                         return ret;
1508                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1509                         if (found > 0)
1510                                 rec->found_csum_item = 1;
1511                         if (found < num_bytes)
1512                                 rec->some_csum_missing = 1;
1513                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1514                         if (found > 0)
1515                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1516                 }
1517         }
1518         return 0;
1519 }
1520
1521 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1522                             struct walk_control *wc)
1523 {
1524         struct btrfs_key key;
1525         u32 nritems;
1526         int i;
1527         int ret = 0;
1528         struct cache_tree *inode_cache;
1529         struct shared_node *active_node;
1530
1531         if (wc->root_level == wc->active_node &&
1532             btrfs_root_refs(&root->root_item) == 0)
1533                 return 0;
1534
1535         active_node = wc->nodes[wc->active_node];
1536         inode_cache = &active_node->inode_cache;
1537         nritems = btrfs_header_nritems(eb);
1538         for (i = 0; i < nritems; i++) {
1539                 btrfs_item_key_to_cpu(eb, &key, i);
1540
1541                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1542                         continue;
1543                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1544                         continue;
1545
1546                 if (active_node->current == NULL ||
1547                     active_node->current->ino < key.objectid) {
1548                         if (active_node->current) {
1549                                 active_node->current->checked = 1;
1550                                 maybe_free_inode_rec(inode_cache,
1551                                                      active_node->current);
1552                         }
1553                         active_node->current = get_inode_rec(inode_cache,
1554                                                              key.objectid, 1);
1555                         BUG_ON(IS_ERR(active_node->current));
1556                 }
1557                 switch (key.type) {
1558                 case BTRFS_DIR_ITEM_KEY:
1559                 case BTRFS_DIR_INDEX_KEY:
1560                         ret = process_dir_item(eb, i, &key, active_node);
1561                         break;
1562                 case BTRFS_INODE_REF_KEY:
1563                         ret = process_inode_ref(eb, i, &key, active_node);
1564                         break;
1565                 case BTRFS_INODE_EXTREF_KEY:
1566                         ret = process_inode_extref(eb, i, &key, active_node);
1567                         break;
1568                 case BTRFS_INODE_ITEM_KEY:
1569                         ret = process_inode_item(eb, i, &key, active_node);
1570                         break;
1571                 case BTRFS_EXTENT_DATA_KEY:
1572                         ret = process_file_extent(root, eb, i, &key,
1573                                                   active_node);
1574                         break;
1575                 default:
1576                         break;
1577                 };
1578         }
1579         return ret;
1580 }
1581
1582 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1583                           struct walk_control *wc, int *level,
1584                           struct node_refs *nrefs)
1585 {
1586         enum btrfs_tree_block_status status;
1587         u64 bytenr;
1588         u64 ptr_gen;
1589         struct btrfs_fs_info *fs_info = root->fs_info;
1590         struct extent_buffer *next;
1591         struct extent_buffer *cur;
1592         int ret, err = 0;
1593         u64 refs;
1594
1595         WARN_ON(*level < 0);
1596         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1597
1598         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1599                 refs = nrefs->refs[*level];
1600                 ret = 0;
1601         } else {
1602                 ret = btrfs_lookup_extent_info(NULL, root,
1603                                        path->nodes[*level]->start,
1604                                        *level, 1, &refs, NULL);
1605                 if (ret < 0) {
1606                         err = ret;
1607                         goto out;
1608                 }
1609                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1610                 nrefs->refs[*level] = refs;
1611         }
1612
1613         if (refs > 1) {
1614                 ret = enter_shared_node(root, path->nodes[*level]->start,
1615                                         refs, wc, *level);
1616                 if (ret > 0) {
1617                         err = ret;
1618                         goto out;
1619                 }
1620         }
1621
1622         while (*level >= 0) {
1623                 WARN_ON(*level < 0);
1624                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1625                 cur = path->nodes[*level];
1626
1627                 if (btrfs_header_level(cur) != *level)
1628                         WARN_ON(1);
1629
1630                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1631                         break;
1632                 if (*level == 0) {
1633                         ret = process_one_leaf(root, cur, wc);
1634                         if (ret < 0)
1635                                 err = ret;
1636                         break;
1637                 }
1638                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1639                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1640
1641                 if (bytenr == nrefs->bytenr[*level - 1]) {
1642                         refs = nrefs->refs[*level - 1];
1643                 } else {
1644                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1645                                         *level - 1, 1, &refs, NULL);
1646                         if (ret < 0) {
1647                                 refs = 0;
1648                         } else {
1649                                 nrefs->bytenr[*level - 1] = bytenr;
1650                                 nrefs->refs[*level - 1] = refs;
1651                         }
1652                 }
1653
1654                 if (refs > 1) {
1655                         ret = enter_shared_node(root, bytenr, refs,
1656                                                 wc, *level - 1);
1657                         if (ret > 0) {
1658                                 path->slots[*level]++;
1659                                 continue;
1660                         }
1661                 }
1662
1663                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
1664                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1665                         free_extent_buffer(next);
1666                         reada_walk_down(root, cur, path->slots[*level]);
1667                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
1668                         if (!extent_buffer_uptodate(next)) {
1669                                 struct btrfs_key node_key;
1670
1671                                 btrfs_node_key_to_cpu(path->nodes[*level],
1672                                                       &node_key,
1673                                                       path->slots[*level]);
1674                                 btrfs_add_corrupt_extent_record(root->fs_info,
1675                                                 &node_key,
1676                                                 path->nodes[*level]->start,
1677                                                 root->fs_info->nodesize,
1678                                                 *level);
1679                                 err = -EIO;
1680                                 goto out;
1681                         }
1682                 }
1683
1684                 ret = check_child_node(cur, path->slots[*level], next);
1685                 if (ret) {
1686                         free_extent_buffer(next);
1687                         err = ret;
1688                         goto out;
1689                 }
1690
1691                 if (btrfs_is_leaf(next))
1692                         status = btrfs_check_leaf(root, NULL, next);
1693                 else
1694                         status = btrfs_check_node(root, NULL, next);
1695                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1696                         free_extent_buffer(next);
1697                         err = -EIO;
1698                         goto out;
1699                 }
1700
1701                 *level = *level - 1;
1702                 free_extent_buffer(path->nodes[*level]);
1703                 path->nodes[*level] = next;
1704                 path->slots[*level] = 0;
1705         }
1706 out:
1707         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1708         return err;
1709 }
1710
1711 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1712                         struct walk_control *wc, int *level)
1713 {
1714         int i;
1715         struct extent_buffer *leaf;
1716
1717         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1718                 leaf = path->nodes[i];
1719                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1720                         path->slots[i]++;
1721                         *level = i;
1722                         return 0;
1723                 }
1724                 free_extent_buffer(path->nodes[*level]);
1725                 path->nodes[*level] = NULL;
1726                 BUG_ON(*level > wc->active_node);
1727                 if (*level == wc->active_node)
1728                         leave_shared_node(root, wc, *level);
1729                 *level = i + 1;
1730         }
1731         return 1;
1732 }
1733
1734 static int check_root_dir(struct inode_record *rec)
1735 {
1736         struct inode_backref *backref;
1737         int ret = -1;
1738
1739         if (!rec->found_inode_item || rec->errors)
1740                 goto out;
1741         if (rec->nlink != 1 || rec->found_link != 0)
1742                 goto out;
1743         if (list_empty(&rec->backrefs))
1744                 goto out;
1745         backref = to_inode_backref(rec->backrefs.next);
1746         if (!backref->found_inode_ref)
1747                 goto out;
1748         if (backref->index != 0 || backref->namelen != 2 ||
1749             memcmp(backref->name, "..", 2))
1750                 goto out;
1751         if (backref->found_dir_index || backref->found_dir_item)
1752                 goto out;
1753         ret = 0;
1754 out:
1755         return ret;
1756 }
1757
1758 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1759                               struct btrfs_root *root, struct btrfs_path *path,
1760                               struct inode_record *rec)
1761 {
1762         struct btrfs_inode_item *ei;
1763         struct btrfs_key key;
1764         int ret;
1765
1766         key.objectid = rec->ino;
1767         key.type = BTRFS_INODE_ITEM_KEY;
1768         key.offset = (u64)-1;
1769
1770         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1771         if (ret < 0)
1772                 goto out;
1773         if (ret) {
1774                 if (!path->slots[0]) {
1775                         ret = -ENOENT;
1776                         goto out;
1777                 }
1778                 path->slots[0]--;
1779                 ret = 0;
1780         }
1781         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1782         if (key.objectid != rec->ino) {
1783                 ret = -ENOENT;
1784                 goto out;
1785         }
1786
1787         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1788                             struct btrfs_inode_item);
1789         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1790         btrfs_mark_buffer_dirty(path->nodes[0]);
1791         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1792         printf("reset isize for dir %llu root %llu\n", rec->ino,
1793                root->root_key.objectid);
1794 out:
1795         btrfs_release_path(path);
1796         return ret;
1797 }
1798
1799 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1800                                     struct btrfs_root *root,
1801                                     struct btrfs_path *path,
1802                                     struct inode_record *rec)
1803 {
1804         int ret;
1805
1806         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1807         btrfs_release_path(path);
1808         if (!ret)
1809                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1810         return ret;
1811 }
1812
1813 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1814                                struct btrfs_root *root,
1815                                struct btrfs_path *path,
1816                                struct inode_record *rec)
1817 {
1818         struct btrfs_inode_item *ei;
1819         struct btrfs_key key;
1820         int ret = 0;
1821
1822         key.objectid = rec->ino;
1823         key.type = BTRFS_INODE_ITEM_KEY;
1824         key.offset = 0;
1825
1826         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1827         if (ret) {
1828                 if (ret > 0)
1829                         ret = -ENOENT;
1830                 goto out;
1831         }
1832
1833         /* Since ret == 0, no need to check anything */
1834         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1835                             struct btrfs_inode_item);
1836         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
1837         btrfs_mark_buffer_dirty(path->nodes[0]);
1838         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
1839         printf("reset nbytes for ino %llu root %llu\n",
1840                rec->ino, root->root_key.objectid);
1841 out:
1842         btrfs_release_path(path);
1843         return ret;
1844 }
1845
1846 static int add_missing_dir_index(struct btrfs_root *root,
1847                                  struct cache_tree *inode_cache,
1848                                  struct inode_record *rec,
1849                                  struct inode_backref *backref)
1850 {
1851         struct btrfs_path path;
1852         struct btrfs_trans_handle *trans;
1853         struct btrfs_dir_item *dir_item;
1854         struct extent_buffer *leaf;
1855         struct btrfs_key key;
1856         struct btrfs_disk_key disk_key;
1857         struct inode_record *dir_rec;
1858         unsigned long name_ptr;
1859         u32 data_size = sizeof(*dir_item) + backref->namelen;
1860         int ret;
1861
1862         trans = btrfs_start_transaction(root, 1);
1863         if (IS_ERR(trans))
1864                 return PTR_ERR(trans);
1865
1866         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
1867                 (unsigned long long)rec->ino);
1868
1869         btrfs_init_path(&path);
1870         key.objectid = backref->dir;
1871         key.type = BTRFS_DIR_INDEX_KEY;
1872         key.offset = backref->index;
1873         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
1874         BUG_ON(ret);
1875
1876         leaf = path.nodes[0];
1877         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
1878
1879         disk_key.objectid = cpu_to_le64(rec->ino);
1880         disk_key.type = BTRFS_INODE_ITEM_KEY;
1881         disk_key.offset = 0;
1882
1883         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
1884         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
1885         btrfs_set_dir_data_len(leaf, dir_item, 0);
1886         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
1887         name_ptr = (unsigned long)(dir_item + 1);
1888         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
1889         btrfs_mark_buffer_dirty(leaf);
1890         btrfs_release_path(&path);
1891         btrfs_commit_transaction(trans, root);
1892
1893         backref->found_dir_index = 1;
1894         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
1895         BUG_ON(IS_ERR(dir_rec));
1896         if (!dir_rec)
1897                 return 0;
1898         dir_rec->found_size += backref->namelen;
1899         if (dir_rec->found_size == dir_rec->isize &&
1900             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
1901                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1902         if (dir_rec->found_size != dir_rec->isize)
1903                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1904
1905         return 0;
1906 }
1907
1908 static int delete_dir_index(struct btrfs_root *root,
1909                             struct inode_backref *backref)
1910 {
1911         struct btrfs_trans_handle *trans;
1912         struct btrfs_dir_item *di;
1913         struct btrfs_path path;
1914         int ret = 0;
1915
1916         trans = btrfs_start_transaction(root, 1);
1917         if (IS_ERR(trans))
1918                 return PTR_ERR(trans);
1919
1920         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1921                 (unsigned long long)backref->dir,
1922                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
1923                 (unsigned long long)root->objectid);
1924
1925         btrfs_init_path(&path);
1926         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
1927                                     backref->name, backref->namelen,
1928                                     backref->index, -1);
1929         if (IS_ERR(di)) {
1930                 ret = PTR_ERR(di);
1931                 btrfs_release_path(&path);
1932                 btrfs_commit_transaction(trans, root);
1933                 if (ret == -ENOENT)
1934                         return 0;
1935                 return ret;
1936         }
1937
1938         if (!di)
1939                 ret = btrfs_del_item(trans, root, &path);
1940         else
1941                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
1942         BUG_ON(ret);
1943         btrfs_release_path(&path);
1944         btrfs_commit_transaction(trans, root);
1945         return ret;
1946 }
1947
1948 static int create_inode_item(struct btrfs_root *root,
1949                              struct inode_record *rec, int root_dir)
1950 {
1951         struct btrfs_trans_handle *trans;
1952         u64 nlink = 0;
1953         u32 mode = 0;
1954         u64 size = 0;
1955         int ret;
1956
1957         trans = btrfs_start_transaction(root, 1);
1958         if (IS_ERR(trans)) {
1959                 ret = PTR_ERR(trans);
1960                 return ret;
1961         }
1962
1963         nlink = root_dir ? 1 : rec->found_link;
1964         if (rec->found_dir_item) {
1965                 if (rec->found_file_extent)
1966                         fprintf(stderr, "root %llu inode %llu has both a dir "
1967                                 "item and extents, unsure if it is a dir or a "
1968                                 "regular file so setting it as a directory\n",
1969                                 (unsigned long long)root->objectid,
1970                                 (unsigned long long)rec->ino);
1971                 mode = S_IFDIR | 0755;
1972                 size = rec->found_size;
1973         } else if (!rec->found_dir_item) {
1974                 size = rec->extent_end;
1975                 mode =  S_IFREG | 0755;
1976         }
1977
1978         ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
1979                                   nlink, mode);
1980         btrfs_commit_transaction(trans, root);
1981         return 0;
1982 }
1983
1984 static int repair_inode_backrefs(struct btrfs_root *root,
1985                                  struct inode_record *rec,
1986                                  struct cache_tree *inode_cache,
1987                                  int delete)
1988 {
1989         struct inode_backref *tmp, *backref;
1990         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1991         int ret = 0;
1992         int repaired = 0;
1993
1994         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1995                 if (!delete && rec->ino == root_dirid) {
1996                         if (!rec->found_inode_item) {
1997                                 ret = create_inode_item(root, rec, 1);
1998                                 if (ret)
1999                                         break;
2000                                 repaired++;
2001                         }
2002                 }
2003
2004                 /* Index 0 for root dir's are special, don't mess with it */
2005                 if (rec->ino == root_dirid && backref->index == 0)
2006                         continue;
2007
2008                 if (delete &&
2009                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2010                      (backref->found_dir_index && backref->found_inode_ref &&
2011                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2012                         ret = delete_dir_index(root, backref);
2013                         if (ret)
2014                                 break;
2015                         repaired++;
2016                         list_del(&backref->list);
2017                         free(backref);
2018                         continue;
2019                 }
2020
2021                 if (!delete && !backref->found_dir_index &&
2022                     backref->found_dir_item && backref->found_inode_ref) {
2023                         ret = add_missing_dir_index(root, inode_cache, rec,
2024                                                     backref);
2025                         if (ret)
2026                                 break;
2027                         repaired++;
2028                         if (backref->found_dir_item &&
2029                             backref->found_dir_index) {
2030                                 if (!backref->errors &&
2031                                     backref->found_inode_ref) {
2032                                         list_del(&backref->list);
2033                                         free(backref);
2034                                         continue;
2035                                 }
2036                         }
2037                 }
2038
2039                 if (!delete && (!backref->found_dir_index &&
2040                                 !backref->found_dir_item &&
2041                                 backref->found_inode_ref)) {
2042                         struct btrfs_trans_handle *trans;
2043                         struct btrfs_key location;
2044
2045                         ret = check_dir_conflict(root, backref->name,
2046                                                  backref->namelen,
2047                                                  backref->dir,
2048                                                  backref->index);
2049                         if (ret) {
2050                                 /*
2051                                  * let nlink fixing routine to handle it,
2052                                  * which can do it better.
2053                                  */
2054                                 ret = 0;
2055                                 break;
2056                         }
2057                         location.objectid = rec->ino;
2058                         location.type = BTRFS_INODE_ITEM_KEY;
2059                         location.offset = 0;
2060
2061                         trans = btrfs_start_transaction(root, 1);
2062                         if (IS_ERR(trans)) {
2063                                 ret = PTR_ERR(trans);
2064                                 break;
2065                         }
2066                         fprintf(stderr, "adding missing dir index/item pair "
2067                                 "for inode %llu\n",
2068                                 (unsigned long long)rec->ino);
2069                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2070                                                     backref->namelen,
2071                                                     backref->dir, &location,
2072                                                     imode_to_type(rec->imode),
2073                                                     backref->index);
2074                         BUG_ON(ret);
2075                         btrfs_commit_transaction(trans, root);
2076                         repaired++;
2077                 }
2078
2079                 if (!delete && (backref->found_inode_ref &&
2080                                 backref->found_dir_index &&
2081                                 backref->found_dir_item &&
2082                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2083                                 !rec->found_inode_item)) {
2084                         ret = create_inode_item(root, rec, 0);
2085                         if (ret)
2086                                 break;
2087                         repaired++;
2088                 }
2089
2090         }
2091         return ret ? ret : repaired;
2092 }
2093
2094 /*
2095  * To determine the file type for nlink/inode_item repair
2096  *
2097  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2098  * Return -ENOENT if file type is not found.
2099  */
2100 static int find_file_type(struct inode_record *rec, u8 *type)
2101 {
2102         struct inode_backref *backref;
2103
2104         /* For inode item recovered case */
2105         if (rec->found_inode_item) {
2106                 *type = imode_to_type(rec->imode);
2107                 return 0;
2108         }
2109
2110         list_for_each_entry(backref, &rec->backrefs, list) {
2111                 if (backref->found_dir_index || backref->found_dir_item) {
2112                         *type = backref->filetype;
2113                         return 0;
2114                 }
2115         }
2116         return -ENOENT;
2117 }
2118
2119 /*
2120  * To determine the file name for nlink repair
2121  *
2122  * Return 0 if file name is found, set name and namelen.
2123  * Return -ENOENT if file name is not found.
2124  */
2125 static int find_file_name(struct inode_record *rec,
2126                           char *name, int *namelen)
2127 {
2128         struct inode_backref *backref;
2129
2130         list_for_each_entry(backref, &rec->backrefs, list) {
2131                 if (backref->found_dir_index || backref->found_dir_item ||
2132                     backref->found_inode_ref) {
2133                         memcpy(name, backref->name, backref->namelen);
2134                         *namelen = backref->namelen;
2135                         return 0;
2136                 }
2137         }
2138         return -ENOENT;
2139 }
2140
2141 /* Reset the nlink of the inode to the correct one */
2142 static int reset_nlink(struct btrfs_trans_handle *trans,
2143                        struct btrfs_root *root,
2144                        struct btrfs_path *path,
2145                        struct inode_record *rec)
2146 {
2147         struct inode_backref *backref;
2148         struct inode_backref *tmp;
2149         struct btrfs_key key;
2150         struct btrfs_inode_item *inode_item;
2151         int ret = 0;
2152
2153         /* We don't believe this either, reset it and iterate backref */
2154         rec->found_link = 0;
2155
2156         /* Remove all backref including the valid ones */
2157         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2158                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2159                                    backref->index, backref->name,
2160                                    backref->namelen, 0);
2161                 if (ret < 0)
2162                         goto out;
2163
2164                 /* remove invalid backref, so it won't be added back */
2165                 if (!(backref->found_dir_index &&
2166                       backref->found_dir_item &&
2167                       backref->found_inode_ref)) {
2168                         list_del(&backref->list);
2169                         free(backref);
2170                 } else {
2171                         rec->found_link++;
2172                 }
2173         }
2174
2175         /* Set nlink to 0 */
2176         key.objectid = rec->ino;
2177         key.type = BTRFS_INODE_ITEM_KEY;
2178         key.offset = 0;
2179         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2180         if (ret < 0)
2181                 goto out;
2182         if (ret > 0) {
2183                 ret = -ENOENT;
2184                 goto out;
2185         }
2186         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2187                                     struct btrfs_inode_item);
2188         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2189         btrfs_mark_buffer_dirty(path->nodes[0]);
2190         btrfs_release_path(path);
2191
2192         /*
2193          * Add back valid inode_ref/dir_item/dir_index,
2194          * add_link() will handle the nlink inc, so new nlink must be correct
2195          */
2196         list_for_each_entry(backref, &rec->backrefs, list) {
2197                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2198                                      backref->name, backref->namelen,
2199                                      backref->filetype, &backref->index, 1, 0);
2200                 if (ret < 0)
2201                         goto out;
2202         }
2203 out:
2204         btrfs_release_path(path);
2205         return ret;
2206 }
2207
2208 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2209                                struct btrfs_root *root,
2210                                struct btrfs_path *path,
2211                                struct inode_record *rec)
2212 {
2213         char namebuf[BTRFS_NAME_LEN] = {0};
2214         u8 type = 0;
2215         int namelen = 0;
2216         int name_recovered = 0;
2217         int type_recovered = 0;
2218         int ret = 0;
2219
2220         /*
2221          * Get file name and type first before these invalid inode ref
2222          * are deleted by remove_all_invalid_backref()
2223          */
2224         name_recovered = !find_file_name(rec, namebuf, &namelen);
2225         type_recovered = !find_file_type(rec, &type);
2226
2227         if (!name_recovered) {
2228                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2229                        rec->ino, rec->ino);
2230                 namelen = count_digits(rec->ino);
2231                 sprintf(namebuf, "%llu", rec->ino);
2232                 name_recovered = 1;
2233         }
2234         if (!type_recovered) {
2235                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2236                        rec->ino);
2237                 type = BTRFS_FT_REG_FILE;
2238                 type_recovered = 1;
2239         }
2240
2241         ret = reset_nlink(trans, root, path, rec);
2242         if (ret < 0) {
2243                 fprintf(stderr,
2244                         "Failed to reset nlink for inode %llu: %s\n",
2245                         rec->ino, strerror(-ret));
2246                 goto out;
2247         }
2248
2249         if (rec->found_link == 0) {
2250                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2251                                               namebuf, namelen, type,
2252                                               (u64 *)&rec->found_link);
2253                 if (ret)
2254                         goto out;
2255         }
2256         printf("Fixed the nlink of inode %llu\n", rec->ino);
2257 out:
2258         /*
2259          * Clear the flag anyway, or we will loop forever for the same inode
2260          * as it will not be removed from the bad inode list and the dead loop
2261          * happens.
2262          */
2263         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2264         btrfs_release_path(path);
2265         return ret;
2266 }
2267
2268 /*
2269  * Check if there is any normal(reg or prealloc) file extent for given
2270  * ino.
2271  * This is used to determine the file type when neither its dir_index/item or
2272  * inode_item exists.
2273  *
2274  * This will *NOT* report error, if any error happens, just consider it does
2275  * not have any normal file extent.
2276  */
2277 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2278 {
2279         struct btrfs_path path;
2280         struct btrfs_key key;
2281         struct btrfs_key found_key;
2282         struct btrfs_file_extent_item *fi;
2283         u8 type;
2284         int ret = 0;
2285
2286         btrfs_init_path(&path);
2287         key.objectid = ino;
2288         key.type = BTRFS_EXTENT_DATA_KEY;
2289         key.offset = 0;
2290
2291         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2292         if (ret < 0) {
2293                 ret = 0;
2294                 goto out;
2295         }
2296         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2297                 ret = btrfs_next_leaf(root, &path);
2298                 if (ret) {
2299                         ret = 0;
2300                         goto out;
2301                 }
2302         }
2303         while (1) {
2304                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2305                                       path.slots[0]);
2306                 if (found_key.objectid != ino ||
2307                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2308                         break;
2309                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2310                                     struct btrfs_file_extent_item);
2311                 type = btrfs_file_extent_type(path.nodes[0], fi);
2312                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2313                         ret = 1;
2314                         goto out;
2315                 }
2316         }
2317 out:
2318         btrfs_release_path(&path);
2319         return ret;
2320 }
2321
2322 static u32 btrfs_type_to_imode(u8 type)
2323 {
2324         static u32 imode_by_btrfs_type[] = {
2325                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2326                 [BTRFS_FT_DIR]          = S_IFDIR,
2327                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2328                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2329                 [BTRFS_FT_FIFO]         = S_IFIFO,
2330                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2331                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2332         };
2333
2334         return imode_by_btrfs_type[(type)];
2335 }
2336
2337 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2338                                 struct btrfs_root *root,
2339                                 struct btrfs_path *path,
2340                                 struct inode_record *rec)
2341 {
2342         u8 filetype;
2343         u32 mode = 0700;
2344         int type_recovered = 0;
2345         int ret = 0;
2346
2347         printf("Trying to rebuild inode:%llu\n", rec->ino);
2348
2349         type_recovered = !find_file_type(rec, &filetype);
2350
2351         /*
2352          * Try to determine inode type if type not found.
2353          *
2354          * For found regular file extent, it must be FILE.
2355          * For found dir_item/index, it must be DIR.
2356          *
2357          * For undetermined one, use FILE as fallback.
2358          *
2359          * TODO:
2360          * 1. If found backref(inode_index/item is already handled) to it,
2361          *    it must be DIR.
2362          *    Need new inode-inode ref structure to allow search for that.
2363          */
2364         if (!type_recovered) {
2365                 if (rec->found_file_extent &&
2366                     find_normal_file_extent(root, rec->ino)) {
2367                         type_recovered = 1;
2368                         filetype = BTRFS_FT_REG_FILE;
2369                 } else if (rec->found_dir_item) {
2370                         type_recovered = 1;
2371                         filetype = BTRFS_FT_DIR;
2372                 } else if (!list_empty(&rec->orphan_extents)) {
2373                         type_recovered = 1;
2374                         filetype = BTRFS_FT_REG_FILE;
2375                 } else{
2376                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2377                                rec->ino);
2378                         type_recovered = 1;
2379                         filetype = BTRFS_FT_REG_FILE;
2380                 }
2381         }
2382
2383         ret = btrfs_new_inode(trans, root, rec->ino,
2384                               mode | btrfs_type_to_imode(filetype));
2385         if (ret < 0)
2386                 goto out;
2387
2388         /*
2389          * Here inode rebuild is done, we only rebuild the inode item,
2390          * don't repair the nlink(like move to lost+found).
2391          * That is the job of nlink repair.
2392          *
2393          * We just fill the record and return
2394          */
2395         rec->found_dir_item = 1;
2396         rec->imode = mode | btrfs_type_to_imode(filetype);
2397         rec->nlink = 0;
2398         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2399         /* Ensure the inode_nlinks repair function will be called */
2400         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2401 out:
2402         return ret;
2403 }
2404
2405 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2406                                       struct btrfs_root *root,
2407                                       struct btrfs_path *path,
2408                                       struct inode_record *rec)
2409 {
2410         struct orphan_data_extent *orphan;
2411         struct orphan_data_extent *tmp;
2412         int ret = 0;
2413
2414         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2415                 /*
2416                  * Check for conflicting file extents
2417                  *
2418                  * Here we don't know whether the extents is compressed or not,
2419                  * so we can only assume it not compressed nor data offset,
2420                  * and use its disk_len as extent length.
2421                  */
2422                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2423                                        orphan->offset, orphan->disk_len, 0);
2424                 btrfs_release_path(path);
2425                 if (ret < 0)
2426                         goto out;
2427                 if (!ret) {
2428                         fprintf(stderr,
2429                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2430                                 orphan->disk_bytenr, orphan->disk_len);
2431                         ret = btrfs_free_extent(trans,
2432                                         root->fs_info->extent_root,
2433                                         orphan->disk_bytenr, orphan->disk_len,
2434                                         0, root->objectid, orphan->objectid,
2435                                         orphan->offset);
2436                         if (ret < 0)
2437                                 goto out;
2438                 }
2439                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2440                                 orphan->offset, orphan->disk_bytenr,
2441                                 orphan->disk_len, orphan->disk_len);
2442                 if (ret < 0)
2443                         goto out;
2444
2445                 /* Update file size info */
2446                 rec->found_size += orphan->disk_len;
2447                 if (rec->found_size == rec->nbytes)
2448                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2449
2450                 /* Update the file extent hole info too */
2451                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2452                                            orphan->disk_len);
2453                 if (ret < 0)
2454                         goto out;
2455                 if (RB_EMPTY_ROOT(&rec->holes))
2456                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2457
2458                 list_del(&orphan->list);
2459                 free(orphan);
2460         }
2461         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2462 out:
2463         return ret;
2464 }
2465
2466 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2467                                         struct btrfs_root *root,
2468                                         struct btrfs_path *path,
2469                                         struct inode_record *rec)
2470 {
2471         struct rb_node *node;
2472         struct file_extent_hole *hole;
2473         int found = 0;
2474         int ret = 0;
2475
2476         node = rb_first(&rec->holes);
2477
2478         while (node) {
2479                 found = 1;
2480                 hole = rb_entry(node, struct file_extent_hole, node);
2481                 ret = btrfs_punch_hole(trans, root, rec->ino,
2482                                        hole->start, hole->len);
2483                 if (ret < 0)
2484                         goto out;
2485                 ret = del_file_extent_hole(&rec->holes, hole->start,
2486                                            hole->len);
2487                 if (ret < 0)
2488                         goto out;
2489                 if (RB_EMPTY_ROOT(&rec->holes))
2490                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2491                 node = rb_first(&rec->holes);
2492         }
2493         /* special case for a file losing all its file extent */
2494         if (!found) {
2495                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2496                                        round_up(rec->isize,
2497                                                 root->fs_info->sectorsize));
2498                 if (ret < 0)
2499                         goto out;
2500         }
2501         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2502                rec->ino, root->objectid);
2503 out:
2504         return ret;
2505 }
2506
2507 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2508 {
2509         struct btrfs_trans_handle *trans;
2510         struct btrfs_path path;
2511         int ret = 0;
2512
2513         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2514                              I_ERR_NO_ORPHAN_ITEM |
2515                              I_ERR_LINK_COUNT_WRONG |
2516                              I_ERR_NO_INODE_ITEM |
2517                              I_ERR_FILE_EXTENT_ORPHAN |
2518                              I_ERR_FILE_EXTENT_DISCOUNT|
2519                              I_ERR_FILE_NBYTES_WRONG)))
2520                 return rec->errors;
2521
2522         /*
2523          * For nlink repair, it may create a dir and add link, so
2524          * 2 for parent(256)'s dir_index and dir_item
2525          * 2 for lost+found dir's inode_item and inode_ref
2526          * 1 for the new inode_ref of the file
2527          * 2 for lost+found dir's dir_index and dir_item for the file
2528          */
2529         trans = btrfs_start_transaction(root, 7);
2530         if (IS_ERR(trans))
2531                 return PTR_ERR(trans);
2532
2533         btrfs_init_path(&path);
2534         if (rec->errors & I_ERR_NO_INODE_ITEM)
2535                 ret = repair_inode_no_item(trans, root, &path, rec);
2536         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2537                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2538         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2539                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2540         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2541                 ret = repair_inode_isize(trans, root, &path, rec);
2542         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2543                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2544         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2545                 ret = repair_inode_nlinks(trans, root, &path, rec);
2546         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2547                 ret = repair_inode_nbytes(trans, root, &path, rec);
2548         btrfs_commit_transaction(trans, root);
2549         btrfs_release_path(&path);
2550         return ret;
2551 }
2552
2553 static int check_inode_recs(struct btrfs_root *root,
2554                             struct cache_tree *inode_cache)
2555 {
2556         struct cache_extent *cache;
2557         struct ptr_node *node;
2558         struct inode_record *rec;
2559         struct inode_backref *backref;
2560         int stage = 0;
2561         int ret = 0;
2562         int err = 0;
2563         u64 error = 0;
2564         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2565
2566         if (btrfs_root_refs(&root->root_item) == 0) {
2567                 if (!cache_tree_empty(inode_cache))
2568                         fprintf(stderr, "warning line %d\n", __LINE__);
2569                 return 0;
2570         }
2571
2572         /*
2573          * We need to repair backrefs first because we could change some of the
2574          * errors in the inode recs.
2575          *
2576          * We also need to go through and delete invalid backrefs first and then
2577          * add the correct ones second.  We do this because we may get EEXIST
2578          * when adding back the correct index because we hadn't yet deleted the
2579          * invalid index.
2580          *
2581          * For example, if we were missing a dir index then the directories
2582          * isize would be wrong, so if we fixed the isize to what we thought it
2583          * would be and then fixed the backref we'd still have a invalid fs, so
2584          * we need to add back the dir index and then check to see if the isize
2585          * is still wrong.
2586          */
2587         while (stage < 3) {
2588                 stage++;
2589                 if (stage == 3 && !err)
2590                         break;
2591
2592                 cache = search_cache_extent(inode_cache, 0);
2593                 while (repair && cache) {
2594                         node = container_of(cache, struct ptr_node, cache);
2595                         rec = node->data;
2596                         cache = next_cache_extent(cache);
2597
2598                         /* Need to free everything up and rescan */
2599                         if (stage == 3) {
2600                                 remove_cache_extent(inode_cache, &node->cache);
2601                                 free(node);
2602                                 free_inode_rec(rec);
2603                                 continue;
2604                         }
2605
2606                         if (list_empty(&rec->backrefs))
2607                                 continue;
2608
2609                         ret = repair_inode_backrefs(root, rec, inode_cache,
2610                                                     stage == 1);
2611                         if (ret < 0) {
2612                                 err = ret;
2613                                 stage = 2;
2614                                 break;
2615                         } if (ret > 0) {
2616                                 err = -EAGAIN;
2617                         }
2618                 }
2619         }
2620         if (err)
2621                 return err;
2622
2623         rec = get_inode_rec(inode_cache, root_dirid, 0);
2624         BUG_ON(IS_ERR(rec));
2625         if (rec) {
2626                 ret = check_root_dir(rec);
2627                 if (ret) {
2628                         fprintf(stderr, "root %llu root dir %llu error\n",
2629                                 (unsigned long long)root->root_key.objectid,
2630                                 (unsigned long long)root_dirid);
2631                         print_inode_error(root, rec);
2632                         error++;
2633                 }
2634         } else {
2635                 if (repair) {
2636                         struct btrfs_trans_handle *trans;
2637
2638                         trans = btrfs_start_transaction(root, 1);
2639                         if (IS_ERR(trans)) {
2640                                 err = PTR_ERR(trans);
2641                                 return err;
2642                         }
2643
2644                         fprintf(stderr,
2645                                 "root %llu missing its root dir, recreating\n",
2646                                 (unsigned long long)root->objectid);
2647
2648                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2649                         BUG_ON(ret);
2650
2651                         btrfs_commit_transaction(trans, root);
2652                         return -EAGAIN;
2653                 }
2654
2655                 fprintf(stderr, "root %llu root dir %llu not found\n",
2656                         (unsigned long long)root->root_key.objectid,
2657                         (unsigned long long)root_dirid);
2658         }
2659
2660         while (1) {
2661                 cache = search_cache_extent(inode_cache, 0);
2662                 if (!cache)
2663                         break;
2664                 node = container_of(cache, struct ptr_node, cache);
2665                 rec = node->data;
2666                 remove_cache_extent(inode_cache, &node->cache);
2667                 free(node);
2668                 if (rec->ino == root_dirid ||
2669                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2670                         free_inode_rec(rec);
2671                         continue;
2672                 }
2673
2674                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2675                         ret = check_orphan_item(root, rec->ino);
2676                         if (ret == 0)
2677                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2678                         if (can_free_inode_rec(rec)) {
2679                                 free_inode_rec(rec);
2680                                 continue;
2681                         }
2682                 }
2683
2684                 if (!rec->found_inode_item)
2685                         rec->errors |= I_ERR_NO_INODE_ITEM;
2686                 if (rec->found_link != rec->nlink)
2687                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2688                 if (repair) {
2689                         ret = try_repair_inode(root, rec);
2690                         if (ret == 0 && can_free_inode_rec(rec)) {
2691                                 free_inode_rec(rec);
2692                                 continue;
2693                         }
2694                         ret = 0;
2695                 }
2696
2697                 if (!(repair && ret == 0))
2698                         error++;
2699                 print_inode_error(root, rec);
2700                 list_for_each_entry(backref, &rec->backrefs, list) {
2701                         if (!backref->found_dir_item)
2702                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2703                         if (!backref->found_dir_index)
2704                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2705                         if (!backref->found_inode_ref)
2706                                 backref->errors |= REF_ERR_NO_INODE_REF;
2707                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2708                                 " namelen %u name %s filetype %d errors %x",
2709                                 (unsigned long long)backref->dir,
2710                                 (unsigned long long)backref->index,
2711                                 backref->namelen, backref->name,
2712                                 backref->filetype, backref->errors);
2713                         print_ref_error(backref->errors);
2714                 }
2715                 free_inode_rec(rec);
2716         }
2717         return (error > 0) ? -1 : 0;
2718 }
2719
2720 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2721                                         u64 objectid)
2722 {
2723         struct cache_extent *cache;
2724         struct root_record *rec = NULL;
2725         int ret;
2726
2727         cache = lookup_cache_extent(root_cache, objectid, 1);
2728         if (cache) {
2729                 rec = container_of(cache, struct root_record, cache);
2730         } else {
2731                 rec = calloc(1, sizeof(*rec));
2732                 if (!rec)
2733                         return ERR_PTR(-ENOMEM);
2734                 rec->objectid = objectid;
2735                 INIT_LIST_HEAD(&rec->backrefs);
2736                 rec->cache.start = objectid;
2737                 rec->cache.size = 1;
2738
2739                 ret = insert_cache_extent(root_cache, &rec->cache);
2740                 if (ret)
2741                         return ERR_PTR(-EEXIST);
2742         }
2743         return rec;
2744 }
2745
2746 static struct root_backref *get_root_backref(struct root_record *rec,
2747                                              u64 ref_root, u64 dir, u64 index,
2748                                              const char *name, int namelen)
2749 {
2750         struct root_backref *backref;
2751
2752         list_for_each_entry(backref, &rec->backrefs, list) {
2753                 if (backref->ref_root != ref_root || backref->dir != dir ||
2754                     backref->namelen != namelen)
2755                         continue;
2756                 if (memcmp(name, backref->name, namelen))
2757                         continue;
2758                 return backref;
2759         }
2760
2761         backref = calloc(1, sizeof(*backref) + namelen + 1);
2762         if (!backref)
2763                 return NULL;
2764         backref->ref_root = ref_root;
2765         backref->dir = dir;
2766         backref->index = index;
2767         backref->namelen = namelen;
2768         memcpy(backref->name, name, namelen);
2769         backref->name[namelen] = '\0';
2770         list_add_tail(&backref->list, &rec->backrefs);
2771         return backref;
2772 }
2773
2774 static void free_root_record(struct cache_extent *cache)
2775 {
2776         struct root_record *rec;
2777         struct root_backref *backref;
2778
2779         rec = container_of(cache, struct root_record, cache);
2780         while (!list_empty(&rec->backrefs)) {
2781                 backref = to_root_backref(rec->backrefs.next);
2782                 list_del(&backref->list);
2783                 free(backref);
2784         }
2785
2786         free(rec);
2787 }
2788
2789 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
2790
2791 static int add_root_backref(struct cache_tree *root_cache,
2792                             u64 root_id, u64 ref_root, u64 dir, u64 index,
2793                             const char *name, int namelen,
2794                             int item_type, int errors)
2795 {
2796         struct root_record *rec;
2797         struct root_backref *backref;
2798
2799         rec = get_root_rec(root_cache, root_id);
2800         BUG_ON(IS_ERR(rec));
2801         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
2802         BUG_ON(!backref);
2803
2804         backref->errors |= errors;
2805
2806         if (item_type != BTRFS_DIR_ITEM_KEY) {
2807                 if (backref->found_dir_index || backref->found_back_ref ||
2808                     backref->found_forward_ref) {
2809                         if (backref->index != index)
2810                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
2811                 } else {
2812                         backref->index = index;
2813                 }
2814         }
2815
2816         if (item_type == BTRFS_DIR_ITEM_KEY) {
2817                 if (backref->found_forward_ref)
2818                         rec->found_ref++;
2819                 backref->found_dir_item = 1;
2820         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
2821                 backref->found_dir_index = 1;
2822         } else if (item_type == BTRFS_ROOT_REF_KEY) {
2823                 if (backref->found_forward_ref)
2824                         backref->errors |= REF_ERR_DUP_ROOT_REF;
2825                 else if (backref->found_dir_item)
2826                         rec->found_ref++;
2827                 backref->found_forward_ref = 1;
2828         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
2829                 if (backref->found_back_ref)
2830                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
2831                 backref->found_back_ref = 1;
2832         } else {
2833                 BUG_ON(1);
2834         }
2835
2836         if (backref->found_forward_ref && backref->found_dir_item)
2837                 backref->reachable = 1;
2838         return 0;
2839 }
2840
2841 static int merge_root_recs(struct btrfs_root *root,
2842                            struct cache_tree *src_cache,
2843                            struct cache_tree *dst_cache)
2844 {
2845         struct cache_extent *cache;
2846         struct ptr_node *node;
2847         struct inode_record *rec;
2848         struct inode_backref *backref;
2849         int ret = 0;
2850
2851         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2852                 free_inode_recs_tree(src_cache);
2853                 return 0;
2854         }
2855
2856         while (1) {
2857                 cache = search_cache_extent(src_cache, 0);
2858                 if (!cache)
2859                         break;
2860                 node = container_of(cache, struct ptr_node, cache);
2861                 rec = node->data;
2862                 remove_cache_extent(src_cache, &node->cache);
2863                 free(node);
2864
2865                 ret = is_child_root(root, root->objectid, rec->ino);
2866                 if (ret < 0)
2867                         break;
2868                 else if (ret == 0)
2869                         goto skip;
2870
2871                 list_for_each_entry(backref, &rec->backrefs, list) {
2872                         BUG_ON(backref->found_inode_ref);
2873                         if (backref->found_dir_item)
2874                                 add_root_backref(dst_cache, rec->ino,
2875                                         root->root_key.objectid, backref->dir,
2876                                         backref->index, backref->name,
2877                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
2878                                         backref->errors);
2879                         if (backref->found_dir_index)
2880                                 add_root_backref(dst_cache, rec->ino,
2881                                         root->root_key.objectid, backref->dir,
2882                                         backref->index, backref->name,
2883                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
2884                                         backref->errors);
2885                 }
2886 skip:
2887                 free_inode_rec(rec);
2888         }
2889         if (ret < 0)
2890                 return ret;
2891         return 0;
2892 }
2893
2894 static int check_root_refs(struct btrfs_root *root,
2895                            struct cache_tree *root_cache)
2896 {
2897         struct root_record *rec;
2898         struct root_record *ref_root;
2899         struct root_backref *backref;
2900         struct cache_extent *cache;
2901         int loop = 1;
2902         int ret;
2903         int error;
2904         int errors = 0;
2905
2906         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
2907         BUG_ON(IS_ERR(rec));
2908         rec->found_ref = 1;
2909
2910         /* fixme: this can not detect circular references */
2911         while (loop) {
2912                 loop = 0;
2913                 cache = search_cache_extent(root_cache, 0);
2914                 while (1) {
2915                         if (!cache)
2916                                 break;
2917                         rec = container_of(cache, struct root_record, cache);
2918                         cache = next_cache_extent(cache);
2919
2920                         if (rec->found_ref == 0)
2921                                 continue;
2922
2923                         list_for_each_entry(backref, &rec->backrefs, list) {
2924                                 if (!backref->reachable)
2925                                         continue;
2926
2927                                 ref_root = get_root_rec(root_cache,
2928                                                         backref->ref_root);
2929                                 BUG_ON(IS_ERR(ref_root));
2930                                 if (ref_root->found_ref > 0)
2931                                         continue;
2932
2933                                 backref->reachable = 0;
2934                                 rec->found_ref--;
2935                                 if (rec->found_ref == 0)
2936                                         loop = 1;
2937                         }
2938                 }
2939         }
2940
2941         cache = search_cache_extent(root_cache, 0);
2942         while (1) {
2943                 if (!cache)
2944                         break;
2945                 rec = container_of(cache, struct root_record, cache);
2946                 cache = next_cache_extent(cache);
2947
2948                 if (rec->found_ref == 0 &&
2949                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
2950                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
2951                         ret = check_orphan_item(root->fs_info->tree_root,
2952                                                 rec->objectid);
2953                         if (ret == 0)
2954                                 continue;
2955
2956                         /*
2957                          * If we don't have a root item then we likely just have
2958                          * a dir item in a snapshot for this root but no actual
2959                          * ref key or anything so it's meaningless.
2960                          */
2961                         if (!rec->found_root_item)
2962                                 continue;
2963                         errors++;
2964                         fprintf(stderr, "fs tree %llu not referenced\n",
2965                                 (unsigned long long)rec->objectid);
2966                 }
2967
2968                 error = 0;
2969                 if (rec->found_ref > 0 && !rec->found_root_item)
2970                         error = 1;
2971                 list_for_each_entry(backref, &rec->backrefs, list) {
2972                         if (!backref->found_dir_item)
2973                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2974                         if (!backref->found_dir_index)
2975                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2976                         if (!backref->found_back_ref)
2977                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
2978                         if (!backref->found_forward_ref)
2979                                 backref->errors |= REF_ERR_NO_ROOT_REF;
2980                         if (backref->reachable && backref->errors)
2981                                 error = 1;
2982                 }
2983                 if (!error)
2984                         continue;
2985
2986                 errors++;
2987                 fprintf(stderr, "fs tree %llu refs %u %s\n",
2988                         (unsigned long long)rec->objectid, rec->found_ref,
2989                          rec->found_root_item ? "" : "not found");
2990
2991                 list_for_each_entry(backref, &rec->backrefs, list) {
2992                         if (!backref->reachable)
2993                                 continue;
2994                         if (!backref->errors && rec->found_root_item)
2995                                 continue;
2996                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
2997                                 " index %llu namelen %u name %s errors %x\n",
2998                                 (unsigned long long)backref->ref_root,
2999                                 (unsigned long long)backref->dir,
3000                                 (unsigned long long)backref->index,
3001                                 backref->namelen, backref->name,
3002                                 backref->errors);
3003                         print_ref_error(backref->errors);
3004                 }
3005         }
3006         return errors > 0 ? 1 : 0;
3007 }
3008
3009 static int process_root_ref(struct extent_buffer *eb, int slot,
3010                             struct btrfs_key *key,
3011                             struct cache_tree *root_cache)
3012 {
3013         u64 dirid;
3014         u64 index;
3015         u32 len;
3016         u32 name_len;
3017         struct btrfs_root_ref *ref;
3018         char namebuf[BTRFS_NAME_LEN];
3019         int error;
3020
3021         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3022
3023         dirid = btrfs_root_ref_dirid(eb, ref);
3024         index = btrfs_root_ref_sequence(eb, ref);
3025         name_len = btrfs_root_ref_name_len(eb, ref);
3026
3027         if (name_len <= BTRFS_NAME_LEN) {
3028                 len = name_len;
3029                 error = 0;
3030         } else {
3031                 len = BTRFS_NAME_LEN;
3032                 error = REF_ERR_NAME_TOO_LONG;
3033         }
3034         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3035
3036         if (key->type == BTRFS_ROOT_REF_KEY) {
3037                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3038                                  index, namebuf, len, key->type, error);
3039         } else {
3040                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3041                                  index, namebuf, len, key->type, error);
3042         }
3043         return 0;
3044 }
3045
3046 static void free_corrupt_block(struct cache_extent *cache)
3047 {
3048         struct btrfs_corrupt_block *corrupt;
3049
3050         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3051         free(corrupt);
3052 }
3053
3054 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3055
3056 /*
3057  * Repair the btree of the given root.
3058  *
3059  * The fix is to remove the node key in corrupt_blocks cache_tree.
3060  * and rebalance the tree.
3061  * After the fix, the btree should be writeable.
3062  */
3063 static int repair_btree(struct btrfs_root *root,
3064                         struct cache_tree *corrupt_blocks)
3065 {
3066         struct btrfs_trans_handle *trans;
3067         struct btrfs_path path;
3068         struct btrfs_corrupt_block *corrupt;
3069         struct cache_extent *cache;
3070         struct btrfs_key key;
3071         u64 offset;
3072         int level;
3073         int ret = 0;
3074
3075         if (cache_tree_empty(corrupt_blocks))
3076                 return 0;
3077
3078         trans = btrfs_start_transaction(root, 1);
3079         if (IS_ERR(trans)) {
3080                 ret = PTR_ERR(trans);
3081                 fprintf(stderr, "Error starting transaction: %s\n",
3082                         strerror(-ret));
3083                 return ret;
3084         }
3085         btrfs_init_path(&path);
3086         cache = first_cache_extent(corrupt_blocks);
3087         while (cache) {
3088                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3089                                        cache);
3090                 level = corrupt->level;
3091                 path.lowest_level = level;
3092                 key.objectid = corrupt->key.objectid;
3093                 key.type = corrupt->key.type;
3094                 key.offset = corrupt->key.offset;
3095
3096                 /*
3097                  * Here we don't want to do any tree balance, since it may
3098                  * cause a balance with corrupted brother leaf/node,
3099                  * so ins_len set to 0 here.
3100                  * Balance will be done after all corrupt node/leaf is deleted.
3101                  */
3102                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3103                 if (ret < 0)
3104                         goto out;
3105                 offset = btrfs_node_blockptr(path.nodes[level],
3106                                              path.slots[level]);
3107
3108                 /* Remove the ptr */
3109                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3110                 if (ret < 0)
3111                         goto out;
3112                 /*
3113                  * Remove the corresponding extent
3114                  * return value is not concerned.
3115                  */
3116                 btrfs_release_path(&path);
3117                 ret = btrfs_free_extent(trans, root, offset,
3118                                 root->fs_info->nodesize, 0,
3119                                 root->root_key.objectid, level - 1, 0);
3120                 cache = next_cache_extent(cache);
3121         }
3122
3123         /* Balance the btree using btrfs_search_slot() */
3124         cache = first_cache_extent(corrupt_blocks);
3125         while (cache) {
3126                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3127                                        cache);
3128                 memcpy(&key, &corrupt->key, sizeof(key));
3129                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3130                 if (ret < 0)
3131                         goto out;
3132                 /* return will always >0 since it won't find the item */
3133                 ret = 0;
3134                 btrfs_release_path(&path);
3135                 cache = next_cache_extent(cache);
3136         }
3137 out:
3138         btrfs_commit_transaction(trans, root);
3139         btrfs_release_path(&path);
3140         return ret;
3141 }
3142
3143 static int check_fs_root(struct btrfs_root *root,
3144                          struct cache_tree *root_cache,
3145                          struct walk_control *wc)
3146 {
3147         int ret = 0;
3148         int err = 0;
3149         int wret;
3150         int level;
3151         struct btrfs_path path;
3152         struct shared_node root_node;
3153         struct root_record *rec;
3154         struct btrfs_root_item *root_item = &root->root_item;
3155         struct cache_tree corrupt_blocks;
3156         struct orphan_data_extent *orphan;
3157         struct orphan_data_extent *tmp;
3158         enum btrfs_tree_block_status status;
3159         struct node_refs nrefs;
3160
3161         /*
3162          * Reuse the corrupt_block cache tree to record corrupted tree block
3163          *
3164          * Unlike the usage in extent tree check, here we do it in a per
3165          * fs/subvol tree base.
3166          */
3167         cache_tree_init(&corrupt_blocks);
3168         root->fs_info->corrupt_blocks = &corrupt_blocks;
3169
3170         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3171                 rec = get_root_rec(root_cache, root->root_key.objectid);
3172                 BUG_ON(IS_ERR(rec));
3173                 if (btrfs_root_refs(root_item) > 0)
3174                         rec->found_root_item = 1;
3175         }
3176
3177         btrfs_init_path(&path);
3178         memset(&root_node, 0, sizeof(root_node));
3179         cache_tree_init(&root_node.root_cache);
3180         cache_tree_init(&root_node.inode_cache);
3181         memset(&nrefs, 0, sizeof(nrefs));
3182
3183         /* Move the orphan extent record to corresponding inode_record */
3184         list_for_each_entry_safe(orphan, tmp,
3185                                  &root->orphan_data_extents, list) {
3186                 struct inode_record *inode;
3187
3188                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3189                                       1);
3190                 BUG_ON(IS_ERR(inode));
3191                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3192                 list_move(&orphan->list, &inode->orphan_extents);
3193         }
3194
3195         level = btrfs_header_level(root->node);
3196         memset(wc->nodes, 0, sizeof(wc->nodes));
3197         wc->nodes[level] = &root_node;
3198         wc->active_node = level;
3199         wc->root_level = level;
3200
3201         /* We may not have checked the root block, lets do that now */
3202         if (btrfs_is_leaf(root->node))
3203                 status = btrfs_check_leaf(root, NULL, root->node);
3204         else
3205                 status = btrfs_check_node(root, NULL, root->node);
3206         if (status != BTRFS_TREE_BLOCK_CLEAN)
3207                 return -EIO;
3208
3209         if (btrfs_root_refs(root_item) > 0 ||
3210             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3211                 path.nodes[level] = root->node;
3212                 extent_buffer_get(root->node);
3213                 path.slots[level] = 0;
3214         } else {
3215                 struct btrfs_key key;
3216                 struct btrfs_disk_key found_key;
3217
3218                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3219                 level = root_item->drop_level;
3220                 path.lowest_level = level;
3221                 if (level > btrfs_header_level(root->node) ||
3222                     level >= BTRFS_MAX_LEVEL) {
3223                         error("ignoring invalid drop level: %u", level);
3224                         goto skip_walking;
3225                 }
3226                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3227                 if (wret < 0)
3228                         goto skip_walking;
3229                 btrfs_node_key(path.nodes[level], &found_key,
3230                                 path.slots[level]);
3231                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3232                                         sizeof(found_key)));
3233         }
3234
3235         while (1) {
3236                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3237                 if (wret < 0)
3238                         ret = wret;
3239                 if (wret != 0)
3240                         break;
3241
3242                 wret = walk_up_tree(root, &path, wc, &level);
3243                 if (wret < 0)
3244                         ret = wret;
3245                 if (wret != 0)
3246                         break;
3247         }
3248 skip_walking:
3249         btrfs_release_path(&path);
3250
3251         if (!cache_tree_empty(&corrupt_blocks)) {
3252                 struct cache_extent *cache;
3253                 struct btrfs_corrupt_block *corrupt;
3254
3255                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3256                        root->root_key.objectid);
3257                 cache = first_cache_extent(&corrupt_blocks);
3258                 while (cache) {
3259                         corrupt = container_of(cache,
3260                                                struct btrfs_corrupt_block,
3261                                                cache);
3262                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3263                                cache->start, corrupt->level,
3264                                corrupt->key.objectid, corrupt->key.type,
3265                                corrupt->key.offset);
3266                         cache = next_cache_extent(cache);
3267                 }
3268                 if (repair) {
3269                         printf("Try to repair the btree for root %llu\n",
3270                                root->root_key.objectid);
3271                         ret = repair_btree(root, &corrupt_blocks);
3272                         if (ret < 0)
3273                                 fprintf(stderr, "Failed to repair btree: %s\n",
3274                                         strerror(-ret));
3275                         if (!ret)
3276                                 printf("Btree for root %llu is fixed\n",
3277                                        root->root_key.objectid);
3278                 }
3279         }
3280
3281         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3282         if (err < 0)
3283                 ret = err;
3284
3285         if (root_node.current) {
3286                 root_node.current->checked = 1;
3287                 maybe_free_inode_rec(&root_node.inode_cache,
3288                                 root_node.current);
3289         }
3290
3291         err = check_inode_recs(root, &root_node.inode_cache);
3292         if (!ret)
3293                 ret = err;
3294
3295         free_corrupt_blocks_tree(&corrupt_blocks);
3296         root->fs_info->corrupt_blocks = NULL;
3297         free_orphan_data_extents(&root->orphan_data_extents);
3298         return ret;
3299 }
3300
3301 static int check_fs_roots(struct btrfs_fs_info *fs_info,
3302                           struct cache_tree *root_cache)
3303 {
3304         struct btrfs_path path;
3305         struct btrfs_key key;
3306         struct walk_control wc;
3307         struct extent_buffer *leaf, *tree_node;
3308         struct btrfs_root *tmp_root;
3309         struct btrfs_root *tree_root = fs_info->tree_root;
3310         int ret;
3311         int err = 0;
3312
3313         if (ctx.progress_enabled) {
3314                 ctx.tp = TASK_FS_ROOTS;
3315                 task_start(ctx.info);
3316         }
3317
3318         /*
3319          * Just in case we made any changes to the extent tree that weren't
3320          * reflected into the free space cache yet.
3321          */
3322         if (repair)
3323                 reset_cached_block_groups(fs_info);
3324         memset(&wc, 0, sizeof(wc));
3325         cache_tree_init(&wc.shared);
3326         btrfs_init_path(&path);
3327
3328 again:
3329         key.offset = 0;
3330         key.objectid = 0;
3331         key.type = BTRFS_ROOT_ITEM_KEY;
3332         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3333         if (ret < 0) {
3334                 err = 1;
3335                 goto out;
3336         }
3337         tree_node = tree_root->node;
3338         while (1) {
3339                 if (tree_node != tree_root->node) {
3340                         free_root_recs_tree(root_cache);
3341                         btrfs_release_path(&path);
3342                         goto again;
3343                 }
3344                 leaf = path.nodes[0];
3345                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3346                         ret = btrfs_next_leaf(tree_root, &path);
3347                         if (ret) {
3348                                 if (ret < 0)
3349                                         err = 1;
3350                                 break;
3351                         }
3352                         leaf = path.nodes[0];
3353                 }
3354                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3355                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3356                     fs_root_objectid(key.objectid)) {
3357                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3358                                 tmp_root = btrfs_read_fs_root_no_cache(
3359                                                 fs_info, &key);
3360                         } else {
3361                                 key.offset = (u64)-1;
3362                                 tmp_root = btrfs_read_fs_root(
3363                                                 fs_info, &key);
3364                         }
3365                         if (IS_ERR(tmp_root)) {
3366                                 err = 1;
3367                                 goto next;
3368                         }
3369                         ret = check_fs_root(tmp_root, root_cache, &wc);
3370                         if (ret == -EAGAIN) {
3371                                 free_root_recs_tree(root_cache);
3372                                 btrfs_release_path(&path);
3373                                 goto again;
3374                         }
3375                         if (ret)
3376                                 err = 1;
3377                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3378                                 btrfs_free_fs_root(tmp_root);
3379                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3380                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3381                         process_root_ref(leaf, path.slots[0], &key,
3382                                          root_cache);
3383                 }
3384 next:
3385                 path.slots[0]++;
3386         }
3387 out:
3388         btrfs_release_path(&path);
3389         if (err)
3390                 free_extent_cache_tree(&wc.shared);
3391         if (!cache_tree_empty(&wc.shared))
3392                 fprintf(stderr, "warning line %d\n", __LINE__);
3393
3394         task_stop(ctx.info);
3395
3396         return err;
3397 }
3398
3399 static struct tree_backref *find_tree_backref(struct extent_record *rec,
3400                                                 u64 parent, u64 root)
3401 {
3402         struct rb_node *node;
3403         struct tree_backref *back = NULL;
3404         struct tree_backref match = {
3405                 .node = {
3406                         .is_data = 0,
3407                 },
3408         };
3409
3410         if (parent) {
3411                 match.parent = parent;
3412                 match.node.full_backref = 1;
3413         } else {
3414                 match.root = root;
3415         }
3416
3417         node = rb_search(&rec->backref_tree, &match.node.node,
3418                          (rb_compare_keys)compare_extent_backref, NULL);
3419         if (node)
3420                 back = to_tree_backref(rb_node_to_extent_backref(node));
3421
3422         return back;
3423 }
3424
3425 static struct data_backref *find_data_backref(struct extent_record *rec,
3426                                                 u64 parent, u64 root,
3427                                                 u64 owner, u64 offset,
3428                                                 int found_ref,
3429                                                 u64 disk_bytenr, u64 bytes)
3430 {
3431         struct rb_node *node;
3432         struct data_backref *back = NULL;
3433         struct data_backref match = {
3434                 .node = {
3435                         .is_data = 1,
3436                 },
3437                 .owner = owner,
3438                 .offset = offset,
3439                 .bytes = bytes,
3440                 .found_ref = found_ref,
3441                 .disk_bytenr = disk_bytenr,
3442         };
3443
3444         if (parent) {
3445                 match.parent = parent;
3446                 match.node.full_backref = 1;
3447         } else {
3448                 match.root = root;
3449         }
3450
3451         node = rb_search(&rec->backref_tree, &match.node.node,
3452                          (rb_compare_keys)compare_extent_backref, NULL);
3453         if (node)
3454                 back = to_data_backref(rb_node_to_extent_backref(node));
3455
3456         return back;
3457 }
3458
3459 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
3460                           struct cache_tree *root_cache)
3461 {
3462         int ret;
3463
3464         if (!ctx.progress_enabled)
3465                 fprintf(stderr, "checking fs roots\n");
3466         if (check_mode == CHECK_MODE_LOWMEM)
3467                 ret = check_fs_roots_lowmem(fs_info);
3468         else
3469                 ret = check_fs_roots(fs_info, root_cache);
3470
3471         return ret;
3472 }
3473
3474 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3475 {
3476         struct extent_backref *back, *tmp;
3477         struct tree_backref *tback;
3478         struct data_backref *dback;
3479         u64 found = 0;
3480         int err = 0;
3481
3482         rbtree_postorder_for_each_entry_safe(back, tmp,
3483                                              &rec->backref_tree, node) {
3484                 if (!back->found_extent_tree) {
3485                         err = 1;
3486                         if (!print_errs)
3487                                 goto out;
3488                         if (back->is_data) {
3489                                 dback = to_data_backref(back);
3490                                 fprintf(stderr,
3491 "data backref %llu %s %llu owner %llu offset %llu num_refs %lu not found in extent tree\n",
3492                                         (unsigned long long)rec->start,
3493                                         back->full_backref ?
3494                                         "parent" : "root",
3495                                         back->full_backref ?
3496                                         (unsigned long long)dback->parent :
3497                                         (unsigned long long)dback->root,
3498                                         (unsigned long long)dback->owner,
3499                                         (unsigned long long)dback->offset,
3500                                         (unsigned long)dback->num_refs);
3501                         } else {
3502                                 tback = to_tree_backref(back);
3503                                 fprintf(stderr,
3504 "tree backref %llu parent %llu root %llu not found in extent tree\n",
3505                                         (unsigned long long)rec->start,
3506                                         (unsigned long long)tback->parent,
3507                                         (unsigned long long)tback->root);
3508                         }
3509                 }
3510                 if (!back->is_data && !back->found_ref) {
3511                         err = 1;
3512                         if (!print_errs)
3513                                 goto out;
3514                         tback = to_tree_backref(back);
3515                         fprintf(stderr,
3516                                 "backref %llu %s %llu not referenced back %p\n",
3517                                 (unsigned long long)rec->start,
3518                                 back->full_backref ? "parent" : "root",
3519                                 back->full_backref ?
3520                                 (unsigned long long)tback->parent :
3521                                 (unsigned long long)tback->root, back);
3522                 }
3523                 if (back->is_data) {
3524                         dback = to_data_backref(back);
3525                         if (dback->found_ref != dback->num_refs) {
3526                                 err = 1;
3527                                 if (!print_errs)
3528                                         goto out;
3529                                 fprintf(stderr,
3530 "incorrect local backref count on %llu %s %llu owner %llu offset %llu found %u wanted %u back %p\n",
3531                                         (unsigned long long)rec->start,
3532                                         back->full_backref ?
3533                                         "parent" : "root",
3534                                         back->full_backref ?
3535                                         (unsigned long long)dback->parent :
3536                                         (unsigned long long)dback->root,
3537                                         (unsigned long long)dback->owner,
3538                                         (unsigned long long)dback->offset,
3539                                         dback->found_ref, dback->num_refs,
3540                                         back);
3541                         }
3542                         if (dback->disk_bytenr != rec->start) {
3543                                 err = 1;
3544                                 if (!print_errs)
3545                                         goto out;
3546                                 fprintf(stderr,
3547 "backref disk bytenr does not match extent record, bytenr=%llu, ref bytenr=%llu\n",
3548                                         (unsigned long long)rec->start,
3549                                         (unsigned long long)dback->disk_bytenr);
3550                         }
3551
3552                         if (dback->bytes != rec->nr) {
3553                                 err = 1;
3554                                 if (!print_errs)
3555                                         goto out;
3556                                 fprintf(stderr,
3557 "backref bytes do not match extent backref, bytenr=%llu, ref bytes=%llu, backref bytes=%llu\n",
3558                                         (unsigned long long)rec->start,
3559                                         (unsigned long long)rec->nr,
3560                                         (unsigned long long)dback->bytes);
3561                         }
3562                 }
3563                 if (!back->is_data) {
3564                         found += 1;
3565                 } else {
3566                         dback = to_data_backref(back);
3567                         found += dback->found_ref;
3568                 }
3569         }
3570         if (found != rec->refs) {
3571                 err = 1;
3572                 if (!print_errs)
3573                         goto out;
3574                 fprintf(stderr,
3575         "incorrect global backref count on %llu found %llu wanted %llu\n",
3576                         (unsigned long long)rec->start,
3577                         (unsigned long long)found,
3578                         (unsigned long long)rec->refs);
3579         }
3580 out:
3581         return err;
3582 }
3583
3584 static void __free_one_backref(struct rb_node *node)
3585 {
3586         struct extent_backref *back = rb_node_to_extent_backref(node);
3587
3588         free(back);
3589 }
3590
3591 static void free_all_extent_backrefs(struct extent_record *rec)
3592 {
3593         rb_free_nodes(&rec->backref_tree, __free_one_backref);
3594 }
3595
3596 static void free_extent_record_cache(struct cache_tree *extent_cache)
3597 {
3598         struct cache_extent *cache;
3599         struct extent_record *rec;
3600
3601         while (1) {
3602                 cache = first_cache_extent(extent_cache);
3603                 if (!cache)
3604                         break;
3605                 rec = container_of(cache, struct extent_record, cache);
3606                 remove_cache_extent(extent_cache, cache);
3607                 free_all_extent_backrefs(rec);
3608                 free(rec);
3609         }
3610 }
3611
3612 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3613                                  struct extent_record *rec)
3614 {
3615         if (rec->content_checked && rec->owner_ref_checked &&
3616             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3617             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3618             !rec->bad_full_backref && !rec->crossing_stripes &&
3619             !rec->wrong_chunk_type) {
3620                 remove_cache_extent(extent_cache, &rec->cache);
3621                 free_all_extent_backrefs(rec);
3622                 list_del_init(&rec->list);
3623                 free(rec);
3624         }
3625         return 0;
3626 }
3627
3628 static int check_owner_ref(struct btrfs_root *root,
3629                             struct extent_record *rec,
3630                             struct extent_buffer *buf)
3631 {
3632         struct extent_backref *node, *tmp;
3633         struct tree_backref *back;
3634         struct btrfs_root *ref_root;
3635         struct btrfs_key key;
3636         struct btrfs_path path;
3637         struct extent_buffer *parent;
3638         int level;
3639         int found = 0;
3640         int ret;
3641
3642         rbtree_postorder_for_each_entry_safe(node, tmp,
3643                                              &rec->backref_tree, node) {
3644                 if (node->is_data)
3645                         continue;
3646                 if (!node->found_ref)
3647                         continue;
3648                 if (node->full_backref)
3649                         continue;
3650                 back = to_tree_backref(node);
3651                 if (btrfs_header_owner(buf) == back->root)
3652                         return 0;
3653         }
3654         BUG_ON(rec->is_root);
3655
3656         /* try to find the block by search corresponding fs tree */
3657         key.objectid = btrfs_header_owner(buf);
3658         key.type = BTRFS_ROOT_ITEM_KEY;
3659         key.offset = (u64)-1;
3660
3661         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3662         if (IS_ERR(ref_root))
3663                 return 1;
3664
3665         level = btrfs_header_level(buf);
3666         if (level == 0)
3667                 btrfs_item_key_to_cpu(buf, &key, 0);
3668         else
3669                 btrfs_node_key_to_cpu(buf, &key, 0);
3670
3671         btrfs_init_path(&path);
3672         path.lowest_level = level + 1;
3673         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3674         if (ret < 0)
3675                 return 0;
3676
3677         parent = path.nodes[level + 1];
3678         if (parent && buf->start == btrfs_node_blockptr(parent,
3679                                                         path.slots[level + 1]))
3680                 found = 1;
3681
3682         btrfs_release_path(&path);
3683         return found ? 0 : 1;
3684 }
3685
3686 static int is_extent_tree_record(struct extent_record *rec)
3687 {
3688         struct extent_backref *node, *tmp;
3689         struct tree_backref *back;
3690         int is_extent = 0;
3691
3692         rbtree_postorder_for_each_entry_safe(node, tmp,
3693                                              &rec->backref_tree, node) {
3694                 if (node->is_data)
3695                         return 0;
3696                 back = to_tree_backref(node);
3697                 if (node->full_backref)
3698                         return 0;
3699                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3700                         is_extent = 1;
3701         }
3702         return is_extent;
3703 }
3704
3705
3706 static int record_bad_block_io(struct btrfs_fs_info *info,
3707                                struct cache_tree *extent_cache,
3708                                u64 start, u64 len)
3709 {
3710         struct extent_record *rec;
3711         struct cache_extent *cache;
3712         struct btrfs_key key;
3713
3714         cache = lookup_cache_extent(extent_cache, start, len);
3715         if (!cache)
3716                 return 0;
3717
3718         rec = container_of(cache, struct extent_record, cache);
3719         if (!is_extent_tree_record(rec))
3720                 return 0;
3721
3722         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3723         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3724 }
3725
3726 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3727                        struct extent_buffer *buf, int slot)
3728 {
3729         if (btrfs_header_level(buf)) {
3730                 struct btrfs_key_ptr ptr1, ptr2;
3731
3732                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3733                                    sizeof(struct btrfs_key_ptr));
3734                 read_extent_buffer(buf, &ptr2,
3735                                    btrfs_node_key_ptr_offset(slot + 1),
3736                                    sizeof(struct btrfs_key_ptr));
3737                 write_extent_buffer(buf, &ptr1,
3738                                     btrfs_node_key_ptr_offset(slot + 1),
3739                                     sizeof(struct btrfs_key_ptr));
3740                 write_extent_buffer(buf, &ptr2,
3741                                     btrfs_node_key_ptr_offset(slot),
3742                                     sizeof(struct btrfs_key_ptr));
3743                 if (slot == 0) {
3744                         struct btrfs_disk_key key;
3745
3746                         btrfs_node_key(buf, &key, 0);
3747                         btrfs_fixup_low_keys(root, path, &key,
3748                                              btrfs_header_level(buf) + 1);
3749                 }
3750         } else {
3751                 struct btrfs_item *item1, *item2;
3752                 struct btrfs_key k1, k2;
3753                 char *item1_data, *item2_data;
3754                 u32 item1_offset, item2_offset, item1_size, item2_size;
3755
3756                 item1 = btrfs_item_nr(slot);
3757                 item2 = btrfs_item_nr(slot + 1);
3758                 btrfs_item_key_to_cpu(buf, &k1, slot);
3759                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3760                 item1_offset = btrfs_item_offset(buf, item1);
3761                 item2_offset = btrfs_item_offset(buf, item2);
3762                 item1_size = btrfs_item_size(buf, item1);
3763                 item2_size = btrfs_item_size(buf, item2);
3764
3765                 item1_data = malloc(item1_size);
3766                 if (!item1_data)
3767                         return -ENOMEM;
3768                 item2_data = malloc(item2_size);
3769                 if (!item2_data) {
3770                         free(item1_data);
3771                         return -ENOMEM;
3772                 }
3773
3774                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3775                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3776
3777                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3778                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3779                 free(item1_data);
3780                 free(item2_data);
3781
3782                 btrfs_set_item_offset(buf, item1, item2_offset);
3783                 btrfs_set_item_offset(buf, item2, item1_offset);
3784                 btrfs_set_item_size(buf, item1, item2_size);
3785                 btrfs_set_item_size(buf, item2, item1_size);
3786
3787                 path->slots[0] = slot;
3788                 btrfs_set_item_key_unsafe(root, path, &k2);
3789                 path->slots[0] = slot + 1;
3790                 btrfs_set_item_key_unsafe(root, path, &k1);
3791         }
3792         return 0;
3793 }
3794
3795 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
3796 {
3797         struct extent_buffer *buf;
3798         struct btrfs_key k1, k2;
3799         int i;
3800         int level = path->lowest_level;
3801         int ret = -EIO;
3802
3803         buf = path->nodes[level];
3804         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3805                 if (level) {
3806                         btrfs_node_key_to_cpu(buf, &k1, i);
3807                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
3808                 } else {
3809                         btrfs_item_key_to_cpu(buf, &k1, i);
3810                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
3811                 }
3812                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
3813                         continue;
3814                 ret = swap_values(root, path, buf, i);
3815                 if (ret)
3816                         break;
3817                 btrfs_mark_buffer_dirty(buf);
3818                 i = 0;
3819         }
3820         return ret;
3821 }
3822
3823 static int delete_bogus_item(struct btrfs_root *root,
3824                              struct btrfs_path *path,
3825                              struct extent_buffer *buf, int slot)
3826 {
3827         struct btrfs_key key;
3828         int nritems = btrfs_header_nritems(buf);
3829
3830         btrfs_item_key_to_cpu(buf, &key, slot);
3831
3832         /* These are all the keys we can deal with missing. */
3833         if (key.type != BTRFS_DIR_INDEX_KEY &&
3834             key.type != BTRFS_EXTENT_ITEM_KEY &&
3835             key.type != BTRFS_METADATA_ITEM_KEY &&
3836             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
3837             key.type != BTRFS_EXTENT_DATA_REF_KEY)
3838                 return -1;
3839
3840         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
3841                (unsigned long long)key.objectid, key.type,
3842                (unsigned long long)key.offset, slot, buf->start);
3843         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
3844                               btrfs_item_nr_offset(slot + 1),
3845                               sizeof(struct btrfs_item) *
3846                               (nritems - slot - 1));
3847         btrfs_set_header_nritems(buf, nritems - 1);
3848         if (slot == 0) {
3849                 struct btrfs_disk_key disk_key;
3850
3851                 btrfs_item_key(buf, &disk_key, 0);
3852                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
3853         }
3854         btrfs_mark_buffer_dirty(buf);
3855         return 0;
3856 }
3857
3858 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
3859 {
3860         struct extent_buffer *buf;
3861         int i;
3862         int ret = 0;
3863
3864         /* We should only get this for leaves */
3865         BUG_ON(path->lowest_level);
3866         buf = path->nodes[0];
3867 again:
3868         for (i = 0; i < btrfs_header_nritems(buf); i++) {
3869                 unsigned int shift = 0, offset;
3870
3871                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
3872                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3873                         if (btrfs_item_end_nr(buf, i) >
3874                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3875                                 ret = delete_bogus_item(root, path, buf, i);
3876                                 if (!ret)
3877                                         goto again;
3878                                 fprintf(stderr,
3879                                 "item is off the end of the leaf, can't fix\n");
3880                                 ret = -EIO;
3881                                 break;
3882                         }
3883                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
3884                                 btrfs_item_end_nr(buf, i);
3885                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
3886                            btrfs_item_offset_nr(buf, i - 1)) {
3887                         if (btrfs_item_end_nr(buf, i) >
3888                             btrfs_item_offset_nr(buf, i - 1)) {
3889                                 ret = delete_bogus_item(root, path, buf, i);
3890                                 if (!ret)
3891                                         goto again;
3892                                 fprintf(stderr, "items overlap, can't fix\n");
3893                                 ret = -EIO;
3894                                 break;
3895                         }
3896                         shift = btrfs_item_offset_nr(buf, i - 1) -
3897                                 btrfs_item_end_nr(buf, i);
3898                 }
3899                 if (!shift)
3900                         continue;
3901
3902                 printf("Shifting item nr %d by %u bytes in block %llu\n",
3903                        i, shift, (unsigned long long)buf->start);
3904                 offset = btrfs_item_offset_nr(buf, i);
3905                 memmove_extent_buffer(buf,
3906                                       btrfs_leaf_data(buf) + offset + shift,
3907                                       btrfs_leaf_data(buf) + offset,
3908                                       btrfs_item_size_nr(buf, i));
3909                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
3910                                       offset + shift);
3911                 btrfs_mark_buffer_dirty(buf);
3912         }
3913
3914         /*
3915          * We may have moved things, in which case we want to exit so we don't
3916          * write those changes out.  Once we have proper abort functionality in
3917          * progs this can be changed to something nicer.
3918          */
3919         BUG_ON(ret);
3920         return ret;
3921 }
3922
3923 /*
3924  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
3925  * then just return -EIO.
3926  */
3927 static int try_to_fix_bad_block(struct btrfs_root *root,
3928                                 struct extent_buffer *buf,
3929                                 enum btrfs_tree_block_status status)
3930 {
3931         struct btrfs_trans_handle *trans;
3932         struct ulist *roots;
3933         struct ulist_node *node;
3934         struct btrfs_root *search_root;
3935         struct btrfs_path path;
3936         struct ulist_iterator iter;
3937         struct btrfs_key root_key, key;
3938         int ret;
3939
3940         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
3941             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
3942                 return -EIO;
3943
3944         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
3945         if (ret)
3946                 return -EIO;
3947
3948         btrfs_init_path(&path);
3949         ULIST_ITER_INIT(&iter);
3950         while ((node = ulist_next(roots, &iter))) {
3951                 root_key.objectid = node->val;
3952                 root_key.type = BTRFS_ROOT_ITEM_KEY;
3953                 root_key.offset = (u64)-1;
3954
3955                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
3956                 if (IS_ERR(root)) {
3957                         ret = -EIO;
3958                         break;
3959                 }
3960
3961
3962                 trans = btrfs_start_transaction(search_root, 0);
3963                 if (IS_ERR(trans)) {
3964                         ret = PTR_ERR(trans);
3965                         break;
3966                 }
3967
3968                 path.lowest_level = btrfs_header_level(buf);
3969                 path.skip_check_block = 1;
3970                 if (path.lowest_level)
3971                         btrfs_node_key_to_cpu(buf, &key, 0);
3972                 else
3973                         btrfs_item_key_to_cpu(buf, &key, 0);
3974                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
3975                 if (ret) {
3976                         ret = -EIO;
3977                         btrfs_commit_transaction(trans, search_root);
3978                         break;
3979                 }
3980                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
3981                         ret = fix_key_order(search_root, &path);
3982                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
3983                         ret = fix_item_offset(search_root, &path);
3984                 if (ret) {
3985                         btrfs_commit_transaction(trans, search_root);
3986                         break;
3987                 }
3988                 btrfs_release_path(&path);
3989                 btrfs_commit_transaction(trans, search_root);
3990         }
3991         ulist_free(roots);
3992         btrfs_release_path(&path);
3993         return ret;
3994 }
3995
3996 static int check_block(struct btrfs_root *root,
3997                        struct cache_tree *extent_cache,
3998                        struct extent_buffer *buf, u64 flags)
3999 {
4000         struct extent_record *rec;
4001         struct cache_extent *cache;
4002         struct btrfs_key key;
4003         enum btrfs_tree_block_status status;
4004         int ret = 0;
4005         int level;
4006
4007         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4008         if (!cache)
4009                 return 1;
4010         rec = container_of(cache, struct extent_record, cache);
4011         rec->generation = btrfs_header_generation(buf);
4012
4013         level = btrfs_header_level(buf);
4014         if (btrfs_header_nritems(buf) > 0) {
4015
4016                 if (level == 0)
4017                         btrfs_item_key_to_cpu(buf, &key, 0);
4018                 else
4019                         btrfs_node_key_to_cpu(buf, &key, 0);
4020
4021                 rec->info_objectid = key.objectid;
4022         }
4023         rec->info_level = level;
4024
4025         if (btrfs_is_leaf(buf))
4026                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4027         else
4028                 status = btrfs_check_node(root, &rec->parent_key, buf);
4029
4030         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4031                 if (repair)
4032                         status = try_to_fix_bad_block(root, buf, status);
4033                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4034                         ret = -EIO;
4035                         fprintf(stderr, "bad block %llu\n",
4036                                 (unsigned long long)buf->start);
4037                 } else {
4038                         /*
4039                          * Signal to callers we need to start the scan over
4040                          * again since we'll have cowed blocks.
4041                          */
4042                         ret = -EAGAIN;
4043                 }
4044         } else {
4045                 rec->content_checked = 1;
4046                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4047                         rec->owner_ref_checked = 1;
4048                 else {
4049                         ret = check_owner_ref(root, rec, buf);
4050                         if (!ret)
4051                                 rec->owner_ref_checked = 1;
4052                 }
4053         }
4054         if (!ret)
4055                 maybe_free_extent_rec(extent_cache, rec);
4056         return ret;
4057 }
4058
4059 #if 0
4060 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4061                                                 u64 parent, u64 root)
4062 {
4063         struct list_head *cur = rec->backrefs.next;
4064         struct extent_backref *node;
4065         struct tree_backref *back;
4066
4067         while (cur != &rec->backrefs) {
4068                 node = to_extent_backref(cur);
4069                 cur = cur->next;
4070                 if (node->is_data)
4071                         continue;
4072                 back = to_tree_backref(node);
4073                 if (parent > 0) {
4074                         if (!node->full_backref)
4075                                 continue;
4076                         if (parent == back->parent)
4077                                 return back;
4078                 } else {
4079                         if (node->full_backref)
4080                                 continue;
4081                         if (back->root == root)
4082                                 return back;
4083                 }
4084         }
4085         return NULL;
4086 }
4087 #endif
4088
4089 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4090                                                 u64 parent, u64 root)
4091 {
4092         struct tree_backref *ref = malloc(sizeof(*ref));
4093
4094         if (!ref)
4095                 return NULL;
4096         memset(&ref->node, 0, sizeof(ref->node));
4097         if (parent > 0) {
4098                 ref->parent = parent;
4099                 ref->node.full_backref = 1;
4100         } else {
4101                 ref->root = root;
4102                 ref->node.full_backref = 0;
4103         }
4104
4105         return ref;
4106 }
4107
4108 #if 0
4109 static struct data_backref *find_data_backref(struct extent_record *rec,
4110                                                 u64 parent, u64 root,
4111                                                 u64 owner, u64 offset,
4112                                                 int found_ref,
4113                                                 u64 disk_bytenr, u64 bytes)
4114 {
4115         struct list_head *cur = rec->backrefs.next;
4116         struct extent_backref *node;
4117         struct data_backref *back;
4118
4119         while (cur != &rec->backrefs) {
4120                 node = to_extent_backref(cur);
4121                 cur = cur->next;
4122                 if (!node->is_data)
4123                         continue;
4124                 back = to_data_backref(node);
4125                 if (parent > 0) {
4126                         if (!node->full_backref)
4127                                 continue;
4128                         if (parent == back->parent)
4129                                 return back;
4130                 } else {
4131                         if (node->full_backref)
4132                                 continue;
4133                         if (back->root == root && back->owner == owner &&
4134                             back->offset == offset) {
4135                                 if (found_ref && node->found_ref &&
4136                                     (back->bytes != bytes ||
4137                                     back->disk_bytenr != disk_bytenr))
4138                                         continue;
4139                                 return back;
4140                         }
4141                 }
4142         }
4143         return NULL;
4144 }
4145 #endif
4146
4147 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4148                                                 u64 parent, u64 root,
4149                                                 u64 owner, u64 offset,
4150                                                 u64 max_size)
4151 {
4152         struct data_backref *ref = malloc(sizeof(*ref));
4153
4154         if (!ref)
4155                 return NULL;
4156         memset(&ref->node, 0, sizeof(ref->node));
4157         ref->node.is_data = 1;
4158
4159         if (parent > 0) {
4160                 ref->parent = parent;
4161                 ref->owner = 0;
4162                 ref->offset = 0;
4163                 ref->node.full_backref = 1;
4164         } else {
4165                 ref->root = root;
4166                 ref->owner = owner;
4167                 ref->offset = offset;
4168                 ref->node.full_backref = 0;
4169         }
4170         ref->bytes = max_size;
4171         ref->found_ref = 0;
4172         ref->num_refs = 0;
4173         if (max_size > rec->max_size)
4174                 rec->max_size = max_size;
4175         return ref;
4176 }
4177
4178 /* Check if the type of extent matches with its chunk */
4179 static void check_extent_type(struct extent_record *rec)
4180 {
4181         struct btrfs_block_group_cache *bg_cache;
4182
4183         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4184         if (!bg_cache)
4185                 return;
4186
4187         /* data extent, check chunk directly*/
4188         if (!rec->metadata) {
4189                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4190                         rec->wrong_chunk_type = 1;
4191                 return;
4192         }
4193
4194         /* metadata extent, check the obvious case first */
4195         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4196                                  BTRFS_BLOCK_GROUP_METADATA))) {
4197                 rec->wrong_chunk_type = 1;
4198                 return;
4199         }
4200
4201         /*
4202          * Check SYSTEM extent, as it's also marked as metadata, we can only
4203          * make sure it's a SYSTEM extent by its backref
4204          */
4205         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4206                 struct extent_backref *node;
4207                 struct tree_backref *tback;
4208                 u64 bg_type;
4209
4210                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4211                 if (node->is_data) {
4212                         /* tree block shouldn't have data backref */
4213                         rec->wrong_chunk_type = 1;
4214                         return;
4215                 }
4216                 tback = container_of(node, struct tree_backref, node);
4217
4218                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4219                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4220                 else
4221                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4222                 if (!(bg_cache->flags & bg_type))
4223                         rec->wrong_chunk_type = 1;
4224         }
4225 }
4226
4227 /*
4228  * Allocate a new extent record, fill default values from @tmpl and insert int
4229  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4230  * the cache, otherwise it fails.
4231  */
4232 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4233                 struct extent_record *tmpl)
4234 {
4235         struct extent_record *rec;
4236         int ret = 0;
4237
4238         BUG_ON(tmpl->max_size == 0);
4239         rec = malloc(sizeof(*rec));
4240         if (!rec)
4241                 return -ENOMEM;
4242         rec->start = tmpl->start;
4243         rec->max_size = tmpl->max_size;
4244         rec->nr = max(tmpl->nr, tmpl->max_size);
4245         rec->found_rec = tmpl->found_rec;
4246         rec->content_checked = tmpl->content_checked;
4247         rec->owner_ref_checked = tmpl->owner_ref_checked;
4248         rec->num_duplicates = 0;
4249         rec->metadata = tmpl->metadata;
4250         rec->flag_block_full_backref = FLAG_UNSET;
4251         rec->bad_full_backref = 0;
4252         rec->crossing_stripes = 0;
4253         rec->wrong_chunk_type = 0;
4254         rec->is_root = tmpl->is_root;
4255         rec->refs = tmpl->refs;
4256         rec->extent_item_refs = tmpl->extent_item_refs;
4257         rec->parent_generation = tmpl->parent_generation;
4258         INIT_LIST_HEAD(&rec->backrefs);
4259         INIT_LIST_HEAD(&rec->dups);
4260         INIT_LIST_HEAD(&rec->list);
4261         rec->backref_tree = RB_ROOT;
4262         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4263         rec->cache.start = tmpl->start;
4264         rec->cache.size = tmpl->nr;
4265         ret = insert_cache_extent(extent_cache, &rec->cache);
4266         if (ret) {
4267                 free(rec);
4268                 return ret;
4269         }
4270         bytes_used += rec->nr;
4271
4272         if (tmpl->metadata)
4273                 rec->crossing_stripes = check_crossing_stripes(global_info,
4274                                 rec->start, global_info->nodesize);
4275         check_extent_type(rec);
4276         return ret;
4277 }
4278
4279 /*
4280  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4281  * some are hints:
4282  * - refs              - if found, increase refs
4283  * - is_root           - if found, set
4284  * - content_checked   - if found, set
4285  * - owner_ref_checked - if found, set
4286  *
4287  * If not found, create a new one, initialize and insert.
4288  */
4289 static int add_extent_rec(struct cache_tree *extent_cache,
4290                 struct extent_record *tmpl)
4291 {
4292         struct extent_record *rec;
4293         struct cache_extent *cache;
4294         int ret = 0;
4295         int dup = 0;
4296
4297         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4298         if (cache) {
4299                 rec = container_of(cache, struct extent_record, cache);
4300                 if (tmpl->refs)
4301                         rec->refs++;
4302                 if (rec->nr == 1)
4303                         rec->nr = max(tmpl->nr, tmpl->max_size);
4304
4305                 /*
4306                  * We need to make sure to reset nr to whatever the extent
4307                  * record says was the real size, this way we can compare it to
4308                  * the backrefs.
4309                  */
4310                 if (tmpl->found_rec) {
4311                         if (tmpl->start != rec->start || rec->found_rec) {
4312                                 struct extent_record *tmp;
4313
4314                                 dup = 1;
4315                                 if (list_empty(&rec->list))
4316                                         list_add_tail(&rec->list,
4317                                                       &duplicate_extents);
4318
4319                                 /*
4320                                  * We have to do this song and dance in case we
4321                                  * find an extent record that falls inside of
4322                                  * our current extent record but does not have
4323                                  * the same objectid.
4324                                  */
4325                                 tmp = malloc(sizeof(*tmp));
4326                                 if (!tmp)
4327                                         return -ENOMEM;
4328                                 tmp->start = tmpl->start;
4329                                 tmp->max_size = tmpl->max_size;
4330                                 tmp->nr = tmpl->nr;
4331                                 tmp->found_rec = 1;
4332                                 tmp->metadata = tmpl->metadata;
4333                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4334                                 INIT_LIST_HEAD(&tmp->list);
4335                                 list_add_tail(&tmp->list, &rec->dups);
4336                                 rec->num_duplicates++;
4337                         } else {
4338                                 rec->nr = tmpl->nr;
4339                                 rec->found_rec = 1;
4340                         }
4341                 }
4342
4343                 if (tmpl->extent_item_refs && !dup) {
4344                         if (rec->extent_item_refs) {
4345                                 fprintf(stderr,
4346                         "block %llu rec extent_item_refs %llu, passed %llu\n",
4347                                         (unsigned long long)tmpl->start,
4348                                         (unsigned long long)
4349                                                         rec->extent_item_refs,
4350                                         (unsigned long long)
4351                                                         tmpl->extent_item_refs);
4352                         }
4353                         rec->extent_item_refs = tmpl->extent_item_refs;
4354                 }
4355                 if (tmpl->is_root)
4356                         rec->is_root = 1;
4357                 if (tmpl->content_checked)
4358                         rec->content_checked = 1;
4359                 if (tmpl->owner_ref_checked)
4360                         rec->owner_ref_checked = 1;
4361                 memcpy(&rec->parent_key, &tmpl->parent_key,
4362                                 sizeof(tmpl->parent_key));
4363                 if (tmpl->parent_generation)
4364                         rec->parent_generation = tmpl->parent_generation;
4365                 if (rec->max_size < tmpl->max_size)
4366                         rec->max_size = tmpl->max_size;
4367
4368                 /*
4369                  * A metadata extent can't cross stripe_len boundary, otherwise
4370                  * kernel scrub won't be able to handle it.
4371                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4372                  * it.
4373                  */
4374                 if (tmpl->metadata)
4375                         rec->crossing_stripes = check_crossing_stripes(
4376                                         global_info, rec->start,
4377                                         global_info->nodesize);
4378                 check_extent_type(rec);
4379                 maybe_free_extent_rec(extent_cache, rec);
4380                 return ret;
4381         }
4382
4383         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4384
4385         return ret;
4386 }
4387
4388 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4389                             u64 parent, u64 root, int found_ref)
4390 {
4391         struct extent_record *rec;
4392         struct tree_backref *back;
4393         struct cache_extent *cache;
4394         int ret;
4395         bool insert = false;
4396
4397         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4398         if (!cache) {
4399                 struct extent_record tmpl;
4400
4401                 memset(&tmpl, 0, sizeof(tmpl));
4402                 tmpl.start = bytenr;
4403                 tmpl.nr = 1;
4404                 tmpl.metadata = 1;
4405                 tmpl.max_size = 1;
4406
4407                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4408                 if (ret)
4409                         return ret;
4410
4411                 /* really a bug in cache_extent implement now */
4412                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4413                 if (!cache)
4414                         return -ENOENT;
4415         }
4416
4417         rec = container_of(cache, struct extent_record, cache);
4418         if (rec->start != bytenr) {
4419                 /*
4420                  * Several cause, from unaligned bytenr to over lapping extents
4421                  */
4422                 return -EEXIST;
4423         }
4424
4425         back = find_tree_backref(rec, parent, root);
4426         if (!back) {
4427                 back = alloc_tree_backref(rec, parent, root);
4428                 if (!back)
4429                         return -ENOMEM;
4430                 insert = true;
4431         }
4432
4433         if (found_ref) {
4434                 if (back->node.found_ref) {
4435                         fprintf(stderr,
4436         "Extent back ref already exists for %llu parent %llu root %llu\n",
4437                                 (unsigned long long)bytenr,
4438                                 (unsigned long long)parent,
4439                                 (unsigned long long)root);
4440                 }
4441                 back->node.found_ref = 1;
4442         } else {
4443                 if (back->node.found_extent_tree) {
4444                         fprintf(stderr,
4445         "extent back ref already exists for %llu parent %llu root %llu\n",
4446                                 (unsigned long long)bytenr,
4447                                 (unsigned long long)parent,
4448                                 (unsigned long long)root);
4449                 }
4450                 back->node.found_extent_tree = 1;
4451         }
4452         if (insert)
4453                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4454                         compare_extent_backref));
4455         check_extent_type(rec);
4456         maybe_free_extent_rec(extent_cache, rec);
4457         return 0;
4458 }
4459
4460 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4461                             u64 parent, u64 root, u64 owner, u64 offset,
4462                             u32 num_refs, int found_ref, u64 max_size)
4463 {
4464         struct extent_record *rec;
4465         struct data_backref *back;
4466         struct cache_extent *cache;
4467         int ret;
4468         bool insert = false;
4469
4470         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4471         if (!cache) {
4472                 struct extent_record tmpl;
4473
4474                 memset(&tmpl, 0, sizeof(tmpl));
4475                 tmpl.start = bytenr;
4476                 tmpl.nr = 1;
4477                 tmpl.max_size = max_size;
4478
4479                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4480                 if (ret)
4481                         return ret;
4482
4483                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4484                 if (!cache)
4485                         abort();
4486         }
4487
4488         rec = container_of(cache, struct extent_record, cache);
4489         if (rec->max_size < max_size)
4490                 rec->max_size = max_size;
4491
4492         /*
4493          * If found_ref is set then max_size is the real size and must match the
4494          * existing refs.  So if we have already found a ref then we need to
4495          * make sure that this ref matches the existing one, otherwise we need
4496          * to add a new backref so we can notice that the backrefs don't match
4497          * and we need to figure out who is telling the truth.  This is to
4498          * account for that awful fsync bug I introduced where we'd end up with
4499          * a btrfs_file_extent_item that would have its length include multiple
4500          * prealloc extents or point inside of a prealloc extent.
4501          */
4502         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4503                                  bytenr, max_size);
4504         if (!back) {
4505                 back = alloc_data_backref(rec, parent, root, owner, offset,
4506                                           max_size);
4507                 BUG_ON(!back);
4508                 insert = true;
4509         }
4510
4511         if (found_ref) {
4512                 BUG_ON(num_refs != 1);
4513                 if (back->node.found_ref)
4514                         BUG_ON(back->bytes != max_size);
4515                 back->node.found_ref = 1;
4516                 back->found_ref += 1;
4517                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
4518                         back->bytes = max_size;
4519                         back->disk_bytenr = bytenr;
4520
4521                         /* Need to reinsert if not already in the tree */
4522                         if (!insert) {
4523                                 rb_erase(&back->node.node, &rec->backref_tree);
4524                                 insert = true;
4525                         }
4526                 }
4527                 rec->refs += 1;
4528                 rec->content_checked = 1;
4529                 rec->owner_ref_checked = 1;
4530         } else {
4531                 if (back->node.found_extent_tree) {
4532                         fprintf(stderr,
4533 "Extent back ref already exists for %llu parent %llu root %llu owner %llu offset %llu num_refs %lu\n",
4534                                 (unsigned long long)bytenr,
4535                                 (unsigned long long)parent,
4536                                 (unsigned long long)root,
4537                                 (unsigned long long)owner,
4538                                 (unsigned long long)offset,
4539                                 (unsigned long)num_refs);
4540                 }
4541                 back->num_refs = num_refs;
4542                 back->node.found_extent_tree = 1;
4543         }
4544         if (insert)
4545                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4546                         compare_extent_backref));
4547
4548         maybe_free_extent_rec(extent_cache, rec);
4549         return 0;
4550 }
4551
4552 static int add_pending(struct cache_tree *pending,
4553                        struct cache_tree *seen, u64 bytenr, u32 size)
4554 {
4555         int ret;
4556
4557         ret = add_cache_extent(seen, bytenr, size);
4558         if (ret)
4559                 return ret;
4560         add_cache_extent(pending, bytenr, size);
4561         return 0;
4562 }
4563
4564 static int pick_next_pending(struct cache_tree *pending,
4565                         struct cache_tree *reada,
4566                         struct cache_tree *nodes,
4567                         u64 last, struct block_info *bits, int bits_nr,
4568                         int *reada_bits)
4569 {
4570         unsigned long node_start = last;
4571         struct cache_extent *cache;
4572         int ret;
4573
4574         cache = search_cache_extent(reada, 0);
4575         if (cache) {
4576                 bits[0].start = cache->start;
4577                 bits[0].size = cache->size;
4578                 *reada_bits = 1;
4579                 return 1;
4580         }
4581         *reada_bits = 0;
4582         if (node_start > 32768)
4583                 node_start -= 32768;
4584
4585         cache = search_cache_extent(nodes, node_start);
4586         if (!cache)
4587                 cache = search_cache_extent(nodes, 0);
4588
4589         if (!cache) {
4590                 cache = search_cache_extent(pending, 0);
4591                 if (!cache)
4592                         return 0;
4593                 ret = 0;
4594                 do {
4595                         bits[ret].start = cache->start;
4596                         bits[ret].size = cache->size;
4597                         cache = next_cache_extent(cache);
4598                         ret++;
4599                 } while (cache && ret < bits_nr);
4600                 return ret;
4601         }
4602
4603         ret = 0;
4604         do {
4605                 bits[ret].start = cache->start;
4606                 bits[ret].size = cache->size;
4607                 cache = next_cache_extent(cache);
4608                 ret++;
4609         } while (cache && ret < bits_nr);
4610
4611         if (bits_nr - ret > 8) {
4612                 u64 lookup = bits[0].start + bits[0].size;
4613                 struct cache_extent *next;
4614
4615                 next = search_cache_extent(pending, lookup);
4616                 while (next) {
4617                         if (next->start - lookup > 32768)
4618                                 break;
4619                         bits[ret].start = next->start;
4620                         bits[ret].size = next->size;
4621                         lookup = next->start + next->size;
4622                         ret++;
4623                         if (ret == bits_nr)
4624                                 break;
4625                         next = next_cache_extent(next);
4626                         if (!next)
4627                                 break;
4628                 }
4629         }
4630         return ret;
4631 }
4632
4633 static void free_chunk_record(struct cache_extent *cache)
4634 {
4635         struct chunk_record *rec;
4636
4637         rec = container_of(cache, struct chunk_record, cache);
4638         list_del_init(&rec->list);
4639         list_del_init(&rec->dextents);
4640         free(rec);
4641 }
4642
4643 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4644 {
4645         cache_tree_free_extents(chunk_cache, free_chunk_record);
4646 }
4647
4648 static void free_device_record(struct rb_node *node)
4649 {
4650         struct device_record *rec;
4651
4652         rec = container_of(node, struct device_record, node);
4653         free(rec);
4654 }
4655
4656 FREE_RB_BASED_TREE(device_cache, free_device_record);
4657
4658 int insert_block_group_record(struct block_group_tree *tree,
4659                               struct block_group_record *bg_rec)
4660 {
4661         int ret;
4662
4663         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4664         if (ret)
4665                 return ret;
4666
4667         list_add_tail(&bg_rec->list, &tree->block_groups);
4668         return 0;
4669 }
4670
4671 static void free_block_group_record(struct cache_extent *cache)
4672 {
4673         struct block_group_record *rec;
4674
4675         rec = container_of(cache, struct block_group_record, cache);
4676         list_del_init(&rec->list);
4677         free(rec);
4678 }
4679
4680 void free_block_group_tree(struct block_group_tree *tree)
4681 {
4682         cache_tree_free_extents(&tree->tree, free_block_group_record);
4683 }
4684
4685 int insert_device_extent_record(struct device_extent_tree *tree,
4686                                 struct device_extent_record *de_rec)
4687 {
4688         int ret;
4689
4690         /*
4691          * Device extent is a bit different from the other extents, because
4692          * the extents which belong to the different devices may have the
4693          * same start and size, so we need use the special extent cache
4694          * search/insert functions.
4695          */
4696         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4697         if (ret)
4698                 return ret;
4699
4700         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4701         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4702         return 0;
4703 }
4704
4705 static void free_device_extent_record(struct cache_extent *cache)
4706 {
4707         struct device_extent_record *rec;
4708
4709         rec = container_of(cache, struct device_extent_record, cache);
4710         if (!list_empty(&rec->chunk_list))
4711                 list_del_init(&rec->chunk_list);
4712         if (!list_empty(&rec->device_list))
4713                 list_del_init(&rec->device_list);
4714         free(rec);
4715 }
4716
4717 void free_device_extent_tree(struct device_extent_tree *tree)
4718 {
4719         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4720 }
4721
4722 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4723 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4724                                  struct extent_buffer *leaf, int slot)
4725 {
4726         struct btrfs_extent_ref_v0 *ref0;
4727         struct btrfs_key key;
4728         int ret;
4729
4730         btrfs_item_key_to_cpu(leaf, &key, slot);
4731         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4732         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4733                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
4734                                 0, 0);
4735         } else {
4736                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
4737                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4738         }
4739         return ret;
4740 }
4741 #endif
4742
4743 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4744                                             struct btrfs_key *key,
4745                                             int slot)
4746 {
4747         struct btrfs_chunk *ptr;
4748         struct chunk_record *rec;
4749         int num_stripes, i;
4750
4751         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4752         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4753
4754         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4755         if (!rec) {
4756                 fprintf(stderr, "memory allocation failed\n");
4757                 exit(-1);
4758         }
4759
4760         INIT_LIST_HEAD(&rec->list);
4761         INIT_LIST_HEAD(&rec->dextents);
4762         rec->bg_rec = NULL;
4763
4764         rec->cache.start = key->offset;
4765         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4766
4767         rec->generation = btrfs_header_generation(leaf);
4768
4769         rec->objectid = key->objectid;
4770         rec->type = key->type;
4771         rec->offset = key->offset;
4772
4773         rec->length = rec->cache.size;
4774         rec->owner = btrfs_chunk_owner(leaf, ptr);
4775         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4776         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4777         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4778         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4779         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4780         rec->num_stripes = num_stripes;
4781         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4782
4783         for (i = 0; i < rec->num_stripes; ++i) {
4784                 rec->stripes[i].devid =
4785                         btrfs_stripe_devid_nr(leaf, ptr, i);
4786                 rec->stripes[i].offset =
4787                         btrfs_stripe_offset_nr(leaf, ptr, i);
4788                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4789                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4790                                 BTRFS_UUID_SIZE);
4791         }
4792
4793         return rec;
4794 }
4795
4796 static int process_chunk_item(struct cache_tree *chunk_cache,
4797                               struct btrfs_key *key, struct extent_buffer *eb,
4798                               int slot)
4799 {
4800         struct chunk_record *rec;
4801         struct btrfs_chunk *chunk;
4802         int ret = 0;
4803
4804         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
4805         /*
4806          * Do extra check for this chunk item,
4807          *
4808          * It's still possible one can craft a leaf with CHUNK_ITEM, with
4809          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
4810          * and owner<->key_type check.
4811          */
4812         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
4813                                       key->offset);
4814         if (ret < 0) {
4815                 error("chunk(%llu, %llu) is not valid, ignore it",
4816                       key->offset, btrfs_chunk_length(eb, chunk));
4817                 return 0;
4818         }
4819         rec = btrfs_new_chunk_record(eb, key, slot);
4820         ret = insert_cache_extent(chunk_cache, &rec->cache);
4821         if (ret) {
4822                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4823                         rec->offset, rec->length);
4824                 free(rec);
4825         }
4826
4827         return ret;
4828 }
4829
4830 static int process_device_item(struct rb_root *dev_cache,
4831                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4832 {
4833         struct btrfs_dev_item *ptr;
4834         struct device_record *rec;
4835         int ret = 0;
4836
4837         ptr = btrfs_item_ptr(eb,
4838                 slot, struct btrfs_dev_item);
4839
4840         rec = malloc(sizeof(*rec));
4841         if (!rec) {
4842                 fprintf(stderr, "memory allocation failed\n");
4843                 return -ENOMEM;
4844         }
4845
4846         rec->devid = key->offset;
4847         rec->generation = btrfs_header_generation(eb);
4848
4849         rec->objectid = key->objectid;
4850         rec->type = key->type;
4851         rec->offset = key->offset;
4852
4853         rec->devid = btrfs_device_id(eb, ptr);
4854         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4855         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4856
4857         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4858         if (ret) {
4859                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4860                 free(rec);
4861         }
4862
4863         return ret;
4864 }
4865
4866 struct block_group_record *
4867 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4868                              int slot)
4869 {
4870         struct btrfs_block_group_item *ptr;
4871         struct block_group_record *rec;
4872
4873         rec = calloc(1, sizeof(*rec));
4874         if (!rec) {
4875                 fprintf(stderr, "memory allocation failed\n");
4876                 exit(-1);
4877         }
4878
4879         rec->cache.start = key->objectid;
4880         rec->cache.size = key->offset;
4881
4882         rec->generation = btrfs_header_generation(leaf);
4883
4884         rec->objectid = key->objectid;
4885         rec->type = key->type;
4886         rec->offset = key->offset;
4887
4888         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
4889         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
4890
4891         INIT_LIST_HEAD(&rec->list);
4892
4893         return rec;
4894 }
4895
4896 static int process_block_group_item(struct block_group_tree *block_group_cache,
4897                                     struct btrfs_key *key,
4898                                     struct extent_buffer *eb, int slot)
4899 {
4900         struct block_group_record *rec;
4901         int ret = 0;
4902
4903         rec = btrfs_new_block_group_record(eb, key, slot);
4904         ret = insert_block_group_record(block_group_cache, rec);
4905         if (ret) {
4906                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
4907                         rec->objectid, rec->offset);
4908                 free(rec);
4909         }
4910
4911         return ret;
4912 }
4913
4914 struct device_extent_record *
4915 btrfs_new_device_extent_record(struct extent_buffer *leaf,
4916                                struct btrfs_key *key, int slot)
4917 {
4918         struct device_extent_record *rec;
4919         struct btrfs_dev_extent *ptr;
4920
4921         rec = calloc(1, sizeof(*rec));
4922         if (!rec) {
4923                 fprintf(stderr, "memory allocation failed\n");
4924                 exit(-1);
4925         }
4926
4927         rec->cache.objectid = key->objectid;
4928         rec->cache.start = key->offset;
4929
4930         rec->generation = btrfs_header_generation(leaf);
4931
4932         rec->objectid = key->objectid;
4933         rec->type = key->type;
4934         rec->offset = key->offset;
4935
4936         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
4937         rec->chunk_objecteid =
4938                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
4939         rec->chunk_offset =
4940                 btrfs_dev_extent_chunk_offset(leaf, ptr);
4941         rec->length = btrfs_dev_extent_length(leaf, ptr);
4942         rec->cache.size = rec->length;
4943
4944         INIT_LIST_HEAD(&rec->chunk_list);
4945         INIT_LIST_HEAD(&rec->device_list);
4946
4947         return rec;
4948 }
4949
4950 static int
4951 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
4952                            struct btrfs_key *key, struct extent_buffer *eb,
4953                            int slot)
4954 {
4955         struct device_extent_record *rec;
4956         int ret;
4957
4958         rec = btrfs_new_device_extent_record(eb, key, slot);
4959         ret = insert_device_extent_record(dev_extent_cache, rec);
4960         if (ret) {
4961                 fprintf(stderr,
4962                         "Device extent[%llu, %llu, %llu] existed.\n",
4963                         rec->objectid, rec->offset, rec->length);
4964                 free(rec);
4965         }
4966
4967         return ret;
4968 }
4969
4970 static int process_extent_item(struct btrfs_root *root,
4971                                struct cache_tree *extent_cache,
4972                                struct extent_buffer *eb, int slot)
4973 {
4974         struct btrfs_extent_item *ei;
4975         struct btrfs_extent_inline_ref *iref;
4976         struct btrfs_extent_data_ref *dref;
4977         struct btrfs_shared_data_ref *sref;
4978         struct btrfs_key key;
4979         struct extent_record tmpl;
4980         unsigned long end;
4981         unsigned long ptr;
4982         int ret;
4983         int type;
4984         u32 item_size = btrfs_item_size_nr(eb, slot);
4985         u64 refs = 0;
4986         u64 offset;
4987         u64 num_bytes;
4988         int metadata = 0;
4989
4990         btrfs_item_key_to_cpu(eb, &key, slot);
4991
4992         if (key.type == BTRFS_METADATA_ITEM_KEY) {
4993                 metadata = 1;
4994                 num_bytes = root->fs_info->nodesize;
4995         } else {
4996                 num_bytes = key.offset;
4997         }
4998
4999         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
5000                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5001                       key.objectid, root->fs_info->sectorsize);
5002                 return -EIO;
5003         }
5004         if (item_size < sizeof(*ei)) {
5005 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5006                 struct btrfs_extent_item_v0 *ei0;
5007
5008                 if (item_size != sizeof(*ei0)) {
5009                         error(
5010         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
5011                                 key.objectid, key.type, key.offset,
5012                                 btrfs_header_bytenr(eb), slot);
5013                         BUG();
5014                 }
5015                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5016                 refs = btrfs_extent_refs_v0(eb, ei0);
5017 #else
5018                 BUG();
5019 #endif
5020                 memset(&tmpl, 0, sizeof(tmpl));
5021                 tmpl.start = key.objectid;
5022                 tmpl.nr = num_bytes;
5023                 tmpl.extent_item_refs = refs;
5024                 tmpl.metadata = metadata;
5025                 tmpl.found_rec = 1;
5026                 tmpl.max_size = num_bytes;
5027
5028                 return add_extent_rec(extent_cache, &tmpl);
5029         }
5030
5031         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5032         refs = btrfs_extent_refs(eb, ei);
5033         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5034                 metadata = 1;
5035         else
5036                 metadata = 0;
5037         if (metadata && num_bytes != root->fs_info->nodesize) {
5038                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5039                       num_bytes, root->fs_info->nodesize);
5040                 return -EIO;
5041         }
5042         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
5043                 error("ignore invalid data extent, length %llu is not aligned to %u",
5044                       num_bytes, root->fs_info->sectorsize);
5045                 return -EIO;
5046         }
5047
5048         memset(&tmpl, 0, sizeof(tmpl));
5049         tmpl.start = key.objectid;
5050         tmpl.nr = num_bytes;
5051         tmpl.extent_item_refs = refs;
5052         tmpl.metadata = metadata;
5053         tmpl.found_rec = 1;
5054         tmpl.max_size = num_bytes;
5055         add_extent_rec(extent_cache, &tmpl);
5056
5057         ptr = (unsigned long)(ei + 1);
5058         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5059             key.type == BTRFS_EXTENT_ITEM_KEY)
5060                 ptr += sizeof(struct btrfs_tree_block_info);
5061
5062         end = (unsigned long)ei + item_size;
5063         while (ptr < end) {
5064                 iref = (struct btrfs_extent_inline_ref *)ptr;
5065                 type = btrfs_extent_inline_ref_type(eb, iref);
5066                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5067                 switch (type) {
5068                 case BTRFS_TREE_BLOCK_REF_KEY:
5069                         ret = add_tree_backref(extent_cache, key.objectid,
5070                                         0, offset, 0);
5071                         if (ret < 0)
5072                                 error(
5073                         "add_tree_backref failed (extent items tree block): %s",
5074                                       strerror(-ret));
5075                         break;
5076                 case BTRFS_SHARED_BLOCK_REF_KEY:
5077                         ret = add_tree_backref(extent_cache, key.objectid,
5078                                         offset, 0, 0);
5079                         if (ret < 0)
5080                                 error(
5081                         "add_tree_backref failed (extent items shared block): %s",
5082                                       strerror(-ret));
5083                         break;
5084                 case BTRFS_EXTENT_DATA_REF_KEY:
5085                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5086                         add_data_backref(extent_cache, key.objectid, 0,
5087                                         btrfs_extent_data_ref_root(eb, dref),
5088                                         btrfs_extent_data_ref_objectid(eb,
5089                                                                        dref),
5090                                         btrfs_extent_data_ref_offset(eb, dref),
5091                                         btrfs_extent_data_ref_count(eb, dref),
5092                                         0, num_bytes);
5093                         break;
5094                 case BTRFS_SHARED_DATA_REF_KEY:
5095                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5096                         add_data_backref(extent_cache, key.objectid, offset,
5097                                         0, 0, 0,
5098                                         btrfs_shared_data_ref_count(eb, sref),
5099                                         0, num_bytes);
5100                         break;
5101                 default:
5102                         fprintf(stderr,
5103                                 "corrupt extent record: key [%llu,%u,%llu]\n",
5104                                 key.objectid, key.type, num_bytes);
5105                         goto out;
5106                 }
5107                 ptr += btrfs_extent_inline_ref_size(type);
5108         }
5109         WARN_ON(ptr > end);
5110 out:
5111         return 0;
5112 }
5113
5114 static int check_cache_range(struct btrfs_root *root,
5115                              struct btrfs_block_group_cache *cache,
5116                              u64 offset, u64 bytes)
5117 {
5118         struct btrfs_free_space *entry;
5119         u64 *logical;
5120         u64 bytenr;
5121         int stripe_len;
5122         int i, nr, ret;
5123
5124         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5125                 bytenr = btrfs_sb_offset(i);
5126                 ret = btrfs_rmap_block(root->fs_info,
5127                                        cache->key.objectid, bytenr, 0,
5128                                        &logical, &nr, &stripe_len);
5129                 if (ret)
5130                         return ret;
5131
5132                 while (nr--) {
5133                         if (logical[nr] + stripe_len <= offset)
5134                                 continue;
5135                         if (offset + bytes <= logical[nr])
5136                                 continue;
5137                         if (logical[nr] == offset) {
5138                                 if (stripe_len >= bytes) {
5139                                         free(logical);
5140                                         return 0;
5141                                 }
5142                                 bytes -= stripe_len;
5143                                 offset += stripe_len;
5144                         } else if (logical[nr] < offset) {
5145                                 if (logical[nr] + stripe_len >=
5146                                     offset + bytes) {
5147                                         free(logical);
5148                                         return 0;
5149                                 }
5150                                 bytes = (offset + bytes) -
5151                                         (logical[nr] + stripe_len);
5152                                 offset = logical[nr] + stripe_len;
5153                         } else {
5154                                 /*
5155                                  * Could be tricky, the super may land in the
5156                                  * middle of the area we're checking.  First
5157                                  * check the easiest case, it's at the end.
5158                                  */
5159                                 if (logical[nr] + stripe_len >=
5160                                     bytes + offset) {
5161                                         bytes = logical[nr] - offset;
5162                                         continue;
5163                                 }
5164
5165                                 /* Check the left side */
5166                                 ret = check_cache_range(root, cache,
5167                                                         offset,
5168                                                         logical[nr] - offset);
5169                                 if (ret) {
5170                                         free(logical);
5171                                         return ret;
5172                                 }
5173
5174                                 /* Now we continue with the right side */
5175                                 bytes = (offset + bytes) -
5176                                         (logical[nr] + stripe_len);
5177                                 offset = logical[nr] + stripe_len;
5178                         }
5179                 }
5180
5181                 free(logical);
5182         }
5183
5184         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5185         if (!entry) {
5186                 fprintf(stderr, "there is no free space entry for %llu-%llu\n",
5187                         offset, offset+bytes);
5188                 return -EINVAL;
5189         }
5190
5191         if (entry->offset != offset) {
5192                 fprintf(stderr, "wanted offset %llu, found %llu\n", offset,
5193                         entry->offset);
5194                 return -EINVAL;
5195         }
5196
5197         if (entry->bytes != bytes) {
5198                 fprintf(stderr, "wanted bytes %llu, found %llu for off %llu\n",
5199                         bytes, entry->bytes, offset);
5200                 return -EINVAL;
5201         }
5202
5203         unlink_free_space(cache->free_space_ctl, entry);
5204         free(entry);
5205         return 0;
5206 }
5207
5208 static int verify_space_cache(struct btrfs_root *root,
5209                               struct btrfs_block_group_cache *cache)
5210 {
5211         struct btrfs_path path;
5212         struct extent_buffer *leaf;
5213         struct btrfs_key key;
5214         u64 last;
5215         int ret = 0;
5216
5217         root = root->fs_info->extent_root;
5218
5219         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5220
5221         btrfs_init_path(&path);
5222         key.objectid = last;
5223         key.offset = 0;
5224         key.type = BTRFS_EXTENT_ITEM_KEY;
5225         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5226         if (ret < 0)
5227                 goto out;
5228         ret = 0;
5229         while (1) {
5230                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5231                         ret = btrfs_next_leaf(root, &path);
5232                         if (ret < 0)
5233                                 goto out;
5234                         if (ret > 0) {
5235                                 ret = 0;
5236                                 break;
5237                         }
5238                 }
5239                 leaf = path.nodes[0];
5240                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5241                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5242                         break;
5243                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5244                     key.type != BTRFS_METADATA_ITEM_KEY) {
5245                         path.slots[0]++;
5246                         continue;
5247                 }
5248
5249                 if (last == key.objectid) {
5250                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5251                                 last = key.objectid + key.offset;
5252                         else
5253                                 last = key.objectid + root->fs_info->nodesize;
5254                         path.slots[0]++;
5255                         continue;
5256                 }
5257
5258                 ret = check_cache_range(root, cache, last,
5259                                         key.objectid - last);
5260                 if (ret)
5261                         break;
5262                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5263                         last = key.objectid + key.offset;
5264                 else
5265                         last = key.objectid + root->fs_info->nodesize;
5266                 path.slots[0]++;
5267         }
5268
5269         if (last < cache->key.objectid + cache->key.offset)
5270                 ret = check_cache_range(root, cache, last,
5271                                         cache->key.objectid +
5272                                         cache->key.offset - last);
5273
5274 out:
5275         btrfs_release_path(&path);
5276
5277         if (!ret &&
5278             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5279                 fprintf(stderr, "There are still entries left in the space "
5280                         "cache\n");
5281                 ret = -EINVAL;
5282         }
5283
5284         return ret;
5285 }
5286
5287 static int check_space_cache(struct btrfs_root *root)
5288 {
5289         struct btrfs_block_group_cache *cache;
5290         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5291         int ret;
5292         int error = 0;
5293
5294         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5295             btrfs_super_generation(root->fs_info->super_copy) !=
5296             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5297                 printf("cache and super generation don't match, space cache "
5298                        "will be invalidated\n");
5299                 return 0;
5300         }
5301
5302         if (ctx.progress_enabled) {
5303                 ctx.tp = TASK_FREE_SPACE;
5304                 task_start(ctx.info);
5305         }
5306
5307         while (1) {
5308                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5309                 if (!cache)
5310                         break;
5311
5312                 start = cache->key.objectid + cache->key.offset;
5313                 if (!cache->free_space_ctl) {
5314                         if (btrfs_init_free_space_ctl(cache,
5315                                                 root->fs_info->sectorsize)) {
5316                                 ret = -ENOMEM;
5317                                 break;
5318                         }
5319                 } else {
5320                         btrfs_remove_free_space_cache(cache);
5321                 }
5322
5323                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
5324                         ret = exclude_super_stripes(root, cache);
5325                         if (ret) {
5326                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5327                                         strerror(-ret));
5328                                 error++;
5329                                 continue;
5330                         }
5331                         ret = load_free_space_tree(root->fs_info, cache);
5332                         free_excluded_extents(root, cache);
5333                         if (ret < 0) {
5334                                 fprintf(stderr, "could not load free space tree: %s\n",
5335                                         strerror(-ret));
5336                                 error++;
5337                                 continue;
5338                         }
5339                         error += ret;
5340                 } else {
5341                         ret = load_free_space_cache(root->fs_info, cache);
5342                         if (!ret)
5343                                 continue;
5344                 }
5345
5346                 ret = verify_space_cache(root, cache);
5347                 if (ret) {
5348                         fprintf(stderr, "cache appears valid but isn't %llu\n",
5349                                 cache->key.objectid);
5350                         error++;
5351                 }
5352         }
5353
5354         task_stop(ctx.info);
5355
5356         return error ? -EINVAL : 0;
5357 }
5358
5359 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5360                         u64 num_bytes, unsigned long leaf_offset,
5361                         struct extent_buffer *eb)
5362 {
5363         struct btrfs_fs_info *fs_info = root->fs_info;
5364         u64 offset = 0;
5365         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
5366         char *data;
5367         unsigned long csum_offset;
5368         u32 csum;
5369         u32 csum_expected;
5370         u64 read_len;
5371         u64 data_checked = 0;
5372         u64 tmp;
5373         int ret = 0;
5374         int mirror;
5375         int num_copies;
5376
5377         if (num_bytes % fs_info->sectorsize)
5378                 return -EINVAL;
5379
5380         data = malloc(num_bytes);
5381         if (!data)
5382                 return -ENOMEM;
5383
5384         while (offset < num_bytes) {
5385                 mirror = 0;
5386 again:
5387                 read_len = num_bytes - offset;
5388                 /* read as much space once a time */
5389                 ret = read_extent_data(fs_info, data + offset,
5390                                 bytenr + offset, &read_len, mirror);
5391                 if (ret)
5392                         goto out;
5393                 data_checked = 0;
5394                 /* verify every 4k data's checksum */
5395                 while (data_checked < read_len) {
5396                         csum = ~(u32)0;
5397                         tmp = offset + data_checked;
5398
5399                         csum = btrfs_csum_data((char *)data + tmp,
5400                                                csum, fs_info->sectorsize);
5401                         btrfs_csum_final(csum, (u8 *)&csum);
5402
5403                         csum_offset = leaf_offset +
5404                                  tmp / fs_info->sectorsize * csum_size;
5405                         read_extent_buffer(eb, (char *)&csum_expected,
5406                                            csum_offset, csum_size);
5407                         /* try another mirror */
5408                         if (csum != csum_expected) {
5409                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5410                                                 mirror, bytenr + tmp,
5411                                                 csum, csum_expected);
5412                                 num_copies = btrfs_num_copies(root->fs_info,
5413                                                 bytenr, num_bytes);
5414                                 if (mirror < num_copies - 1) {
5415                                         mirror += 1;
5416                                         goto again;
5417                                 }
5418                         }
5419                         data_checked += fs_info->sectorsize;
5420                 }
5421                 offset += read_len;
5422         }
5423 out:
5424         free(data);
5425         return ret;
5426 }
5427
5428 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5429                                u64 num_bytes)
5430 {
5431         struct btrfs_path path;
5432         struct extent_buffer *leaf;
5433         struct btrfs_key key;
5434         int ret;
5435
5436         btrfs_init_path(&path);
5437         key.objectid = bytenr;
5438         key.type = BTRFS_EXTENT_ITEM_KEY;
5439         key.offset = (u64)-1;
5440
5441 again:
5442         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
5443                                 0, 0);
5444         if (ret < 0) {
5445                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5446                 btrfs_release_path(&path);
5447                 return ret;
5448         } else if (ret) {
5449                 if (path.slots[0] > 0) {
5450                         path.slots[0]--;
5451                 } else {
5452                         ret = btrfs_prev_leaf(root, &path);
5453                         if (ret < 0) {
5454                                 goto out;
5455                         } else if (ret > 0) {
5456                                 ret = 0;
5457                                 goto out;
5458                         }
5459                 }
5460         }
5461
5462         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5463
5464         /*
5465          * Block group items come before extent items if they have the same
5466          * bytenr, so walk back one more just in case.  Dear future traveller,
5467          * first congrats on mastering time travel.  Now if it's not too much
5468          * trouble could you go back to 2006 and tell Chris to make the
5469          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5470          * EXTENT_ITEM_KEY please?
5471          */
5472         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5473                 if (path.slots[0] > 0) {
5474                         path.slots[0]--;
5475                 } else {
5476                         ret = btrfs_prev_leaf(root, &path);
5477                         if (ret < 0) {
5478                                 goto out;
5479                         } else if (ret > 0) {
5480                                 ret = 0;
5481                                 goto out;
5482                         }
5483                 }
5484                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5485         }
5486
5487         while (num_bytes) {
5488                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5489                         ret = btrfs_next_leaf(root, &path);
5490                         if (ret < 0) {
5491                                 fprintf(stderr, "Error going to next leaf "
5492                                         "%d\n", ret);
5493                                 btrfs_release_path(&path);
5494                                 return ret;
5495                         } else if (ret) {
5496                                 break;
5497                         }
5498                 }
5499                 leaf = path.nodes[0];
5500                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5501                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5502                         path.slots[0]++;
5503                         continue;
5504                 }
5505                 if (key.objectid + key.offset < bytenr) {
5506                         path.slots[0]++;
5507                         continue;
5508                 }
5509                 if (key.objectid > bytenr + num_bytes)
5510                         break;
5511
5512                 if (key.objectid == bytenr) {
5513                         if (key.offset >= num_bytes) {
5514                                 num_bytes = 0;
5515                                 break;
5516                         }
5517                         num_bytes -= key.offset;
5518                         bytenr += key.offset;
5519                 } else if (key.objectid < bytenr) {
5520                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5521                                 num_bytes = 0;
5522                                 break;
5523                         }
5524                         num_bytes = (bytenr + num_bytes) -
5525                                 (key.objectid + key.offset);
5526                         bytenr = key.objectid + key.offset;
5527                 } else {
5528                         if (key.objectid + key.offset < bytenr + num_bytes) {
5529                                 u64 new_start = key.objectid + key.offset;
5530                                 u64 new_bytes = bytenr + num_bytes - new_start;
5531
5532                                 /*
5533                                  * Weird case, the extent is in the middle of
5534                                  * our range, we'll have to search one side
5535                                  * and then the other.  Not sure if this happens
5536                                  * in real life, but no harm in coding it up
5537                                  * anyway just in case.
5538                                  */
5539                                 btrfs_release_path(&path);
5540                                 ret = check_extent_exists(root, new_start,
5541                                                           new_bytes);
5542                                 if (ret) {
5543                                         fprintf(stderr, "Right section didn't "
5544                                                 "have a record\n");
5545                                         break;
5546                                 }
5547                                 num_bytes = key.objectid - bytenr;
5548                                 goto again;
5549                         }
5550                         num_bytes = key.objectid - bytenr;
5551                 }
5552                 path.slots[0]++;
5553         }
5554         ret = 0;
5555
5556 out:
5557         if (num_bytes && !ret) {
5558                 fprintf(stderr,
5559                         "there are no extents for csum range %llu-%llu\n",
5560                         bytenr, bytenr+num_bytes);
5561                 ret = 1;
5562         }
5563
5564         btrfs_release_path(&path);
5565         return ret;
5566 }
5567
5568 static int check_csums(struct btrfs_root *root)
5569 {
5570         struct btrfs_path path;
5571         struct extent_buffer *leaf;
5572         struct btrfs_key key;
5573         u64 offset = 0, num_bytes = 0;
5574         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5575         int errors = 0;
5576         int ret;
5577         u64 data_len;
5578         unsigned long leaf_offset;
5579
5580         root = root->fs_info->csum_root;
5581         if (!extent_buffer_uptodate(root->node)) {
5582                 fprintf(stderr, "No valid csum tree found\n");
5583                 return -ENOENT;
5584         }
5585
5586         btrfs_init_path(&path);
5587         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5588         key.type = BTRFS_EXTENT_CSUM_KEY;
5589         key.offset = 0;
5590         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5591         if (ret < 0) {
5592                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5593                 btrfs_release_path(&path);
5594                 return ret;
5595         }
5596
5597         if (ret > 0 && path.slots[0])
5598                 path.slots[0]--;
5599         ret = 0;
5600
5601         while (1) {
5602                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5603                         ret = btrfs_next_leaf(root, &path);
5604                         if (ret < 0) {
5605                                 fprintf(stderr, "Error going to next leaf "
5606                                         "%d\n", ret);
5607                                 break;
5608                         }
5609                         if (ret)
5610                                 break;
5611                 }
5612                 leaf = path.nodes[0];
5613
5614                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5615                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5616                         path.slots[0]++;
5617                         continue;
5618                 }
5619
5620                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
5621                               csum_size) * root->fs_info->sectorsize;
5622                 if (!check_data_csum)
5623                         goto skip_csum_check;
5624                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5625                 ret = check_extent_csums(root, key.offset, data_len,
5626                                          leaf_offset, leaf);
5627                 if (ret)
5628                         break;
5629 skip_csum_check:
5630                 if (!num_bytes) {
5631                         offset = key.offset;
5632                 } else if (key.offset != offset + num_bytes) {
5633                         ret = check_extent_exists(root, offset, num_bytes);
5634                         if (ret) {
5635                                 fprintf(stderr,
5636                 "csum exists for %llu-%llu but there is no extent record\n",
5637                                         offset, offset+num_bytes);
5638                                 errors++;
5639                         }
5640                         offset = key.offset;
5641                         num_bytes = 0;
5642                 }
5643                 num_bytes += data_len;
5644                 path.slots[0]++;
5645         }
5646
5647         btrfs_release_path(&path);
5648         return errors;
5649 }
5650
5651 static int is_dropped_key(struct btrfs_key *key,
5652                           struct btrfs_key *drop_key)
5653 {
5654         if (key->objectid < drop_key->objectid)
5655                 return 1;
5656         else if (key->objectid == drop_key->objectid) {
5657                 if (key->type < drop_key->type)
5658                         return 1;
5659                 else if (key->type == drop_key->type) {
5660                         if (key->offset < drop_key->offset)
5661                                 return 1;
5662                 }
5663         }
5664         return 0;
5665 }
5666
5667 /*
5668  * Here are the rules for FULL_BACKREF.
5669  *
5670  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5671  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5672  *      FULL_BACKREF set.
5673  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
5674  *    if it happened after the relocation occurred since we'll have dropped the
5675  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5676  *    have no real way to know for sure.
5677  *
5678  * We process the blocks one root at a time, and we start from the lowest root
5679  * objectid and go to the highest.  So we can just lookup the owner backref for
5680  * the record and if we don't find it then we know it doesn't exist and we have
5681  * a FULL BACKREF.
5682  *
5683  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5684  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5685  * be set or not and then we can check later once we've gathered all the refs.
5686  */
5687 static int calc_extent_flag(struct cache_tree *extent_cache,
5688                            struct extent_buffer *buf,
5689                            struct root_item_record *ri,
5690                            u64 *flags)
5691 {
5692         struct extent_record *rec;
5693         struct cache_extent *cache;
5694         struct tree_backref *tback;
5695         u64 owner = 0;
5696
5697         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5698         /* we have added this extent before */
5699         if (!cache)
5700                 return -ENOENT;
5701
5702         rec = container_of(cache, struct extent_record, cache);
5703
5704         /*
5705          * Except file/reloc tree, we can not have
5706          * FULL BACKREF MODE
5707          */
5708         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5709                 goto normal;
5710         /*
5711          * root node
5712          */
5713         if (buf->start == ri->bytenr)
5714                 goto normal;
5715
5716         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5717                 goto full_backref;
5718
5719         owner = btrfs_header_owner(buf);
5720         if (owner == ri->objectid)
5721                 goto normal;
5722
5723         tback = find_tree_backref(rec, 0, owner);
5724         if (!tback)
5725                 goto full_backref;
5726 normal:
5727         *flags = 0;
5728         if (rec->flag_block_full_backref != FLAG_UNSET &&
5729             rec->flag_block_full_backref != 0)
5730                 rec->bad_full_backref = 1;
5731         return 0;
5732 full_backref:
5733         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5734         if (rec->flag_block_full_backref != FLAG_UNSET &&
5735             rec->flag_block_full_backref != 1)
5736                 rec->bad_full_backref = 1;
5737         return 0;
5738 }
5739
5740 static void report_mismatch_key_root(u8 key_type, u64 rootid)
5741 {
5742         fprintf(stderr, "Invalid key type(");
5743         print_key_type(stderr, 0, key_type);
5744         fprintf(stderr, ") found in root(");
5745         print_objectid(stderr, rootid, 0);
5746         fprintf(stderr, ")\n");
5747 }
5748
5749 /*
5750  * Check if the key is valid with its extent buffer.
5751  *
5752  * This is a early check in case invalid key exists in a extent buffer
5753  * This is not comprehensive yet, but should prevent wrong key/item passed
5754  * further
5755  */
5756 static int check_type_with_root(u64 rootid, u8 key_type)
5757 {
5758         switch (key_type) {
5759         /* Only valid in chunk tree */
5760         case BTRFS_DEV_ITEM_KEY:
5761         case BTRFS_CHUNK_ITEM_KEY:
5762                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
5763                         goto err;
5764                 break;
5765         /* valid in csum and log tree */
5766         case BTRFS_CSUM_TREE_OBJECTID:
5767                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
5768                       is_fstree(rootid)))
5769                         goto err;
5770                 break;
5771         case BTRFS_EXTENT_ITEM_KEY:
5772         case BTRFS_METADATA_ITEM_KEY:
5773         case BTRFS_BLOCK_GROUP_ITEM_KEY:
5774                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
5775                         goto err;
5776                 break;
5777         case BTRFS_ROOT_ITEM_KEY:
5778                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
5779                         goto err;
5780                 break;
5781         case BTRFS_DEV_EXTENT_KEY:
5782                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
5783                         goto err;
5784                 break;
5785         }
5786         return 0;
5787 err:
5788         report_mismatch_key_root(key_type, rootid);
5789         return -EINVAL;
5790 }
5791
5792 static int run_next_block(struct btrfs_root *root,
5793                           struct block_info *bits,
5794                           int bits_nr,
5795                           u64 *last,
5796                           struct cache_tree *pending,
5797                           struct cache_tree *seen,
5798                           struct cache_tree *reada,
5799                           struct cache_tree *nodes,
5800                           struct cache_tree *extent_cache,
5801                           struct cache_tree *chunk_cache,
5802                           struct rb_root *dev_cache,
5803                           struct block_group_tree *block_group_cache,
5804                           struct device_extent_tree *dev_extent_cache,
5805                           struct root_item_record *ri)
5806 {
5807         struct btrfs_fs_info *fs_info = root->fs_info;
5808         struct extent_buffer *buf;
5809         struct extent_record *rec = NULL;
5810         u64 bytenr;
5811         u32 size;
5812         u64 parent;
5813         u64 owner;
5814         u64 flags;
5815         u64 ptr;
5816         u64 gen = 0;
5817         int ret = 0;
5818         int i;
5819         int nritems;
5820         struct btrfs_key key;
5821         struct cache_extent *cache;
5822         int reada_bits;
5823
5824         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5825                                     bits_nr, &reada_bits);
5826         if (nritems == 0)
5827                 return 1;
5828
5829         if (!reada_bits) {
5830                 for (i = 0; i < nritems; i++) {
5831                         ret = add_cache_extent(reada, bits[i].start,
5832                                                bits[i].size);
5833                         if (ret == -EEXIST)
5834                                 continue;
5835
5836                         /* fixme, get the parent transid */
5837                         readahead_tree_block(fs_info, bits[i].start, 0);
5838                 }
5839         }
5840         *last = bits[0].start;
5841         bytenr = bits[0].start;
5842         size = bits[0].size;
5843
5844         cache = lookup_cache_extent(pending, bytenr, size);
5845         if (cache) {
5846                 remove_cache_extent(pending, cache);
5847                 free(cache);
5848         }
5849         cache = lookup_cache_extent(reada, bytenr, size);
5850         if (cache) {
5851                 remove_cache_extent(reada, cache);
5852                 free(cache);
5853         }
5854         cache = lookup_cache_extent(nodes, bytenr, size);
5855         if (cache) {
5856                 remove_cache_extent(nodes, cache);
5857                 free(cache);
5858         }
5859         cache = lookup_cache_extent(extent_cache, bytenr, size);
5860         if (cache) {
5861                 rec = container_of(cache, struct extent_record, cache);
5862                 gen = rec->parent_generation;
5863         }
5864
5865         /* fixme, get the real parent transid */
5866         buf = read_tree_block(root->fs_info, bytenr, gen);
5867         if (!extent_buffer_uptodate(buf)) {
5868                 record_bad_block_io(root->fs_info,
5869                                     extent_cache, bytenr, size);
5870                 goto out;
5871         }
5872
5873         nritems = btrfs_header_nritems(buf);
5874
5875         flags = 0;
5876         if (!init_extent_tree) {
5877                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5878                                        btrfs_header_level(buf), 1, NULL,
5879                                        &flags);
5880                 if (ret < 0) {
5881                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5882                         if (ret < 0) {
5883                                 fprintf(stderr, "Couldn't calc extent flags\n");
5884                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5885                         }
5886                 }
5887         } else {
5888                 flags = 0;
5889                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5890                 if (ret < 0) {
5891                         fprintf(stderr, "Couldn't calc extent flags\n");
5892                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5893                 }
5894         }
5895
5896         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5897                 if (ri != NULL &&
5898                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5899                     ri->objectid == btrfs_header_owner(buf)) {
5900                         /*
5901                          * Ok we got to this block from it's original owner and
5902                          * we have FULL_BACKREF set.  Relocation can leave
5903                          * converted blocks over so this is altogether possible,
5904                          * however it's not possible if the generation > the
5905                          * last snapshot, so check for this case.
5906                          */
5907                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5908                             btrfs_header_generation(buf) > ri->last_snapshot) {
5909                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5910                                 rec->bad_full_backref = 1;
5911                         }
5912                 }
5913         } else {
5914                 if (ri != NULL &&
5915                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5916                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5917                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5918                         rec->bad_full_backref = 1;
5919                 }
5920         }
5921
5922         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5923                 rec->flag_block_full_backref = 1;
5924                 parent = bytenr;
5925                 owner = 0;
5926         } else {
5927                 rec->flag_block_full_backref = 0;
5928                 parent = 0;
5929                 owner = btrfs_header_owner(buf);
5930         }
5931
5932         ret = check_block(root, extent_cache, buf, flags);
5933         if (ret)
5934                 goto out;
5935
5936         if (btrfs_is_leaf(buf)) {
5937                 btree_space_waste += btrfs_leaf_free_space(root, buf);
5938                 for (i = 0; i < nritems; i++) {
5939                         struct btrfs_file_extent_item *fi;
5940
5941                         btrfs_item_key_to_cpu(buf, &key, i);
5942                         /*
5943                          * Check key type against the leaf owner.
5944                          * Could filter quite a lot of early error if
5945                          * owner is correct
5946                          */
5947                         if (check_type_with_root(btrfs_header_owner(buf),
5948                                                  key.type)) {
5949                                 fprintf(stderr, "ignoring invalid key\n");
5950                                 continue;
5951                         }
5952                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5953                                 process_extent_item(root, extent_cache, buf,
5954                                                     i);
5955                                 continue;
5956                         }
5957                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5958                                 process_extent_item(root, extent_cache, buf,
5959                                                     i);
5960                                 continue;
5961                         }
5962                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5963                                 total_csum_bytes +=
5964                                         btrfs_item_size_nr(buf, i);
5965                                 continue;
5966                         }
5967                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5968                                 process_chunk_item(chunk_cache, &key, buf, i);
5969                                 continue;
5970                         }
5971                         if (key.type == BTRFS_DEV_ITEM_KEY) {
5972                                 process_device_item(dev_cache, &key, buf, i);
5973                                 continue;
5974                         }
5975                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5976                                 process_block_group_item(block_group_cache,
5977                                         &key, buf, i);
5978                                 continue;
5979                         }
5980                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
5981                                 process_device_extent_item(dev_extent_cache,
5982                                         &key, buf, i);
5983                                 continue;
5984
5985                         }
5986                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5987 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5988                                 process_extent_ref_v0(extent_cache, buf, i);
5989 #else
5990                                 BUG();
5991 #endif
5992                                 continue;
5993                         }
5994
5995                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
5996                                 ret = add_tree_backref(extent_cache,
5997                                                 key.objectid, 0, key.offset, 0);
5998                                 if (ret < 0)
5999                                         error(
6000                                 "add_tree_backref failed (leaf tree block): %s",
6001                                               strerror(-ret));
6002                                 continue;
6003                         }
6004                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6005                                 ret = add_tree_backref(extent_cache,
6006                                                 key.objectid, key.offset, 0, 0);
6007                                 if (ret < 0)
6008                                         error(
6009                                 "add_tree_backref failed (leaf shared block): %s",
6010                                               strerror(-ret));
6011                                 continue;
6012                         }
6013                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6014                                 struct btrfs_extent_data_ref *ref;
6015
6016                                 ref = btrfs_item_ptr(buf, i,
6017                                                 struct btrfs_extent_data_ref);
6018                                 add_data_backref(extent_cache,
6019                                         key.objectid, 0,
6020                                         btrfs_extent_data_ref_root(buf, ref),
6021                                         btrfs_extent_data_ref_objectid(buf,
6022                                                                        ref),
6023                                         btrfs_extent_data_ref_offset(buf, ref),
6024                                         btrfs_extent_data_ref_count(buf, ref),
6025                                         0, root->fs_info->sectorsize);
6026                                 continue;
6027                         }
6028                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6029                                 struct btrfs_shared_data_ref *ref;
6030
6031                                 ref = btrfs_item_ptr(buf, i,
6032                                                 struct btrfs_shared_data_ref);
6033                                 add_data_backref(extent_cache,
6034                                         key.objectid, key.offset, 0, 0, 0,
6035                                         btrfs_shared_data_ref_count(buf, ref),
6036                                         0, root->fs_info->sectorsize);
6037                                 continue;
6038                         }
6039                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6040                                 struct bad_item *bad;
6041
6042                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6043                                         continue;
6044                                 if (!owner)
6045                                         continue;
6046                                 bad = malloc(sizeof(struct bad_item));
6047                                 if (!bad)
6048                                         continue;
6049                                 INIT_LIST_HEAD(&bad->list);
6050                                 memcpy(&bad->key, &key,
6051                                        sizeof(struct btrfs_key));
6052                                 bad->root_id = owner;
6053                                 list_add_tail(&bad->list, &delete_items);
6054                                 continue;
6055                         }
6056                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6057                                 continue;
6058                         fi = btrfs_item_ptr(buf, i,
6059                                             struct btrfs_file_extent_item);
6060                         if (btrfs_file_extent_type(buf, fi) ==
6061                             BTRFS_FILE_EXTENT_INLINE)
6062                                 continue;
6063                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6064                                 continue;
6065
6066                         data_bytes_allocated +=
6067                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6068                         if (data_bytes_allocated < root->fs_info->sectorsize)
6069                                 abort();
6070
6071                         data_bytes_referenced +=
6072                                 btrfs_file_extent_num_bytes(buf, fi);
6073                         add_data_backref(extent_cache,
6074                                 btrfs_file_extent_disk_bytenr(buf, fi),
6075                                 parent, owner, key.objectid, key.offset -
6076                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6077                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6078                 }
6079         } else {
6080                 int level;
6081                 struct btrfs_key first_key;
6082
6083                 first_key.objectid = 0;
6084
6085                 if (nritems > 0)
6086                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6087                 level = btrfs_header_level(buf);
6088                 for (i = 0; i < nritems; i++) {
6089                         struct extent_record tmpl;
6090
6091                         ptr = btrfs_node_blockptr(buf, i);
6092                         size = root->fs_info->nodesize;
6093                         btrfs_node_key_to_cpu(buf, &key, i);
6094                         if (ri != NULL) {
6095                                 if ((level == ri->drop_level)
6096                                     && is_dropped_key(&key, &ri->drop_key)) {
6097                                         continue;
6098                                 }
6099                         }
6100
6101                         memset(&tmpl, 0, sizeof(tmpl));
6102                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6103                         tmpl.parent_generation =
6104                                 btrfs_node_ptr_generation(buf, i);
6105                         tmpl.start = ptr;
6106                         tmpl.nr = size;
6107                         tmpl.refs = 1;
6108                         tmpl.metadata = 1;
6109                         tmpl.max_size = size;
6110                         ret = add_extent_rec(extent_cache, &tmpl);
6111                         if (ret < 0)
6112                                 goto out;
6113
6114                         ret = add_tree_backref(extent_cache, ptr, parent,
6115                                         owner, 1);
6116                         if (ret < 0) {
6117                                 error(
6118                                 "add_tree_backref failed (non-leaf block): %s",
6119                                       strerror(-ret));
6120                                 continue;
6121                         }
6122
6123                         if (level > 1)
6124                                 add_pending(nodes, seen, ptr, size);
6125                         else
6126                                 add_pending(pending, seen, ptr, size);
6127                 }
6128                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
6129                                       nritems) * sizeof(struct btrfs_key_ptr);
6130         }
6131         total_btree_bytes += buf->len;
6132         if (fs_root_objectid(btrfs_header_owner(buf)))
6133                 total_fs_tree_bytes += buf->len;
6134         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6135                 total_extent_tree_bytes += buf->len;
6136 out:
6137         free_extent_buffer(buf);
6138         return ret;
6139 }
6140
6141 static int add_root_to_pending(struct extent_buffer *buf,
6142                                struct cache_tree *extent_cache,
6143                                struct cache_tree *pending,
6144                                struct cache_tree *seen,
6145                                struct cache_tree *nodes,
6146                                u64 objectid)
6147 {
6148         struct extent_record tmpl;
6149         int ret;
6150
6151         if (btrfs_header_level(buf) > 0)
6152                 add_pending(nodes, seen, buf->start, buf->len);
6153         else
6154                 add_pending(pending, seen, buf->start, buf->len);
6155
6156         memset(&tmpl, 0, sizeof(tmpl));
6157         tmpl.start = buf->start;
6158         tmpl.nr = buf->len;
6159         tmpl.is_root = 1;
6160         tmpl.refs = 1;
6161         tmpl.metadata = 1;
6162         tmpl.max_size = buf->len;
6163         add_extent_rec(extent_cache, &tmpl);
6164
6165         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6166             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6167                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6168                                 0, 1);
6169         else
6170                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6171                                 1);
6172         return ret;
6173 }
6174
6175 /* as we fix the tree, we might be deleting blocks that
6176  * we're tracking for repair.  This hook makes sure we
6177  * remove any backrefs for blocks as we are fixing them.
6178  */
6179 static int free_extent_hook(struct btrfs_trans_handle *trans,
6180                             struct btrfs_root *root,
6181                             u64 bytenr, u64 num_bytes, u64 parent,
6182                             u64 root_objectid, u64 owner, u64 offset,
6183                             int refs_to_drop)
6184 {
6185         struct extent_record *rec;
6186         struct cache_extent *cache;
6187         int is_data;
6188         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6189
6190         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6191         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6192         if (!cache)
6193                 return 0;
6194
6195         rec = container_of(cache, struct extent_record, cache);
6196         if (is_data) {
6197                 struct data_backref *back;
6198
6199                 back = find_data_backref(rec, parent, root_objectid, owner,
6200                                          offset, 1, bytenr, num_bytes);
6201                 if (!back)
6202                         goto out;
6203                 if (back->node.found_ref) {
6204                         back->found_ref -= refs_to_drop;
6205                         if (rec->refs)
6206                                 rec->refs -= refs_to_drop;
6207                 }
6208                 if (back->node.found_extent_tree) {
6209                         back->num_refs -= refs_to_drop;
6210                         if (rec->extent_item_refs)
6211                                 rec->extent_item_refs -= refs_to_drop;
6212                 }
6213                 if (back->found_ref == 0)
6214                         back->node.found_ref = 0;
6215                 if (back->num_refs == 0)
6216                         back->node.found_extent_tree = 0;
6217
6218                 if (!back->node.found_extent_tree && back->node.found_ref) {
6219                         rb_erase(&back->node.node, &rec->backref_tree);
6220                         free(back);
6221                 }
6222         } else {
6223                 struct tree_backref *back;
6224
6225                 back = find_tree_backref(rec, parent, root_objectid);
6226                 if (!back)
6227                         goto out;
6228                 if (back->node.found_ref) {
6229                         if (rec->refs)
6230                                 rec->refs--;
6231                         back->node.found_ref = 0;
6232                 }
6233                 if (back->node.found_extent_tree) {
6234                         if (rec->extent_item_refs)
6235                                 rec->extent_item_refs--;
6236                         back->node.found_extent_tree = 0;
6237                 }
6238                 if (!back->node.found_extent_tree && back->node.found_ref) {
6239                         rb_erase(&back->node.node, &rec->backref_tree);
6240                         free(back);
6241                 }
6242         }
6243         maybe_free_extent_rec(extent_cache, rec);
6244 out:
6245         return 0;
6246 }
6247
6248 static int delete_extent_records(struct btrfs_trans_handle *trans,
6249                                  struct btrfs_root *root,
6250                                  struct btrfs_path *path,
6251                                  u64 bytenr)
6252 {
6253         struct btrfs_key key;
6254         struct btrfs_key found_key;
6255         struct extent_buffer *leaf;
6256         int ret;
6257         int slot;
6258
6259
6260         key.objectid = bytenr;
6261         key.type = (u8)-1;
6262         key.offset = (u64)-1;
6263
6264         while (1) {
6265                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6266                                         &key, path, 0, 1);
6267                 if (ret < 0)
6268                         break;
6269
6270                 if (ret > 0) {
6271                         ret = 0;
6272                         if (path->slots[0] == 0)
6273                                 break;
6274                         path->slots[0]--;
6275                 }
6276                 ret = 0;
6277
6278                 leaf = path->nodes[0];
6279                 slot = path->slots[0];
6280
6281                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6282                 if (found_key.objectid != bytenr)
6283                         break;
6284
6285                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6286                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6287                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6288                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6289                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6290                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6291                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6292                         btrfs_release_path(path);
6293                         if (found_key.type == 0) {
6294                                 if (found_key.offset == 0)
6295                                         break;
6296                                 key.offset = found_key.offset - 1;
6297                                 key.type = found_key.type;
6298                         }
6299                         key.type = found_key.type - 1;
6300                         key.offset = (u64)-1;
6301                         continue;
6302                 }
6303
6304                 fprintf(stderr,
6305                         "repair deleting extent record: key [%llu,%u,%llu]\n",
6306                         found_key.objectid, found_key.type, found_key.offset);
6307
6308                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6309                 if (ret)
6310                         break;
6311                 btrfs_release_path(path);
6312
6313                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6314                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6315                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6316                                 found_key.offset : root->fs_info->nodesize;
6317
6318                         ret = btrfs_update_block_group(root, bytenr,
6319                                                        bytes, 0, 0);
6320                         if (ret)
6321                                 break;
6322                 }
6323         }
6324
6325         btrfs_release_path(path);
6326         return ret;
6327 }
6328
6329 /*
6330  * for a single backref, this will allocate a new extent
6331  * and add the backref to it.
6332  */
6333 static int record_extent(struct btrfs_trans_handle *trans,
6334                          struct btrfs_fs_info *info,
6335                          struct btrfs_path *path,
6336                          struct extent_record *rec,
6337                          struct extent_backref *back,
6338                          int allocated, u64 flags)
6339 {
6340         int ret = 0;
6341         struct btrfs_root *extent_root = info->extent_root;
6342         struct extent_buffer *leaf;
6343         struct btrfs_key ins_key;
6344         struct btrfs_extent_item *ei;
6345         struct data_backref *dback;
6346         struct btrfs_tree_block_info *bi;
6347
6348         if (!back->is_data)
6349                 rec->max_size = max_t(u64, rec->max_size,
6350                                     info->nodesize);
6351
6352         if (!allocated) {
6353                 u32 item_size = sizeof(*ei);
6354
6355                 if (!back->is_data)
6356                         item_size += sizeof(*bi);
6357
6358                 ins_key.objectid = rec->start;
6359                 ins_key.offset = rec->max_size;
6360                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6361
6362                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6363                                         &ins_key, item_size);
6364                 if (ret)
6365                         goto fail;
6366
6367                 leaf = path->nodes[0];
6368                 ei = btrfs_item_ptr(leaf, path->slots[0],
6369                                     struct btrfs_extent_item);
6370
6371                 btrfs_set_extent_refs(leaf, ei, 0);
6372                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6373
6374                 if (back->is_data) {
6375                         btrfs_set_extent_flags(leaf, ei,
6376                                                BTRFS_EXTENT_FLAG_DATA);
6377                 } else {
6378                         struct btrfs_disk_key copy_key;
6379
6380                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6381                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6382                                              sizeof(*bi));
6383
6384                         btrfs_set_disk_key_objectid(&copy_key,
6385                                                     rec->info_objectid);
6386                         btrfs_set_disk_key_type(&copy_key, 0);
6387                         btrfs_set_disk_key_offset(&copy_key, 0);
6388
6389                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6390                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6391
6392                         btrfs_set_extent_flags(leaf, ei,
6393                                         flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
6394                 }
6395
6396                 btrfs_mark_buffer_dirty(leaf);
6397                 ret = btrfs_update_block_group(extent_root, rec->start,
6398                                                rec->max_size, 1, 0);
6399                 if (ret)
6400                         goto fail;
6401                 btrfs_release_path(path);
6402         }
6403
6404         if (back->is_data) {
6405                 u64 parent;
6406                 int i;
6407
6408                 dback = to_data_backref(back);
6409                 if (back->full_backref)
6410                         parent = dback->parent;
6411                 else
6412                         parent = 0;
6413
6414                 for (i = 0; i < dback->found_ref; i++) {
6415                         /* if parent != 0, we're doing a full backref
6416                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6417                          * just makes the backref allocator create a data
6418                          * backref
6419                          */
6420                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6421                                                    rec->start, rec->max_size,
6422                                                    parent,
6423                                                    dback->root,
6424                                                    parent ?
6425                                                    BTRFS_FIRST_FREE_OBJECTID :
6426                                                    dback->owner,
6427                                                    dback->offset);
6428                         if (ret)
6429                                 break;
6430                 }
6431                 fprintf(stderr,
6432 "adding new data backref on %llu %s %llu owner %llu offset %llu found %d\n",
6433                         (unsigned long long)rec->start,
6434                         back->full_backref ? "parent" : "root",
6435                         back->full_backref ? (unsigned long long)parent :
6436                                              (unsigned long long)dback->root,
6437                         (unsigned long long)dback->owner,
6438                         (unsigned long long)dback->offset, dback->found_ref);
6439         } else {
6440                 u64 parent;
6441                 struct tree_backref *tback;
6442
6443                 tback = to_tree_backref(back);
6444                 if (back->full_backref)
6445                         parent = tback->parent;
6446                 else
6447                         parent = 0;
6448
6449                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6450                                            rec->start, rec->max_size,
6451                                            parent, tback->root, 0, 0);
6452                 fprintf(stderr,
6453 "adding new tree backref on start %llu len %llu parent %llu root %llu\n",
6454                         rec->start, rec->max_size, parent, tback->root);
6455         }
6456 fail:
6457         btrfs_release_path(path);
6458         return ret;
6459 }
6460
6461 static struct extent_entry *find_entry(struct list_head *entries,
6462                                        u64 bytenr, u64 bytes)
6463 {
6464         struct extent_entry *entry = NULL;
6465
6466         list_for_each_entry(entry, entries, list) {
6467                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6468                         return entry;
6469         }
6470
6471         return NULL;
6472 }
6473
6474 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6475 {
6476         struct extent_entry *entry, *best = NULL, *prev = NULL;
6477
6478         list_for_each_entry(entry, entries, list) {
6479                 /*
6480                  * If there are as many broken entries as entries then we know
6481                  * not to trust this particular entry.
6482                  */
6483                 if (entry->broken == entry->count)
6484                         continue;
6485
6486                 /*
6487                  * Special case, when there are only two entries and 'best' is
6488                  * the first one
6489                  */
6490                 if (!prev) {
6491                         best = entry;
6492                         prev = entry;
6493                         continue;
6494                 }
6495
6496                 /*
6497                  * If our current entry == best then we can't be sure our best
6498                  * is really the best, so we need to keep searching.
6499                  */
6500                 if (best && best->count == entry->count) {
6501                         prev = entry;
6502                         best = NULL;
6503                         continue;
6504                 }
6505
6506                 /* Prev == entry, not good enough, have to keep searching */
6507                 if (!prev->broken && prev->count == entry->count)
6508                         continue;
6509
6510                 if (!best)
6511                         best = (prev->count > entry->count) ? prev : entry;
6512                 else if (best->count < entry->count)
6513                         best = entry;
6514                 prev = entry;
6515         }
6516
6517         return best;
6518 }
6519
6520 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6521                       struct data_backref *dback, struct extent_entry *entry)
6522 {
6523         struct btrfs_trans_handle *trans;
6524         struct btrfs_root *root;
6525         struct btrfs_file_extent_item *fi;
6526         struct extent_buffer *leaf;
6527         struct btrfs_key key;
6528         u64 bytenr, bytes;
6529         int ret, err;
6530
6531         key.objectid = dback->root;
6532         key.type = BTRFS_ROOT_ITEM_KEY;
6533         key.offset = (u64)-1;
6534         root = btrfs_read_fs_root(info, &key);
6535         if (IS_ERR(root)) {
6536                 fprintf(stderr, "Couldn't find root for our ref\n");
6537                 return -EINVAL;
6538         }
6539
6540         /*
6541          * The backref points to the original offset of the extent if it was
6542          * split, so we need to search down to the offset we have and then walk
6543          * forward until we find the backref we're looking for.
6544          */
6545         key.objectid = dback->owner;
6546         key.type = BTRFS_EXTENT_DATA_KEY;
6547         key.offset = dback->offset;
6548         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6549         if (ret < 0) {
6550                 fprintf(stderr, "Error looking up ref %d\n", ret);
6551                 return ret;
6552         }
6553
6554         while (1) {
6555                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6556                         ret = btrfs_next_leaf(root, path);
6557                         if (ret) {
6558                                 fprintf(stderr, "Couldn't find our ref, next\n");
6559                                 return -EINVAL;
6560                         }
6561                 }
6562                 leaf = path->nodes[0];
6563                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6564                 if (key.objectid != dback->owner ||
6565                     key.type != BTRFS_EXTENT_DATA_KEY) {
6566                         fprintf(stderr, "Couldn't find our ref, search\n");
6567                         return -EINVAL;
6568                 }
6569                 fi = btrfs_item_ptr(leaf, path->slots[0],
6570                                     struct btrfs_file_extent_item);
6571                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6572                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6573
6574                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6575                         break;
6576                 path->slots[0]++;
6577         }
6578
6579         btrfs_release_path(path);
6580
6581         trans = btrfs_start_transaction(root, 1);
6582         if (IS_ERR(trans))
6583                 return PTR_ERR(trans);
6584
6585         /*
6586          * Ok we have the key of the file extent we want to fix, now we can cow
6587          * down to the thing and fix it.
6588          */
6589         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6590         if (ret < 0) {
6591                 fprintf(stderr, "error cowing down to ref [%llu,%u,%llu]: %d\n",
6592                         key.objectid, key.type, key.offset, ret);
6593                 goto out;
6594         }
6595         if (ret > 0) {
6596                 fprintf(stderr,
6597                 "well that's odd, we just found this key [%llu,%u,%llu]\n",
6598                         key.objectid, key.type, key.offset);
6599                 ret = -EINVAL;
6600                 goto out;
6601         }
6602         leaf = path->nodes[0];
6603         fi = btrfs_item_ptr(leaf, path->slots[0],
6604                             struct btrfs_file_extent_item);
6605
6606         if (btrfs_file_extent_compression(leaf, fi) &&
6607             dback->disk_bytenr != entry->bytenr) {
6608                 fprintf(stderr,
6609 "ref doesn't match the record start and is compressed, please take a btrfs-image of this file system and send it to a btrfs developer so they can complete this functionality for bytenr %llu\n",
6610                         dback->disk_bytenr);
6611                 ret = -EINVAL;
6612                 goto out;
6613         }
6614
6615         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6616                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6617         } else if (dback->disk_bytenr > entry->bytenr) {
6618                 u64 off_diff, offset;
6619
6620                 off_diff = dback->disk_bytenr - entry->bytenr;
6621                 offset = btrfs_file_extent_offset(leaf, fi);
6622                 if (dback->disk_bytenr + offset +
6623                     btrfs_file_extent_num_bytes(leaf, fi) >
6624                     entry->bytenr + entry->bytes) {
6625                         fprintf(stderr,
6626 "ref is past the entry end, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6627                                 dback->disk_bytenr);
6628                         ret = -EINVAL;
6629                         goto out;
6630                 }
6631                 offset += off_diff;
6632                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6633                 btrfs_set_file_extent_offset(leaf, fi, offset);
6634         } else if (dback->disk_bytenr < entry->bytenr) {
6635                 u64 offset;
6636
6637                 offset = btrfs_file_extent_offset(leaf, fi);
6638                 if (dback->disk_bytenr + offset < entry->bytenr) {
6639                         fprintf(stderr,
6640 "ref is before the entry start, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6641                                 dback->disk_bytenr);
6642                         ret = -EINVAL;
6643                         goto out;
6644                 }
6645
6646                 offset += dback->disk_bytenr;
6647                 offset -= entry->bytenr;
6648                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6649                 btrfs_set_file_extent_offset(leaf, fi, offset);
6650         }
6651
6652         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6653
6654         /*
6655          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6656          * only do this if we aren't using compression, otherwise it's a
6657          * trickier case.
6658          */
6659         if (!btrfs_file_extent_compression(leaf, fi))
6660                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6661         else
6662                 printf("ram bytes may be wrong?\n");
6663         btrfs_mark_buffer_dirty(leaf);
6664 out:
6665         err = btrfs_commit_transaction(trans, root);
6666         btrfs_release_path(path);
6667         return ret ? ret : err;
6668 }
6669
6670 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6671                            struct extent_record *rec)
6672 {
6673         struct extent_backref *back, *tmp;
6674         struct data_backref *dback;
6675         struct extent_entry *entry, *best = NULL;
6676         LIST_HEAD(entries);
6677         int nr_entries = 0;
6678         int broken_entries = 0;
6679         int ret = 0;
6680         short mismatch = 0;
6681
6682         /*
6683          * Metadata is easy and the backrefs should always agree on bytenr and
6684          * size, if not we've got bigger issues.
6685          */
6686         if (rec->metadata)
6687                 return 0;
6688
6689         rbtree_postorder_for_each_entry_safe(back, tmp,
6690                                              &rec->backref_tree, node) {
6691                 if (back->full_backref || !back->is_data)
6692                         continue;
6693
6694                 dback = to_data_backref(back);
6695
6696                 /*
6697                  * We only pay attention to backrefs that we found a real
6698                  * backref for.
6699                  */
6700                 if (dback->found_ref == 0)
6701                         continue;
6702
6703                 /*
6704                  * For now we only catch when the bytes don't match, not the
6705                  * bytenr.  We can easily do this at the same time, but I want
6706                  * to have a fs image to test on before we just add repair
6707                  * functionality willy-nilly so we know we won't screw up the
6708                  * repair.
6709                  */
6710
6711                 entry = find_entry(&entries, dback->disk_bytenr,
6712                                    dback->bytes);
6713                 if (!entry) {
6714                         entry = malloc(sizeof(struct extent_entry));
6715                         if (!entry) {
6716                                 ret = -ENOMEM;
6717                                 goto out;
6718                         }
6719                         memset(entry, 0, sizeof(*entry));
6720                         entry->bytenr = dback->disk_bytenr;
6721                         entry->bytes = dback->bytes;
6722                         list_add_tail(&entry->list, &entries);
6723                         nr_entries++;
6724                 }
6725
6726                 /*
6727                  * If we only have on entry we may think the entries agree when
6728                  * in reality they don't so we have to do some extra checking.
6729                  */
6730                 if (dback->disk_bytenr != rec->start ||
6731                     dback->bytes != rec->nr || back->broken)
6732                         mismatch = 1;
6733
6734                 if (back->broken) {
6735                         entry->broken++;
6736                         broken_entries++;
6737                 }
6738
6739                 entry->count++;
6740         }
6741
6742         /* Yay all the backrefs agree, carry on good sir */
6743         if (nr_entries <= 1 && !mismatch)
6744                 goto out;
6745
6746         fprintf(stderr,
6747                 "attempting to repair backref discrepency for bytenr %llu\n",
6748                 rec->start);
6749
6750         /*
6751          * First we want to see if the backrefs can agree amongst themselves who
6752          * is right, so figure out which one of the entries has the highest
6753          * count.
6754          */
6755         best = find_most_right_entry(&entries);
6756
6757         /*
6758          * Ok so we may have an even split between what the backrefs think, so
6759          * this is where we use the extent ref to see what it thinks.
6760          */
6761         if (!best) {
6762                 entry = find_entry(&entries, rec->start, rec->nr);
6763                 if (!entry && (!broken_entries || !rec->found_rec)) {
6764                         fprintf(stderr,
6765 "backrefs don't agree with each other and extent record doesn't agree with anybody, so we can't fix bytenr %llu bytes %llu\n",
6766                                 rec->start, rec->nr);
6767                         ret = -EINVAL;
6768                         goto out;
6769                 } else if (!entry) {
6770                         /*
6771                          * Ok our backrefs were broken, we'll assume this is the
6772                          * correct value and add an entry for this range.
6773                          */
6774                         entry = malloc(sizeof(struct extent_entry));
6775                         if (!entry) {
6776                                 ret = -ENOMEM;
6777                                 goto out;
6778                         }
6779                         memset(entry, 0, sizeof(*entry));
6780                         entry->bytenr = rec->start;
6781                         entry->bytes = rec->nr;
6782                         list_add_tail(&entry->list, &entries);
6783                         nr_entries++;
6784                 }
6785                 entry->count++;
6786                 best = find_most_right_entry(&entries);
6787                 if (!best) {
6788                         fprintf(stderr,
6789 "backrefs and extent record evenly split on who is right, this is going to require user input to fix bytenr %llu bytes %llu\n",
6790                                 rec->start, rec->nr);
6791                         ret = -EINVAL;
6792                         goto out;
6793                 }
6794         }
6795
6796         /*
6797          * I don't think this can happen currently as we'll abort() if we catch
6798          * this case higher up, but in case somebody removes that we still can't
6799          * deal with it properly here yet, so just bail out of that's the case.
6800          */
6801         if (best->bytenr != rec->start) {
6802                 fprintf(stderr,
6803 "extent start and backref starts don't match, please use btrfs-image on this file system and send it to a btrfs developer so they can make fsck fix this particular case.  bytenr is %llu, bytes is %llu\n",
6804                         rec->start, rec->nr);
6805                 ret = -EINVAL;
6806                 goto out;
6807         }
6808
6809         /*
6810          * Ok great we all agreed on an extent record, let's go find the real
6811          * references and fix up the ones that don't match.
6812          */
6813         rbtree_postorder_for_each_entry_safe(back, tmp,
6814                                              &rec->backref_tree, node) {
6815                 if (back->full_backref || !back->is_data)
6816                         continue;
6817
6818                 dback = to_data_backref(back);
6819
6820                 /*
6821                  * Still ignoring backrefs that don't have a real ref attached
6822                  * to them.
6823                  */
6824                 if (dback->found_ref == 0)
6825                         continue;
6826
6827                 if (dback->bytes == best->bytes &&
6828                     dback->disk_bytenr == best->bytenr)
6829                         continue;
6830
6831                 ret = repair_ref(info, path, dback, best);
6832                 if (ret)
6833                         goto out;
6834         }
6835
6836         /*
6837          * Ok we messed with the actual refs, which means we need to drop our
6838          * entire cache and go back and rescan.  I know this is a huge pain and
6839          * adds a lot of extra work, but it's the only way to be safe.  Once all
6840          * the backrefs agree we may not need to do anything to the extent
6841          * record itself.
6842          */
6843         ret = -EAGAIN;
6844 out:
6845         while (!list_empty(&entries)) {
6846                 entry = list_entry(entries.next, struct extent_entry, list);
6847                 list_del_init(&entry->list);
6848                 free(entry);
6849         }
6850         return ret;
6851 }
6852
6853 static int process_duplicates(struct cache_tree *extent_cache,
6854                               struct extent_record *rec)
6855 {
6856         struct extent_record *good, *tmp;
6857         struct cache_extent *cache;
6858         int ret;
6859
6860         /*
6861          * If we found a extent record for this extent then return, or if we
6862          * have more than one duplicate we are likely going to need to delete
6863          * something.
6864          */
6865         if (rec->found_rec || rec->num_duplicates > 1)
6866                 return 0;
6867
6868         /* Shouldn't happen but just in case */
6869         BUG_ON(!rec->num_duplicates);
6870
6871         /*
6872          * So this happens if we end up with a backref that doesn't match the
6873          * actual extent entry.  So either the backref is bad or the extent
6874          * entry is bad.  Either way we want to have the extent_record actually
6875          * reflect what we found in the extent_tree, so we need to take the
6876          * duplicate out and use that as the extent_record since the only way we
6877          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6878          */
6879         remove_cache_extent(extent_cache, &rec->cache);
6880
6881         good = to_extent_record(rec->dups.next);
6882         list_del_init(&good->list);
6883         INIT_LIST_HEAD(&good->backrefs);
6884         INIT_LIST_HEAD(&good->dups);
6885         good->cache.start = good->start;
6886         good->cache.size = good->nr;
6887         good->content_checked = 0;
6888         good->owner_ref_checked = 0;
6889         good->num_duplicates = 0;
6890         good->refs = rec->refs;
6891         list_splice_init(&rec->backrefs, &good->backrefs);
6892         while (1) {
6893                 cache = lookup_cache_extent(extent_cache, good->start,
6894                                             good->nr);
6895                 if (!cache)
6896                         break;
6897                 tmp = container_of(cache, struct extent_record, cache);
6898
6899                 /*
6900                  * If we find another overlapping extent and it's found_rec is
6901                  * set then it's a duplicate and we need to try and delete
6902                  * something.
6903                  */
6904                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6905                         if (list_empty(&good->list))
6906                                 list_add_tail(&good->list,
6907                                               &duplicate_extents);
6908                         good->num_duplicates += tmp->num_duplicates + 1;
6909                         list_splice_init(&tmp->dups, &good->dups);
6910                         list_del_init(&tmp->list);
6911                         list_add_tail(&tmp->list, &good->dups);
6912                         remove_cache_extent(extent_cache, &tmp->cache);
6913                         continue;
6914                 }
6915
6916                 /*
6917                  * Ok we have another non extent item backed extent rec, so lets
6918                  * just add it to this extent and carry on like we did above.
6919                  */
6920                 good->refs += tmp->refs;
6921                 list_splice_init(&tmp->backrefs, &good->backrefs);
6922                 remove_cache_extent(extent_cache, &tmp->cache);
6923                 free(tmp);
6924         }
6925         ret = insert_cache_extent(extent_cache, &good->cache);
6926         BUG_ON(ret);
6927         free(rec);
6928         return good->num_duplicates ? 0 : 1;
6929 }
6930
6931 static int delete_duplicate_records(struct btrfs_root *root,
6932                                     struct extent_record *rec)
6933 {
6934         struct btrfs_trans_handle *trans;
6935         LIST_HEAD(delete_list);
6936         struct btrfs_path path;
6937         struct extent_record *tmp, *good, *n;
6938         int nr_del = 0;
6939         int ret = 0, err;
6940         struct btrfs_key key;
6941
6942         btrfs_init_path(&path);
6943
6944         good = rec;
6945         /* Find the record that covers all of the duplicates. */
6946         list_for_each_entry(tmp, &rec->dups, list) {
6947                 if (good->start < tmp->start)
6948                         continue;
6949                 if (good->nr > tmp->nr)
6950                         continue;
6951
6952                 if (tmp->start + tmp->nr < good->start + good->nr) {
6953                         fprintf(stderr,
6954 "Ok we have overlapping extents that aren't completely covered by each other, this is going to require more careful thought. The extents are [%llu-%llu] and [%llu-%llu]\n",
6955                                 tmp->start, tmp->nr, good->start, good->nr);
6956                         abort();
6957                 }
6958                 good = tmp;
6959         }
6960
6961         if (good != rec)
6962                 list_add_tail(&rec->list, &delete_list);
6963
6964         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6965                 if (tmp == good)
6966                         continue;
6967                 list_move_tail(&tmp->list, &delete_list);
6968         }
6969
6970         root = root->fs_info->extent_root;
6971         trans = btrfs_start_transaction(root, 1);
6972         if (IS_ERR(trans)) {
6973                 ret = PTR_ERR(trans);
6974                 goto out;
6975         }
6976
6977         list_for_each_entry(tmp, &delete_list, list) {
6978                 if (tmp->found_rec == 0)
6979                         continue;
6980                 key.objectid = tmp->start;
6981                 key.type = BTRFS_EXTENT_ITEM_KEY;
6982                 key.offset = tmp->nr;
6983
6984                 /* Shouldn't happen but just in case */
6985                 if (tmp->metadata) {
6986                         fprintf(stderr,
6987 "well this shouldn't happen, extent record overlaps but is metadata? [%llu, %llu]\n",
6988                                 tmp->start, tmp->nr);
6989                         abort();
6990                 }
6991
6992                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
6993                 if (ret) {
6994                         if (ret > 0)
6995                                 ret = -EINVAL;
6996                         break;
6997                 }
6998                 ret = btrfs_del_item(trans, root, &path);
6999                 if (ret)
7000                         break;
7001                 btrfs_release_path(&path);
7002                 nr_del++;
7003         }
7004         err = btrfs_commit_transaction(trans, root);
7005         if (err && !ret)
7006                 ret = err;
7007 out:
7008         while (!list_empty(&delete_list)) {
7009                 tmp = to_extent_record(delete_list.next);
7010                 list_del_init(&tmp->list);
7011                 if (tmp == rec)
7012                         continue;
7013                 free(tmp);
7014         }
7015
7016         while (!list_empty(&rec->dups)) {
7017                 tmp = to_extent_record(rec->dups.next);
7018                 list_del_init(&tmp->list);
7019                 free(tmp);
7020         }
7021
7022         btrfs_release_path(&path);
7023
7024         if (!ret && !nr_del)
7025                 rec->num_duplicates = 0;
7026
7027         return ret ? ret : nr_del;
7028 }
7029
7030 static int find_possible_backrefs(struct btrfs_fs_info *info,
7031                                   struct btrfs_path *path,
7032                                   struct cache_tree *extent_cache,
7033                                   struct extent_record *rec)
7034 {
7035         struct btrfs_root *root;
7036         struct extent_backref *back, *tmp;
7037         struct data_backref *dback;
7038         struct cache_extent *cache;
7039         struct btrfs_file_extent_item *fi;
7040         struct btrfs_key key;
7041         u64 bytenr, bytes;
7042         int ret;
7043
7044         rbtree_postorder_for_each_entry_safe(back, tmp,
7045                                              &rec->backref_tree, node) {
7046                 /* Don't care about full backrefs (poor unloved backrefs) */
7047                 if (back->full_backref || !back->is_data)
7048                         continue;
7049
7050                 dback = to_data_backref(back);
7051
7052                 /* We found this one, we don't need to do a lookup */
7053                 if (dback->found_ref)
7054                         continue;
7055
7056                 key.objectid = dback->root;
7057                 key.type = BTRFS_ROOT_ITEM_KEY;
7058                 key.offset = (u64)-1;
7059
7060                 root = btrfs_read_fs_root(info, &key);
7061
7062                 /* No root, definitely a bad ref, skip */
7063                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7064                         continue;
7065                 /* Other err, exit */
7066                 if (IS_ERR(root))
7067                         return PTR_ERR(root);
7068
7069                 key.objectid = dback->owner;
7070                 key.type = BTRFS_EXTENT_DATA_KEY;
7071                 key.offset = dback->offset;
7072                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7073                 if (ret) {
7074                         btrfs_release_path(path);
7075                         if (ret < 0)
7076                                 return ret;
7077                         /* Didn't find it, we can carry on */
7078                         ret = 0;
7079                         continue;
7080                 }
7081
7082                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7083                                     struct btrfs_file_extent_item);
7084                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7085                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7086                 btrfs_release_path(path);
7087                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7088                 if (cache) {
7089                         struct extent_record *tmp;
7090
7091                         tmp = container_of(cache, struct extent_record, cache);
7092
7093                         /*
7094                          * If we found an extent record for the bytenr for this
7095                          * particular backref then we can't add it to our
7096                          * current extent record.  We only want to add backrefs
7097                          * that don't have a corresponding extent item in the
7098                          * extent tree since they likely belong to this record
7099                          * and we need to fix it if it doesn't match bytenrs.
7100                          */
7101                         if  (tmp->found_rec)
7102                                 continue;
7103                 }
7104
7105                 dback->found_ref += 1;
7106                 dback->disk_bytenr = bytenr;
7107                 dback->bytes = bytes;
7108
7109                 /*
7110                  * Set this so the verify backref code knows not to trust the
7111                  * values in this backref.
7112                  */
7113                 back->broken = 1;
7114         }
7115
7116         return 0;
7117 }
7118
7119 /*
7120  * Record orphan data ref into corresponding root.
7121  *
7122  * Return 0 if the extent item contains data ref and recorded.
7123  * Return 1 if the extent item contains no useful data ref
7124  *   On that case, it may contains only shared_dataref or metadata backref
7125  *   or the file extent exists(this should be handled by the extent bytenr
7126  *   recovery routine)
7127  * Return <0 if something goes wrong.
7128  */
7129 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7130                                       struct extent_record *rec)
7131 {
7132         struct btrfs_key key;
7133         struct btrfs_root *dest_root;
7134         struct extent_backref *back, *tmp;
7135         struct data_backref *dback;
7136         struct orphan_data_extent *orphan;
7137         struct btrfs_path path;
7138         int recorded_data_ref = 0;
7139         int ret = 0;
7140
7141         if (rec->metadata)
7142                 return 1;
7143         btrfs_init_path(&path);
7144         rbtree_postorder_for_each_entry_safe(back, tmp,
7145                                              &rec->backref_tree, node) {
7146                 if (back->full_backref || !back->is_data ||
7147                     !back->found_extent_tree)
7148                         continue;
7149                 dback = to_data_backref(back);
7150                 if (dback->found_ref)
7151                         continue;
7152                 key.objectid = dback->root;
7153                 key.type = BTRFS_ROOT_ITEM_KEY;
7154                 key.offset = (u64)-1;
7155
7156                 dest_root = btrfs_read_fs_root(fs_info, &key);
7157
7158                 /* For non-exist root we just skip it */
7159                 if (IS_ERR(dest_root) || !dest_root)
7160                         continue;
7161
7162                 key.objectid = dback->owner;
7163                 key.type = BTRFS_EXTENT_DATA_KEY;
7164                 key.offset = dback->offset;
7165
7166                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7167                 btrfs_release_path(&path);
7168                 /*
7169                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7170                  * we need to record it for inode/file extent rebuild.
7171                  * For ret > 0, we record it only for file extent rebuild.
7172                  * For ret == 0, the file extent exists but only bytenr
7173                  * mismatch, let the original bytenr fix routine to handle,
7174                  * don't record it.
7175                  */
7176                 if (ret == 0)
7177                         continue;
7178                 ret = 0;
7179                 orphan = malloc(sizeof(*orphan));
7180                 if (!orphan) {
7181                         ret = -ENOMEM;
7182                         goto out;
7183                 }
7184                 INIT_LIST_HEAD(&orphan->list);
7185                 orphan->root = dback->root;
7186                 orphan->objectid = dback->owner;
7187                 orphan->offset = dback->offset;
7188                 orphan->disk_bytenr = rec->cache.start;
7189                 orphan->disk_len = rec->cache.size;
7190                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7191                 recorded_data_ref = 1;
7192         }
7193 out:
7194         btrfs_release_path(&path);
7195         if (!ret)
7196                 return !recorded_data_ref;
7197         else
7198                 return ret;
7199 }
7200
7201 /*
7202  * when an incorrect extent item is found, this will delete
7203  * all of the existing entries for it and recreate them
7204  * based on what the tree scan found.
7205  */
7206 static int fixup_extent_refs(struct btrfs_fs_info *info,
7207                              struct cache_tree *extent_cache,
7208                              struct extent_record *rec)
7209 {
7210         struct btrfs_trans_handle *trans = NULL;
7211         int ret;
7212         struct btrfs_path path;
7213         struct cache_extent *cache;
7214         struct extent_backref *back, *tmp;
7215         int allocated = 0;
7216         u64 flags = 0;
7217
7218         if (rec->flag_block_full_backref)
7219                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7220
7221         btrfs_init_path(&path);
7222         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7223                 /*
7224                  * Sometimes the backrefs themselves are so broken they don't
7225                  * get attached to any meaningful rec, so first go back and
7226                  * check any of our backrefs that we couldn't find and throw
7227                  * them into the list if we find the backref so that
7228                  * verify_backrefs can figure out what to do.
7229                  */
7230                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7231                 if (ret < 0)
7232                         goto out;
7233         }
7234
7235         /* step one, make sure all of the backrefs agree */
7236         ret = verify_backrefs(info, &path, rec);
7237         if (ret < 0)
7238                 goto out;
7239
7240         trans = btrfs_start_transaction(info->extent_root, 1);
7241         if (IS_ERR(trans)) {
7242                 ret = PTR_ERR(trans);
7243                 goto out;
7244         }
7245
7246         /* step two, delete all the existing records */
7247         ret = delete_extent_records(trans, info->extent_root, &path,
7248                                     rec->start);
7249
7250         if (ret < 0)
7251                 goto out;
7252
7253         /* was this block corrupt?  If so, don't add references to it */
7254         cache = lookup_cache_extent(info->corrupt_blocks,
7255                                     rec->start, rec->max_size);
7256         if (cache) {
7257                 ret = 0;
7258                 goto out;
7259         }
7260
7261         /* step three, recreate all the refs we did find */
7262         rbtree_postorder_for_each_entry_safe(back, tmp,
7263                                              &rec->backref_tree, node) {
7264                 /*
7265                  * if we didn't find any references, don't create a
7266                  * new extent record
7267                  */
7268                 if (!back->found_ref)
7269                         continue;
7270
7271                 rec->bad_full_backref = 0;
7272                 ret = record_extent(trans, info, &path, rec, back, allocated,
7273                                     flags);
7274                 allocated = 1;
7275
7276                 if (ret)
7277                         goto out;
7278         }
7279 out:
7280         if (trans) {
7281                 int err = btrfs_commit_transaction(trans, info->extent_root);
7282
7283                 if (!ret)
7284                         ret = err;
7285         }
7286
7287         if (!ret)
7288                 fprintf(stderr, "Repaired extent references for %llu\n",
7289                                 (unsigned long long)rec->start);
7290
7291         btrfs_release_path(&path);
7292         return ret;
7293 }
7294
7295 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7296                               struct extent_record *rec)
7297 {
7298         struct btrfs_trans_handle *trans;
7299         struct btrfs_root *root = fs_info->extent_root;
7300         struct btrfs_path path;
7301         struct btrfs_extent_item *ei;
7302         struct btrfs_key key;
7303         u64 flags;
7304         int ret = 0;
7305
7306         key.objectid = rec->start;
7307         if (rec->metadata) {
7308                 key.type = BTRFS_METADATA_ITEM_KEY;
7309                 key.offset = rec->info_level;
7310         } else {
7311                 key.type = BTRFS_EXTENT_ITEM_KEY;
7312                 key.offset = rec->max_size;
7313         }
7314
7315         trans = btrfs_start_transaction(root, 0);
7316         if (IS_ERR(trans))
7317                 return PTR_ERR(trans);
7318
7319         btrfs_init_path(&path);
7320         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7321         if (ret < 0) {
7322                 btrfs_release_path(&path);
7323                 btrfs_commit_transaction(trans, root);
7324                 return ret;
7325         } else if (ret) {
7326                 fprintf(stderr, "Didn't find extent for %llu\n",
7327                         (unsigned long long)rec->start);
7328                 btrfs_release_path(&path);
7329                 btrfs_commit_transaction(trans, root);
7330                 return -ENOENT;
7331         }
7332
7333         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7334                             struct btrfs_extent_item);
7335         flags = btrfs_extent_flags(path.nodes[0], ei);
7336         if (rec->flag_block_full_backref) {
7337                 fprintf(stderr, "setting full backref on %llu\n",
7338                         (unsigned long long)key.objectid);
7339                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7340         } else {
7341                 fprintf(stderr, "clearing full backref on %llu\n",
7342                         (unsigned long long)key.objectid);
7343                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7344         }
7345         btrfs_set_extent_flags(path.nodes[0], ei, flags);
7346         btrfs_mark_buffer_dirty(path.nodes[0]);
7347         btrfs_release_path(&path);
7348         ret = btrfs_commit_transaction(trans, root);
7349         if (!ret)
7350                 fprintf(stderr, "Repaired extent flags for %llu\n",
7351                                 (unsigned long long)rec->start);
7352
7353         return ret;
7354 }
7355
7356 /* right now we only prune from the extent allocation tree */
7357 static int prune_one_block(struct btrfs_trans_handle *trans,
7358                            struct btrfs_fs_info *info,
7359                            struct btrfs_corrupt_block *corrupt)
7360 {
7361         int ret;
7362         struct btrfs_path path;
7363         struct extent_buffer *eb;
7364         u64 found;
7365         int slot;
7366         int nritems;
7367         int level = corrupt->level + 1;
7368
7369         btrfs_init_path(&path);
7370 again:
7371         /* we want to stop at the parent to our busted block */
7372         path.lowest_level = level;
7373
7374         ret = btrfs_search_slot(trans, info->extent_root,
7375                                 &corrupt->key, &path, -1, 1);
7376
7377         if (ret < 0)
7378                 goto out;
7379
7380         eb = path.nodes[level];
7381         if (!eb) {
7382                 ret = -ENOENT;
7383                 goto out;
7384         }
7385
7386         /*
7387          * hopefully the search gave us the block we want to prune,
7388          * lets try that first
7389          */
7390         slot = path.slots[level];
7391         found =  btrfs_node_blockptr(eb, slot);
7392         if (found == corrupt->cache.start)
7393                 goto del_ptr;
7394
7395         nritems = btrfs_header_nritems(eb);
7396
7397         /* the search failed, lets scan this node and hope we find it */
7398         for (slot = 0; slot < nritems; slot++) {
7399                 found =  btrfs_node_blockptr(eb, slot);
7400                 if (found == corrupt->cache.start)
7401                         goto del_ptr;
7402         }
7403         /*
7404          * We couldn't find the bad block.
7405          * TODO: search all the nodes for pointers to this block
7406          */
7407         if (eb == info->extent_root->node) {
7408                 ret = -ENOENT;
7409                 goto out;
7410         } else {
7411                 level++;
7412                 btrfs_release_path(&path);
7413                 goto again;
7414         }
7415
7416 del_ptr:
7417         printk("deleting pointer to block %llu\n", corrupt->cache.start);
7418         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
7419
7420 out:
7421         btrfs_release_path(&path);
7422         return ret;
7423 }
7424
7425 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7426 {
7427         struct btrfs_trans_handle *trans = NULL;
7428         struct cache_extent *cache;
7429         struct btrfs_corrupt_block *corrupt;
7430
7431         while (1) {
7432                 cache = search_cache_extent(info->corrupt_blocks, 0);
7433                 if (!cache)
7434                         break;
7435                 if (!trans) {
7436                         trans = btrfs_start_transaction(info->extent_root, 1);
7437                         if (IS_ERR(trans))
7438                                 return PTR_ERR(trans);
7439                 }
7440                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7441                 prune_one_block(trans, info, corrupt);
7442                 remove_cache_extent(info->corrupt_blocks, cache);
7443         }
7444         if (trans)
7445                 return btrfs_commit_transaction(trans, info->extent_root);
7446         return 0;
7447 }
7448
7449 static int check_extent_refs(struct btrfs_root *root,
7450                              struct cache_tree *extent_cache)
7451 {
7452         struct extent_record *rec;
7453         struct cache_extent *cache;
7454         int ret = 0;
7455         int had_dups = 0;
7456         int err = 0;
7457
7458         if (repair) {
7459                 /*
7460                  * if we're doing a repair, we have to make sure
7461                  * we don't allocate from the problem extents.
7462                  * In the worst case, this will be all the
7463                  * extents in the FS
7464                  */
7465                 cache = search_cache_extent(extent_cache, 0);
7466                 while (cache) {
7467                         rec = container_of(cache, struct extent_record, cache);
7468                         set_extent_dirty(root->fs_info->excluded_extents,
7469                                          rec->start,
7470                                          rec->start + rec->max_size - 1);
7471                         cache = next_cache_extent(cache);
7472                 }
7473
7474                 /* pin down all the corrupted blocks too */
7475                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7476                 while (cache) {
7477                         set_extent_dirty(root->fs_info->excluded_extents,
7478                                          cache->start,
7479                                          cache->start + cache->size - 1);
7480                         cache = next_cache_extent(cache);
7481                 }
7482                 prune_corrupt_blocks(root->fs_info);
7483                 reset_cached_block_groups(root->fs_info);
7484         }
7485
7486         reset_cached_block_groups(root->fs_info);
7487
7488         /*
7489          * We need to delete any duplicate entries we find first otherwise we
7490          * could mess up the extent tree when we have backrefs that actually
7491          * belong to a different extent item and not the weird duplicate one.
7492          */
7493         while (repair && !list_empty(&duplicate_extents)) {
7494                 rec = to_extent_record(duplicate_extents.next);
7495                 list_del_init(&rec->list);
7496
7497                 /* Sometimes we can find a backref before we find an actual
7498                  * extent, so we need to process it a little bit to see if there
7499                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7500                  * if this is a backref screwup.  If we need to delete stuff
7501                  * process_duplicates() will return 0, otherwise it will return
7502                  * 1 and we
7503                  */
7504                 if (process_duplicates(extent_cache, rec))
7505                         continue;
7506                 ret = delete_duplicate_records(root, rec);
7507                 if (ret < 0)
7508                         return ret;
7509                 /*
7510                  * delete_duplicate_records will return the number of entries
7511                  * deleted, so if it's greater than 0 then we know we actually
7512                  * did something and we need to remove.
7513                  */
7514                 if (ret)
7515                         had_dups = 1;
7516         }
7517
7518         if (had_dups)
7519                 return -EAGAIN;
7520
7521         while (1) {
7522                 int cur_err = 0;
7523                 int fix = 0;
7524
7525                 cache = search_cache_extent(extent_cache, 0);
7526                 if (!cache)
7527                         break;
7528                 rec = container_of(cache, struct extent_record, cache);
7529                 if (rec->num_duplicates) {
7530                         fprintf(stderr,
7531                                 "extent item %llu has multiple extent items\n",
7532                                 (unsigned long long)rec->start);
7533                         cur_err = 1;
7534                 }
7535
7536                 if (rec->refs != rec->extent_item_refs) {
7537                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7538                                 (unsigned long long)rec->start,
7539                                 (unsigned long long)rec->nr);
7540                         fprintf(stderr, "extent item %llu, found %llu\n",
7541                                 (unsigned long long)rec->extent_item_refs,
7542                                 (unsigned long long)rec->refs);
7543                         ret = record_orphan_data_extents(root->fs_info, rec);
7544                         if (ret < 0)
7545                                 goto repair_abort;
7546                         fix = ret;
7547                         cur_err = 1;
7548                 }
7549                 if (all_backpointers_checked(rec, 1)) {
7550                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7551                                 (unsigned long long)rec->start,
7552                                 (unsigned long long)rec->nr);
7553                         fix = 1;
7554                         cur_err = 1;
7555                 }
7556                 if (!rec->owner_ref_checked) {
7557                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7558                                 (unsigned long long)rec->start,
7559                                 (unsigned long long)rec->nr);
7560                         fix = 1;
7561                         cur_err = 1;
7562                 }
7563
7564                 if (repair && fix) {
7565                         ret = fixup_extent_refs(root->fs_info, extent_cache,
7566                                                 rec);
7567                         if (ret)
7568                                 goto repair_abort;
7569                 }
7570
7571
7572                 if (rec->bad_full_backref) {
7573                         fprintf(stderr, "bad full backref, on [%llu]\n",
7574                                 (unsigned long long)rec->start);
7575                         if (repair) {
7576                                 ret = fixup_extent_flags(root->fs_info, rec);
7577                                 if (ret)
7578                                         goto repair_abort;
7579                                 fix = 1;
7580                         }
7581                         cur_err = 1;
7582                 }
7583                 /*
7584                  * Although it's not a extent ref's problem, we reuse this
7585                  * routine for error reporting.
7586                  * No repair function yet.
7587                  */
7588                 if (rec->crossing_stripes) {
7589                         fprintf(stderr,
7590                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7591                                 rec->start, rec->start + rec->max_size);
7592                         cur_err = 1;
7593                 }
7594
7595                 if (rec->wrong_chunk_type) {
7596                         fprintf(stderr,
7597                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7598                                 rec->start, rec->start + rec->max_size);
7599                         cur_err = 1;
7600                 }
7601
7602                 err = cur_err;
7603                 remove_cache_extent(extent_cache, cache);
7604                 free_all_extent_backrefs(rec);
7605                 if (!init_extent_tree && repair && (!cur_err || fix))
7606                         clear_extent_dirty(root->fs_info->excluded_extents,
7607                                            rec->start,
7608                                            rec->start + rec->max_size - 1);
7609                 free(rec);
7610         }
7611 repair_abort:
7612         if (repair) {
7613                 if (ret && ret != -EAGAIN) {
7614                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7615                         exit(1);
7616                 } else if (!ret) {
7617                         struct btrfs_trans_handle *trans;
7618
7619                         root = root->fs_info->extent_root;
7620                         trans = btrfs_start_transaction(root, 1);
7621                         if (IS_ERR(trans)) {
7622                                 ret = PTR_ERR(trans);
7623                                 goto repair_abort;
7624                         }
7625
7626                         ret = btrfs_fix_block_accounting(trans, root);
7627                         if (ret)
7628                                 goto repair_abort;
7629                         ret = btrfs_commit_transaction(trans, root);
7630                         if (ret)
7631                                 goto repair_abort;
7632                 }
7633                 return ret;
7634         }
7635
7636         if (err)
7637                 err = -EIO;
7638         return err;
7639 }
7640
7641 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7642 {
7643         u64 stripe_size;
7644
7645         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7646                 stripe_size = length;
7647                 stripe_size /= num_stripes;
7648         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7649                 stripe_size = length * 2;
7650                 stripe_size /= num_stripes;
7651         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7652                 stripe_size = length;
7653                 stripe_size /= (num_stripes - 1);
7654         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7655                 stripe_size = length;
7656                 stripe_size /= (num_stripes - 2);
7657         } else {
7658                 stripe_size = length;
7659         }
7660         return stripe_size;
7661 }
7662
7663 /*
7664  * Check the chunk with its block group/dev list ref:
7665  * Return 0 if all refs seems valid.
7666  * Return 1 if part of refs seems valid, need later check for rebuild ref
7667  * like missing block group and needs to search extent tree to rebuild them.
7668  * Return -1 if essential refs are missing and unable to rebuild.
7669  */
7670 static int check_chunk_refs(struct chunk_record *chunk_rec,
7671                             struct block_group_tree *block_group_cache,
7672                             struct device_extent_tree *dev_extent_cache,
7673                             int silent)
7674 {
7675         struct cache_extent *block_group_item;
7676         struct block_group_record *block_group_rec;
7677         struct cache_extent *dev_extent_item;
7678         struct device_extent_record *dev_extent_rec;
7679         u64 devid;
7680         u64 offset;
7681         u64 length;
7682         int metadump_v2 = 0;
7683         int i;
7684         int ret = 0;
7685
7686         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7687                                                chunk_rec->offset,
7688                                                chunk_rec->length);
7689         if (block_group_item) {
7690                 block_group_rec = container_of(block_group_item,
7691                                                struct block_group_record,
7692                                                cache);
7693                 if (chunk_rec->length != block_group_rec->offset ||
7694                     chunk_rec->offset != block_group_rec->objectid ||
7695                     (!metadump_v2 &&
7696                      chunk_rec->type_flags != block_group_rec->flags)) {
7697                         if (!silent)
7698                                 fprintf(stderr,
7699                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7700                                         chunk_rec->objectid,
7701                                         chunk_rec->type,
7702                                         chunk_rec->offset,
7703                                         chunk_rec->length,
7704                                         chunk_rec->offset,
7705                                         chunk_rec->type_flags,
7706                                         block_group_rec->objectid,
7707                                         block_group_rec->type,
7708                                         block_group_rec->offset,
7709                                         block_group_rec->offset,
7710                                         block_group_rec->objectid,
7711                                         block_group_rec->flags);
7712                         ret = -1;
7713                 } else {
7714                         list_del_init(&block_group_rec->list);
7715                         chunk_rec->bg_rec = block_group_rec;
7716                 }
7717         } else {
7718                 if (!silent)
7719                         fprintf(stderr,
7720                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7721                                 chunk_rec->objectid,
7722                                 chunk_rec->type,
7723                                 chunk_rec->offset,
7724                                 chunk_rec->length,
7725                                 chunk_rec->offset,
7726                                 chunk_rec->type_flags);
7727                 ret = 1;
7728         }
7729
7730         if (metadump_v2)
7731                 return ret;
7732
7733         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7734                                     chunk_rec->num_stripes);
7735         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7736                 devid = chunk_rec->stripes[i].devid;
7737                 offset = chunk_rec->stripes[i].offset;
7738                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7739                                                        devid, offset, length);
7740                 if (dev_extent_item) {
7741                         dev_extent_rec = container_of(dev_extent_item,
7742                                                 struct device_extent_record,
7743                                                 cache);
7744                         if (dev_extent_rec->objectid != devid ||
7745                             dev_extent_rec->offset != offset ||
7746                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7747                             dev_extent_rec->length != length) {
7748                                 if (!silent)
7749                                         fprintf(stderr,
7750                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7751                                                 chunk_rec->objectid,
7752                                                 chunk_rec->type,
7753                                                 chunk_rec->offset,
7754                                                 chunk_rec->stripes[i].devid,
7755                                                 chunk_rec->stripes[i].offset,
7756                                                 dev_extent_rec->objectid,
7757                                                 dev_extent_rec->offset,
7758                                                 dev_extent_rec->length);
7759                                 ret = -1;
7760                         } else {
7761                                 list_move(&dev_extent_rec->chunk_list,
7762                                           &chunk_rec->dextents);
7763                         }
7764                 } else {
7765                         if (!silent)
7766                                 fprintf(stderr,
7767                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7768                                         chunk_rec->objectid,
7769                                         chunk_rec->type,
7770                                         chunk_rec->offset,
7771                                         chunk_rec->stripes[i].devid,
7772                                         chunk_rec->stripes[i].offset);
7773                         ret = -1;
7774                 }
7775         }
7776         return ret;
7777 }
7778
7779 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7780 int check_chunks(struct cache_tree *chunk_cache,
7781                  struct block_group_tree *block_group_cache,
7782                  struct device_extent_tree *dev_extent_cache,
7783                  struct list_head *good, struct list_head *bad,
7784                  struct list_head *rebuild, int silent)
7785 {
7786         struct cache_extent *chunk_item;
7787         struct chunk_record *chunk_rec;
7788         struct block_group_record *bg_rec;
7789         struct device_extent_record *dext_rec;
7790         int err;
7791         int ret = 0;
7792
7793         chunk_item = first_cache_extent(chunk_cache);
7794         while (chunk_item) {
7795                 chunk_rec = container_of(chunk_item, struct chunk_record,
7796                                          cache);
7797                 err = check_chunk_refs(chunk_rec, block_group_cache,
7798                                        dev_extent_cache, silent);
7799                 if (err < 0)
7800                         ret = err;
7801                 if (err == 0 && good)
7802                         list_add_tail(&chunk_rec->list, good);
7803                 if (err > 0 && rebuild)
7804                         list_add_tail(&chunk_rec->list, rebuild);
7805                 if (err < 0 && bad)
7806                         list_add_tail(&chunk_rec->list, bad);
7807                 chunk_item = next_cache_extent(chunk_item);
7808         }
7809
7810         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7811                 if (!silent)
7812                         fprintf(stderr,
7813                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7814                                 bg_rec->objectid,
7815                                 bg_rec->offset,
7816                                 bg_rec->flags);
7817                 if (!ret)
7818                         ret = 1;
7819         }
7820
7821         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7822                             chunk_list) {
7823                 if (!silent)
7824                         fprintf(stderr,
7825                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7826                                 dext_rec->objectid,
7827                                 dext_rec->offset,
7828                                 dext_rec->length);
7829                 if (!ret)
7830                         ret = 1;
7831         }
7832         return ret;
7833 }
7834
7835
7836 static int check_device_used(struct device_record *dev_rec,
7837                              struct device_extent_tree *dext_cache)
7838 {
7839         struct cache_extent *cache;
7840         struct device_extent_record *dev_extent_rec;
7841         u64 total_byte = 0;
7842
7843         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7844         while (cache) {
7845                 dev_extent_rec = container_of(cache,
7846                                               struct device_extent_record,
7847                                               cache);
7848                 if (dev_extent_rec->objectid != dev_rec->devid)
7849                         break;
7850
7851                 list_del_init(&dev_extent_rec->device_list);
7852                 total_byte += dev_extent_rec->length;
7853                 cache = next_cache_extent(cache);
7854         }
7855
7856         if (total_byte != dev_rec->byte_used) {
7857                 fprintf(stderr,
7858                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7859                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7860                         dev_rec->type, dev_rec->offset);
7861                 return -1;
7862         } else {
7863                 return 0;
7864         }
7865 }
7866
7867 /*
7868  * Unlike device size alignment check above, some super total_bytes check
7869  * failure can lead to mount failure for newer kernel.
7870  *
7871  * So this function will return the error for a fatal super total_bytes problem.
7872  */
7873 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
7874 {
7875         struct btrfs_device *dev;
7876         struct list_head *dev_list = &fs_info->fs_devices->devices;
7877         u64 total_bytes = 0;
7878         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
7879
7880         list_for_each_entry(dev, dev_list, dev_list)
7881                 total_bytes += dev->total_bytes;
7882
7883         /* Important check, which can cause unmountable fs */
7884         if (super_bytes < total_bytes) {
7885                 error("super total bytes %llu smaller than real device(s) size %llu",
7886                         super_bytes, total_bytes);
7887                 error("mounting this fs may fail for newer kernels");
7888                 error("this can be fixed by 'btrfs rescue fix-device-size'");
7889                 return false;
7890         }
7891
7892         /*
7893          * Optional check, just to make everything aligned and match with each
7894          * other.
7895          *
7896          * For a btrfs-image restored fs, we don't need to check it anyway.
7897          */
7898         if (btrfs_super_flags(fs_info->super_copy) &
7899             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
7900                 return true;
7901         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
7902             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
7903             super_bytes != total_bytes) {
7904                 warning("minor unaligned/mismatch device size detected");
7905                 warning(
7906                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
7907         }
7908         return true;
7909 }
7910
7911 /* check btrfs_dev_item -> btrfs_dev_extent */
7912 static int check_devices(struct rb_root *dev_cache,
7913                          struct device_extent_tree *dev_extent_cache)
7914 {
7915         struct rb_node *dev_node;
7916         struct device_record *dev_rec;
7917         struct device_extent_record *dext_rec;
7918         int err;
7919         int ret = 0;
7920
7921         dev_node = rb_first(dev_cache);
7922         while (dev_node) {
7923                 dev_rec = container_of(dev_node, struct device_record, node);
7924                 err = check_device_used(dev_rec, dev_extent_cache);
7925                 if (err)
7926                         ret = err;
7927
7928                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
7929                                          global_info->sectorsize);
7930                 dev_node = rb_next(dev_node);
7931         }
7932         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7933                             device_list) {
7934                 fprintf(stderr,
7935                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7936                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7937                 if (!ret)
7938                         ret = 1;
7939         }
7940         return ret;
7941 }
7942
7943 static int add_root_item_to_list(struct list_head *head,
7944                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7945                                   u8 level, u8 drop_level,
7946                                   struct btrfs_key *drop_key)
7947 {
7948         struct root_item_record *ri_rec;
7949
7950         ri_rec = malloc(sizeof(*ri_rec));
7951         if (!ri_rec)
7952                 return -ENOMEM;
7953         ri_rec->bytenr = bytenr;
7954         ri_rec->objectid = objectid;
7955         ri_rec->level = level;
7956         ri_rec->drop_level = drop_level;
7957         ri_rec->last_snapshot = last_snapshot;
7958         if (drop_key)
7959                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7960         list_add_tail(&ri_rec->list, head);
7961
7962         return 0;
7963 }
7964
7965 static void free_root_item_list(struct list_head *list)
7966 {
7967         struct root_item_record *ri_rec;
7968
7969         while (!list_empty(list)) {
7970                 ri_rec = list_first_entry(list, struct root_item_record,
7971                                           list);
7972                 list_del_init(&ri_rec->list);
7973                 free(ri_rec);
7974         }
7975 }
7976
7977 static int deal_root_from_list(struct list_head *list,
7978                                struct btrfs_root *root,
7979                                struct block_info *bits,
7980                                int bits_nr,
7981                                struct cache_tree *pending,
7982                                struct cache_tree *seen,
7983                                struct cache_tree *reada,
7984                                struct cache_tree *nodes,
7985                                struct cache_tree *extent_cache,
7986                                struct cache_tree *chunk_cache,
7987                                struct rb_root *dev_cache,
7988                                struct block_group_tree *block_group_cache,
7989                                struct device_extent_tree *dev_extent_cache)
7990 {
7991         int ret = 0;
7992         u64 last;
7993
7994         while (!list_empty(list)) {
7995                 struct root_item_record *rec;
7996                 struct extent_buffer *buf;
7997
7998                 rec = list_entry(list->next,
7999                                  struct root_item_record, list);
8000                 last = 0;
8001                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
8002                 if (!extent_buffer_uptodate(buf)) {
8003                         free_extent_buffer(buf);
8004                         ret = -EIO;
8005                         break;
8006                 }
8007                 ret = add_root_to_pending(buf, extent_cache, pending,
8008                                     seen, nodes, rec->objectid);
8009                 if (ret < 0)
8010                         break;
8011                 /*
8012                  * To rebuild extent tree, we need deal with snapshot
8013                  * one by one, otherwise we deal with node firstly which
8014                  * can maximize readahead.
8015                  */
8016                 while (1) {
8017                         ret = run_next_block(root, bits, bits_nr, &last,
8018                                              pending, seen, reada, nodes,
8019                                              extent_cache, chunk_cache,
8020                                              dev_cache, block_group_cache,
8021                                              dev_extent_cache, rec);
8022                         if (ret != 0)
8023                                 break;
8024                 }
8025                 free_extent_buffer(buf);
8026                 list_del(&rec->list);
8027                 free(rec);
8028                 if (ret < 0)
8029                         break;
8030         }
8031         while (ret >= 0) {
8032                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8033                                      reada, nodes, extent_cache, chunk_cache,
8034                                      dev_cache, block_group_cache,
8035                                      dev_extent_cache, NULL);
8036                 if (ret != 0) {
8037                         if (ret > 0)
8038                                 ret = 0;
8039                         break;
8040                 }
8041         }
8042         return ret;
8043 }
8044
8045 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8046 {
8047         struct rb_root dev_cache;
8048         struct cache_tree chunk_cache;
8049         struct block_group_tree block_group_cache;
8050         struct device_extent_tree dev_extent_cache;
8051         struct cache_tree extent_cache;
8052         struct cache_tree seen;
8053         struct cache_tree pending;
8054         struct cache_tree reada;
8055         struct cache_tree nodes;
8056         struct extent_io_tree excluded_extents;
8057         struct cache_tree corrupt_blocks;
8058         struct btrfs_path path;
8059         struct btrfs_key key;
8060         struct btrfs_key found_key;
8061         int ret, err = 0;
8062         struct block_info *bits;
8063         int bits_nr;
8064         struct extent_buffer *leaf;
8065         int slot;
8066         struct btrfs_root_item ri;
8067         struct list_head dropping_trees;
8068         struct list_head normal_trees;
8069         struct btrfs_root *root1;
8070         struct btrfs_root *root;
8071         u64 objectid;
8072         u8 level;
8073
8074         root = fs_info->fs_root;
8075         dev_cache = RB_ROOT;
8076         cache_tree_init(&chunk_cache);
8077         block_group_tree_init(&block_group_cache);
8078         device_extent_tree_init(&dev_extent_cache);
8079
8080         cache_tree_init(&extent_cache);
8081         cache_tree_init(&seen);
8082         cache_tree_init(&pending);
8083         cache_tree_init(&nodes);
8084         cache_tree_init(&reada);
8085         cache_tree_init(&corrupt_blocks);
8086         extent_io_tree_init(&excluded_extents);
8087         INIT_LIST_HEAD(&dropping_trees);
8088         INIT_LIST_HEAD(&normal_trees);
8089
8090         if (repair) {
8091                 fs_info->excluded_extents = &excluded_extents;
8092                 fs_info->fsck_extent_cache = &extent_cache;
8093                 fs_info->free_extent_hook = free_extent_hook;
8094                 fs_info->corrupt_blocks = &corrupt_blocks;
8095         }
8096
8097         bits_nr = 1024;
8098         bits = malloc(bits_nr * sizeof(struct block_info));
8099         if (!bits) {
8100                 perror("malloc");
8101                 exit(1);
8102         }
8103
8104         if (ctx.progress_enabled) {
8105                 ctx.tp = TASK_EXTENTS;
8106                 task_start(ctx.info);
8107         }
8108
8109 again:
8110         root1 = fs_info->tree_root;
8111         level = btrfs_header_level(root1->node);
8112         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8113                                     root1->node->start, 0, level, 0, NULL);
8114         if (ret < 0)
8115                 goto out;
8116         root1 = fs_info->chunk_root;
8117         level = btrfs_header_level(root1->node);
8118         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8119                                     root1->node->start, 0, level, 0, NULL);
8120         if (ret < 0)
8121                 goto out;
8122         btrfs_init_path(&path);
8123         key.offset = 0;
8124         key.objectid = 0;
8125         key.type = BTRFS_ROOT_ITEM_KEY;
8126         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
8127         if (ret < 0)
8128                 goto out;
8129         while (1) {
8130                 leaf = path.nodes[0];
8131                 slot = path.slots[0];
8132                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8133                         ret = btrfs_next_leaf(root, &path);
8134                         if (ret != 0)
8135                                 break;
8136                         leaf = path.nodes[0];
8137                         slot = path.slots[0];
8138                 }
8139                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8140                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8141                         unsigned long offset;
8142                         u64 last_snapshot;
8143
8144                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8145                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8146                         last_snapshot = btrfs_root_last_snapshot(&ri);
8147                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8148                                 level = btrfs_root_level(&ri);
8149                                 ret = add_root_item_to_list(&normal_trees,
8150                                                 found_key.objectid,
8151                                                 btrfs_root_bytenr(&ri),
8152                                                 last_snapshot, level,
8153                                                 0, NULL);
8154                                 if (ret < 0)
8155                                         goto out;
8156                         } else {
8157                                 level = btrfs_root_level(&ri);
8158                                 objectid = found_key.objectid;
8159                                 btrfs_disk_key_to_cpu(&found_key,
8160                                                       &ri.drop_progress);
8161                                 ret = add_root_item_to_list(&dropping_trees,
8162                                                 objectid,
8163                                                 btrfs_root_bytenr(&ri),
8164                                                 last_snapshot, level,
8165                                                 ri.drop_level, &found_key);
8166                                 if (ret < 0)
8167                                         goto out;
8168                         }
8169                 }
8170                 path.slots[0]++;
8171         }
8172         btrfs_release_path(&path);
8173
8174         /*
8175          * check_block can return -EAGAIN if it fixes something, please keep
8176          * this in mind when dealing with return values from these functions, if
8177          * we get -EAGAIN we want to fall through and restart the loop.
8178          */
8179         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8180                                   &seen, &reada, &nodes, &extent_cache,
8181                                   &chunk_cache, &dev_cache, &block_group_cache,
8182                                   &dev_extent_cache);
8183         if (ret < 0) {
8184                 if (ret == -EAGAIN)
8185                         goto loop;
8186                 goto out;
8187         }
8188         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8189                                   &pending, &seen, &reada, &nodes,
8190                                   &extent_cache, &chunk_cache, &dev_cache,
8191                                   &block_group_cache, &dev_extent_cache);
8192         if (ret < 0) {
8193                 if (ret == -EAGAIN)
8194                         goto loop;
8195                 goto out;
8196         }
8197
8198         ret = check_chunks(&chunk_cache, &block_group_cache,
8199                            &dev_extent_cache, NULL, NULL, NULL, 0);
8200         if (ret) {
8201                 if (ret == -EAGAIN)
8202                         goto loop;
8203                 err = ret;
8204         }
8205
8206         ret = check_extent_refs(root, &extent_cache);
8207         if (ret < 0) {
8208                 if (ret == -EAGAIN)
8209                         goto loop;
8210                 goto out;
8211         }
8212
8213         ret = check_devices(&dev_cache, &dev_extent_cache);
8214         if (ret && err)
8215                 ret = err;
8216
8217 out:
8218         task_stop(ctx.info);
8219         if (repair) {
8220                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8221                 extent_io_tree_cleanup(&excluded_extents);
8222                 fs_info->fsck_extent_cache = NULL;
8223                 fs_info->free_extent_hook = NULL;
8224                 fs_info->corrupt_blocks = NULL;
8225                 fs_info->excluded_extents = NULL;
8226         }
8227         free(bits);
8228         free_chunk_cache_tree(&chunk_cache);
8229         free_device_cache_tree(&dev_cache);
8230         free_block_group_tree(&block_group_cache);
8231         free_device_extent_tree(&dev_extent_cache);
8232         free_extent_cache_tree(&seen);
8233         free_extent_cache_tree(&pending);
8234         free_extent_cache_tree(&reada);
8235         free_extent_cache_tree(&nodes);
8236         free_root_item_list(&normal_trees);
8237         free_root_item_list(&dropping_trees);
8238         return ret;
8239 loop:
8240         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8241         free_extent_cache_tree(&seen);
8242         free_extent_cache_tree(&pending);
8243         free_extent_cache_tree(&reada);
8244         free_extent_cache_tree(&nodes);
8245         free_chunk_cache_tree(&chunk_cache);
8246         free_block_group_tree(&block_group_cache);
8247         free_device_cache_tree(&dev_cache);
8248         free_device_extent_tree(&dev_extent_cache);
8249         free_extent_record_cache(&extent_cache);
8250         free_root_item_list(&normal_trees);
8251         free_root_item_list(&dropping_trees);
8252         extent_io_tree_cleanup(&excluded_extents);
8253         goto again;
8254 }
8255
8256 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8257 {
8258         int ret;
8259
8260         if (!ctx.progress_enabled)
8261                 fprintf(stderr, "checking extents\n");
8262         if (check_mode == CHECK_MODE_LOWMEM)
8263                 ret = check_chunks_and_extents_lowmem(fs_info);
8264         else
8265                 ret = check_chunks_and_extents(fs_info);
8266
8267         /* Also repair device size related problems */
8268         if (repair && !ret) {
8269                 ret = btrfs_fix_device_and_super_size(fs_info);
8270                 if (ret > 0)
8271                         ret = 0;
8272         }
8273         return ret;
8274 }
8275
8276 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8277                            struct btrfs_root *root, int overwrite)
8278 {
8279         struct extent_buffer *c;
8280         struct extent_buffer *old = root->node;
8281         int level;
8282         int ret;
8283         struct btrfs_disk_key disk_key = {0,0,0};
8284
8285         level = 0;
8286
8287         if (overwrite) {
8288                 c = old;
8289                 extent_buffer_get(c);
8290                 goto init;
8291         }
8292         c = btrfs_alloc_free_block(trans, root,
8293                                    root->fs_info->nodesize,
8294                                    root->root_key.objectid,
8295                                    &disk_key, level, 0, 0);
8296         if (IS_ERR(c)) {
8297                 c = old;
8298                 extent_buffer_get(c);
8299                 overwrite = 1;
8300         }
8301 init:
8302         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8303         btrfs_set_header_level(c, level);
8304         btrfs_set_header_bytenr(c, c->start);
8305         btrfs_set_header_generation(c, trans->transid);
8306         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8307         btrfs_set_header_owner(c, root->root_key.objectid);
8308
8309         write_extent_buffer(c, root->fs_info->fsid,
8310                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8311
8312         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8313                             btrfs_header_chunk_tree_uuid(c),
8314                             BTRFS_UUID_SIZE);
8315
8316         btrfs_mark_buffer_dirty(c);
8317         /*
8318          * this case can happen in the following case:
8319          *
8320          * 1.overwrite previous root.
8321          *
8322          * 2.reinit reloc data root, this is because we skip pin
8323          * down reloc data tree before which means we can allocate
8324          * same block bytenr here.
8325          */
8326         if (old->start == c->start) {
8327                 btrfs_set_root_generation(&root->root_item,
8328                                           trans->transid);
8329                 root->root_item.level = btrfs_header_level(root->node);
8330                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8331                                         &root->root_key, &root->root_item);
8332                 if (ret) {
8333                         free_extent_buffer(c);
8334                         return ret;
8335                 }
8336         }
8337         free_extent_buffer(old);
8338         root->node = c;
8339         add_root_to_dirty_list(root);
8340         return 0;
8341 }
8342
8343 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8344                                 struct extent_buffer *eb, int tree_root)
8345 {
8346         struct extent_buffer *tmp;
8347         struct btrfs_root_item *ri;
8348         struct btrfs_key key;
8349         u64 bytenr;
8350         int level = btrfs_header_level(eb);
8351         int nritems;
8352         int ret;
8353         int i;
8354
8355         /*
8356          * If we have pinned this block before, don't pin it again.
8357          * This can not only avoid forever loop with broken filesystem
8358          * but also give us some speedups.
8359          */
8360         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8361                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8362                 return 0;
8363
8364         btrfs_pin_extent(fs_info, eb->start, eb->len);
8365
8366         nritems = btrfs_header_nritems(eb);
8367         for (i = 0; i < nritems; i++) {
8368                 if (level == 0) {
8369                         btrfs_item_key_to_cpu(eb, &key, i);
8370                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8371                                 continue;
8372                         /* Skip the extent root and reloc roots */
8373                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8374                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8375                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8376                                 continue;
8377                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8378                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8379
8380                         /*
8381                          * If at any point we start needing the real root we
8382                          * will have to build a stump root for the root we are
8383                          * in, but for now this doesn't actually use the root so
8384                          * just pass in extent_root.
8385                          */
8386                         tmp = read_tree_block(fs_info, bytenr, 0);
8387                         if (!extent_buffer_uptodate(tmp)) {
8388                                 fprintf(stderr, "Error reading root block\n");
8389                                 return -EIO;
8390                         }
8391                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8392                         free_extent_buffer(tmp);
8393                         if (ret)
8394                                 return ret;
8395                 } else {
8396                         bytenr = btrfs_node_blockptr(eb, i);
8397
8398                         /* If we aren't the tree root don't read the block */
8399                         if (level == 1 && !tree_root) {
8400                                 btrfs_pin_extent(fs_info, bytenr,
8401                                                 fs_info->nodesize);
8402                                 continue;
8403                         }
8404
8405                         tmp = read_tree_block(fs_info, bytenr, 0);
8406                         if (!extent_buffer_uptodate(tmp)) {
8407                                 fprintf(stderr, "Error reading tree block\n");
8408                                 return -EIO;
8409                         }
8410                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8411                         free_extent_buffer(tmp);
8412                         if (ret)
8413                                 return ret;
8414                 }
8415         }
8416
8417         return 0;
8418 }
8419
8420 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8421 {
8422         int ret;
8423
8424         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8425         if (ret)
8426                 return ret;
8427
8428         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8429 }
8430
8431 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8432 {
8433         struct btrfs_block_group_cache *cache;
8434         struct btrfs_path path;
8435         struct extent_buffer *leaf;
8436         struct btrfs_chunk *chunk;
8437         struct btrfs_key key;
8438         int ret;
8439         u64 start;
8440
8441         btrfs_init_path(&path);
8442         key.objectid = 0;
8443         key.type = BTRFS_CHUNK_ITEM_KEY;
8444         key.offset = 0;
8445         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
8446         if (ret < 0) {
8447                 btrfs_release_path(&path);
8448                 return ret;
8449         }
8450
8451         /*
8452          * We do this in case the block groups were screwed up and had alloc
8453          * bits that aren't actually set on the chunks.  This happens with
8454          * restored images every time and could happen in real life I guess.
8455          */
8456         fs_info->avail_data_alloc_bits = 0;
8457         fs_info->avail_metadata_alloc_bits = 0;
8458         fs_info->avail_system_alloc_bits = 0;
8459
8460         /* First we need to create the in-memory block groups */
8461         while (1) {
8462                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8463                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
8464                         if (ret < 0) {
8465                                 btrfs_release_path(&path);
8466                                 return ret;
8467                         }
8468                         if (ret) {
8469                                 ret = 0;
8470                                 break;
8471                         }
8472                 }
8473                 leaf = path.nodes[0];
8474                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8475                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8476                         path.slots[0]++;
8477                         continue;
8478                 }
8479
8480                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
8481                 btrfs_add_block_group(fs_info, 0,
8482                                       btrfs_chunk_type(leaf, chunk), key.offset,
8483                                       btrfs_chunk_length(leaf, chunk));
8484                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8485                                  key.offset + btrfs_chunk_length(leaf, chunk));
8486                 path.slots[0]++;
8487         }
8488         start = 0;
8489         while (1) {
8490                 cache = btrfs_lookup_first_block_group(fs_info, start);
8491                 if (!cache)
8492                         break;
8493                 cache->cached = 1;
8494                 start = cache->key.objectid + cache->key.offset;
8495         }
8496
8497         btrfs_release_path(&path);
8498         return 0;
8499 }
8500
8501 static int reset_balance(struct btrfs_trans_handle *trans,
8502                          struct btrfs_fs_info *fs_info)
8503 {
8504         struct btrfs_root *root = fs_info->tree_root;
8505         struct btrfs_path path;
8506         struct extent_buffer *leaf;
8507         struct btrfs_key key;
8508         int del_slot, del_nr = 0;
8509         int ret;
8510         int found = 0;
8511
8512         btrfs_init_path(&path);
8513         key.objectid = BTRFS_BALANCE_OBJECTID;
8514         key.type = BTRFS_BALANCE_ITEM_KEY;
8515         key.offset = 0;
8516         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8517         if (ret) {
8518                 if (ret > 0)
8519                         ret = 0;
8520                 if (!ret)
8521                         goto reinit_data_reloc;
8522                 else
8523                         goto out;
8524         }
8525
8526         ret = btrfs_del_item(trans, root, &path);
8527         if (ret)
8528                 goto out;
8529         btrfs_release_path(&path);
8530
8531         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8532         key.type = BTRFS_ROOT_ITEM_KEY;
8533         key.offset = 0;
8534         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8535         if (ret < 0)
8536                 goto out;
8537         while (1) {
8538                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8539                         if (!found)
8540                                 break;
8541
8542                         if (del_nr) {
8543                                 ret = btrfs_del_items(trans, root, &path,
8544                                                       del_slot, del_nr);
8545                                 del_nr = 0;
8546                                 if (ret)
8547                                         goto out;
8548                         }
8549                         key.offset++;
8550                         btrfs_release_path(&path);
8551
8552                         found = 0;
8553                         ret = btrfs_search_slot(trans, root, &key, &path,
8554                                                 -1, 1);
8555                         if (ret < 0)
8556                                 goto out;
8557                         continue;
8558                 }
8559                 found = 1;
8560                 leaf = path.nodes[0];
8561                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8562                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8563                         break;
8564                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8565                         path.slots[0]++;
8566                         continue;
8567                 }
8568                 if (!del_nr) {
8569                         del_slot = path.slots[0];
8570                         del_nr = 1;
8571                 } else {
8572                         del_nr++;
8573                 }
8574                 path.slots[0]++;
8575         }
8576
8577         if (del_nr) {
8578                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
8579                 if (ret)
8580                         goto out;
8581         }
8582         btrfs_release_path(&path);
8583
8584 reinit_data_reloc:
8585         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8586         key.type = BTRFS_ROOT_ITEM_KEY;
8587         key.offset = (u64)-1;
8588         root = btrfs_read_fs_root(fs_info, &key);
8589         if (IS_ERR(root)) {
8590                 fprintf(stderr, "Error reading data reloc tree\n");
8591                 ret = PTR_ERR(root);
8592                 goto out;
8593         }
8594         record_root_in_trans(trans, root);
8595         ret = btrfs_fsck_reinit_root(trans, root, 0);
8596         if (ret)
8597                 goto out;
8598         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8599 out:
8600         btrfs_release_path(&path);
8601         return ret;
8602 }
8603
8604 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8605                               struct btrfs_fs_info *fs_info)
8606 {
8607         u64 start = 0;
8608         int ret;
8609
8610         /*
8611          * The only reason we don't do this is because right now we're just
8612          * walking the trees we find and pinning down their bytes, we don't look
8613          * at any of the leaves.  In order to do mixed groups we'd have to check
8614          * the leaves of any fs roots and pin down the bytes for any file
8615          * extents we find.  Not hard but why do it if we don't have to?
8616          */
8617         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
8618                 fprintf(stderr, "We don't support re-initing the extent tree "
8619                         "for mixed block groups yet, please notify a btrfs "
8620                         "developer you want to do this so they can add this "
8621                         "functionality.\n");
8622                 return -EINVAL;
8623         }
8624
8625         /*
8626          * first we need to walk all of the trees except the extent tree and pin
8627          * down the bytes that are in use so we don't overwrite any existing
8628          * metadata.
8629          */
8630         ret = pin_metadata_blocks(fs_info);
8631         if (ret) {
8632                 fprintf(stderr, "error pinning down used bytes\n");
8633                 return ret;
8634         }
8635
8636         /*
8637          * Need to drop all the block groups since we're going to recreate all
8638          * of them again.
8639          */
8640         btrfs_free_block_groups(fs_info);
8641         ret = reset_block_groups(fs_info);
8642         if (ret) {
8643                 fprintf(stderr, "error resetting the block groups\n");
8644                 return ret;
8645         }
8646
8647         /* Ok we can allocate now, reinit the extent root */
8648         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8649         if (ret) {
8650                 fprintf(stderr, "extent root initialization failed\n");
8651                 /*
8652                  * When the transaction code is updated we should end the
8653                  * transaction, but for now progs only knows about commit so
8654                  * just return an error.
8655                  */
8656                 return ret;
8657         }
8658
8659         /*
8660          * Now we have all the in-memory block groups setup so we can make
8661          * allocations properly, and the metadata we care about is safe since we
8662          * pinned all of it above.
8663          */
8664         while (1) {
8665                 struct btrfs_block_group_cache *cache;
8666
8667                 cache = btrfs_lookup_first_block_group(fs_info, start);
8668                 if (!cache)
8669                         break;
8670                 start = cache->key.objectid + cache->key.offset;
8671                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8672                                         &cache->key, &cache->item,
8673                                         sizeof(cache->item));
8674                 if (ret) {
8675                         fprintf(stderr, "Error adding block group\n");
8676                         return ret;
8677                 }
8678                 btrfs_extent_post_op(trans, fs_info->extent_root);
8679         }
8680
8681         ret = reset_balance(trans, fs_info);
8682         if (ret)
8683                 fprintf(stderr, "error resetting the pending balance\n");
8684
8685         return ret;
8686 }
8687
8688 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8689 {
8690         struct btrfs_path path;
8691         struct btrfs_trans_handle *trans;
8692         struct btrfs_key key;
8693         int ret;
8694
8695         printf("Recowing metadata block %llu\n", eb->start);
8696         key.objectid = btrfs_header_owner(eb);
8697         key.type = BTRFS_ROOT_ITEM_KEY;
8698         key.offset = (u64)-1;
8699
8700         root = btrfs_read_fs_root(root->fs_info, &key);
8701         if (IS_ERR(root)) {
8702                 fprintf(stderr, "Couldn't find owner root %llu\n",
8703                         key.objectid);
8704                 return PTR_ERR(root);
8705         }
8706
8707         trans = btrfs_start_transaction(root, 1);
8708         if (IS_ERR(trans))
8709                 return PTR_ERR(trans);
8710
8711         btrfs_init_path(&path);
8712         path.lowest_level = btrfs_header_level(eb);
8713         if (path.lowest_level)
8714                 btrfs_node_key_to_cpu(eb, &key, 0);
8715         else
8716                 btrfs_item_key_to_cpu(eb, &key, 0);
8717
8718         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8719         btrfs_commit_transaction(trans, root);
8720         btrfs_release_path(&path);
8721         return ret;
8722 }
8723
8724 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8725 {
8726         struct btrfs_path path;
8727         struct btrfs_trans_handle *trans;
8728         struct btrfs_key key;
8729         int ret;
8730
8731         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8732                bad->key.type, bad->key.offset);
8733         key.objectid = bad->root_id;
8734         key.type = BTRFS_ROOT_ITEM_KEY;
8735         key.offset = (u64)-1;
8736
8737         root = btrfs_read_fs_root(root->fs_info, &key);
8738         if (IS_ERR(root)) {
8739                 fprintf(stderr, "Couldn't find owner root %llu\n",
8740                         key.objectid);
8741                 return PTR_ERR(root);
8742         }
8743
8744         trans = btrfs_start_transaction(root, 1);
8745         if (IS_ERR(trans))
8746                 return PTR_ERR(trans);
8747
8748         btrfs_init_path(&path);
8749         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
8750         if (ret) {
8751                 if (ret > 0)
8752                         ret = 0;
8753                 goto out;
8754         }
8755         ret = btrfs_del_item(trans, root, &path);
8756 out:
8757         btrfs_commit_transaction(trans, root);
8758         btrfs_release_path(&path);
8759         return ret;
8760 }
8761
8762 static int zero_log_tree(struct btrfs_root *root)
8763 {
8764         struct btrfs_trans_handle *trans;
8765         int ret;
8766
8767         trans = btrfs_start_transaction(root, 1);
8768         if (IS_ERR(trans)) {
8769                 ret = PTR_ERR(trans);
8770                 return ret;
8771         }
8772         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8773         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8774         ret = btrfs_commit_transaction(trans, root);
8775         return ret;
8776 }
8777
8778 static int populate_csum(struct btrfs_trans_handle *trans,
8779                          struct btrfs_root *csum_root, char *buf, u64 start,
8780                          u64 len)
8781 {
8782         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8783         u64 offset = 0;
8784         u64 sectorsize;
8785         int ret = 0;
8786
8787         while (offset < len) {
8788                 sectorsize = fs_info->sectorsize;
8789                 ret = read_extent_data(fs_info, buf, start + offset,
8790                                        &sectorsize, 0);
8791                 if (ret)
8792                         break;
8793                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8794                                             start + offset, buf, sectorsize);
8795                 if (ret)
8796                         break;
8797                 offset += sectorsize;
8798         }
8799         return ret;
8800 }
8801
8802 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8803                                       struct btrfs_root *csum_root,
8804                                       struct btrfs_root *cur_root)
8805 {
8806         struct btrfs_path path;
8807         struct btrfs_key key;
8808         struct extent_buffer *node;
8809         struct btrfs_file_extent_item *fi;
8810         char *buf = NULL;
8811         u64 start = 0;
8812         u64 len = 0;
8813         int slot = 0;
8814         int ret = 0;
8815
8816         buf = malloc(cur_root->fs_info->sectorsize);
8817         if (!buf)
8818                 return -ENOMEM;
8819
8820         btrfs_init_path(&path);
8821         key.objectid = 0;
8822         key.offset = 0;
8823         key.type = 0;
8824         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
8825         if (ret < 0)
8826                 goto out;
8827         /* Iterate all regular file extents and fill its csum */
8828         while (1) {
8829                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8830
8831                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8832                         goto next;
8833                 node = path.nodes[0];
8834                 slot = path.slots[0];
8835                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8836                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8837                         goto next;
8838                 start = btrfs_file_extent_disk_bytenr(node, fi);
8839                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8840
8841                 ret = populate_csum(trans, csum_root, buf, start, len);
8842                 if (ret == -EEXIST)
8843                         ret = 0;
8844                 if (ret < 0)
8845                         goto out;
8846 next:
8847                 /*
8848                  * TODO: if next leaf is corrupted, jump to nearest next valid
8849                  * leaf.
8850                  */
8851                 ret = btrfs_next_item(cur_root, &path);
8852                 if (ret < 0)
8853                         goto out;
8854                 if (ret > 0) {
8855                         ret = 0;
8856                         goto out;
8857                 }
8858         }
8859
8860 out:
8861         btrfs_release_path(&path);
8862         free(buf);
8863         return ret;
8864 }
8865
8866 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8867                                   struct btrfs_root *csum_root)
8868 {
8869         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8870         struct btrfs_path path;
8871         struct btrfs_root *tree_root = fs_info->tree_root;
8872         struct btrfs_root *cur_root;
8873         struct extent_buffer *node;
8874         struct btrfs_key key;
8875         int slot = 0;
8876         int ret = 0;
8877
8878         btrfs_init_path(&path);
8879         key.objectid = BTRFS_FS_TREE_OBJECTID;
8880         key.offset = 0;
8881         key.type = BTRFS_ROOT_ITEM_KEY;
8882         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
8883         if (ret < 0)
8884                 goto out;
8885         if (ret > 0) {
8886                 ret = -ENOENT;
8887                 goto out;
8888         }
8889
8890         while (1) {
8891                 node = path.nodes[0];
8892                 slot = path.slots[0];
8893                 btrfs_item_key_to_cpu(node, &key, slot);
8894                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8895                         goto out;
8896                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8897                         goto next;
8898                 if (!is_fstree(key.objectid))
8899                         goto next;
8900                 key.offset = (u64)-1;
8901
8902                 cur_root = btrfs_read_fs_root(fs_info, &key);
8903                 if (IS_ERR(cur_root) || !cur_root) {
8904                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8905                                 key.objectid);
8906                         goto out;
8907                 }
8908                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8909                                 cur_root);
8910                 if (ret < 0)
8911                         goto out;
8912 next:
8913                 ret = btrfs_next_item(tree_root, &path);
8914                 if (ret > 0) {
8915                         ret = 0;
8916                         goto out;
8917                 }
8918                 if (ret < 0)
8919                         goto out;
8920         }
8921
8922 out:
8923         btrfs_release_path(&path);
8924         return ret;
8925 }
8926
8927 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8928                                       struct btrfs_root *csum_root)
8929 {
8930         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8931         struct btrfs_path path;
8932         struct btrfs_extent_item *ei;
8933         struct extent_buffer *leaf;
8934         char *buf;
8935         struct btrfs_key key;
8936         int ret;
8937
8938         btrfs_init_path(&path);
8939         key.objectid = 0;
8940         key.type = BTRFS_EXTENT_ITEM_KEY;
8941         key.offset = 0;
8942         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8943         if (ret < 0) {
8944                 btrfs_release_path(&path);
8945                 return ret;
8946         }
8947
8948         buf = malloc(csum_root->fs_info->sectorsize);
8949         if (!buf) {
8950                 btrfs_release_path(&path);
8951                 return -ENOMEM;
8952         }
8953
8954         while (1) {
8955                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8956                         ret = btrfs_next_leaf(extent_root, &path);
8957                         if (ret < 0)
8958                                 break;
8959                         if (ret) {
8960                                 ret = 0;
8961                                 break;
8962                         }
8963                 }
8964                 leaf = path.nodes[0];
8965
8966                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8967                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8968                         path.slots[0]++;
8969                         continue;
8970                 }
8971
8972                 ei = btrfs_item_ptr(leaf, path.slots[0],
8973                                     struct btrfs_extent_item);
8974                 if (!(btrfs_extent_flags(leaf, ei) &
8975                       BTRFS_EXTENT_FLAG_DATA)) {
8976                         path.slots[0]++;
8977                         continue;
8978                 }
8979
8980                 ret = populate_csum(trans, csum_root, buf, key.objectid,
8981                                     key.offset);
8982                 if (ret)
8983                         break;
8984                 path.slots[0]++;
8985         }
8986
8987         btrfs_release_path(&path);
8988         free(buf);
8989         return ret;
8990 }
8991
8992 /*
8993  * Recalculate the csum and put it into the csum tree.
8994  *
8995  * Extent tree init will wipe out all the extent info, so in that case, we
8996  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
8997  * will use fs/subvol trees to init the csum tree.
8998  */
8999 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9000                           struct btrfs_root *csum_root,
9001                           int search_fs_tree)
9002 {
9003         if (search_fs_tree)
9004                 return fill_csum_tree_from_fs(trans, csum_root);
9005         else
9006                 return fill_csum_tree_from_extent(trans, csum_root);
9007 }
9008
9009 static void free_roots_info_cache(void)
9010 {
9011         if (!roots_info_cache)
9012                 return;
9013
9014         while (!cache_tree_empty(roots_info_cache)) {
9015                 struct cache_extent *entry;
9016                 struct root_item_info *rii;
9017
9018                 entry = first_cache_extent(roots_info_cache);
9019                 if (!entry)
9020                         break;
9021                 remove_cache_extent(roots_info_cache, entry);
9022                 rii = container_of(entry, struct root_item_info, cache_extent);
9023                 free(rii);
9024         }
9025
9026         free(roots_info_cache);
9027         roots_info_cache = NULL;
9028 }
9029
9030 static int build_roots_info_cache(struct btrfs_fs_info *info)
9031 {
9032         int ret = 0;
9033         struct btrfs_key key;
9034         struct extent_buffer *leaf;
9035         struct btrfs_path path;
9036
9037         if (!roots_info_cache) {
9038                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9039                 if (!roots_info_cache)
9040                         return -ENOMEM;
9041                 cache_tree_init(roots_info_cache);
9042         }
9043
9044         btrfs_init_path(&path);
9045         key.objectid = 0;
9046         key.type = BTRFS_EXTENT_ITEM_KEY;
9047         key.offset = 0;
9048         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
9049         if (ret < 0)
9050                 goto out;
9051         leaf = path.nodes[0];
9052
9053         while (1) {
9054                 struct btrfs_key found_key;
9055                 struct btrfs_extent_item *ei;
9056                 struct btrfs_extent_inline_ref *iref;
9057                 int slot = path.slots[0];
9058                 int type;
9059                 u64 flags;
9060                 u64 root_id;
9061                 u8 level;
9062                 struct cache_extent *entry;
9063                 struct root_item_info *rii;
9064
9065                 if (slot >= btrfs_header_nritems(leaf)) {
9066                         ret = btrfs_next_leaf(info->extent_root, &path);
9067                         if (ret < 0) {
9068                                 break;
9069                         } else if (ret) {
9070                                 ret = 0;
9071                                 break;
9072                         }
9073                         leaf = path.nodes[0];
9074                         slot = path.slots[0];
9075                 }
9076
9077                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9078
9079                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9080                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9081                         goto next;
9082
9083                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9084                 flags = btrfs_extent_flags(leaf, ei);
9085
9086                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9087                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9088                         goto next;
9089
9090                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9091                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9092                         level = found_key.offset;
9093                 } else {
9094                         struct btrfs_tree_block_info *binfo;
9095
9096                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9097                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9098                         level = btrfs_tree_block_level(leaf, binfo);
9099                 }
9100
9101                 /*
9102                  * For a root extent, it must be of the following type and the
9103                  * first (and only one) iref in the item.
9104                  */
9105                 type = btrfs_extent_inline_ref_type(leaf, iref);
9106                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9107                         goto next;
9108
9109                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9110                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9111                 if (!entry) {
9112                         rii = malloc(sizeof(struct root_item_info));
9113                         if (!rii) {
9114                                 ret = -ENOMEM;
9115                                 goto out;
9116                         }
9117                         rii->cache_extent.start = root_id;
9118                         rii->cache_extent.size = 1;
9119                         rii->level = (u8)-1;
9120                         entry = &rii->cache_extent;
9121                         ret = insert_cache_extent(roots_info_cache, entry);
9122                         ASSERT(ret == 0);
9123                 } else {
9124                         rii = container_of(entry, struct root_item_info,
9125                                            cache_extent);
9126                 }
9127
9128                 ASSERT(rii->cache_extent.start == root_id);
9129                 ASSERT(rii->cache_extent.size == 1);
9130
9131                 if (level > rii->level || rii->level == (u8)-1) {
9132                         rii->level = level;
9133                         rii->bytenr = found_key.objectid;
9134                         rii->gen = btrfs_extent_generation(leaf, ei);
9135                         rii->node_count = 1;
9136                 } else if (level == rii->level) {
9137                         rii->node_count++;
9138                 }
9139 next:
9140                 path.slots[0]++;
9141         }
9142
9143 out:
9144         btrfs_release_path(&path);
9145
9146         return ret;
9147 }
9148
9149 static int maybe_repair_root_item(struct btrfs_path *path,
9150                                   const struct btrfs_key *root_key,
9151                                   const int read_only_mode)
9152 {
9153         const u64 root_id = root_key->objectid;
9154         struct cache_extent *entry;
9155         struct root_item_info *rii;
9156         struct btrfs_root_item ri;
9157         unsigned long offset;
9158
9159         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9160         if (!entry) {
9161                 fprintf(stderr,
9162                         "Error: could not find extent items for root %llu\n",
9163                         root_key->objectid);
9164                 return -ENOENT;
9165         }
9166
9167         rii = container_of(entry, struct root_item_info, cache_extent);
9168         ASSERT(rii->cache_extent.start == root_id);
9169         ASSERT(rii->cache_extent.size == 1);
9170
9171         if (rii->node_count != 1) {
9172                 fprintf(stderr,
9173                         "Error: could not find btree root extent for root %llu\n",
9174                         root_id);
9175                 return -ENOENT;
9176         }
9177
9178         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9179         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9180
9181         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9182             btrfs_root_level(&ri) != rii->level ||
9183             btrfs_root_generation(&ri) != rii->gen) {
9184
9185                 /*
9186                  * If we're in repair mode but our caller told us to not update
9187                  * the root item, i.e. just check if it needs to be updated, don't
9188                  * print this message, since the caller will call us again shortly
9189                  * for the same root item without read only mode (the caller will
9190                  * open a transaction first).
9191                  */
9192                 if (!(read_only_mode && repair))
9193                         fprintf(stderr,
9194                                 "%sroot item for root %llu,"
9195                                 " current bytenr %llu, current gen %llu, current level %u,"
9196                                 " new bytenr %llu, new gen %llu, new level %u\n",
9197                                 (read_only_mode ? "" : "fixing "),
9198                                 root_id,
9199                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9200                                 btrfs_root_level(&ri),
9201                                 rii->bytenr, rii->gen, rii->level);
9202
9203                 if (btrfs_root_generation(&ri) > rii->gen) {
9204                         fprintf(stderr,
9205                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9206                                 root_id, btrfs_root_generation(&ri), rii->gen);
9207                         return -EINVAL;
9208                 }
9209
9210                 if (!read_only_mode) {
9211                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9212                         btrfs_set_root_level(&ri, rii->level);
9213                         btrfs_set_root_generation(&ri, rii->gen);
9214                         write_extent_buffer(path->nodes[0], &ri,
9215                                             offset, sizeof(ri));
9216                 }
9217
9218                 return 1;
9219         }
9220
9221         return 0;
9222 }
9223
9224 /*
9225  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9226  * caused read-only snapshots to be corrupted if they were created at a moment
9227  * when the source subvolume/snapshot had orphan items. The issue was that the
9228  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9229  * node instead of the post orphan cleanup root node.
9230  * So this function, and its callees, just detects and fixes those cases. Even
9231  * though the regression was for read-only snapshots, this function applies to
9232  * any snapshot/subvolume root.
9233  * This must be run before any other repair code - not doing it so, makes other
9234  * repair code delete or modify backrefs in the extent tree for example, which
9235  * will result in an inconsistent fs after repairing the root items.
9236  */
9237 static int repair_root_items(struct btrfs_fs_info *info)
9238 {
9239         struct btrfs_path path;
9240         struct btrfs_key key;
9241         struct extent_buffer *leaf;
9242         struct btrfs_trans_handle *trans = NULL;
9243         int ret = 0;
9244         int bad_roots = 0;
9245         int need_trans = 0;
9246
9247         btrfs_init_path(&path);
9248
9249         ret = build_roots_info_cache(info);
9250         if (ret)
9251                 goto out;
9252
9253         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9254         key.type = BTRFS_ROOT_ITEM_KEY;
9255         key.offset = 0;
9256
9257 again:
9258         /*
9259          * Avoid opening and committing transactions if a leaf doesn't have
9260          * any root items that need to be fixed, so that we avoid rotating
9261          * backup roots unnecessarily.
9262          */
9263         if (need_trans) {
9264                 trans = btrfs_start_transaction(info->tree_root, 1);
9265                 if (IS_ERR(trans)) {
9266                         ret = PTR_ERR(trans);
9267                         goto out;
9268                 }
9269         }
9270
9271         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
9272                                 0, trans ? 1 : 0);
9273         if (ret < 0)
9274                 goto out;
9275         leaf = path.nodes[0];
9276
9277         while (1) {
9278                 struct btrfs_key found_key;
9279
9280                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
9281                         int no_more_keys = find_next_key(&path, &key);
9282
9283                         btrfs_release_path(&path);
9284                         if (trans) {
9285                                 ret = btrfs_commit_transaction(trans,
9286                                                                info->tree_root);
9287                                 trans = NULL;
9288                                 if (ret < 0)
9289                                         goto out;
9290                         }
9291                         need_trans = 0;
9292                         if (no_more_keys)
9293                                 break;
9294                         goto again;
9295                 }
9296
9297                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9298
9299                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9300                         goto next;
9301                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9302                         goto next;
9303
9304                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
9305                 if (ret < 0)
9306                         goto out;
9307                 if (ret) {
9308                         if (!trans && repair) {
9309                                 need_trans = 1;
9310                                 key = found_key;
9311                                 btrfs_release_path(&path);
9312                                 goto again;
9313                         }
9314                         bad_roots++;
9315                 }
9316 next:
9317                 path.slots[0]++;
9318         }
9319         ret = 0;
9320 out:
9321         free_roots_info_cache();
9322         btrfs_release_path(&path);
9323         if (trans)
9324                 btrfs_commit_transaction(trans, info->tree_root);
9325         if (ret < 0)
9326                 return ret;
9327
9328         return bad_roots;
9329 }
9330
9331 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
9332 {
9333         struct btrfs_trans_handle *trans;
9334         struct btrfs_block_group_cache *bg_cache;
9335         u64 current = 0;
9336         int ret = 0;
9337
9338         /* Clear all free space cache inodes and its extent data */
9339         while (1) {
9340                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
9341                 if (!bg_cache)
9342                         break;
9343                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
9344                 if (ret < 0)
9345                         return ret;
9346                 current = bg_cache->key.objectid + bg_cache->key.offset;
9347         }
9348
9349         /* Don't forget to set cache_generation to -1 */
9350         trans = btrfs_start_transaction(fs_info->tree_root, 0);
9351         if (IS_ERR(trans)) {
9352                 error("failed to update super block cache generation");
9353                 return PTR_ERR(trans);
9354         }
9355         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
9356         btrfs_commit_transaction(trans, fs_info->tree_root);
9357
9358         return ret;
9359 }
9360
9361 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
9362                 int clear_version)
9363 {
9364         int ret = 0;
9365
9366         if (clear_version == 1) {
9367                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9368                         error(
9369                 "free space cache v2 detected, use --clear-space-cache v2");
9370                         ret = 1;
9371                         goto close_out;
9372                 }
9373                 printf("Clearing free space cache\n");
9374                 ret = clear_free_space_cache(fs_info);
9375                 if (ret) {
9376                         error("failed to clear free space cache");
9377                         ret = 1;
9378                 } else {
9379                         printf("Free space cache cleared\n");
9380                 }
9381         } else if (clear_version == 2) {
9382                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9383                         printf("no free space cache v2 to clear\n");
9384                         ret = 0;
9385                         goto close_out;
9386                 }
9387                 printf("Clear free space cache v2\n");
9388                 ret = btrfs_clear_free_space_tree(fs_info);
9389                 if (ret) {
9390                         error("failed to clear free space cache v2: %d", ret);
9391                         ret = 1;
9392                 } else {
9393                         printf("free space cache v2 cleared\n");
9394                 }
9395         }
9396 close_out:
9397         return ret;
9398 }
9399
9400 const char * const cmd_check_usage[] = {
9401         "btrfs check [options] <device>",
9402         "Check structural integrity of a filesystem (unmounted).",
9403         "Check structural integrity of an unmounted filesystem. Verify internal",
9404         "trees' consistency and item connectivity. In the repair mode try to",
9405         "fix the problems found. ",
9406         "WARNING: the repair mode is considered dangerous",
9407         "",
9408         "-s|--super <superblock>     use this superblock copy",
9409         "-b|--backup                 use the first valid backup root copy",
9410         "--force                     skip mount checks, repair is not possible",
9411         "--repair                    try to repair the filesystem",
9412         "--readonly                  run in read-only mode (default)",
9413         "--init-csum-tree            create a new CRC tree",
9414         "--init-extent-tree          create a new extent tree",
9415         "--mode <MODE>               allows choice of memory/IO trade-offs",
9416         "                            where MODE is one of:",
9417         "                            original - read inodes and extents to memory (requires",
9418         "                                       more memory, does less IO)",
9419         "                            lowmem   - try to use less memory but read blocks again",
9420         "                                       when needed",
9421         "--check-data-csum           verify checksums of data blocks",
9422         "-Q|--qgroup-report          print a report on qgroup consistency",
9423         "-E|--subvol-extents <subvolid>",
9424         "                            print subvolume extents and sharing state",
9425         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9426         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9427         "-p|--progress               indicate progress",
9428         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
9429         NULL
9430 };
9431
9432 int cmd_check(int argc, char **argv)
9433 {
9434         struct cache_tree root_cache;
9435         struct btrfs_root *root;
9436         struct btrfs_fs_info *info;
9437         u64 bytenr = 0;
9438         u64 subvolid = 0;
9439         u64 tree_root_bytenr = 0;
9440         u64 chunk_root_bytenr = 0;
9441         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9442         int ret = 0;
9443         int err = 0;
9444         u64 num;
9445         int init_csum_tree = 0;
9446         int readonly = 0;
9447         int clear_space_cache = 0;
9448         int qgroup_report = 0;
9449         int qgroups_repaired = 0;
9450         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
9451         int force = 0;
9452
9453         while(1) {
9454                 int c;
9455                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9456                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9457                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
9458                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
9459                         GETOPT_VAL_FORCE };
9460                 static const struct option long_options[] = {
9461                         { "super", required_argument, NULL, 's' },
9462                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9463                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9464                         { "init-csum-tree", no_argument, NULL,
9465                                 GETOPT_VAL_INIT_CSUM },
9466                         { "init-extent-tree", no_argument, NULL,
9467                                 GETOPT_VAL_INIT_EXTENT },
9468                         { "check-data-csum", no_argument, NULL,
9469                                 GETOPT_VAL_CHECK_CSUM },
9470                         { "backup", no_argument, NULL, 'b' },
9471                         { "subvol-extents", required_argument, NULL, 'E' },
9472                         { "qgroup-report", no_argument, NULL, 'Q' },
9473                         { "tree-root", required_argument, NULL, 'r' },
9474                         { "chunk-root", required_argument, NULL,
9475                                 GETOPT_VAL_CHUNK_TREE },
9476                         { "progress", no_argument, NULL, 'p' },
9477                         { "mode", required_argument, NULL,
9478                                 GETOPT_VAL_MODE },
9479                         { "clear-space-cache", required_argument, NULL,
9480                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
9481                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
9482                         { NULL, 0, NULL, 0}
9483                 };
9484
9485                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
9486                 if (c < 0)
9487                         break;
9488                 switch(c) {
9489                         case 'a': /* ignored */ break;
9490                         case 'b':
9491                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9492                                 break;
9493                         case 's':
9494                                 num = arg_strtou64(optarg);
9495                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9496                                         error(
9497                                         "super mirror should be less than %d",
9498                                                 BTRFS_SUPER_MIRROR_MAX);
9499                                         exit(1);
9500                                 }
9501                                 bytenr = btrfs_sb_offset(((int)num));
9502                                 printf("using SB copy %llu, bytenr %llu\n", num,
9503                                        (unsigned long long)bytenr);
9504                                 break;
9505                         case 'Q':
9506                                 qgroup_report = 1;
9507                                 break;
9508                         case 'E':
9509                                 subvolid = arg_strtou64(optarg);
9510                                 break;
9511                         case 'r':
9512                                 tree_root_bytenr = arg_strtou64(optarg);
9513                                 break;
9514                         case GETOPT_VAL_CHUNK_TREE:
9515                                 chunk_root_bytenr = arg_strtou64(optarg);
9516                                 break;
9517                         case 'p':
9518                                 ctx.progress_enabled = true;
9519                                 break;
9520                         case '?':
9521                         case 'h':
9522                                 usage(cmd_check_usage);
9523                         case GETOPT_VAL_REPAIR:
9524                                 printf("enabling repair mode\n");
9525                                 repair = 1;
9526                                 ctree_flags |= OPEN_CTREE_WRITES;
9527                                 break;
9528                         case GETOPT_VAL_READONLY:
9529                                 readonly = 1;
9530                                 break;
9531                         case GETOPT_VAL_INIT_CSUM:
9532                                 printf("Creating a new CRC tree\n");
9533                                 init_csum_tree = 1;
9534                                 repair = 1;
9535                                 ctree_flags |= OPEN_CTREE_WRITES;
9536                                 break;
9537                         case GETOPT_VAL_INIT_EXTENT:
9538                                 init_extent_tree = 1;
9539                                 ctree_flags |= (OPEN_CTREE_WRITES |
9540                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9541                                 repair = 1;
9542                                 break;
9543                         case GETOPT_VAL_CHECK_CSUM:
9544                                 check_data_csum = 1;
9545                                 break;
9546                         case GETOPT_VAL_MODE:
9547                                 check_mode = parse_check_mode(optarg);
9548                                 if (check_mode == CHECK_MODE_UNKNOWN) {
9549                                         error("unknown mode: %s", optarg);
9550                                         exit(1);
9551                                 }
9552                                 break;
9553                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
9554                                 if (strcmp(optarg, "v1") == 0) {
9555                                         clear_space_cache = 1;
9556                                 } else if (strcmp(optarg, "v2") == 0) {
9557                                         clear_space_cache = 2;
9558                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
9559                                 } else {
9560                                         error(
9561                 "invalid argument to --clear-space-cache, must be v1 or v2");
9562                                         exit(1);
9563                                 }
9564                                 ctree_flags |= OPEN_CTREE_WRITES;
9565                                 break;
9566                         case GETOPT_VAL_FORCE:
9567                                 force = 1;
9568                                 break;
9569                 }
9570         }
9571
9572         if (check_argc_exact(argc - optind, 1))
9573                 usage(cmd_check_usage);
9574
9575         if (ctx.progress_enabled) {
9576                 ctx.tp = TASK_NOTHING;
9577                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9578         }
9579
9580         /* This check is the only reason for --readonly to exist */
9581         if (readonly && repair) {
9582                 error("repair options are not compatible with --readonly");
9583                 exit(1);
9584         }
9585
9586         /*
9587          * experimental and dangerous
9588          */
9589         if (repair && check_mode == CHECK_MODE_LOWMEM)
9590                 warning("low-memory mode repair support is only partial");
9591
9592         radix_tree_init();
9593         cache_tree_init(&root_cache);
9594
9595         ret = check_mounted(argv[optind]);
9596         if (!force) {
9597                 if (ret < 0) {
9598                         error("could not check mount status: %s",
9599                                         strerror(-ret));
9600                         err |= !!ret;
9601                         goto err_out;
9602                 } else if (ret) {
9603                         error(
9604 "%s is currently mounted, use --force if you really intend to check the filesystem",
9605                                 argv[optind]);
9606                         ret = -EBUSY;
9607                         err |= !!ret;
9608                         goto err_out;
9609                 }
9610         } else {
9611                 if (repair) {
9612                         error("repair and --force is not yet supported");
9613                         ret = 1;
9614                         err |= !!ret;
9615                         goto err_out;
9616                 }
9617                 if (ret < 0) {
9618                         warning(
9619 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
9620                                 argv[optind]);
9621                 } else if (ret) {
9622                         warning(
9623                         "filesystem mounted, continuing because of --force");
9624                 }
9625                 /* A block device is mounted in exclusive mode by kernel */
9626                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
9627         }
9628
9629         /* only allow partial opening under repair mode */
9630         if (repair)
9631                 ctree_flags |= OPEN_CTREE_PARTIAL;
9632
9633         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9634                                   chunk_root_bytenr, ctree_flags);
9635         if (!info) {
9636                 error("cannot open file system");
9637                 ret = -EIO;
9638                 err |= !!ret;
9639                 goto err_out;
9640         }
9641
9642         global_info = info;
9643         root = info->fs_root;
9644         uuid_unparse(info->super_copy->fsid, uuidbuf);
9645
9646         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9647
9648         /*
9649          * Check the bare minimum before starting anything else that could rely
9650          * on it, namely the tree roots, any local consistency checks
9651          */
9652         if (!extent_buffer_uptodate(info->tree_root->node) ||
9653             !extent_buffer_uptodate(info->dev_root->node) ||
9654             !extent_buffer_uptodate(info->chunk_root->node)) {
9655                 error("critical roots corrupted, unable to check the filesystem");
9656                 err |= !!ret;
9657                 ret = -EIO;
9658                 goto close_out;
9659         }
9660
9661         if (clear_space_cache) {
9662                 ret = do_clear_free_space_cache(info, clear_space_cache);
9663                 err |= !!ret;
9664                 goto close_out;
9665         }
9666
9667         /*
9668          * repair mode will force us to commit transaction which
9669          * will make us fail to load log tree when mounting.
9670          */
9671         if (repair && btrfs_super_log_root(info->super_copy)) {
9672                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
9673                 if (!ret) {
9674                         ret = 1;
9675                         err |= !!ret;
9676                         goto close_out;
9677                 }
9678                 ret = zero_log_tree(root);
9679                 err |= !!ret;
9680                 if (ret) {
9681                         error("failed to zero log tree: %d", ret);
9682                         goto close_out;
9683                 }
9684         }
9685
9686         if (qgroup_report) {
9687                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9688                        uuidbuf);
9689                 ret = qgroup_verify_all(info);
9690                 err |= !!ret;
9691                 if (ret == 0)
9692                         report_qgroups(1);
9693                 goto close_out;
9694         }
9695         if (subvolid) {
9696                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9697                        subvolid, argv[optind], uuidbuf);
9698                 ret = print_extent_state(info, subvolid);
9699                 err |= !!ret;
9700                 goto close_out;
9701         }
9702
9703         if (init_extent_tree || init_csum_tree) {
9704                 struct btrfs_trans_handle *trans;
9705
9706                 trans = btrfs_start_transaction(info->extent_root, 0);
9707                 if (IS_ERR(trans)) {
9708                         error("error starting transaction");
9709                         ret = PTR_ERR(trans);
9710                         err |= !!ret;
9711                         goto close_out;
9712                 }
9713
9714                 if (init_extent_tree) {
9715                         printf("Creating a new extent tree\n");
9716                         ret = reinit_extent_tree(trans, info);
9717                         err |= !!ret;
9718                         if (ret)
9719                                 goto close_out;
9720                 }
9721
9722                 if (init_csum_tree) {
9723                         printf("Reinitialize checksum tree\n");
9724                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9725                         if (ret) {
9726                                 error("checksum tree initialization failed: %d",
9727                                                 ret);
9728                                 ret = -EIO;
9729                                 err |= !!ret;
9730                                 goto close_out;
9731                         }
9732
9733                         ret = fill_csum_tree(trans, info->csum_root,
9734                                              init_extent_tree);
9735                         err |= !!ret;
9736                         if (ret) {
9737                                 error("checksum tree refilling failed: %d", ret);
9738                                 return -EIO;
9739                         }
9740                 }
9741                 /*
9742                  * Ok now we commit and run the normal fsck, which will add
9743                  * extent entries for all of the items it finds.
9744                  */
9745                 ret = btrfs_commit_transaction(trans, info->extent_root);
9746                 err |= !!ret;
9747                 if (ret)
9748                         goto close_out;
9749         }
9750         if (!extent_buffer_uptodate(info->extent_root->node)) {
9751                 error("critical: extent_root, unable to check the filesystem");
9752                 ret = -EIO;
9753                 err |= !!ret;
9754                 goto close_out;
9755         }
9756         if (!extent_buffer_uptodate(info->csum_root->node)) {
9757                 error("critical: csum_root, unable to check the filesystem");
9758                 ret = -EIO;
9759                 err |= !!ret;
9760                 goto close_out;
9761         }
9762
9763         if (!init_extent_tree) {
9764                 ret = repair_root_items(info);
9765                 if (ret < 0) {
9766                         err = !!ret;
9767                         error("failed to repair root items: %s", strerror(-ret));
9768                         goto close_out;
9769                 }
9770                 if (repair) {
9771                         fprintf(stderr, "Fixed %d roots.\n", ret);
9772                         ret = 0;
9773                 } else if (ret > 0) {
9774                         fprintf(stderr,
9775                                 "Found %d roots with an outdated root item.\n",
9776                                 ret);
9777                         fprintf(stderr,
9778         "Please run a filesystem check with the option --repair to fix them.\n");
9779                         ret = 1;
9780                         err |= ret;
9781                         goto close_out;
9782                 }
9783         }
9784
9785         ret = do_check_chunks_and_extents(info);
9786         err |= !!ret;
9787         if (ret)
9788                 error(
9789                 "errors found in extent allocation tree or chunk allocation");
9790
9791         /* Only re-check super size after we checked and repaired the fs */
9792         err |= !is_super_size_valid(info);
9793
9794         if (!ctx.progress_enabled) {
9795                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9796                         fprintf(stderr, "checking free space tree\n");
9797                 else
9798                         fprintf(stderr, "checking free space cache\n");
9799         }
9800         ret = check_space_cache(root);
9801         err |= !!ret;
9802         if (ret) {
9803                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9804                         error("errors found in free space tree");
9805                 else
9806                         error("errors found in free space cache");
9807                 goto out;
9808         }
9809
9810         /*
9811          * We used to have to have these hole extents in between our real
9812          * extents so if we don't have this flag set we need to make sure there
9813          * are no gaps in the file extents for inodes, otherwise we can just
9814          * ignore it when this happens.
9815          */
9816         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
9817         ret = do_check_fs_roots(info, &root_cache);
9818         err |= !!ret;
9819         if (ret) {
9820                 error("errors found in fs roots");
9821                 goto out;
9822         }
9823
9824         fprintf(stderr, "checking csums\n");
9825         ret = check_csums(root);
9826         err |= !!ret;
9827         if (ret) {
9828                 error("errors found in csum tree");
9829                 goto out;
9830         }
9831
9832         fprintf(stderr, "checking root refs\n");
9833         /* For low memory mode, check_fs_roots_v2 handles root refs */
9834         if (check_mode != CHECK_MODE_LOWMEM) {
9835                 ret = check_root_refs(root, &root_cache);
9836                 err |= !!ret;
9837                 if (ret) {
9838                         error("errors found in root refs");
9839                         goto out;
9840                 }
9841         }
9842
9843         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9844                 struct extent_buffer *eb;
9845
9846                 eb = list_first_entry(&root->fs_info->recow_ebs,
9847                                       struct extent_buffer, recow);
9848                 list_del_init(&eb->recow);
9849                 ret = recow_extent_buffer(root, eb);
9850                 err |= !!ret;
9851                 if (ret) {
9852                         error("fails to fix transid errors");
9853                         break;
9854                 }
9855         }
9856
9857         while (!list_empty(&delete_items)) {
9858                 struct bad_item *bad;
9859
9860                 bad = list_first_entry(&delete_items, struct bad_item, list);
9861                 list_del_init(&bad->list);
9862                 if (repair) {
9863                         ret = delete_bad_item(root, bad);
9864                         err |= !!ret;
9865                 }
9866                 free(bad);
9867         }
9868
9869         if (info->quota_enabled) {
9870                 fprintf(stderr, "checking quota groups\n");
9871                 ret = qgroup_verify_all(info);
9872                 err |= !!ret;
9873                 if (ret) {
9874                         error("failed to check quota groups");
9875                         goto out;
9876                 }
9877                 report_qgroups(0);
9878                 ret = repair_qgroups(info, &qgroups_repaired);
9879                 err |= !!ret;
9880                 if (err) {
9881                         error("failed to repair quota groups");
9882                         goto out;
9883                 }
9884                 ret = 0;
9885         }
9886
9887         if (!list_empty(&root->fs_info->recow_ebs)) {
9888                 error("transid errors in file system");
9889                 ret = 1;
9890                 err |= !!ret;
9891         }
9892 out:
9893         printf("found %llu bytes used, ",
9894                (unsigned long long)bytes_used);
9895         if (err)
9896                 printf("error(s) found\n");
9897         else
9898                 printf("no error found\n");
9899         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9900         printf("total tree bytes: %llu\n",
9901                (unsigned long long)total_btree_bytes);
9902         printf("total fs tree bytes: %llu\n",
9903                (unsigned long long)total_fs_tree_bytes);
9904         printf("total extent tree bytes: %llu\n",
9905                (unsigned long long)total_extent_tree_bytes);
9906         printf("btree space waste bytes: %llu\n",
9907                (unsigned long long)btree_space_waste);
9908         printf("file data blocks allocated: %llu\n referenced %llu\n",
9909                 (unsigned long long)data_bytes_allocated,
9910                 (unsigned long long)data_bytes_referenced);
9911
9912         free_qgroup_counts();
9913         free_root_recs_tree(&root_cache);
9914 close_out:
9915         close_ctree(root);
9916 err_out:
9917         if (ctx.progress_enabled)
9918                 task_deinit(ctx.info);
9919
9920         return err;
9921 }