libbtrfsutil: always build libbtrfsutil.so.$MAJOR
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/mode-common.h"
47 #include "check/mode-original.h"
48 #include "check/mode-lowmem.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void record_root_in_trans(struct btrfs_trans_handle *trans,
416                                  struct btrfs_root *root)
417 {
418         if (root->last_trans != trans->transid) {
419                 root->track_dirty = 1;
420                 root->last_trans = trans->transid;
421                 root->commit_root = root->node;
422                 extent_buffer_get(root->node);
423         }
424 }
425
426 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
427 {
428         struct device_record *rec1;
429         struct device_record *rec2;
430
431         rec1 = rb_entry(node1, struct device_record, node);
432         rec2 = rb_entry(node2, struct device_record, node);
433         if (rec1->devid > rec2->devid)
434                 return -1;
435         else if (rec1->devid < rec2->devid)
436                 return 1;
437         else
438                 return 0;
439 }
440
441 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
442 {
443         struct inode_record *rec;
444         struct inode_backref *backref;
445         struct inode_backref *orig;
446         struct inode_backref *tmp;
447         struct orphan_data_extent *src_orphan;
448         struct orphan_data_extent *dst_orphan;
449         struct rb_node *rb;
450         size_t size;
451         int ret;
452
453         rec = malloc(sizeof(*rec));
454         if (!rec)
455                 return ERR_PTR(-ENOMEM);
456         memcpy(rec, orig_rec, sizeof(*rec));
457         rec->refs = 1;
458         INIT_LIST_HEAD(&rec->backrefs);
459         INIT_LIST_HEAD(&rec->orphan_extents);
460         rec->holes = RB_ROOT;
461
462         list_for_each_entry(orig, &orig_rec->backrefs, list) {
463                 size = sizeof(*orig) + orig->namelen + 1;
464                 backref = malloc(size);
465                 if (!backref) {
466                         ret = -ENOMEM;
467                         goto cleanup;
468                 }
469                 memcpy(backref, orig, size);
470                 list_add_tail(&backref->list, &rec->backrefs);
471         }
472         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
473                 dst_orphan = malloc(sizeof(*dst_orphan));
474                 if (!dst_orphan) {
475                         ret = -ENOMEM;
476                         goto cleanup;
477                 }
478                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
479                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
480         }
481         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
482         if (ret < 0)
483                 goto cleanup_rb;
484
485         return rec;
486
487 cleanup_rb:
488         rb = rb_first(&rec->holes);
489         while (rb) {
490                 struct file_extent_hole *hole;
491
492                 hole = rb_entry(rb, struct file_extent_hole, node);
493                 rb = rb_next(rb);
494                 free(hole);
495         }
496
497 cleanup:
498         if (!list_empty(&rec->backrefs))
499                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
500                         list_del(&orig->list);
501                         free(orig);
502                 }
503
504         if (!list_empty(&rec->orphan_extents))
505                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
506                         list_del(&orig->list);
507                         free(orig);
508                 }
509
510         free(rec);
511
512         return ERR_PTR(ret);
513 }
514
515 static void print_orphan_data_extents(struct list_head *orphan_extents,
516                                       u64 objectid)
517 {
518         struct orphan_data_extent *orphan;
519
520         if (list_empty(orphan_extents))
521                 return;
522         printf("The following data extent is lost in tree %llu:\n",
523                objectid);
524         list_for_each_entry(orphan, orphan_extents, list) {
525                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
526                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
527                        orphan->disk_len);
528         }
529 }
530
531 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
532 {
533         u64 root_objectid = root->root_key.objectid;
534         int errors = rec->errors;
535
536         if (!errors)
537                 return;
538         /* reloc root errors, we print its corresponding fs root objectid*/
539         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
540                 root_objectid = root->root_key.offset;
541                 fprintf(stderr, "reloc");
542         }
543         fprintf(stderr, "root %llu inode %llu errors %x",
544                 (unsigned long long) root_objectid,
545                 (unsigned long long) rec->ino, rec->errors);
546
547         if (errors & I_ERR_NO_INODE_ITEM)
548                 fprintf(stderr, ", no inode item");
549         if (errors & I_ERR_NO_ORPHAN_ITEM)
550                 fprintf(stderr, ", no orphan item");
551         if (errors & I_ERR_DUP_INODE_ITEM)
552                 fprintf(stderr, ", dup inode item");
553         if (errors & I_ERR_DUP_DIR_INDEX)
554                 fprintf(stderr, ", dup dir index");
555         if (errors & I_ERR_ODD_DIR_ITEM)
556                 fprintf(stderr, ", odd dir item");
557         if (errors & I_ERR_ODD_FILE_EXTENT)
558                 fprintf(stderr, ", odd file extent");
559         if (errors & I_ERR_BAD_FILE_EXTENT)
560                 fprintf(stderr, ", bad file extent");
561         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
562                 fprintf(stderr, ", file extent overlap");
563         if (errors & I_ERR_FILE_EXTENT_TOO_LARGE)
564                 fprintf(stderr, ", inline file extent too large");
565         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566                 fprintf(stderr, ", file extent discount");
567         if (errors & I_ERR_DIR_ISIZE_WRONG)
568                 fprintf(stderr, ", dir isize wrong");
569         if (errors & I_ERR_FILE_NBYTES_WRONG)
570                 fprintf(stderr, ", nbytes wrong");
571         if (errors & I_ERR_ODD_CSUM_ITEM)
572                 fprintf(stderr, ", odd csum item");
573         if (errors & I_ERR_SOME_CSUM_MISSING)
574                 fprintf(stderr, ", some csum missing");
575         if (errors & I_ERR_LINK_COUNT_WRONG)
576                 fprintf(stderr, ", link count wrong");
577         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578                 fprintf(stderr, ", orphan file extent");
579         fprintf(stderr, "\n");
580         /* Print the orphan extents if needed */
581         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
583
584         /* Print the holes if needed */
585         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586                 struct file_extent_hole *hole;
587                 struct rb_node *node;
588                 int found = 0;
589
590                 node = rb_first(&rec->holes);
591                 fprintf(stderr, "Found file extent holes:\n");
592                 while (node) {
593                         found = 1;
594                         hole = rb_entry(node, struct file_extent_hole, node);
595                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
596                                 hole->start, hole->len);
597                         node = rb_next(node);
598                 }
599                 if (!found)
600                         fprintf(stderr, "\tstart: 0, len: %llu\n",
601                                 round_up(rec->isize,
602                                          root->fs_info->sectorsize));
603         }
604 }
605
606 static void print_ref_error(int errors)
607 {
608         if (errors & REF_ERR_NO_DIR_ITEM)
609                 fprintf(stderr, ", no dir item");
610         if (errors & REF_ERR_NO_DIR_INDEX)
611                 fprintf(stderr, ", no dir index");
612         if (errors & REF_ERR_NO_INODE_REF)
613                 fprintf(stderr, ", no inode ref");
614         if (errors & REF_ERR_DUP_DIR_ITEM)
615                 fprintf(stderr, ", dup dir item");
616         if (errors & REF_ERR_DUP_DIR_INDEX)
617                 fprintf(stderr, ", dup dir index");
618         if (errors & REF_ERR_DUP_INODE_REF)
619                 fprintf(stderr, ", dup inode ref");
620         if (errors & REF_ERR_INDEX_UNMATCH)
621                 fprintf(stderr, ", index mismatch");
622         if (errors & REF_ERR_FILETYPE_UNMATCH)
623                 fprintf(stderr, ", filetype mismatch");
624         if (errors & REF_ERR_NAME_TOO_LONG)
625                 fprintf(stderr, ", name too long");
626         if (errors & REF_ERR_NO_ROOT_REF)
627                 fprintf(stderr, ", no root ref");
628         if (errors & REF_ERR_NO_ROOT_BACKREF)
629                 fprintf(stderr, ", no root backref");
630         if (errors & REF_ERR_DUP_ROOT_REF)
631                 fprintf(stderr, ", dup root ref");
632         if (errors & REF_ERR_DUP_ROOT_BACKREF)
633                 fprintf(stderr, ", dup root backref");
634         fprintf(stderr, "\n");
635 }
636
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
638                                           u64 ino, int mod)
639 {
640         struct ptr_node *node;
641         struct cache_extent *cache;
642         struct inode_record *rec = NULL;
643         int ret;
644
645         cache = lookup_cache_extent(inode_cache, ino, 1);
646         if (cache) {
647                 node = container_of(cache, struct ptr_node, cache);
648                 rec = node->data;
649                 if (mod && rec->refs > 1) {
650                         node->data = clone_inode_rec(rec);
651                         if (IS_ERR(node->data))
652                                 return node->data;
653                         rec->refs--;
654                         rec = node->data;
655                 }
656         } else if (mod) {
657                 rec = calloc(1, sizeof(*rec));
658                 if (!rec)
659                         return ERR_PTR(-ENOMEM);
660                 rec->ino = ino;
661                 rec->extent_start = (u64)-1;
662                 rec->refs = 1;
663                 INIT_LIST_HEAD(&rec->backrefs);
664                 INIT_LIST_HEAD(&rec->orphan_extents);
665                 rec->holes = RB_ROOT;
666
667                 node = malloc(sizeof(*node));
668                 if (!node) {
669                         free(rec);
670                         return ERR_PTR(-ENOMEM);
671                 }
672                 node->cache.start = ino;
673                 node->cache.size = 1;
674                 node->data = rec;
675
676                 if (ino == BTRFS_FREE_INO_OBJECTID)
677                         rec->found_link = 1;
678
679                 ret = insert_cache_extent(inode_cache, &node->cache);
680                 if (ret)
681                         return ERR_PTR(-EEXIST);
682         }
683         return rec;
684 }
685
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
687 {
688         struct orphan_data_extent *orphan;
689
690         while (!list_empty(orphan_extents)) {
691                 orphan = list_entry(orphan_extents->next,
692                                     struct orphan_data_extent, list);
693                 list_del(&orphan->list);
694                 free(orphan);
695         }
696 }
697
698 static void free_inode_rec(struct inode_record *rec)
699 {
700         struct inode_backref *backref;
701
702         if (--rec->refs > 0)
703                 return;
704
705         while (!list_empty(&rec->backrefs)) {
706                 backref = to_inode_backref(rec->backrefs.next);
707                 list_del(&backref->list);
708                 free(backref);
709         }
710         free_orphan_data_extents(&rec->orphan_extents);
711         free_file_extent_holes(&rec->holes);
712         free(rec);
713 }
714
715 static int can_free_inode_rec(struct inode_record *rec)
716 {
717         if (!rec->errors && rec->checked && rec->found_inode_item &&
718             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
719                 return 1;
720         return 0;
721 }
722
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724                                  struct inode_record *rec)
725 {
726         struct cache_extent *cache;
727         struct inode_backref *tmp, *backref;
728         struct ptr_node *node;
729         u8 filetype;
730
731         if (!rec->found_inode_item)
732                 return;
733
734         filetype = imode_to_type(rec->imode);
735         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736                 if (backref->found_dir_item && backref->found_dir_index) {
737                         if (backref->filetype != filetype)
738                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739                         if (!backref->errors && backref->found_inode_ref &&
740                             rec->nlink == rec->found_link) {
741                                 list_del(&backref->list);
742                                 free(backref);
743                         }
744                 }
745         }
746
747         if (!rec->checked || rec->merging)
748                 return;
749
750         if (S_ISDIR(rec->imode)) {
751                 if (rec->found_size != rec->isize)
752                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753                 if (rec->found_file_extent)
754                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
755         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756                 if (rec->found_dir_item)
757                         rec->errors |= I_ERR_ODD_DIR_ITEM;
758                 if (rec->found_size != rec->nbytes)
759                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760                 if (rec->nlink > 0 && !no_holes &&
761                     (rec->extent_end < rec->isize ||
762                      first_extent_gap(&rec->holes) < rec->isize))
763                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
764         }
765
766         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767                 if (rec->found_csum_item && rec->nodatasum)
768                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
769                 if (rec->some_csum_missing && !rec->nodatasum)
770                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
771         }
772
773         BUG_ON(rec->refs != 1);
774         if (can_free_inode_rec(rec)) {
775                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776                 node = container_of(cache, struct ptr_node, cache);
777                 BUG_ON(node->data != rec);
778                 remove_cache_extent(inode_cache, &node->cache);
779                 free(node);
780                 free_inode_rec(rec);
781         }
782 }
783
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
785 {
786         struct btrfs_path path;
787         struct btrfs_key key;
788         int ret;
789
790         key.objectid = BTRFS_ORPHAN_OBJECTID;
791         key.type = BTRFS_ORPHAN_ITEM_KEY;
792         key.offset = ino;
793
794         btrfs_init_path(&path);
795         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796         btrfs_release_path(&path);
797         if (ret > 0)
798                 ret = -ENOENT;
799         return ret;
800 }
801
802 static int process_inode_item(struct extent_buffer *eb,
803                               int slot, struct btrfs_key *key,
804                               struct shared_node *active_node)
805 {
806         struct inode_record *rec;
807         struct btrfs_inode_item *item;
808
809         rec = active_node->current;
810         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811         if (rec->found_inode_item) {
812                 rec->errors |= I_ERR_DUP_INODE_ITEM;
813                 return 1;
814         }
815         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816         rec->nlink = btrfs_inode_nlink(eb, item);
817         rec->isize = btrfs_inode_size(eb, item);
818         rec->nbytes = btrfs_inode_nbytes(eb, item);
819         rec->imode = btrfs_inode_mode(eb, item);
820         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
821                 rec->nodatasum = 1;
822         rec->found_inode_item = 1;
823         if (rec->nlink == 0)
824                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825         maybe_free_inode_rec(&active_node->inode_cache, rec);
826         return 0;
827 }
828
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
830                                                 const char *name,
831                                                 int namelen, u64 dir)
832 {
833         struct inode_backref *backref;
834
835         list_for_each_entry(backref, &rec->backrefs, list) {
836                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
837                         break;
838                 if (backref->dir != dir || backref->namelen != namelen)
839                         continue;
840                 if (memcmp(name, backref->name, namelen))
841                         continue;
842                 return backref;
843         }
844
845         backref = malloc(sizeof(*backref) + namelen + 1);
846         if (!backref)
847                 return NULL;
848         memset(backref, 0, sizeof(*backref));
849         backref->dir = dir;
850         backref->namelen = namelen;
851         memcpy(backref->name, name, namelen);
852         backref->name[namelen] = '\0';
853         list_add_tail(&backref->list, &rec->backrefs);
854         return backref;
855 }
856
857 static int add_inode_backref(struct cache_tree *inode_cache,
858                              u64 ino, u64 dir, u64 index,
859                              const char *name, int namelen,
860                              u8 filetype, u8 itemtype, int errors)
861 {
862         struct inode_record *rec;
863         struct inode_backref *backref;
864
865         rec = get_inode_rec(inode_cache, ino, 1);
866         BUG_ON(IS_ERR(rec));
867         backref = get_inode_backref(rec, name, namelen, dir);
868         BUG_ON(!backref);
869         if (errors)
870                 backref->errors |= errors;
871         if (itemtype == BTRFS_DIR_INDEX_KEY) {
872                 if (backref->found_dir_index)
873                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
874                 if (backref->found_inode_ref && backref->index != index)
875                         backref->errors |= REF_ERR_INDEX_UNMATCH;
876                 if (backref->found_dir_item && backref->filetype != filetype)
877                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
878
879                 backref->index = index;
880                 backref->filetype = filetype;
881                 backref->found_dir_index = 1;
882         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
883                 rec->found_link++;
884                 if (backref->found_dir_item)
885                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
886                 if (backref->found_dir_index && backref->filetype != filetype)
887                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
888
889                 backref->filetype = filetype;
890                 backref->found_dir_item = 1;
891         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893                 if (backref->found_inode_ref)
894                         backref->errors |= REF_ERR_DUP_INODE_REF;
895                 if (backref->found_dir_index && backref->index != index)
896                         backref->errors |= REF_ERR_INDEX_UNMATCH;
897                 else
898                         backref->index = index;
899
900                 backref->ref_type = itemtype;
901                 backref->found_inode_ref = 1;
902         } else {
903                 BUG_ON(1);
904         }
905
906         maybe_free_inode_rec(inode_cache, rec);
907         return 0;
908 }
909
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911                             struct cache_tree *dst_cache)
912 {
913         struct inode_backref *backref;
914         u32 dir_count = 0;
915         int ret = 0;
916
917         dst->merging = 1;
918         list_for_each_entry(backref, &src->backrefs, list) {
919                 if (backref->found_dir_index) {
920                         add_inode_backref(dst_cache, dst->ino, backref->dir,
921                                         backref->index, backref->name,
922                                         backref->namelen, backref->filetype,
923                                         BTRFS_DIR_INDEX_KEY, backref->errors);
924                 }
925                 if (backref->found_dir_item) {
926                         dir_count++;
927                         add_inode_backref(dst_cache, dst->ino,
928                                         backref->dir, 0, backref->name,
929                                         backref->namelen, backref->filetype,
930                                         BTRFS_DIR_ITEM_KEY, backref->errors);
931                 }
932                 if (backref->found_inode_ref) {
933                         add_inode_backref(dst_cache, dst->ino,
934                                         backref->dir, backref->index,
935                                         backref->name, backref->namelen, 0,
936                                         backref->ref_type, backref->errors);
937                 }
938         }
939
940         if (src->found_dir_item)
941                 dst->found_dir_item = 1;
942         if (src->found_file_extent)
943                 dst->found_file_extent = 1;
944         if (src->found_csum_item)
945                 dst->found_csum_item = 1;
946         if (src->some_csum_missing)
947                 dst->some_csum_missing = 1;
948         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
950                 if (ret < 0)
951                         return ret;
952         }
953
954         BUG_ON(src->found_link < dir_count);
955         dst->found_link += src->found_link - dir_count;
956         dst->found_size += src->found_size;
957         if (src->extent_start != (u64)-1) {
958                 if (dst->extent_start == (u64)-1) {
959                         dst->extent_start = src->extent_start;
960                         dst->extent_end = src->extent_end;
961                 } else {
962                         if (dst->extent_end > src->extent_start)
963                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964                         else if (dst->extent_end < src->extent_start) {
965                                 ret = add_file_extent_hole(&dst->holes,
966                                         dst->extent_end,
967                                         src->extent_start - dst->extent_end);
968                         }
969                         if (dst->extent_end < src->extent_end)
970                                 dst->extent_end = src->extent_end;
971                 }
972         }
973
974         dst->errors |= src->errors;
975         if (src->found_inode_item) {
976                 if (!dst->found_inode_item) {
977                         dst->nlink = src->nlink;
978                         dst->isize = src->isize;
979                         dst->nbytes = src->nbytes;
980                         dst->imode = src->imode;
981                         dst->nodatasum = src->nodatasum;
982                         dst->found_inode_item = 1;
983                 } else {
984                         dst->errors |= I_ERR_DUP_INODE_ITEM;
985                 }
986         }
987         dst->merging = 0;
988
989         return 0;
990 }
991
992 static int splice_shared_node(struct shared_node *src_node,
993                               struct shared_node *dst_node)
994 {
995         struct cache_extent *cache;
996         struct ptr_node *node, *ins;
997         struct cache_tree *src, *dst;
998         struct inode_record *rec, *conflict;
999         u64 current_ino = 0;
1000         int splice = 0;
1001         int ret;
1002
1003         if (--src_node->refs == 0)
1004                 splice = 1;
1005         if (src_node->current)
1006                 current_ino = src_node->current->ino;
1007
1008         src = &src_node->root_cache;
1009         dst = &dst_node->root_cache;
1010 again:
1011         cache = search_cache_extent(src, 0);
1012         while (cache) {
1013                 node = container_of(cache, struct ptr_node, cache);
1014                 rec = node->data;
1015                 cache = next_cache_extent(cache);
1016
1017                 if (splice) {
1018                         remove_cache_extent(src, &node->cache);
1019                         ins = node;
1020                 } else {
1021                         ins = malloc(sizeof(*ins));
1022                         BUG_ON(!ins);
1023                         ins->cache.start = node->cache.start;
1024                         ins->cache.size = node->cache.size;
1025                         ins->data = rec;
1026                         rec->refs++;
1027                 }
1028                 ret = insert_cache_extent(dst, &ins->cache);
1029                 if (ret == -EEXIST) {
1030                         conflict = get_inode_rec(dst, rec->ino, 1);
1031                         BUG_ON(IS_ERR(conflict));
1032                         merge_inode_recs(rec, conflict, dst);
1033                         if (rec->checked) {
1034                                 conflict->checked = 1;
1035                                 if (dst_node->current == conflict)
1036                                         dst_node->current = NULL;
1037                         }
1038                         maybe_free_inode_rec(dst, conflict);
1039                         free_inode_rec(rec);
1040                         free(ins);
1041                 } else {
1042                         BUG_ON(ret);
1043                 }
1044         }
1045
1046         if (src == &src_node->root_cache) {
1047                 src = &src_node->inode_cache;
1048                 dst = &dst_node->inode_cache;
1049                 goto again;
1050         }
1051
1052         if (current_ino > 0 && (!dst_node->current ||
1053             current_ino > dst_node->current->ino)) {
1054                 if (dst_node->current) {
1055                         dst_node->current->checked = 1;
1056                         maybe_free_inode_rec(dst, dst_node->current);
1057                 }
1058                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059                 BUG_ON(IS_ERR(dst_node->current));
1060         }
1061         return 0;
1062 }
1063
1064 static void free_inode_ptr(struct cache_extent *cache)
1065 {
1066         struct ptr_node *node;
1067         struct inode_record *rec;
1068
1069         node = container_of(cache, struct ptr_node, cache);
1070         rec = node->data;
1071         free_inode_rec(rec);
1072         free(node);
1073 }
1074
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1076
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1078                                             u64 bytenr)
1079 {
1080         struct cache_extent *cache;
1081         struct shared_node *node;
1082
1083         cache = lookup_cache_extent(shared, bytenr, 1);
1084         if (cache) {
1085                 node = container_of(cache, struct shared_node, cache);
1086                 return node;
1087         }
1088         return NULL;
1089 }
1090
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1092 {
1093         int ret;
1094         struct shared_node *node;
1095
1096         node = calloc(1, sizeof(*node));
1097         if (!node)
1098                 return -ENOMEM;
1099         node->cache.start = bytenr;
1100         node->cache.size = 1;
1101         cache_tree_init(&node->root_cache);
1102         cache_tree_init(&node->inode_cache);
1103         node->refs = refs;
1104
1105         ret = insert_cache_extent(shared, &node->cache);
1106
1107         return ret;
1108 }
1109
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111                              struct walk_control *wc, int level)
1112 {
1113         struct shared_node *node;
1114         struct shared_node *dest;
1115         int ret;
1116
1117         if (level == wc->active_node)
1118                 return 0;
1119
1120         BUG_ON(wc->active_node <= level);
1121         node = find_shared_node(&wc->shared, bytenr);
1122         if (!node) {
1123                 ret = add_shared_node(&wc->shared, bytenr, refs);
1124                 BUG_ON(ret);
1125                 node = find_shared_node(&wc->shared, bytenr);
1126                 wc->nodes[level] = node;
1127                 wc->active_node = level;
1128                 return 0;
1129         }
1130
1131         if (wc->root_level == wc->active_node &&
1132             btrfs_root_refs(&root->root_item) == 0) {
1133                 if (--node->refs == 0) {
1134                         free_inode_recs_tree(&node->root_cache);
1135                         free_inode_recs_tree(&node->inode_cache);
1136                         remove_cache_extent(&wc->shared, &node->cache);
1137                         free(node);
1138                 }
1139                 return 1;
1140         }
1141
1142         dest = wc->nodes[wc->active_node];
1143         splice_shared_node(node, dest);
1144         if (node->refs == 0) {
1145                 remove_cache_extent(&wc->shared, &node->cache);
1146                 free(node);
1147         }
1148         return 1;
1149 }
1150
1151 static int leave_shared_node(struct btrfs_root *root,
1152                              struct walk_control *wc, int level)
1153 {
1154         struct shared_node *node;
1155         struct shared_node *dest;
1156         int i;
1157
1158         if (level == wc->root_level)
1159                 return 0;
1160
1161         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1162                 if (wc->nodes[i])
1163                         break;
1164         }
1165         BUG_ON(i >= BTRFS_MAX_LEVEL);
1166
1167         node = wc->nodes[wc->active_node];
1168         wc->nodes[wc->active_node] = NULL;
1169         wc->active_node = i;
1170
1171         dest = wc->nodes[wc->active_node];
1172         if (wc->active_node < wc->root_level ||
1173             btrfs_root_refs(&root->root_item) > 0) {
1174                 BUG_ON(node->refs <= 1);
1175                 splice_shared_node(node, dest);
1176         } else {
1177                 BUG_ON(node->refs < 2);
1178                 node->refs--;
1179         }
1180         return 0;
1181 }
1182
1183 /*
1184  * Returns:
1185  * < 0 - on error
1186  * 1   - if the root with id child_root_id is a child of root parent_root_id
1187  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1188  *       has other root(s) as parent(s)
1189  * 2   - if the root child_root_id doesn't have any parent roots
1190  */
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1192                          u64 child_root_id)
1193 {
1194         struct btrfs_path path;
1195         struct btrfs_key key;
1196         struct extent_buffer *leaf;
1197         int has_parent = 0;
1198         int ret;
1199
1200         btrfs_init_path(&path);
1201
1202         key.objectid = parent_root_id;
1203         key.type = BTRFS_ROOT_REF_KEY;
1204         key.offset = child_root_id;
1205         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1206                                 0, 0);
1207         if (ret < 0)
1208                 return ret;
1209         btrfs_release_path(&path);
1210         if (!ret)
1211                 return 1;
1212
1213         key.objectid = child_root_id;
1214         key.type = BTRFS_ROOT_BACKREF_KEY;
1215         key.offset = 0;
1216         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1217                                 0, 0);
1218         if (ret < 0)
1219                 goto out;
1220
1221         while (1) {
1222                 leaf = path.nodes[0];
1223                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1225                         if (ret)
1226                                 break;
1227                         leaf = path.nodes[0];
1228                 }
1229
1230                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231                 if (key.objectid != child_root_id ||
1232                     key.type != BTRFS_ROOT_BACKREF_KEY)
1233                         break;
1234
1235                 has_parent = 1;
1236
1237                 if (key.offset == parent_root_id) {
1238                         btrfs_release_path(&path);
1239                         return 1;
1240                 }
1241
1242                 path.slots[0]++;
1243         }
1244 out:
1245         btrfs_release_path(&path);
1246         if (ret < 0)
1247                 return ret;
1248         return has_parent ? 0 : 2;
1249 }
1250
1251 static int process_dir_item(struct extent_buffer *eb,
1252                             int slot, struct btrfs_key *key,
1253                             struct shared_node *active_node)
1254 {
1255         u32 total;
1256         u32 cur = 0;
1257         u32 len;
1258         u32 name_len;
1259         u32 data_len;
1260         int error;
1261         int nritems = 0;
1262         u8 filetype;
1263         struct btrfs_dir_item *di;
1264         struct inode_record *rec;
1265         struct cache_tree *root_cache;
1266         struct cache_tree *inode_cache;
1267         struct btrfs_key location;
1268         char namebuf[BTRFS_NAME_LEN];
1269
1270         root_cache = &active_node->root_cache;
1271         inode_cache = &active_node->inode_cache;
1272         rec = active_node->current;
1273         rec->found_dir_item = 1;
1274
1275         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276         total = btrfs_item_size_nr(eb, slot);
1277         while (cur < total) {
1278                 nritems++;
1279                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280                 name_len = btrfs_dir_name_len(eb, di);
1281                 data_len = btrfs_dir_data_len(eb, di);
1282                 filetype = btrfs_dir_type(eb, di);
1283
1284                 rec->found_size += name_len;
1285                 if (cur + sizeof(*di) + name_len > total ||
1286                     name_len > BTRFS_NAME_LEN) {
1287                         error = REF_ERR_NAME_TOO_LONG;
1288
1289                         if (cur + sizeof(*di) > total)
1290                                 break;
1291                         len = min_t(u32, total - cur - sizeof(*di),
1292                                     BTRFS_NAME_LEN);
1293                 } else {
1294                         len = name_len;
1295                         error = 0;
1296                 }
1297
1298                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1299
1300                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301                     key->offset != btrfs_name_hash(namebuf, len)) {
1302                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1303                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304                         key->objectid, key->offset, namebuf, len, filetype,
1305                         key->offset, btrfs_name_hash(namebuf, len));
1306                 }
1307
1308                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309                         add_inode_backref(inode_cache, location.objectid,
1310                                           key->objectid, key->offset, namebuf,
1311                                           len, filetype, key->type, error);
1312                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313                         add_inode_backref(root_cache, location.objectid,
1314                                           key->objectid, key->offset,
1315                                           namebuf, len, filetype,
1316                                           key->type, error);
1317                 } else {
1318                         fprintf(stderr,
1319                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320                                 location.type, key->objectid, key->offset);
1321                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322                                           key->objectid, key->offset, namebuf,
1323                                           len, filetype, key->type, error);
1324                 }
1325
1326                 len = sizeof(*di) + name_len + data_len;
1327                 di = (struct btrfs_dir_item *)((char *)di + len);
1328                 cur += len;
1329         }
1330         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1332
1333         return 0;
1334 }
1335
1336 static int process_inode_ref(struct extent_buffer *eb,
1337                              int slot, struct btrfs_key *key,
1338                              struct shared_node *active_node)
1339 {
1340         u32 total;
1341         u32 cur = 0;
1342         u32 len;
1343         u32 name_len;
1344         u64 index;
1345         int error;
1346         struct cache_tree *inode_cache;
1347         struct btrfs_inode_ref *ref;
1348         char namebuf[BTRFS_NAME_LEN];
1349
1350         inode_cache = &active_node->inode_cache;
1351
1352         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353         total = btrfs_item_size_nr(eb, slot);
1354         while (cur < total) {
1355                 name_len = btrfs_inode_ref_name_len(eb, ref);
1356                 index = btrfs_inode_ref_index(eb, ref);
1357
1358                 /* inode_ref + namelen should not cross item boundary */
1359                 if (cur + sizeof(*ref) + name_len > total ||
1360                     name_len > BTRFS_NAME_LEN) {
1361                         if (total < cur + sizeof(*ref))
1362                                 break;
1363
1364                         /* Still try to read out the remaining part */
1365                         len = min_t(u32, total - cur - sizeof(*ref),
1366                                     BTRFS_NAME_LEN);
1367                         error = REF_ERR_NAME_TOO_LONG;
1368                 } else {
1369                         len = name_len;
1370                         error = 0;
1371                 }
1372
1373                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374                 add_inode_backref(inode_cache, key->objectid, key->offset,
1375                                   index, namebuf, len, 0, key->type, error);
1376
1377                 len = sizeof(*ref) + name_len;
1378                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1379                 cur += len;
1380         }
1381         return 0;
1382 }
1383
1384 static int process_inode_extref(struct extent_buffer *eb,
1385                                 int slot, struct btrfs_key *key,
1386                                 struct shared_node *active_node)
1387 {
1388         u32 total;
1389         u32 cur = 0;
1390         u32 len;
1391         u32 name_len;
1392         u64 index;
1393         u64 parent;
1394         int error;
1395         struct cache_tree *inode_cache;
1396         struct btrfs_inode_extref *extref;
1397         char namebuf[BTRFS_NAME_LEN];
1398
1399         inode_cache = &active_node->inode_cache;
1400
1401         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402         total = btrfs_item_size_nr(eb, slot);
1403         while (cur < total) {
1404                 name_len = btrfs_inode_extref_name_len(eb, extref);
1405                 index = btrfs_inode_extref_index(eb, extref);
1406                 parent = btrfs_inode_extref_parent(eb, extref);
1407                 if (name_len <= BTRFS_NAME_LEN) {
1408                         len = name_len;
1409                         error = 0;
1410                 } else {
1411                         len = BTRFS_NAME_LEN;
1412                         error = REF_ERR_NAME_TOO_LONG;
1413                 }
1414                 read_extent_buffer(eb, namebuf,
1415                                    (unsigned long)(extref + 1), len);
1416                 add_inode_backref(inode_cache, key->objectid, parent,
1417                                   index, namebuf, len, 0, key->type, error);
1418
1419                 len = sizeof(*extref) + name_len;
1420                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1421                 cur += len;
1422         }
1423         return 0;
1424
1425 }
1426
1427 static int process_file_extent(struct btrfs_root *root,
1428                                 struct extent_buffer *eb,
1429                                 int slot, struct btrfs_key *key,
1430                                 struct shared_node *active_node)
1431 {
1432         struct inode_record *rec;
1433         struct btrfs_file_extent_item *fi;
1434         u64 num_bytes = 0;
1435         u64 disk_bytenr = 0;
1436         u64 extent_offset = 0;
1437         u64 mask = root->fs_info->sectorsize - 1;
1438         u32 max_inline_size = min_t(u32, mask,
1439                                 BTRFS_MAX_INLINE_DATA_SIZE(root->fs_info));
1440         int extent_type;
1441         int ret;
1442
1443         rec = active_node->current;
1444         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1445         rec->found_file_extent = 1;
1446
1447         if (rec->extent_start == (u64)-1) {
1448                 rec->extent_start = key->offset;
1449                 rec->extent_end = key->offset;
1450         }
1451
1452         if (rec->extent_end > key->offset)
1453                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1454         else if (rec->extent_end < key->offset) {
1455                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1456                                            key->offset - rec->extent_end);
1457                 if (ret < 0)
1458                         return ret;
1459         }
1460
1461         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1462         extent_type = btrfs_file_extent_type(eb, fi);
1463
1464         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1465                 u8 compression = btrfs_file_extent_compression(eb, fi);
1466                 struct btrfs_item *item = btrfs_item_nr(slot);
1467
1468                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1469                 if (num_bytes == 0)
1470                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1471                 if (compression) {
1472                         if (btrfs_file_extent_inline_item_len(eb, item) >
1473                             max_inline_size ||
1474                             num_bytes > root->fs_info->sectorsize)
1475                                 rec->errors |= I_ERR_FILE_EXTENT_TOO_LARGE;
1476                 } else {
1477                         if (num_bytes > max_inline_size)
1478                                 rec->errors |= I_ERR_FILE_EXTENT_TOO_LARGE;
1479                 }
1480                 rec->found_size += num_bytes;
1481                 num_bytes = (num_bytes + mask) & ~mask;
1482         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1483                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1484                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1485                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1486                 extent_offset = btrfs_file_extent_offset(eb, fi);
1487                 if (num_bytes == 0 || (num_bytes & mask))
1488                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1489                 if (num_bytes + extent_offset >
1490                     btrfs_file_extent_ram_bytes(eb, fi))
1491                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1492                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1493                     (btrfs_file_extent_compression(eb, fi) ||
1494                      btrfs_file_extent_encryption(eb, fi) ||
1495                      btrfs_file_extent_other_encoding(eb, fi)))
1496                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1497                 if (disk_bytenr > 0)
1498                         rec->found_size += num_bytes;
1499         } else {
1500                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1501         }
1502         rec->extent_end = key->offset + num_bytes;
1503
1504         /*
1505          * The data reloc tree will copy full extents into its inode and then
1506          * copy the corresponding csums.  Because the extent it copied could be
1507          * a preallocated extent that hasn't been written to yet there may be no
1508          * csums to copy, ergo we won't have csums for our file extent.  This is
1509          * ok so just don't bother checking csums if the inode belongs to the
1510          * data reloc tree.
1511          */
1512         if (disk_bytenr > 0 &&
1513             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1514                 u64 found;
1515                 if (btrfs_file_extent_compression(eb, fi))
1516                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1517                 else
1518                         disk_bytenr += extent_offset;
1519
1520                 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1521                                        &found);
1522                 if (ret < 0)
1523                         return ret;
1524                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1525                         if (found > 0)
1526                                 rec->found_csum_item = 1;
1527                         if (found < num_bytes)
1528                                 rec->some_csum_missing = 1;
1529                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1530                         if (found > 0) {
1531                                 ret = check_prealloc_extent_written(root->fs_info,
1532                                                                     disk_bytenr,
1533                                                                     num_bytes);
1534                                 if (ret < 0)
1535                                         return ret;
1536                                 if (ret == 0)
1537                                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1538                         }
1539                 }
1540         }
1541         return 0;
1542 }
1543
1544 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1545                             struct walk_control *wc)
1546 {
1547         struct btrfs_key key;
1548         u32 nritems;
1549         int i;
1550         int ret = 0;
1551         struct cache_tree *inode_cache;
1552         struct shared_node *active_node;
1553
1554         if (wc->root_level == wc->active_node &&
1555             btrfs_root_refs(&root->root_item) == 0)
1556                 return 0;
1557
1558         active_node = wc->nodes[wc->active_node];
1559         inode_cache = &active_node->inode_cache;
1560         nritems = btrfs_header_nritems(eb);
1561         for (i = 0; i < nritems; i++) {
1562                 btrfs_item_key_to_cpu(eb, &key, i);
1563
1564                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1565                         continue;
1566                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1567                         continue;
1568
1569                 if (active_node->current == NULL ||
1570                     active_node->current->ino < key.objectid) {
1571                         if (active_node->current) {
1572                                 active_node->current->checked = 1;
1573                                 maybe_free_inode_rec(inode_cache,
1574                                                      active_node->current);
1575                         }
1576                         active_node->current = get_inode_rec(inode_cache,
1577                                                              key.objectid, 1);
1578                         BUG_ON(IS_ERR(active_node->current));
1579                 }
1580                 switch (key.type) {
1581                 case BTRFS_DIR_ITEM_KEY:
1582                 case BTRFS_DIR_INDEX_KEY:
1583                         ret = process_dir_item(eb, i, &key, active_node);
1584                         break;
1585                 case BTRFS_INODE_REF_KEY:
1586                         ret = process_inode_ref(eb, i, &key, active_node);
1587                         break;
1588                 case BTRFS_INODE_EXTREF_KEY:
1589                         ret = process_inode_extref(eb, i, &key, active_node);
1590                         break;
1591                 case BTRFS_INODE_ITEM_KEY:
1592                         ret = process_inode_item(eb, i, &key, active_node);
1593                         break;
1594                 case BTRFS_EXTENT_DATA_KEY:
1595                         ret = process_file_extent(root, eb, i, &key,
1596                                                   active_node);
1597                         break;
1598                 default:
1599                         break;
1600                 };
1601         }
1602         return ret;
1603 }
1604
1605 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1606                           struct walk_control *wc, int *level,
1607                           struct node_refs *nrefs)
1608 {
1609         enum btrfs_tree_block_status status;
1610         u64 bytenr;
1611         u64 ptr_gen;
1612         struct btrfs_fs_info *fs_info = root->fs_info;
1613         struct extent_buffer *next;
1614         struct extent_buffer *cur;
1615         int ret, err = 0;
1616         u64 refs;
1617
1618         WARN_ON(*level < 0);
1619         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1620
1621         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1622                 refs = nrefs->refs[*level];
1623                 ret = 0;
1624         } else {
1625                 ret = btrfs_lookup_extent_info(NULL, root,
1626                                        path->nodes[*level]->start,
1627                                        *level, 1, &refs, NULL);
1628                 if (ret < 0) {
1629                         err = ret;
1630                         goto out;
1631                 }
1632                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1633                 nrefs->refs[*level] = refs;
1634         }
1635
1636         if (refs > 1) {
1637                 ret = enter_shared_node(root, path->nodes[*level]->start,
1638                                         refs, wc, *level);
1639                 if (ret > 0) {
1640                         err = ret;
1641                         goto out;
1642                 }
1643         }
1644
1645         while (*level >= 0) {
1646                 WARN_ON(*level < 0);
1647                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1648                 cur = path->nodes[*level];
1649
1650                 if (btrfs_header_level(cur) != *level)
1651                         WARN_ON(1);
1652
1653                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1654                         break;
1655                 if (*level == 0) {
1656                         ret = process_one_leaf(root, cur, wc);
1657                         if (ret < 0)
1658                                 err = ret;
1659                         break;
1660                 }
1661                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1662                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1663
1664                 if (bytenr == nrefs->bytenr[*level - 1]) {
1665                         refs = nrefs->refs[*level - 1];
1666                 } else {
1667                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1668                                         *level - 1, 1, &refs, NULL);
1669                         if (ret < 0) {
1670                                 refs = 0;
1671                         } else {
1672                                 nrefs->bytenr[*level - 1] = bytenr;
1673                                 nrefs->refs[*level - 1] = refs;
1674                         }
1675                 }
1676
1677                 if (refs > 1) {
1678                         ret = enter_shared_node(root, bytenr, refs,
1679                                                 wc, *level - 1);
1680                         if (ret > 0) {
1681                                 path->slots[*level]++;
1682                                 continue;
1683                         }
1684                 }
1685
1686                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
1687                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1688                         free_extent_buffer(next);
1689                         reada_walk_down(root, cur, path->slots[*level]);
1690                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
1691                         if (!extent_buffer_uptodate(next)) {
1692                                 struct btrfs_key node_key;
1693
1694                                 btrfs_node_key_to_cpu(path->nodes[*level],
1695                                                       &node_key,
1696                                                       path->slots[*level]);
1697                                 btrfs_add_corrupt_extent_record(root->fs_info,
1698                                                 &node_key,
1699                                                 path->nodes[*level]->start,
1700                                                 root->fs_info->nodesize,
1701                                                 *level);
1702                                 err = -EIO;
1703                                 goto out;
1704                         }
1705                 }
1706
1707                 ret = check_child_node(cur, path->slots[*level], next);
1708                 if (ret) {
1709                         free_extent_buffer(next);
1710                         err = ret;
1711                         goto out;
1712                 }
1713
1714                 if (btrfs_is_leaf(next))
1715                         status = btrfs_check_leaf(root, NULL, next);
1716                 else
1717                         status = btrfs_check_node(root, NULL, next);
1718                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1719                         free_extent_buffer(next);
1720                         err = -EIO;
1721                         goto out;
1722                 }
1723
1724                 *level = *level - 1;
1725                 free_extent_buffer(path->nodes[*level]);
1726                 path->nodes[*level] = next;
1727                 path->slots[*level] = 0;
1728         }
1729 out:
1730         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1731         return err;
1732 }
1733
1734 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1735                         struct walk_control *wc, int *level)
1736 {
1737         int i;
1738         struct extent_buffer *leaf;
1739
1740         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1741                 leaf = path->nodes[i];
1742                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1743                         path->slots[i]++;
1744                         *level = i;
1745                         return 0;
1746                 }
1747                 free_extent_buffer(path->nodes[*level]);
1748                 path->nodes[*level] = NULL;
1749                 BUG_ON(*level > wc->active_node);
1750                 if (*level == wc->active_node)
1751                         leave_shared_node(root, wc, *level);
1752                 *level = i + 1;
1753         }
1754         return 1;
1755 }
1756
1757 static int check_root_dir(struct inode_record *rec)
1758 {
1759         struct inode_backref *backref;
1760         int ret = -1;
1761
1762         if (!rec->found_inode_item || rec->errors)
1763                 goto out;
1764         if (rec->nlink != 1 || rec->found_link != 0)
1765                 goto out;
1766         if (list_empty(&rec->backrefs))
1767                 goto out;
1768         backref = to_inode_backref(rec->backrefs.next);
1769         if (!backref->found_inode_ref)
1770                 goto out;
1771         if (backref->index != 0 || backref->namelen != 2 ||
1772             memcmp(backref->name, "..", 2))
1773                 goto out;
1774         if (backref->found_dir_index || backref->found_dir_item)
1775                 goto out;
1776         ret = 0;
1777 out:
1778         return ret;
1779 }
1780
1781 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1782                               struct btrfs_root *root, struct btrfs_path *path,
1783                               struct inode_record *rec)
1784 {
1785         struct btrfs_inode_item *ei;
1786         struct btrfs_key key;
1787         int ret;
1788
1789         key.objectid = rec->ino;
1790         key.type = BTRFS_INODE_ITEM_KEY;
1791         key.offset = (u64)-1;
1792
1793         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1794         if (ret < 0)
1795                 goto out;
1796         if (ret) {
1797                 if (!path->slots[0]) {
1798                         ret = -ENOENT;
1799                         goto out;
1800                 }
1801                 path->slots[0]--;
1802                 ret = 0;
1803         }
1804         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1805         if (key.objectid != rec->ino) {
1806                 ret = -ENOENT;
1807                 goto out;
1808         }
1809
1810         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1811                             struct btrfs_inode_item);
1812         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1813         btrfs_mark_buffer_dirty(path->nodes[0]);
1814         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1815         printf("reset isize for dir %llu root %llu\n", rec->ino,
1816                root->root_key.objectid);
1817 out:
1818         btrfs_release_path(path);
1819         return ret;
1820 }
1821
1822 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1823                                     struct btrfs_root *root,
1824                                     struct btrfs_path *path,
1825                                     struct inode_record *rec)
1826 {
1827         int ret;
1828
1829         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1830         btrfs_release_path(path);
1831         if (!ret)
1832                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1833         return ret;
1834 }
1835
1836 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1837                                struct btrfs_root *root,
1838                                struct btrfs_path *path,
1839                                struct inode_record *rec)
1840 {
1841         struct btrfs_inode_item *ei;
1842         struct btrfs_key key;
1843         int ret = 0;
1844
1845         key.objectid = rec->ino;
1846         key.type = BTRFS_INODE_ITEM_KEY;
1847         key.offset = 0;
1848
1849         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1850         if (ret) {
1851                 if (ret > 0)
1852                         ret = -ENOENT;
1853                 goto out;
1854         }
1855
1856         /* Since ret == 0, no need to check anything */
1857         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1858                             struct btrfs_inode_item);
1859         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
1860         btrfs_mark_buffer_dirty(path->nodes[0]);
1861         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
1862         printf("reset nbytes for ino %llu root %llu\n",
1863                rec->ino, root->root_key.objectid);
1864 out:
1865         btrfs_release_path(path);
1866         return ret;
1867 }
1868
1869 static int add_missing_dir_index(struct btrfs_root *root,
1870                                  struct cache_tree *inode_cache,
1871                                  struct inode_record *rec,
1872                                  struct inode_backref *backref)
1873 {
1874         struct btrfs_path path;
1875         struct btrfs_trans_handle *trans;
1876         struct btrfs_dir_item *dir_item;
1877         struct extent_buffer *leaf;
1878         struct btrfs_key key;
1879         struct btrfs_disk_key disk_key;
1880         struct inode_record *dir_rec;
1881         unsigned long name_ptr;
1882         u32 data_size = sizeof(*dir_item) + backref->namelen;
1883         int ret;
1884
1885         trans = btrfs_start_transaction(root, 1);
1886         if (IS_ERR(trans))
1887                 return PTR_ERR(trans);
1888
1889         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
1890                 (unsigned long long)rec->ino);
1891
1892         btrfs_init_path(&path);
1893         key.objectid = backref->dir;
1894         key.type = BTRFS_DIR_INDEX_KEY;
1895         key.offset = backref->index;
1896         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
1897         BUG_ON(ret);
1898
1899         leaf = path.nodes[0];
1900         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
1901
1902         disk_key.objectid = cpu_to_le64(rec->ino);
1903         disk_key.type = BTRFS_INODE_ITEM_KEY;
1904         disk_key.offset = 0;
1905
1906         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
1907         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
1908         btrfs_set_dir_data_len(leaf, dir_item, 0);
1909         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
1910         name_ptr = (unsigned long)(dir_item + 1);
1911         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
1912         btrfs_mark_buffer_dirty(leaf);
1913         btrfs_release_path(&path);
1914         btrfs_commit_transaction(trans, root);
1915
1916         backref->found_dir_index = 1;
1917         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
1918         BUG_ON(IS_ERR(dir_rec));
1919         if (!dir_rec)
1920                 return 0;
1921         dir_rec->found_size += backref->namelen;
1922         if (dir_rec->found_size == dir_rec->isize &&
1923             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
1924                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1925         if (dir_rec->found_size != dir_rec->isize)
1926                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1927
1928         return 0;
1929 }
1930
1931 static int delete_dir_index(struct btrfs_root *root,
1932                             struct inode_backref *backref)
1933 {
1934         struct btrfs_trans_handle *trans;
1935         struct btrfs_dir_item *di;
1936         struct btrfs_path path;
1937         int ret = 0;
1938
1939         trans = btrfs_start_transaction(root, 1);
1940         if (IS_ERR(trans))
1941                 return PTR_ERR(trans);
1942
1943         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1944                 (unsigned long long)backref->dir,
1945                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
1946                 (unsigned long long)root->objectid);
1947
1948         btrfs_init_path(&path);
1949         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
1950                                     backref->name, backref->namelen,
1951                                     backref->index, -1);
1952         if (IS_ERR(di)) {
1953                 ret = PTR_ERR(di);
1954                 btrfs_release_path(&path);
1955                 btrfs_commit_transaction(trans, root);
1956                 if (ret == -ENOENT)
1957                         return 0;
1958                 return ret;
1959         }
1960
1961         if (!di)
1962                 ret = btrfs_del_item(trans, root, &path);
1963         else
1964                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
1965         BUG_ON(ret);
1966         btrfs_release_path(&path);
1967         btrfs_commit_transaction(trans, root);
1968         return ret;
1969 }
1970
1971 static int create_inode_item(struct btrfs_root *root,
1972                              struct inode_record *rec, int root_dir)
1973 {
1974         struct btrfs_trans_handle *trans;
1975         u64 nlink = 0;
1976         u32 mode = 0;
1977         u64 size = 0;
1978         int ret;
1979
1980         trans = btrfs_start_transaction(root, 1);
1981         if (IS_ERR(trans)) {
1982                 ret = PTR_ERR(trans);
1983                 return ret;
1984         }
1985
1986         nlink = root_dir ? 1 : rec->found_link;
1987         if (rec->found_dir_item) {
1988                 if (rec->found_file_extent)
1989                         fprintf(stderr, "root %llu inode %llu has both a dir "
1990                                 "item and extents, unsure if it is a dir or a "
1991                                 "regular file so setting it as a directory\n",
1992                                 (unsigned long long)root->objectid,
1993                                 (unsigned long long)rec->ino);
1994                 mode = S_IFDIR | 0755;
1995                 size = rec->found_size;
1996         } else if (!rec->found_dir_item) {
1997                 size = rec->extent_end;
1998                 mode =  S_IFREG | 0755;
1999         }
2000
2001         ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
2002                                   nlink, mode);
2003         btrfs_commit_transaction(trans, root);
2004         return 0;
2005 }
2006
2007 static int repair_inode_backrefs(struct btrfs_root *root,
2008                                  struct inode_record *rec,
2009                                  struct cache_tree *inode_cache,
2010                                  int delete)
2011 {
2012         struct inode_backref *tmp, *backref;
2013         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2014         int ret = 0;
2015         int repaired = 0;
2016
2017         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2018                 if (!delete && rec->ino == root_dirid) {
2019                         if (!rec->found_inode_item) {
2020                                 ret = create_inode_item(root, rec, 1);
2021                                 if (ret)
2022                                         break;
2023                                 repaired++;
2024                         }
2025                 }
2026
2027                 /* Index 0 for root dir's are special, don't mess with it */
2028                 if (rec->ino == root_dirid && backref->index == 0)
2029                         continue;
2030
2031                 if (delete &&
2032                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2033                      (backref->found_dir_index && backref->found_inode_ref &&
2034                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2035                         ret = delete_dir_index(root, backref);
2036                         if (ret)
2037                                 break;
2038                         repaired++;
2039                         list_del(&backref->list);
2040                         free(backref);
2041                         continue;
2042                 }
2043
2044                 if (!delete && !backref->found_dir_index &&
2045                     backref->found_dir_item && backref->found_inode_ref) {
2046                         ret = add_missing_dir_index(root, inode_cache, rec,
2047                                                     backref);
2048                         if (ret)
2049                                 break;
2050                         repaired++;
2051                         if (backref->found_dir_item &&
2052                             backref->found_dir_index) {
2053                                 if (!backref->errors &&
2054                                     backref->found_inode_ref) {
2055                                         list_del(&backref->list);
2056                                         free(backref);
2057                                         continue;
2058                                 }
2059                         }
2060                 }
2061
2062                 if (!delete && (!backref->found_dir_index &&
2063                                 !backref->found_dir_item &&
2064                                 backref->found_inode_ref)) {
2065                         struct btrfs_trans_handle *trans;
2066                         struct btrfs_key location;
2067
2068                         ret = check_dir_conflict(root, backref->name,
2069                                                  backref->namelen,
2070                                                  backref->dir,
2071                                                  backref->index);
2072                         if (ret) {
2073                                 /*
2074                                  * let nlink fixing routine to handle it,
2075                                  * which can do it better.
2076                                  */
2077                                 ret = 0;
2078                                 break;
2079                         }
2080                         location.objectid = rec->ino;
2081                         location.type = BTRFS_INODE_ITEM_KEY;
2082                         location.offset = 0;
2083
2084                         trans = btrfs_start_transaction(root, 1);
2085                         if (IS_ERR(trans)) {
2086                                 ret = PTR_ERR(trans);
2087                                 break;
2088                         }
2089                         fprintf(stderr, "adding missing dir index/item pair "
2090                                 "for inode %llu\n",
2091                                 (unsigned long long)rec->ino);
2092                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2093                                                     backref->namelen,
2094                                                     backref->dir, &location,
2095                                                     imode_to_type(rec->imode),
2096                                                     backref->index);
2097                         BUG_ON(ret);
2098                         btrfs_commit_transaction(trans, root);
2099                         repaired++;
2100                 }
2101
2102                 if (!delete && (backref->found_inode_ref &&
2103                                 backref->found_dir_index &&
2104                                 backref->found_dir_item &&
2105                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2106                                 !rec->found_inode_item)) {
2107                         ret = create_inode_item(root, rec, 0);
2108                         if (ret)
2109                                 break;
2110                         repaired++;
2111                 }
2112
2113         }
2114         return ret ? ret : repaired;
2115 }
2116
2117 /*
2118  * To determine the file type for nlink/inode_item repair
2119  *
2120  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2121  * Return -ENOENT if file type is not found.
2122  */
2123 static int find_file_type(struct inode_record *rec, u8 *type)
2124 {
2125         struct inode_backref *backref;
2126
2127         /* For inode item recovered case */
2128         if (rec->found_inode_item) {
2129                 *type = imode_to_type(rec->imode);
2130                 return 0;
2131         }
2132
2133         list_for_each_entry(backref, &rec->backrefs, list) {
2134                 if (backref->found_dir_index || backref->found_dir_item) {
2135                         *type = backref->filetype;
2136                         return 0;
2137                 }
2138         }
2139         return -ENOENT;
2140 }
2141
2142 /*
2143  * To determine the file name for nlink repair
2144  *
2145  * Return 0 if file name is found, set name and namelen.
2146  * Return -ENOENT if file name is not found.
2147  */
2148 static int find_file_name(struct inode_record *rec,
2149                           char *name, int *namelen)
2150 {
2151         struct inode_backref *backref;
2152
2153         list_for_each_entry(backref, &rec->backrefs, list) {
2154                 if (backref->found_dir_index || backref->found_dir_item ||
2155                     backref->found_inode_ref) {
2156                         memcpy(name, backref->name, backref->namelen);
2157                         *namelen = backref->namelen;
2158                         return 0;
2159                 }
2160         }
2161         return -ENOENT;
2162 }
2163
2164 /* Reset the nlink of the inode to the correct one */
2165 static int reset_nlink(struct btrfs_trans_handle *trans,
2166                        struct btrfs_root *root,
2167                        struct btrfs_path *path,
2168                        struct inode_record *rec)
2169 {
2170         struct inode_backref *backref;
2171         struct inode_backref *tmp;
2172         struct btrfs_key key;
2173         struct btrfs_inode_item *inode_item;
2174         int ret = 0;
2175
2176         /* We don't believe this either, reset it and iterate backref */
2177         rec->found_link = 0;
2178
2179         /* Remove all backref including the valid ones */
2180         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2181                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2182                                    backref->index, backref->name,
2183                                    backref->namelen, 0);
2184                 if (ret < 0)
2185                         goto out;
2186
2187                 /* remove invalid backref, so it won't be added back */
2188                 if (!(backref->found_dir_index &&
2189                       backref->found_dir_item &&
2190                       backref->found_inode_ref)) {
2191                         list_del(&backref->list);
2192                         free(backref);
2193                 } else {
2194                         rec->found_link++;
2195                 }
2196         }
2197
2198         /* Set nlink to 0 */
2199         key.objectid = rec->ino;
2200         key.type = BTRFS_INODE_ITEM_KEY;
2201         key.offset = 0;
2202         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2203         if (ret < 0)
2204                 goto out;
2205         if (ret > 0) {
2206                 ret = -ENOENT;
2207                 goto out;
2208         }
2209         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2210                                     struct btrfs_inode_item);
2211         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2212         btrfs_mark_buffer_dirty(path->nodes[0]);
2213         btrfs_release_path(path);
2214
2215         /*
2216          * Add back valid inode_ref/dir_item/dir_index,
2217          * add_link() will handle the nlink inc, so new nlink must be correct
2218          */
2219         list_for_each_entry(backref, &rec->backrefs, list) {
2220                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2221                                      backref->name, backref->namelen,
2222                                      backref->filetype, &backref->index, 1, 0);
2223                 if (ret < 0)
2224                         goto out;
2225         }
2226 out:
2227         btrfs_release_path(path);
2228         return ret;
2229 }
2230
2231 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2232                                struct btrfs_root *root,
2233                                struct btrfs_path *path,
2234                                struct inode_record *rec)
2235 {
2236         char namebuf[BTRFS_NAME_LEN] = {0};
2237         u8 type = 0;
2238         int namelen = 0;
2239         int name_recovered = 0;
2240         int type_recovered = 0;
2241         int ret = 0;
2242
2243         /*
2244          * Get file name and type first before these invalid inode ref
2245          * are deleted by remove_all_invalid_backref()
2246          */
2247         name_recovered = !find_file_name(rec, namebuf, &namelen);
2248         type_recovered = !find_file_type(rec, &type);
2249
2250         if (!name_recovered) {
2251                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2252                        rec->ino, rec->ino);
2253                 namelen = count_digits(rec->ino);
2254                 sprintf(namebuf, "%llu", rec->ino);
2255                 name_recovered = 1;
2256         }
2257         if (!type_recovered) {
2258                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2259                        rec->ino);
2260                 type = BTRFS_FT_REG_FILE;
2261                 type_recovered = 1;
2262         }
2263
2264         ret = reset_nlink(trans, root, path, rec);
2265         if (ret < 0) {
2266                 fprintf(stderr,
2267                         "Failed to reset nlink for inode %llu: %s\n",
2268                         rec->ino, strerror(-ret));
2269                 goto out;
2270         }
2271
2272         if (rec->found_link == 0) {
2273                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2274                                               namebuf, namelen, type,
2275                                               (u64 *)&rec->found_link);
2276                 if (ret)
2277                         goto out;
2278         }
2279         printf("Fixed the nlink of inode %llu\n", rec->ino);
2280 out:
2281         /*
2282          * Clear the flag anyway, or we will loop forever for the same inode
2283          * as it will not be removed from the bad inode list and the dead loop
2284          * happens.
2285          */
2286         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2287         btrfs_release_path(path);
2288         return ret;
2289 }
2290
2291 /*
2292  * Check if there is any normal(reg or prealloc) file extent for given
2293  * ino.
2294  * This is used to determine the file type when neither its dir_index/item or
2295  * inode_item exists.
2296  *
2297  * This will *NOT* report error, if any error happens, just consider it does
2298  * not have any normal file extent.
2299  */
2300 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2301 {
2302         struct btrfs_path path;
2303         struct btrfs_key key;
2304         struct btrfs_key found_key;
2305         struct btrfs_file_extent_item *fi;
2306         u8 type;
2307         int ret = 0;
2308
2309         btrfs_init_path(&path);
2310         key.objectid = ino;
2311         key.type = BTRFS_EXTENT_DATA_KEY;
2312         key.offset = 0;
2313
2314         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2315         if (ret < 0) {
2316                 ret = 0;
2317                 goto out;
2318         }
2319         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2320                 ret = btrfs_next_leaf(root, &path);
2321                 if (ret) {
2322                         ret = 0;
2323                         goto out;
2324                 }
2325         }
2326         while (1) {
2327                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2328                                       path.slots[0]);
2329                 if (found_key.objectid != ino ||
2330                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2331                         break;
2332                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2333                                     struct btrfs_file_extent_item);
2334                 type = btrfs_file_extent_type(path.nodes[0], fi);
2335                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2336                         ret = 1;
2337                         goto out;
2338                 }
2339         }
2340 out:
2341         btrfs_release_path(&path);
2342         return ret;
2343 }
2344
2345 static u32 btrfs_type_to_imode(u8 type)
2346 {
2347         static u32 imode_by_btrfs_type[] = {
2348                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2349                 [BTRFS_FT_DIR]          = S_IFDIR,
2350                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2351                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2352                 [BTRFS_FT_FIFO]         = S_IFIFO,
2353                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2354                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2355         };
2356
2357         return imode_by_btrfs_type[(type)];
2358 }
2359
2360 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2361                                 struct btrfs_root *root,
2362                                 struct btrfs_path *path,
2363                                 struct inode_record *rec)
2364 {
2365         u8 filetype;
2366         u32 mode = 0700;
2367         int type_recovered = 0;
2368         int ret = 0;
2369
2370         printf("Trying to rebuild inode:%llu\n", rec->ino);
2371
2372         type_recovered = !find_file_type(rec, &filetype);
2373
2374         /*
2375          * Try to determine inode type if type not found.
2376          *
2377          * For found regular file extent, it must be FILE.
2378          * For found dir_item/index, it must be DIR.
2379          *
2380          * For undetermined one, use FILE as fallback.
2381          *
2382          * TODO:
2383          * 1. If found backref(inode_index/item is already handled) to it,
2384          *    it must be DIR.
2385          *    Need new inode-inode ref structure to allow search for that.
2386          */
2387         if (!type_recovered) {
2388                 if (rec->found_file_extent &&
2389                     find_normal_file_extent(root, rec->ino)) {
2390                         type_recovered = 1;
2391                         filetype = BTRFS_FT_REG_FILE;
2392                 } else if (rec->found_dir_item) {
2393                         type_recovered = 1;
2394                         filetype = BTRFS_FT_DIR;
2395                 } else if (!list_empty(&rec->orphan_extents)) {
2396                         type_recovered = 1;
2397                         filetype = BTRFS_FT_REG_FILE;
2398                 } else{
2399                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2400                                rec->ino);
2401                         type_recovered = 1;
2402                         filetype = BTRFS_FT_REG_FILE;
2403                 }
2404         }
2405
2406         ret = btrfs_new_inode(trans, root, rec->ino,
2407                               mode | btrfs_type_to_imode(filetype));
2408         if (ret < 0)
2409                 goto out;
2410
2411         /*
2412          * Here inode rebuild is done, we only rebuild the inode item,
2413          * don't repair the nlink(like move to lost+found).
2414          * That is the job of nlink repair.
2415          *
2416          * We just fill the record and return
2417          */
2418         rec->found_dir_item = 1;
2419         rec->imode = mode | btrfs_type_to_imode(filetype);
2420         rec->nlink = 0;
2421         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2422         /* Ensure the inode_nlinks repair function will be called */
2423         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2424 out:
2425         return ret;
2426 }
2427
2428 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2429                                       struct btrfs_root *root,
2430                                       struct btrfs_path *path,
2431                                       struct inode_record *rec)
2432 {
2433         struct orphan_data_extent *orphan;
2434         struct orphan_data_extent *tmp;
2435         int ret = 0;
2436
2437         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2438                 /*
2439                  * Check for conflicting file extents
2440                  *
2441                  * Here we don't know whether the extents is compressed or not,
2442                  * so we can only assume it not compressed nor data offset,
2443                  * and use its disk_len as extent length.
2444                  */
2445                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2446                                        orphan->offset, orphan->disk_len, 0);
2447                 btrfs_release_path(path);
2448                 if (ret < 0)
2449                         goto out;
2450                 if (!ret) {
2451                         fprintf(stderr,
2452                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2453                                 orphan->disk_bytenr, orphan->disk_len);
2454                         ret = btrfs_free_extent(trans,
2455                                         root->fs_info->extent_root,
2456                                         orphan->disk_bytenr, orphan->disk_len,
2457                                         0, root->objectid, orphan->objectid,
2458                                         orphan->offset);
2459                         if (ret < 0)
2460                                 goto out;
2461                 }
2462                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2463                                 orphan->offset, orphan->disk_bytenr,
2464                                 orphan->disk_len, orphan->disk_len);
2465                 if (ret < 0)
2466                         goto out;
2467
2468                 /* Update file size info */
2469                 rec->found_size += orphan->disk_len;
2470                 if (rec->found_size == rec->nbytes)
2471                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2472
2473                 /* Update the file extent hole info too */
2474                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2475                                            orphan->disk_len);
2476                 if (ret < 0)
2477                         goto out;
2478                 if (RB_EMPTY_ROOT(&rec->holes))
2479                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2480
2481                 list_del(&orphan->list);
2482                 free(orphan);
2483         }
2484         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2485 out:
2486         return ret;
2487 }
2488
2489 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2490                                         struct btrfs_root *root,
2491                                         struct btrfs_path *path,
2492                                         struct inode_record *rec)
2493 {
2494         struct rb_node *node;
2495         struct file_extent_hole *hole;
2496         int found = 0;
2497         int ret = 0;
2498
2499         node = rb_first(&rec->holes);
2500
2501         while (node) {
2502                 found = 1;
2503                 hole = rb_entry(node, struct file_extent_hole, node);
2504                 ret = btrfs_punch_hole(trans, root, rec->ino,
2505                                        hole->start, hole->len);
2506                 if (ret < 0)
2507                         goto out;
2508                 ret = del_file_extent_hole(&rec->holes, hole->start,
2509                                            hole->len);
2510                 if (ret < 0)
2511                         goto out;
2512                 if (RB_EMPTY_ROOT(&rec->holes))
2513                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2514                 node = rb_first(&rec->holes);
2515         }
2516         /* special case for a file losing all its file extent */
2517         if (!found) {
2518                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2519                                        round_up(rec->isize,
2520                                                 root->fs_info->sectorsize));
2521                 if (ret < 0)
2522                         goto out;
2523         }
2524         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2525                rec->ino, root->objectid);
2526 out:
2527         return ret;
2528 }
2529
2530 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2531 {
2532         struct btrfs_trans_handle *trans;
2533         struct btrfs_path path;
2534         int ret = 0;
2535
2536         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2537                              I_ERR_NO_ORPHAN_ITEM |
2538                              I_ERR_LINK_COUNT_WRONG |
2539                              I_ERR_NO_INODE_ITEM |
2540                              I_ERR_FILE_EXTENT_ORPHAN |
2541                              I_ERR_FILE_EXTENT_DISCOUNT|
2542                              I_ERR_FILE_NBYTES_WRONG)))
2543                 return rec->errors;
2544
2545         /*
2546          * For nlink repair, it may create a dir and add link, so
2547          * 2 for parent(256)'s dir_index and dir_item
2548          * 2 for lost+found dir's inode_item and inode_ref
2549          * 1 for the new inode_ref of the file
2550          * 2 for lost+found dir's dir_index and dir_item for the file
2551          */
2552         trans = btrfs_start_transaction(root, 7);
2553         if (IS_ERR(trans))
2554                 return PTR_ERR(trans);
2555
2556         btrfs_init_path(&path);
2557         if (rec->errors & I_ERR_NO_INODE_ITEM)
2558                 ret = repair_inode_no_item(trans, root, &path, rec);
2559         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2560                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2561         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2562                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2563         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2564                 ret = repair_inode_isize(trans, root, &path, rec);
2565         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2566                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2567         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2568                 ret = repair_inode_nlinks(trans, root, &path, rec);
2569         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2570                 ret = repair_inode_nbytes(trans, root, &path, rec);
2571         btrfs_commit_transaction(trans, root);
2572         btrfs_release_path(&path);
2573         return ret;
2574 }
2575
2576 static int check_inode_recs(struct btrfs_root *root,
2577                             struct cache_tree *inode_cache)
2578 {
2579         struct cache_extent *cache;
2580         struct ptr_node *node;
2581         struct inode_record *rec;
2582         struct inode_backref *backref;
2583         int stage = 0;
2584         int ret = 0;
2585         int err = 0;
2586         u64 error = 0;
2587         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2588
2589         if (btrfs_root_refs(&root->root_item) == 0) {
2590                 if (!cache_tree_empty(inode_cache))
2591                         fprintf(stderr, "warning line %d\n", __LINE__);
2592                 return 0;
2593         }
2594
2595         /*
2596          * We need to repair backrefs first because we could change some of the
2597          * errors in the inode recs.
2598          *
2599          * We also need to go through and delete invalid backrefs first and then
2600          * add the correct ones second.  We do this because we may get EEXIST
2601          * when adding back the correct index because we hadn't yet deleted the
2602          * invalid index.
2603          *
2604          * For example, if we were missing a dir index then the directories
2605          * isize would be wrong, so if we fixed the isize to what we thought it
2606          * would be and then fixed the backref we'd still have a invalid fs, so
2607          * we need to add back the dir index and then check to see if the isize
2608          * is still wrong.
2609          */
2610         while (stage < 3) {
2611                 stage++;
2612                 if (stage == 3 && !err)
2613                         break;
2614
2615                 cache = search_cache_extent(inode_cache, 0);
2616                 while (repair && cache) {
2617                         node = container_of(cache, struct ptr_node, cache);
2618                         rec = node->data;
2619                         cache = next_cache_extent(cache);
2620
2621                         /* Need to free everything up and rescan */
2622                         if (stage == 3) {
2623                                 remove_cache_extent(inode_cache, &node->cache);
2624                                 free(node);
2625                                 free_inode_rec(rec);
2626                                 continue;
2627                         }
2628
2629                         if (list_empty(&rec->backrefs))
2630                                 continue;
2631
2632                         ret = repair_inode_backrefs(root, rec, inode_cache,
2633                                                     stage == 1);
2634                         if (ret < 0) {
2635                                 err = ret;
2636                                 stage = 2;
2637                                 break;
2638                         } if (ret > 0) {
2639                                 err = -EAGAIN;
2640                         }
2641                 }
2642         }
2643         if (err)
2644                 return err;
2645
2646         rec = get_inode_rec(inode_cache, root_dirid, 0);
2647         BUG_ON(IS_ERR(rec));
2648         if (rec) {
2649                 ret = check_root_dir(rec);
2650                 if (ret) {
2651                         fprintf(stderr, "root %llu root dir %llu error\n",
2652                                 (unsigned long long)root->root_key.objectid,
2653                                 (unsigned long long)root_dirid);
2654                         print_inode_error(root, rec);
2655                         error++;
2656                 }
2657         } else {
2658                 if (repair) {
2659                         struct btrfs_trans_handle *trans;
2660
2661                         trans = btrfs_start_transaction(root, 1);
2662                         if (IS_ERR(trans)) {
2663                                 err = PTR_ERR(trans);
2664                                 return err;
2665                         }
2666
2667                         fprintf(stderr,
2668                                 "root %llu missing its root dir, recreating\n",
2669                                 (unsigned long long)root->objectid);
2670
2671                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2672                         BUG_ON(ret);
2673
2674                         btrfs_commit_transaction(trans, root);
2675                         return -EAGAIN;
2676                 }
2677
2678                 fprintf(stderr, "root %llu root dir %llu not found\n",
2679                         (unsigned long long)root->root_key.objectid,
2680                         (unsigned long long)root_dirid);
2681         }
2682
2683         while (1) {
2684                 cache = search_cache_extent(inode_cache, 0);
2685                 if (!cache)
2686                         break;
2687                 node = container_of(cache, struct ptr_node, cache);
2688                 rec = node->data;
2689                 remove_cache_extent(inode_cache, &node->cache);
2690                 free(node);
2691                 if (rec->ino == root_dirid ||
2692                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2693                         free_inode_rec(rec);
2694                         continue;
2695                 }
2696
2697                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2698                         ret = check_orphan_item(root, rec->ino);
2699                         if (ret == 0)
2700                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2701                         if (can_free_inode_rec(rec)) {
2702                                 free_inode_rec(rec);
2703                                 continue;
2704                         }
2705                 }
2706
2707                 if (!rec->found_inode_item)
2708                         rec->errors |= I_ERR_NO_INODE_ITEM;
2709                 if (rec->found_link != rec->nlink)
2710                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2711                 if (repair) {
2712                         ret = try_repair_inode(root, rec);
2713                         if (ret == 0 && can_free_inode_rec(rec)) {
2714                                 free_inode_rec(rec);
2715                                 continue;
2716                         }
2717                         ret = 0;
2718                 }
2719
2720                 if (!(repair && ret == 0))
2721                         error++;
2722                 print_inode_error(root, rec);
2723                 list_for_each_entry(backref, &rec->backrefs, list) {
2724                         if (!backref->found_dir_item)
2725                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2726                         if (!backref->found_dir_index)
2727                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2728                         if (!backref->found_inode_ref)
2729                                 backref->errors |= REF_ERR_NO_INODE_REF;
2730                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2731                                 " namelen %u name %s filetype %d errors %x",
2732                                 (unsigned long long)backref->dir,
2733                                 (unsigned long long)backref->index,
2734                                 backref->namelen, backref->name,
2735                                 backref->filetype, backref->errors);
2736                         print_ref_error(backref->errors);
2737                 }
2738                 free_inode_rec(rec);
2739         }
2740         return (error > 0) ? -1 : 0;
2741 }
2742
2743 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2744                                         u64 objectid)
2745 {
2746         struct cache_extent *cache;
2747         struct root_record *rec = NULL;
2748         int ret;
2749
2750         cache = lookup_cache_extent(root_cache, objectid, 1);
2751         if (cache) {
2752                 rec = container_of(cache, struct root_record, cache);
2753         } else {
2754                 rec = calloc(1, sizeof(*rec));
2755                 if (!rec)
2756                         return ERR_PTR(-ENOMEM);
2757                 rec->objectid = objectid;
2758                 INIT_LIST_HEAD(&rec->backrefs);
2759                 rec->cache.start = objectid;
2760                 rec->cache.size = 1;
2761
2762                 ret = insert_cache_extent(root_cache, &rec->cache);
2763                 if (ret)
2764                         return ERR_PTR(-EEXIST);
2765         }
2766         return rec;
2767 }
2768
2769 static struct root_backref *get_root_backref(struct root_record *rec,
2770                                              u64 ref_root, u64 dir, u64 index,
2771                                              const char *name, int namelen)
2772 {
2773         struct root_backref *backref;
2774
2775         list_for_each_entry(backref, &rec->backrefs, list) {
2776                 if (backref->ref_root != ref_root || backref->dir != dir ||
2777                     backref->namelen != namelen)
2778                         continue;
2779                 if (memcmp(name, backref->name, namelen))
2780                         continue;
2781                 return backref;
2782         }
2783
2784         backref = calloc(1, sizeof(*backref) + namelen + 1);
2785         if (!backref)
2786                 return NULL;
2787         backref->ref_root = ref_root;
2788         backref->dir = dir;
2789         backref->index = index;
2790         backref->namelen = namelen;
2791         memcpy(backref->name, name, namelen);
2792         backref->name[namelen] = '\0';
2793         list_add_tail(&backref->list, &rec->backrefs);
2794         return backref;
2795 }
2796
2797 static void free_root_record(struct cache_extent *cache)
2798 {
2799         struct root_record *rec;
2800         struct root_backref *backref;
2801
2802         rec = container_of(cache, struct root_record, cache);
2803         while (!list_empty(&rec->backrefs)) {
2804                 backref = to_root_backref(rec->backrefs.next);
2805                 list_del(&backref->list);
2806                 free(backref);
2807         }
2808
2809         free(rec);
2810 }
2811
2812 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
2813
2814 static int add_root_backref(struct cache_tree *root_cache,
2815                             u64 root_id, u64 ref_root, u64 dir, u64 index,
2816                             const char *name, int namelen,
2817                             int item_type, int errors)
2818 {
2819         struct root_record *rec;
2820         struct root_backref *backref;
2821
2822         rec = get_root_rec(root_cache, root_id);
2823         BUG_ON(IS_ERR(rec));
2824         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
2825         BUG_ON(!backref);
2826
2827         backref->errors |= errors;
2828
2829         if (item_type != BTRFS_DIR_ITEM_KEY) {
2830                 if (backref->found_dir_index || backref->found_back_ref ||
2831                     backref->found_forward_ref) {
2832                         if (backref->index != index)
2833                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
2834                 } else {
2835                         backref->index = index;
2836                 }
2837         }
2838
2839         if (item_type == BTRFS_DIR_ITEM_KEY) {
2840                 if (backref->found_forward_ref)
2841                         rec->found_ref++;
2842                 backref->found_dir_item = 1;
2843         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
2844                 backref->found_dir_index = 1;
2845         } else if (item_type == BTRFS_ROOT_REF_KEY) {
2846                 if (backref->found_forward_ref)
2847                         backref->errors |= REF_ERR_DUP_ROOT_REF;
2848                 else if (backref->found_dir_item)
2849                         rec->found_ref++;
2850                 backref->found_forward_ref = 1;
2851         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
2852                 if (backref->found_back_ref)
2853                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
2854                 backref->found_back_ref = 1;
2855         } else {
2856                 BUG_ON(1);
2857         }
2858
2859         if (backref->found_forward_ref && backref->found_dir_item)
2860                 backref->reachable = 1;
2861         return 0;
2862 }
2863
2864 static int merge_root_recs(struct btrfs_root *root,
2865                            struct cache_tree *src_cache,
2866                            struct cache_tree *dst_cache)
2867 {
2868         struct cache_extent *cache;
2869         struct ptr_node *node;
2870         struct inode_record *rec;
2871         struct inode_backref *backref;
2872         int ret = 0;
2873
2874         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2875                 free_inode_recs_tree(src_cache);
2876                 return 0;
2877         }
2878
2879         while (1) {
2880                 cache = search_cache_extent(src_cache, 0);
2881                 if (!cache)
2882                         break;
2883                 node = container_of(cache, struct ptr_node, cache);
2884                 rec = node->data;
2885                 remove_cache_extent(src_cache, &node->cache);
2886                 free(node);
2887
2888                 ret = is_child_root(root, root->objectid, rec->ino);
2889                 if (ret < 0)
2890                         break;
2891                 else if (ret == 0)
2892                         goto skip;
2893
2894                 list_for_each_entry(backref, &rec->backrefs, list) {
2895                         BUG_ON(backref->found_inode_ref);
2896                         if (backref->found_dir_item)
2897                                 add_root_backref(dst_cache, rec->ino,
2898                                         root->root_key.objectid, backref->dir,
2899                                         backref->index, backref->name,
2900                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
2901                                         backref->errors);
2902                         if (backref->found_dir_index)
2903                                 add_root_backref(dst_cache, rec->ino,
2904                                         root->root_key.objectid, backref->dir,
2905                                         backref->index, backref->name,
2906                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
2907                                         backref->errors);
2908                 }
2909 skip:
2910                 free_inode_rec(rec);
2911         }
2912         if (ret < 0)
2913                 return ret;
2914         return 0;
2915 }
2916
2917 static int check_root_refs(struct btrfs_root *root,
2918                            struct cache_tree *root_cache)
2919 {
2920         struct root_record *rec;
2921         struct root_record *ref_root;
2922         struct root_backref *backref;
2923         struct cache_extent *cache;
2924         int loop = 1;
2925         int ret;
2926         int error;
2927         int errors = 0;
2928
2929         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
2930         BUG_ON(IS_ERR(rec));
2931         rec->found_ref = 1;
2932
2933         /* fixme: this can not detect circular references */
2934         while (loop) {
2935                 loop = 0;
2936                 cache = search_cache_extent(root_cache, 0);
2937                 while (1) {
2938                         if (!cache)
2939                                 break;
2940                         rec = container_of(cache, struct root_record, cache);
2941                         cache = next_cache_extent(cache);
2942
2943                         if (rec->found_ref == 0)
2944                                 continue;
2945
2946                         list_for_each_entry(backref, &rec->backrefs, list) {
2947                                 if (!backref->reachable)
2948                                         continue;
2949
2950                                 ref_root = get_root_rec(root_cache,
2951                                                         backref->ref_root);
2952                                 BUG_ON(IS_ERR(ref_root));
2953                                 if (ref_root->found_ref > 0)
2954                                         continue;
2955
2956                                 backref->reachable = 0;
2957                                 rec->found_ref--;
2958                                 if (rec->found_ref == 0)
2959                                         loop = 1;
2960                         }
2961                 }
2962         }
2963
2964         cache = search_cache_extent(root_cache, 0);
2965         while (1) {
2966                 if (!cache)
2967                         break;
2968                 rec = container_of(cache, struct root_record, cache);
2969                 cache = next_cache_extent(cache);
2970
2971                 if (rec->found_ref == 0 &&
2972                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
2973                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
2974                         ret = check_orphan_item(root->fs_info->tree_root,
2975                                                 rec->objectid);
2976                         if (ret == 0)
2977                                 continue;
2978
2979                         /*
2980                          * If we don't have a root item then we likely just have
2981                          * a dir item in a snapshot for this root but no actual
2982                          * ref key or anything so it's meaningless.
2983                          */
2984                         if (!rec->found_root_item)
2985                                 continue;
2986                         errors++;
2987                         fprintf(stderr, "fs tree %llu not referenced\n",
2988                                 (unsigned long long)rec->objectid);
2989                 }
2990
2991                 error = 0;
2992                 if (rec->found_ref > 0 && !rec->found_root_item)
2993                         error = 1;
2994                 list_for_each_entry(backref, &rec->backrefs, list) {
2995                         if (!backref->found_dir_item)
2996                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2997                         if (!backref->found_dir_index)
2998                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2999                         if (!backref->found_back_ref)
3000                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3001                         if (!backref->found_forward_ref)
3002                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3003                         if (backref->reachable && backref->errors)
3004                                 error = 1;
3005                 }
3006                 if (!error)
3007                         continue;
3008
3009                 errors++;
3010                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3011                         (unsigned long long)rec->objectid, rec->found_ref,
3012                          rec->found_root_item ? "" : "not found");
3013
3014                 list_for_each_entry(backref, &rec->backrefs, list) {
3015                         if (!backref->reachable)
3016                                 continue;
3017                         if (!backref->errors && rec->found_root_item)
3018                                 continue;
3019                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3020                                 " index %llu namelen %u name %s errors %x\n",
3021                                 (unsigned long long)backref->ref_root,
3022                                 (unsigned long long)backref->dir,
3023                                 (unsigned long long)backref->index,
3024                                 backref->namelen, backref->name,
3025                                 backref->errors);
3026                         print_ref_error(backref->errors);
3027                 }
3028         }
3029         return errors > 0 ? 1 : 0;
3030 }
3031
3032 static int process_root_ref(struct extent_buffer *eb, int slot,
3033                             struct btrfs_key *key,
3034                             struct cache_tree *root_cache)
3035 {
3036         u64 dirid;
3037         u64 index;
3038         u32 len;
3039         u32 name_len;
3040         struct btrfs_root_ref *ref;
3041         char namebuf[BTRFS_NAME_LEN];
3042         int error;
3043
3044         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3045
3046         dirid = btrfs_root_ref_dirid(eb, ref);
3047         index = btrfs_root_ref_sequence(eb, ref);
3048         name_len = btrfs_root_ref_name_len(eb, ref);
3049
3050         if (name_len <= BTRFS_NAME_LEN) {
3051                 len = name_len;
3052                 error = 0;
3053         } else {
3054                 len = BTRFS_NAME_LEN;
3055                 error = REF_ERR_NAME_TOO_LONG;
3056         }
3057         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3058
3059         if (key->type == BTRFS_ROOT_REF_KEY) {
3060                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3061                                  index, namebuf, len, key->type, error);
3062         } else {
3063                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3064                                  index, namebuf, len, key->type, error);
3065         }
3066         return 0;
3067 }
3068
3069 static void free_corrupt_block(struct cache_extent *cache)
3070 {
3071         struct btrfs_corrupt_block *corrupt;
3072
3073         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3074         free(corrupt);
3075 }
3076
3077 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3078
3079 /*
3080  * Repair the btree of the given root.
3081  *
3082  * The fix is to remove the node key in corrupt_blocks cache_tree.
3083  * and rebalance the tree.
3084  * After the fix, the btree should be writeable.
3085  */
3086 static int repair_btree(struct btrfs_root *root,
3087                         struct cache_tree *corrupt_blocks)
3088 {
3089         struct btrfs_trans_handle *trans;
3090         struct btrfs_path path;
3091         struct btrfs_corrupt_block *corrupt;
3092         struct cache_extent *cache;
3093         struct btrfs_key key;
3094         u64 offset;
3095         int level;
3096         int ret = 0;
3097
3098         if (cache_tree_empty(corrupt_blocks))
3099                 return 0;
3100
3101         trans = btrfs_start_transaction(root, 1);
3102         if (IS_ERR(trans)) {
3103                 ret = PTR_ERR(trans);
3104                 fprintf(stderr, "Error starting transaction: %s\n",
3105                         strerror(-ret));
3106                 return ret;
3107         }
3108         btrfs_init_path(&path);
3109         cache = first_cache_extent(corrupt_blocks);
3110         while (cache) {
3111                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3112                                        cache);
3113                 level = corrupt->level;
3114                 path.lowest_level = level;
3115                 key.objectid = corrupt->key.objectid;
3116                 key.type = corrupt->key.type;
3117                 key.offset = corrupt->key.offset;
3118
3119                 /*
3120                  * Here we don't want to do any tree balance, since it may
3121                  * cause a balance with corrupted brother leaf/node,
3122                  * so ins_len set to 0 here.
3123                  * Balance will be done after all corrupt node/leaf is deleted.
3124                  */
3125                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3126                 if (ret < 0)
3127                         goto out;
3128                 offset = btrfs_node_blockptr(path.nodes[level],
3129                                              path.slots[level]);
3130
3131                 /* Remove the ptr */
3132                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3133                 if (ret < 0)
3134                         goto out;
3135                 /*
3136                  * Remove the corresponding extent
3137                  * return value is not concerned.
3138                  */
3139                 btrfs_release_path(&path);
3140                 ret = btrfs_free_extent(trans, root, offset,
3141                                 root->fs_info->nodesize, 0,
3142                                 root->root_key.objectid, level - 1, 0);
3143                 cache = next_cache_extent(cache);
3144         }
3145
3146         /* Balance the btree using btrfs_search_slot() */
3147         cache = first_cache_extent(corrupt_blocks);
3148         while (cache) {
3149                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3150                                        cache);
3151                 memcpy(&key, &corrupt->key, sizeof(key));
3152                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3153                 if (ret < 0)
3154                         goto out;
3155                 /* return will always >0 since it won't find the item */
3156                 ret = 0;
3157                 btrfs_release_path(&path);
3158                 cache = next_cache_extent(cache);
3159         }
3160 out:
3161         btrfs_commit_transaction(trans, root);
3162         btrfs_release_path(&path);
3163         return ret;
3164 }
3165
3166 static int check_fs_root(struct btrfs_root *root,
3167                          struct cache_tree *root_cache,
3168                          struct walk_control *wc)
3169 {
3170         int ret = 0;
3171         int err = 0;
3172         int wret;
3173         int level;
3174         struct btrfs_path path;
3175         struct shared_node root_node;
3176         struct root_record *rec;
3177         struct btrfs_root_item *root_item = &root->root_item;
3178         struct cache_tree corrupt_blocks;
3179         struct orphan_data_extent *orphan;
3180         struct orphan_data_extent *tmp;
3181         enum btrfs_tree_block_status status;
3182         struct node_refs nrefs;
3183
3184         /*
3185          * Reuse the corrupt_block cache tree to record corrupted tree block
3186          *
3187          * Unlike the usage in extent tree check, here we do it in a per
3188          * fs/subvol tree base.
3189          */
3190         cache_tree_init(&corrupt_blocks);
3191         root->fs_info->corrupt_blocks = &corrupt_blocks;
3192
3193         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3194                 rec = get_root_rec(root_cache, root->root_key.objectid);
3195                 BUG_ON(IS_ERR(rec));
3196                 if (btrfs_root_refs(root_item) > 0)
3197                         rec->found_root_item = 1;
3198         }
3199
3200         btrfs_init_path(&path);
3201         memset(&root_node, 0, sizeof(root_node));
3202         cache_tree_init(&root_node.root_cache);
3203         cache_tree_init(&root_node.inode_cache);
3204         memset(&nrefs, 0, sizeof(nrefs));
3205
3206         /* Move the orphan extent record to corresponding inode_record */
3207         list_for_each_entry_safe(orphan, tmp,
3208                                  &root->orphan_data_extents, list) {
3209                 struct inode_record *inode;
3210
3211                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3212                                       1);
3213                 BUG_ON(IS_ERR(inode));
3214                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3215                 list_move(&orphan->list, &inode->orphan_extents);
3216         }
3217
3218         level = btrfs_header_level(root->node);
3219         memset(wc->nodes, 0, sizeof(wc->nodes));
3220         wc->nodes[level] = &root_node;
3221         wc->active_node = level;
3222         wc->root_level = level;
3223
3224         /* We may not have checked the root block, lets do that now */
3225         if (btrfs_is_leaf(root->node))
3226                 status = btrfs_check_leaf(root, NULL, root->node);
3227         else
3228                 status = btrfs_check_node(root, NULL, root->node);
3229         if (status != BTRFS_TREE_BLOCK_CLEAN)
3230                 return -EIO;
3231
3232         if (btrfs_root_refs(root_item) > 0 ||
3233             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3234                 path.nodes[level] = root->node;
3235                 extent_buffer_get(root->node);
3236                 path.slots[level] = 0;
3237         } else {
3238                 struct btrfs_key key;
3239                 struct btrfs_disk_key found_key;
3240
3241                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3242                 level = root_item->drop_level;
3243                 path.lowest_level = level;
3244                 if (level > btrfs_header_level(root->node) ||
3245                     level >= BTRFS_MAX_LEVEL) {
3246                         error("ignoring invalid drop level: %u", level);
3247                         goto skip_walking;
3248                 }
3249                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3250                 if (wret < 0)
3251                         goto skip_walking;
3252                 btrfs_node_key(path.nodes[level], &found_key,
3253                                 path.slots[level]);
3254                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3255                                         sizeof(found_key)));
3256         }
3257
3258         while (1) {
3259                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3260                 if (wret < 0)
3261                         ret = wret;
3262                 if (wret != 0)
3263                         break;
3264
3265                 wret = walk_up_tree(root, &path, wc, &level);
3266                 if (wret < 0)
3267                         ret = wret;
3268                 if (wret != 0)
3269                         break;
3270         }
3271 skip_walking:
3272         btrfs_release_path(&path);
3273
3274         if (!cache_tree_empty(&corrupt_blocks)) {
3275                 struct cache_extent *cache;
3276                 struct btrfs_corrupt_block *corrupt;
3277
3278                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3279                        root->root_key.objectid);
3280                 cache = first_cache_extent(&corrupt_blocks);
3281                 while (cache) {
3282                         corrupt = container_of(cache,
3283                                                struct btrfs_corrupt_block,
3284                                                cache);
3285                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3286                                cache->start, corrupt->level,
3287                                corrupt->key.objectid, corrupt->key.type,
3288                                corrupt->key.offset);
3289                         cache = next_cache_extent(cache);
3290                 }
3291                 if (repair) {
3292                         printf("Try to repair the btree for root %llu\n",
3293                                root->root_key.objectid);
3294                         ret = repair_btree(root, &corrupt_blocks);
3295                         if (ret < 0)
3296                                 fprintf(stderr, "Failed to repair btree: %s\n",
3297                                         strerror(-ret));
3298                         if (!ret)
3299                                 printf("Btree for root %llu is fixed\n",
3300                                        root->root_key.objectid);
3301                 }
3302         }
3303
3304         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3305         if (err < 0)
3306                 ret = err;
3307
3308         if (root_node.current) {
3309                 root_node.current->checked = 1;
3310                 maybe_free_inode_rec(&root_node.inode_cache,
3311                                 root_node.current);
3312         }
3313
3314         err = check_inode_recs(root, &root_node.inode_cache);
3315         if (!ret)
3316                 ret = err;
3317
3318         free_corrupt_blocks_tree(&corrupt_blocks);
3319         root->fs_info->corrupt_blocks = NULL;
3320         free_orphan_data_extents(&root->orphan_data_extents);
3321         return ret;
3322 }
3323
3324 static int check_fs_roots(struct btrfs_fs_info *fs_info,
3325                           struct cache_tree *root_cache)
3326 {
3327         struct btrfs_path path;
3328         struct btrfs_key key;
3329         struct walk_control wc;
3330         struct extent_buffer *leaf, *tree_node;
3331         struct btrfs_root *tmp_root;
3332         struct btrfs_root *tree_root = fs_info->tree_root;
3333         int ret;
3334         int err = 0;
3335
3336         if (ctx.progress_enabled) {
3337                 ctx.tp = TASK_FS_ROOTS;
3338                 task_start(ctx.info);
3339         }
3340
3341         /*
3342          * Just in case we made any changes to the extent tree that weren't
3343          * reflected into the free space cache yet.
3344          */
3345         if (repair)
3346                 reset_cached_block_groups(fs_info);
3347         memset(&wc, 0, sizeof(wc));
3348         cache_tree_init(&wc.shared);
3349         btrfs_init_path(&path);
3350
3351 again:
3352         key.offset = 0;
3353         key.objectid = 0;
3354         key.type = BTRFS_ROOT_ITEM_KEY;
3355         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3356         if (ret < 0) {
3357                 err = 1;
3358                 goto out;
3359         }
3360         tree_node = tree_root->node;
3361         while (1) {
3362                 if (tree_node != tree_root->node) {
3363                         free_root_recs_tree(root_cache);
3364                         btrfs_release_path(&path);
3365                         goto again;
3366                 }
3367                 leaf = path.nodes[0];
3368                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3369                         ret = btrfs_next_leaf(tree_root, &path);
3370                         if (ret) {
3371                                 if (ret < 0)
3372                                         err = 1;
3373                                 break;
3374                         }
3375                         leaf = path.nodes[0];
3376                 }
3377                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3378                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3379                     fs_root_objectid(key.objectid)) {
3380                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3381                                 tmp_root = btrfs_read_fs_root_no_cache(
3382                                                 fs_info, &key);
3383                         } else {
3384                                 key.offset = (u64)-1;
3385                                 tmp_root = btrfs_read_fs_root(
3386                                                 fs_info, &key);
3387                         }
3388                         if (IS_ERR(tmp_root)) {
3389                                 err = 1;
3390                                 goto next;
3391                         }
3392                         ret = check_fs_root(tmp_root, root_cache, &wc);
3393                         if (ret == -EAGAIN) {
3394                                 free_root_recs_tree(root_cache);
3395                                 btrfs_release_path(&path);
3396                                 goto again;
3397                         }
3398                         if (ret)
3399                                 err = 1;
3400                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3401                                 btrfs_free_fs_root(tmp_root);
3402                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3403                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3404                         process_root_ref(leaf, path.slots[0], &key,
3405                                          root_cache);
3406                 }
3407 next:
3408                 path.slots[0]++;
3409         }
3410 out:
3411         btrfs_release_path(&path);
3412         if (err)
3413                 free_extent_cache_tree(&wc.shared);
3414         if (!cache_tree_empty(&wc.shared))
3415                 fprintf(stderr, "warning line %d\n", __LINE__);
3416
3417         task_stop(ctx.info);
3418
3419         return err;
3420 }
3421
3422 static struct tree_backref *find_tree_backref(struct extent_record *rec,
3423                                                 u64 parent, u64 root)
3424 {
3425         struct rb_node *node;
3426         struct tree_backref *back = NULL;
3427         struct tree_backref match = {
3428                 .node = {
3429                         .is_data = 0,
3430                 },
3431         };
3432
3433         if (parent) {
3434                 match.parent = parent;
3435                 match.node.full_backref = 1;
3436         } else {
3437                 match.root = root;
3438         }
3439
3440         node = rb_search(&rec->backref_tree, &match.node.node,
3441                          (rb_compare_keys)compare_extent_backref, NULL);
3442         if (node)
3443                 back = to_tree_backref(rb_node_to_extent_backref(node));
3444
3445         return back;
3446 }
3447
3448 static struct data_backref *find_data_backref(struct extent_record *rec,
3449                                                 u64 parent, u64 root,
3450                                                 u64 owner, u64 offset,
3451                                                 int found_ref,
3452                                                 u64 disk_bytenr, u64 bytes)
3453 {
3454         struct rb_node *node;
3455         struct data_backref *back = NULL;
3456         struct data_backref match = {
3457                 .node = {
3458                         .is_data = 1,
3459                 },
3460                 .owner = owner,
3461                 .offset = offset,
3462                 .bytes = bytes,
3463                 .found_ref = found_ref,
3464                 .disk_bytenr = disk_bytenr,
3465         };
3466
3467         if (parent) {
3468                 match.parent = parent;
3469                 match.node.full_backref = 1;
3470         } else {
3471                 match.root = root;
3472         }
3473
3474         node = rb_search(&rec->backref_tree, &match.node.node,
3475                          (rb_compare_keys)compare_extent_backref, NULL);
3476         if (node)
3477                 back = to_data_backref(rb_node_to_extent_backref(node));
3478
3479         return back;
3480 }
3481
3482 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
3483                           struct cache_tree *root_cache)
3484 {
3485         int ret;
3486
3487         if (!ctx.progress_enabled)
3488                 fprintf(stderr, "checking fs roots\n");
3489         if (check_mode == CHECK_MODE_LOWMEM)
3490                 ret = check_fs_roots_lowmem(fs_info);
3491         else
3492                 ret = check_fs_roots(fs_info, root_cache);
3493
3494         return ret;
3495 }
3496
3497 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3498 {
3499         struct extent_backref *back, *tmp;
3500         struct tree_backref *tback;
3501         struct data_backref *dback;
3502         u64 found = 0;
3503         int err = 0;
3504
3505         rbtree_postorder_for_each_entry_safe(back, tmp,
3506                                              &rec->backref_tree, node) {
3507                 if (!back->found_extent_tree) {
3508                         err = 1;
3509                         if (!print_errs)
3510                                 goto out;
3511                         if (back->is_data) {
3512                                 dback = to_data_backref(back);
3513                                 fprintf(stderr,
3514 "data backref %llu %s %llu owner %llu offset %llu num_refs %lu not found in extent tree\n",
3515                                         (unsigned long long)rec->start,
3516                                         back->full_backref ?
3517                                         "parent" : "root",
3518                                         back->full_backref ?
3519                                         (unsigned long long)dback->parent :
3520                                         (unsigned long long)dback->root,
3521                                         (unsigned long long)dback->owner,
3522                                         (unsigned long long)dback->offset,
3523                                         (unsigned long)dback->num_refs);
3524                         } else {
3525                                 tback = to_tree_backref(back);
3526                                 fprintf(stderr,
3527 "tree backref %llu parent %llu root %llu not found in extent tree\n",
3528                                         (unsigned long long)rec->start,
3529                                         (unsigned long long)tback->parent,
3530                                         (unsigned long long)tback->root);
3531                         }
3532                 }
3533                 if (!back->is_data && !back->found_ref) {
3534                         err = 1;
3535                         if (!print_errs)
3536                                 goto out;
3537                         tback = to_tree_backref(back);
3538                         fprintf(stderr,
3539                                 "backref %llu %s %llu not referenced back %p\n",
3540                                 (unsigned long long)rec->start,
3541                                 back->full_backref ? "parent" : "root",
3542                                 back->full_backref ?
3543                                 (unsigned long long)tback->parent :
3544                                 (unsigned long long)tback->root, back);
3545                 }
3546                 if (back->is_data) {
3547                         dback = to_data_backref(back);
3548                         if (dback->found_ref != dback->num_refs) {
3549                                 err = 1;
3550                                 if (!print_errs)
3551                                         goto out;
3552                                 fprintf(stderr,
3553 "incorrect local backref count on %llu %s %llu owner %llu offset %llu found %u wanted %u back %p\n",
3554                                         (unsigned long long)rec->start,
3555                                         back->full_backref ?
3556                                         "parent" : "root",
3557                                         back->full_backref ?
3558                                         (unsigned long long)dback->parent :
3559                                         (unsigned long long)dback->root,
3560                                         (unsigned long long)dback->owner,
3561                                         (unsigned long long)dback->offset,
3562                                         dback->found_ref, dback->num_refs,
3563                                         back);
3564                         }
3565                         if (dback->disk_bytenr != rec->start) {
3566                                 err = 1;
3567                                 if (!print_errs)
3568                                         goto out;
3569                                 fprintf(stderr,
3570 "backref disk bytenr does not match extent record, bytenr=%llu, ref bytenr=%llu\n",
3571                                         (unsigned long long)rec->start,
3572                                         (unsigned long long)dback->disk_bytenr);
3573                         }
3574
3575                         if (dback->bytes != rec->nr) {
3576                                 err = 1;
3577                                 if (!print_errs)
3578                                         goto out;
3579                                 fprintf(stderr,
3580 "backref bytes do not match extent backref, bytenr=%llu, ref bytes=%llu, backref bytes=%llu\n",
3581                                         (unsigned long long)rec->start,
3582                                         (unsigned long long)rec->nr,
3583                                         (unsigned long long)dback->bytes);
3584                         }
3585                 }
3586                 if (!back->is_data) {
3587                         found += 1;
3588                 } else {
3589                         dback = to_data_backref(back);
3590                         found += dback->found_ref;
3591                 }
3592         }
3593         if (found != rec->refs) {
3594                 err = 1;
3595                 if (!print_errs)
3596                         goto out;
3597                 fprintf(stderr,
3598         "incorrect global backref count on %llu found %llu wanted %llu\n",
3599                         (unsigned long long)rec->start,
3600                         (unsigned long long)found,
3601                         (unsigned long long)rec->refs);
3602         }
3603 out:
3604         return err;
3605 }
3606
3607 static void __free_one_backref(struct rb_node *node)
3608 {
3609         struct extent_backref *back = rb_node_to_extent_backref(node);
3610
3611         free(back);
3612 }
3613
3614 static void free_all_extent_backrefs(struct extent_record *rec)
3615 {
3616         rb_free_nodes(&rec->backref_tree, __free_one_backref);
3617 }
3618
3619 static void free_extent_record_cache(struct cache_tree *extent_cache)
3620 {
3621         struct cache_extent *cache;
3622         struct extent_record *rec;
3623
3624         while (1) {
3625                 cache = first_cache_extent(extent_cache);
3626                 if (!cache)
3627                         break;
3628                 rec = container_of(cache, struct extent_record, cache);
3629                 remove_cache_extent(extent_cache, cache);
3630                 free_all_extent_backrefs(rec);
3631                 free(rec);
3632         }
3633 }
3634
3635 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3636                                  struct extent_record *rec)
3637 {
3638         if (rec->content_checked && rec->owner_ref_checked &&
3639             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3640             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3641             !rec->bad_full_backref && !rec->crossing_stripes &&
3642             !rec->wrong_chunk_type) {
3643                 remove_cache_extent(extent_cache, &rec->cache);
3644                 free_all_extent_backrefs(rec);
3645                 list_del_init(&rec->list);
3646                 free(rec);
3647         }
3648         return 0;
3649 }
3650
3651 static int check_owner_ref(struct btrfs_root *root,
3652                             struct extent_record *rec,
3653                             struct extent_buffer *buf)
3654 {
3655         struct extent_backref *node, *tmp;
3656         struct tree_backref *back;
3657         struct btrfs_root *ref_root;
3658         struct btrfs_key key;
3659         struct btrfs_path path;
3660         struct extent_buffer *parent;
3661         int level;
3662         int found = 0;
3663         int ret;
3664
3665         rbtree_postorder_for_each_entry_safe(node, tmp,
3666                                              &rec->backref_tree, node) {
3667                 if (node->is_data)
3668                         continue;
3669                 if (!node->found_ref)
3670                         continue;
3671                 if (node->full_backref)
3672                         continue;
3673                 back = to_tree_backref(node);
3674                 if (btrfs_header_owner(buf) == back->root)
3675                         return 0;
3676         }
3677         BUG_ON(rec->is_root);
3678
3679         /* try to find the block by search corresponding fs tree */
3680         key.objectid = btrfs_header_owner(buf);
3681         key.type = BTRFS_ROOT_ITEM_KEY;
3682         key.offset = (u64)-1;
3683
3684         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3685         if (IS_ERR(ref_root))
3686                 return 1;
3687
3688         level = btrfs_header_level(buf);
3689         if (level == 0)
3690                 btrfs_item_key_to_cpu(buf, &key, 0);
3691         else
3692                 btrfs_node_key_to_cpu(buf, &key, 0);
3693
3694         btrfs_init_path(&path);
3695         path.lowest_level = level + 1;
3696         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3697         if (ret < 0)
3698                 return 0;
3699
3700         parent = path.nodes[level + 1];
3701         if (parent && buf->start == btrfs_node_blockptr(parent,
3702                                                         path.slots[level + 1]))
3703                 found = 1;
3704
3705         btrfs_release_path(&path);
3706         return found ? 0 : 1;
3707 }
3708
3709 static int is_extent_tree_record(struct extent_record *rec)
3710 {
3711         struct extent_backref *node, *tmp;
3712         struct tree_backref *back;
3713         int is_extent = 0;
3714
3715         rbtree_postorder_for_each_entry_safe(node, tmp,
3716                                              &rec->backref_tree, node) {
3717                 if (node->is_data)
3718                         return 0;
3719                 back = to_tree_backref(node);
3720                 if (node->full_backref)
3721                         return 0;
3722                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3723                         is_extent = 1;
3724         }
3725         return is_extent;
3726 }
3727
3728
3729 static int record_bad_block_io(struct btrfs_fs_info *info,
3730                                struct cache_tree *extent_cache,
3731                                u64 start, u64 len)
3732 {
3733         struct extent_record *rec;
3734         struct cache_extent *cache;
3735         struct btrfs_key key;
3736
3737         cache = lookup_cache_extent(extent_cache, start, len);
3738         if (!cache)
3739                 return 0;
3740
3741         rec = container_of(cache, struct extent_record, cache);
3742         if (!is_extent_tree_record(rec))
3743                 return 0;
3744
3745         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3746         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3747 }
3748
3749 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3750                        struct extent_buffer *buf, int slot)
3751 {
3752         if (btrfs_header_level(buf)) {
3753                 struct btrfs_key_ptr ptr1, ptr2;
3754
3755                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3756                                    sizeof(struct btrfs_key_ptr));
3757                 read_extent_buffer(buf, &ptr2,
3758                                    btrfs_node_key_ptr_offset(slot + 1),
3759                                    sizeof(struct btrfs_key_ptr));
3760                 write_extent_buffer(buf, &ptr1,
3761                                     btrfs_node_key_ptr_offset(slot + 1),
3762                                     sizeof(struct btrfs_key_ptr));
3763                 write_extent_buffer(buf, &ptr2,
3764                                     btrfs_node_key_ptr_offset(slot),
3765                                     sizeof(struct btrfs_key_ptr));
3766                 if (slot == 0) {
3767                         struct btrfs_disk_key key;
3768
3769                         btrfs_node_key(buf, &key, 0);
3770                         btrfs_fixup_low_keys(root, path, &key,
3771                                              btrfs_header_level(buf) + 1);
3772                 }
3773         } else {
3774                 struct btrfs_item *item1, *item2;
3775                 struct btrfs_key k1, k2;
3776                 char *item1_data, *item2_data;
3777                 u32 item1_offset, item2_offset, item1_size, item2_size;
3778
3779                 item1 = btrfs_item_nr(slot);
3780                 item2 = btrfs_item_nr(slot + 1);
3781                 btrfs_item_key_to_cpu(buf, &k1, slot);
3782                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3783                 item1_offset = btrfs_item_offset(buf, item1);
3784                 item2_offset = btrfs_item_offset(buf, item2);
3785                 item1_size = btrfs_item_size(buf, item1);
3786                 item2_size = btrfs_item_size(buf, item2);
3787
3788                 item1_data = malloc(item1_size);
3789                 if (!item1_data)
3790                         return -ENOMEM;
3791                 item2_data = malloc(item2_size);
3792                 if (!item2_data) {
3793                         free(item1_data);
3794                         return -ENOMEM;
3795                 }
3796
3797                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3798                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3799
3800                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3801                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3802                 free(item1_data);
3803                 free(item2_data);
3804
3805                 btrfs_set_item_offset(buf, item1, item2_offset);
3806                 btrfs_set_item_offset(buf, item2, item1_offset);
3807                 btrfs_set_item_size(buf, item1, item2_size);
3808                 btrfs_set_item_size(buf, item2, item1_size);
3809
3810                 path->slots[0] = slot;
3811                 btrfs_set_item_key_unsafe(root, path, &k2);
3812                 path->slots[0] = slot + 1;
3813                 btrfs_set_item_key_unsafe(root, path, &k1);
3814         }
3815         return 0;
3816 }
3817
3818 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
3819 {
3820         struct extent_buffer *buf;
3821         struct btrfs_key k1, k2;
3822         int i;
3823         int level = path->lowest_level;
3824         int ret = -EIO;
3825
3826         buf = path->nodes[level];
3827         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3828                 if (level) {
3829                         btrfs_node_key_to_cpu(buf, &k1, i);
3830                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
3831                 } else {
3832                         btrfs_item_key_to_cpu(buf, &k1, i);
3833                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
3834                 }
3835                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
3836                         continue;
3837                 ret = swap_values(root, path, buf, i);
3838                 if (ret)
3839                         break;
3840                 btrfs_mark_buffer_dirty(buf);
3841                 i = 0;
3842         }
3843         return ret;
3844 }
3845
3846 static int delete_bogus_item(struct btrfs_root *root,
3847                              struct btrfs_path *path,
3848                              struct extent_buffer *buf, int slot)
3849 {
3850         struct btrfs_key key;
3851         int nritems = btrfs_header_nritems(buf);
3852
3853         btrfs_item_key_to_cpu(buf, &key, slot);
3854
3855         /* These are all the keys we can deal with missing. */
3856         if (key.type != BTRFS_DIR_INDEX_KEY &&
3857             key.type != BTRFS_EXTENT_ITEM_KEY &&
3858             key.type != BTRFS_METADATA_ITEM_KEY &&
3859             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
3860             key.type != BTRFS_EXTENT_DATA_REF_KEY)
3861                 return -1;
3862
3863         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
3864                (unsigned long long)key.objectid, key.type,
3865                (unsigned long long)key.offset, slot, buf->start);
3866         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
3867                               btrfs_item_nr_offset(slot + 1),
3868                               sizeof(struct btrfs_item) *
3869                               (nritems - slot - 1));
3870         btrfs_set_header_nritems(buf, nritems - 1);
3871         if (slot == 0) {
3872                 struct btrfs_disk_key disk_key;
3873
3874                 btrfs_item_key(buf, &disk_key, 0);
3875                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
3876         }
3877         btrfs_mark_buffer_dirty(buf);
3878         return 0;
3879 }
3880
3881 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
3882 {
3883         struct extent_buffer *buf;
3884         int i;
3885         int ret = 0;
3886
3887         /* We should only get this for leaves */
3888         BUG_ON(path->lowest_level);
3889         buf = path->nodes[0];
3890 again:
3891         for (i = 0; i < btrfs_header_nritems(buf); i++) {
3892                 unsigned int shift = 0, offset;
3893
3894                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
3895                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3896                         if (btrfs_item_end_nr(buf, i) >
3897                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3898                                 ret = delete_bogus_item(root, path, buf, i);
3899                                 if (!ret)
3900                                         goto again;
3901                                 fprintf(stderr,
3902                                 "item is off the end of the leaf, can't fix\n");
3903                                 ret = -EIO;
3904                                 break;
3905                         }
3906                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
3907                                 btrfs_item_end_nr(buf, i);
3908                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
3909                            btrfs_item_offset_nr(buf, i - 1)) {
3910                         if (btrfs_item_end_nr(buf, i) >
3911                             btrfs_item_offset_nr(buf, i - 1)) {
3912                                 ret = delete_bogus_item(root, path, buf, i);
3913                                 if (!ret)
3914                                         goto again;
3915                                 fprintf(stderr, "items overlap, can't fix\n");
3916                                 ret = -EIO;
3917                                 break;
3918                         }
3919                         shift = btrfs_item_offset_nr(buf, i - 1) -
3920                                 btrfs_item_end_nr(buf, i);
3921                 }
3922                 if (!shift)
3923                         continue;
3924
3925                 printf("Shifting item nr %d by %u bytes in block %llu\n",
3926                        i, shift, (unsigned long long)buf->start);
3927                 offset = btrfs_item_offset_nr(buf, i);
3928                 memmove_extent_buffer(buf,
3929                                       btrfs_leaf_data(buf) + offset + shift,
3930                                       btrfs_leaf_data(buf) + offset,
3931                                       btrfs_item_size_nr(buf, i));
3932                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
3933                                       offset + shift);
3934                 btrfs_mark_buffer_dirty(buf);
3935         }
3936
3937         /*
3938          * We may have moved things, in which case we want to exit so we don't
3939          * write those changes out.  Once we have proper abort functionality in
3940          * progs this can be changed to something nicer.
3941          */
3942         BUG_ON(ret);
3943         return ret;
3944 }
3945
3946 /*
3947  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
3948  * then just return -EIO.
3949  */
3950 static int try_to_fix_bad_block(struct btrfs_root *root,
3951                                 struct extent_buffer *buf,
3952                                 enum btrfs_tree_block_status status)
3953 {
3954         struct btrfs_trans_handle *trans;
3955         struct ulist *roots;
3956         struct ulist_node *node;
3957         struct btrfs_root *search_root;
3958         struct btrfs_path path;
3959         struct ulist_iterator iter;
3960         struct btrfs_key root_key, key;
3961         int ret;
3962
3963         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
3964             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
3965                 return -EIO;
3966
3967         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
3968         if (ret)
3969                 return -EIO;
3970
3971         btrfs_init_path(&path);
3972         ULIST_ITER_INIT(&iter);
3973         while ((node = ulist_next(roots, &iter))) {
3974                 root_key.objectid = node->val;
3975                 root_key.type = BTRFS_ROOT_ITEM_KEY;
3976                 root_key.offset = (u64)-1;
3977
3978                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
3979                 if (IS_ERR(root)) {
3980                         ret = -EIO;
3981                         break;
3982                 }
3983
3984
3985                 trans = btrfs_start_transaction(search_root, 0);
3986                 if (IS_ERR(trans)) {
3987                         ret = PTR_ERR(trans);
3988                         break;
3989                 }
3990
3991                 path.lowest_level = btrfs_header_level(buf);
3992                 path.skip_check_block = 1;
3993                 if (path.lowest_level)
3994                         btrfs_node_key_to_cpu(buf, &key, 0);
3995                 else
3996                         btrfs_item_key_to_cpu(buf, &key, 0);
3997                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
3998                 if (ret) {
3999                         ret = -EIO;
4000                         btrfs_commit_transaction(trans, search_root);
4001                         break;
4002                 }
4003                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4004                         ret = fix_key_order(search_root, &path);
4005                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4006                         ret = fix_item_offset(search_root, &path);
4007                 if (ret) {
4008                         btrfs_commit_transaction(trans, search_root);
4009                         break;
4010                 }
4011                 btrfs_release_path(&path);
4012                 btrfs_commit_transaction(trans, search_root);
4013         }
4014         ulist_free(roots);
4015         btrfs_release_path(&path);
4016         return ret;
4017 }
4018
4019 static int check_block(struct btrfs_root *root,
4020                        struct cache_tree *extent_cache,
4021                        struct extent_buffer *buf, u64 flags)
4022 {
4023         struct extent_record *rec;
4024         struct cache_extent *cache;
4025         struct btrfs_key key;
4026         enum btrfs_tree_block_status status;
4027         int ret = 0;
4028         int level;
4029
4030         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4031         if (!cache)
4032                 return 1;
4033         rec = container_of(cache, struct extent_record, cache);
4034         rec->generation = btrfs_header_generation(buf);
4035
4036         level = btrfs_header_level(buf);
4037         if (btrfs_header_nritems(buf) > 0) {
4038
4039                 if (level == 0)
4040                         btrfs_item_key_to_cpu(buf, &key, 0);
4041                 else
4042                         btrfs_node_key_to_cpu(buf, &key, 0);
4043
4044                 rec->info_objectid = key.objectid;
4045         }
4046         rec->info_level = level;
4047
4048         if (btrfs_is_leaf(buf))
4049                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4050         else
4051                 status = btrfs_check_node(root, &rec->parent_key, buf);
4052
4053         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4054                 if (repair)
4055                         status = try_to_fix_bad_block(root, buf, status);
4056                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4057                         ret = -EIO;
4058                         fprintf(stderr, "bad block %llu\n",
4059                                 (unsigned long long)buf->start);
4060                 } else {
4061                         /*
4062                          * Signal to callers we need to start the scan over
4063                          * again since we'll have cowed blocks.
4064                          */
4065                         ret = -EAGAIN;
4066                 }
4067         } else {
4068                 rec->content_checked = 1;
4069                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4070                         rec->owner_ref_checked = 1;
4071                 else {
4072                         ret = check_owner_ref(root, rec, buf);
4073                         if (!ret)
4074                                 rec->owner_ref_checked = 1;
4075                 }
4076         }
4077         if (!ret)
4078                 maybe_free_extent_rec(extent_cache, rec);
4079         return ret;
4080 }
4081
4082 #if 0
4083 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4084                                                 u64 parent, u64 root)
4085 {
4086         struct list_head *cur = rec->backrefs.next;
4087         struct extent_backref *node;
4088         struct tree_backref *back;
4089
4090         while (cur != &rec->backrefs) {
4091                 node = to_extent_backref(cur);
4092                 cur = cur->next;
4093                 if (node->is_data)
4094                         continue;
4095                 back = to_tree_backref(node);
4096                 if (parent > 0) {
4097                         if (!node->full_backref)
4098                                 continue;
4099                         if (parent == back->parent)
4100                                 return back;
4101                 } else {
4102                         if (node->full_backref)
4103                                 continue;
4104                         if (back->root == root)
4105                                 return back;
4106                 }
4107         }
4108         return NULL;
4109 }
4110 #endif
4111
4112 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4113                                                 u64 parent, u64 root)
4114 {
4115         struct tree_backref *ref = malloc(sizeof(*ref));
4116
4117         if (!ref)
4118                 return NULL;
4119         memset(&ref->node, 0, sizeof(ref->node));
4120         if (parent > 0) {
4121                 ref->parent = parent;
4122                 ref->node.full_backref = 1;
4123         } else {
4124                 ref->root = root;
4125                 ref->node.full_backref = 0;
4126         }
4127
4128         return ref;
4129 }
4130
4131 #if 0
4132 static struct data_backref *find_data_backref(struct extent_record *rec,
4133                                                 u64 parent, u64 root,
4134                                                 u64 owner, u64 offset,
4135                                                 int found_ref,
4136                                                 u64 disk_bytenr, u64 bytes)
4137 {
4138         struct list_head *cur = rec->backrefs.next;
4139         struct extent_backref *node;
4140         struct data_backref *back;
4141
4142         while (cur != &rec->backrefs) {
4143                 node = to_extent_backref(cur);
4144                 cur = cur->next;
4145                 if (!node->is_data)
4146                         continue;
4147                 back = to_data_backref(node);
4148                 if (parent > 0) {
4149                         if (!node->full_backref)
4150                                 continue;
4151                         if (parent == back->parent)
4152                                 return back;
4153                 } else {
4154                         if (node->full_backref)
4155                                 continue;
4156                         if (back->root == root && back->owner == owner &&
4157                             back->offset == offset) {
4158                                 if (found_ref && node->found_ref &&
4159                                     (back->bytes != bytes ||
4160                                     back->disk_bytenr != disk_bytenr))
4161                                         continue;
4162                                 return back;
4163                         }
4164                 }
4165         }
4166         return NULL;
4167 }
4168 #endif
4169
4170 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4171                                                 u64 parent, u64 root,
4172                                                 u64 owner, u64 offset,
4173                                                 u64 max_size)
4174 {
4175         struct data_backref *ref = malloc(sizeof(*ref));
4176
4177         if (!ref)
4178                 return NULL;
4179         memset(&ref->node, 0, sizeof(ref->node));
4180         ref->node.is_data = 1;
4181
4182         if (parent > 0) {
4183                 ref->parent = parent;
4184                 ref->owner = 0;
4185                 ref->offset = 0;
4186                 ref->node.full_backref = 1;
4187         } else {
4188                 ref->root = root;
4189                 ref->owner = owner;
4190                 ref->offset = offset;
4191                 ref->node.full_backref = 0;
4192         }
4193         ref->bytes = max_size;
4194         ref->found_ref = 0;
4195         ref->num_refs = 0;
4196         if (max_size > rec->max_size)
4197                 rec->max_size = max_size;
4198         return ref;
4199 }
4200
4201 /* Check if the type of extent matches with its chunk */
4202 static void check_extent_type(struct extent_record *rec)
4203 {
4204         struct btrfs_block_group_cache *bg_cache;
4205
4206         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4207         if (!bg_cache)
4208                 return;
4209
4210         /* data extent, check chunk directly*/
4211         if (!rec->metadata) {
4212                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4213                         rec->wrong_chunk_type = 1;
4214                 return;
4215         }
4216
4217         /* metadata extent, check the obvious case first */
4218         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4219                                  BTRFS_BLOCK_GROUP_METADATA))) {
4220                 rec->wrong_chunk_type = 1;
4221                 return;
4222         }
4223
4224         /*
4225          * Check SYSTEM extent, as it's also marked as metadata, we can only
4226          * make sure it's a SYSTEM extent by its backref
4227          */
4228         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4229                 struct extent_backref *node;
4230                 struct tree_backref *tback;
4231                 u64 bg_type;
4232
4233                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4234                 if (node->is_data) {
4235                         /* tree block shouldn't have data backref */
4236                         rec->wrong_chunk_type = 1;
4237                         return;
4238                 }
4239                 tback = container_of(node, struct tree_backref, node);
4240
4241                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4242                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4243                 else
4244                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4245                 if (!(bg_cache->flags & bg_type))
4246                         rec->wrong_chunk_type = 1;
4247         }
4248 }
4249
4250 /*
4251  * Allocate a new extent record, fill default values from @tmpl and insert int
4252  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4253  * the cache, otherwise it fails.
4254  */
4255 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4256                 struct extent_record *tmpl)
4257 {
4258         struct extent_record *rec;
4259         int ret = 0;
4260
4261         BUG_ON(tmpl->max_size == 0);
4262         rec = malloc(sizeof(*rec));
4263         if (!rec)
4264                 return -ENOMEM;
4265         rec->start = tmpl->start;
4266         rec->max_size = tmpl->max_size;
4267         rec->nr = max(tmpl->nr, tmpl->max_size);
4268         rec->found_rec = tmpl->found_rec;
4269         rec->content_checked = tmpl->content_checked;
4270         rec->owner_ref_checked = tmpl->owner_ref_checked;
4271         rec->num_duplicates = 0;
4272         rec->metadata = tmpl->metadata;
4273         rec->flag_block_full_backref = FLAG_UNSET;
4274         rec->bad_full_backref = 0;
4275         rec->crossing_stripes = 0;
4276         rec->wrong_chunk_type = 0;
4277         rec->is_root = tmpl->is_root;
4278         rec->refs = tmpl->refs;
4279         rec->extent_item_refs = tmpl->extent_item_refs;
4280         rec->parent_generation = tmpl->parent_generation;
4281         INIT_LIST_HEAD(&rec->backrefs);
4282         INIT_LIST_HEAD(&rec->dups);
4283         INIT_LIST_HEAD(&rec->list);
4284         rec->backref_tree = RB_ROOT;
4285         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4286         rec->cache.start = tmpl->start;
4287         rec->cache.size = tmpl->nr;
4288         ret = insert_cache_extent(extent_cache, &rec->cache);
4289         if (ret) {
4290                 free(rec);
4291                 return ret;
4292         }
4293         bytes_used += rec->nr;
4294
4295         if (tmpl->metadata)
4296                 rec->crossing_stripes = check_crossing_stripes(global_info,
4297                                 rec->start, global_info->nodesize);
4298         check_extent_type(rec);
4299         return ret;
4300 }
4301
4302 /*
4303  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4304  * some are hints:
4305  * - refs              - if found, increase refs
4306  * - is_root           - if found, set
4307  * - content_checked   - if found, set
4308  * - owner_ref_checked - if found, set
4309  *
4310  * If not found, create a new one, initialize and insert.
4311  */
4312 static int add_extent_rec(struct cache_tree *extent_cache,
4313                 struct extent_record *tmpl)
4314 {
4315         struct extent_record *rec;
4316         struct cache_extent *cache;
4317         int ret = 0;
4318         int dup = 0;
4319
4320         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4321         if (cache) {
4322                 rec = container_of(cache, struct extent_record, cache);
4323                 if (tmpl->refs)
4324                         rec->refs++;
4325                 if (rec->nr == 1)
4326                         rec->nr = max(tmpl->nr, tmpl->max_size);
4327
4328                 /*
4329                  * We need to make sure to reset nr to whatever the extent
4330                  * record says was the real size, this way we can compare it to
4331                  * the backrefs.
4332                  */
4333                 if (tmpl->found_rec) {
4334                         if (tmpl->start != rec->start || rec->found_rec) {
4335                                 struct extent_record *tmp;
4336
4337                                 dup = 1;
4338                                 if (list_empty(&rec->list))
4339                                         list_add_tail(&rec->list,
4340                                                       &duplicate_extents);
4341
4342                                 /*
4343                                  * We have to do this song and dance in case we
4344                                  * find an extent record that falls inside of
4345                                  * our current extent record but does not have
4346                                  * the same objectid.
4347                                  */
4348                                 tmp = malloc(sizeof(*tmp));
4349                                 if (!tmp)
4350                                         return -ENOMEM;
4351                                 tmp->start = tmpl->start;
4352                                 tmp->max_size = tmpl->max_size;
4353                                 tmp->nr = tmpl->nr;
4354                                 tmp->found_rec = 1;
4355                                 tmp->metadata = tmpl->metadata;
4356                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4357                                 INIT_LIST_HEAD(&tmp->list);
4358                                 list_add_tail(&tmp->list, &rec->dups);
4359                                 rec->num_duplicates++;
4360                         } else {
4361                                 rec->nr = tmpl->nr;
4362                                 rec->found_rec = 1;
4363                         }
4364                 }
4365
4366                 if (tmpl->extent_item_refs && !dup) {
4367                         if (rec->extent_item_refs) {
4368                                 fprintf(stderr,
4369                         "block %llu rec extent_item_refs %llu, passed %llu\n",
4370                                         (unsigned long long)tmpl->start,
4371                                         (unsigned long long)
4372                                                         rec->extent_item_refs,
4373                                         (unsigned long long)
4374                                                         tmpl->extent_item_refs);
4375                         }
4376                         rec->extent_item_refs = tmpl->extent_item_refs;
4377                 }
4378                 if (tmpl->is_root)
4379                         rec->is_root = 1;
4380                 if (tmpl->content_checked)
4381                         rec->content_checked = 1;
4382                 if (tmpl->owner_ref_checked)
4383                         rec->owner_ref_checked = 1;
4384                 memcpy(&rec->parent_key, &tmpl->parent_key,
4385                                 sizeof(tmpl->parent_key));
4386                 if (tmpl->parent_generation)
4387                         rec->parent_generation = tmpl->parent_generation;
4388                 if (rec->max_size < tmpl->max_size)
4389                         rec->max_size = tmpl->max_size;
4390
4391                 /*
4392                  * A metadata extent can't cross stripe_len boundary, otherwise
4393                  * kernel scrub won't be able to handle it.
4394                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4395                  * it.
4396                  */
4397                 if (tmpl->metadata)
4398                         rec->crossing_stripes = check_crossing_stripes(
4399                                         global_info, rec->start,
4400                                         global_info->nodesize);
4401                 check_extent_type(rec);
4402                 maybe_free_extent_rec(extent_cache, rec);
4403                 return ret;
4404         }
4405
4406         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4407
4408         return ret;
4409 }
4410
4411 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4412                             u64 parent, u64 root, int found_ref)
4413 {
4414         struct extent_record *rec;
4415         struct tree_backref *back;
4416         struct cache_extent *cache;
4417         int ret;
4418         bool insert = false;
4419
4420         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4421         if (!cache) {
4422                 struct extent_record tmpl;
4423
4424                 memset(&tmpl, 0, sizeof(tmpl));
4425                 tmpl.start = bytenr;
4426                 tmpl.nr = 1;
4427                 tmpl.metadata = 1;
4428                 tmpl.max_size = 1;
4429
4430                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4431                 if (ret)
4432                         return ret;
4433
4434                 /* really a bug in cache_extent implement now */
4435                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4436                 if (!cache)
4437                         return -ENOENT;
4438         }
4439
4440         rec = container_of(cache, struct extent_record, cache);
4441         if (rec->start != bytenr) {
4442                 /*
4443                  * Several cause, from unaligned bytenr to over lapping extents
4444                  */
4445                 return -EEXIST;
4446         }
4447
4448         back = find_tree_backref(rec, parent, root);
4449         if (!back) {
4450                 back = alloc_tree_backref(rec, parent, root);
4451                 if (!back)
4452                         return -ENOMEM;
4453                 insert = true;
4454         }
4455
4456         if (found_ref) {
4457                 if (back->node.found_ref) {
4458                         fprintf(stderr,
4459         "Extent back ref already exists for %llu parent %llu root %llu\n",
4460                                 (unsigned long long)bytenr,
4461                                 (unsigned long long)parent,
4462                                 (unsigned long long)root);
4463                 }
4464                 back->node.found_ref = 1;
4465         } else {
4466                 if (back->node.found_extent_tree) {
4467                         fprintf(stderr,
4468         "extent back ref already exists for %llu parent %llu root %llu\n",
4469                                 (unsigned long long)bytenr,
4470                                 (unsigned long long)parent,
4471                                 (unsigned long long)root);
4472                 }
4473                 back->node.found_extent_tree = 1;
4474         }
4475         if (insert)
4476                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4477                         compare_extent_backref));
4478         check_extent_type(rec);
4479         maybe_free_extent_rec(extent_cache, rec);
4480         return 0;
4481 }
4482
4483 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4484                             u64 parent, u64 root, u64 owner, u64 offset,
4485                             u32 num_refs, int found_ref, u64 max_size)
4486 {
4487         struct extent_record *rec;
4488         struct data_backref *back;
4489         struct cache_extent *cache;
4490         int ret;
4491         bool insert = false;
4492
4493         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4494         if (!cache) {
4495                 struct extent_record tmpl;
4496
4497                 memset(&tmpl, 0, sizeof(tmpl));
4498                 tmpl.start = bytenr;
4499                 tmpl.nr = 1;
4500                 tmpl.max_size = max_size;
4501
4502                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4503                 if (ret)
4504                         return ret;
4505
4506                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4507                 if (!cache)
4508                         abort();
4509         }
4510
4511         rec = container_of(cache, struct extent_record, cache);
4512         if (rec->max_size < max_size)
4513                 rec->max_size = max_size;
4514
4515         /*
4516          * If found_ref is set then max_size is the real size and must match the
4517          * existing refs.  So if we have already found a ref then we need to
4518          * make sure that this ref matches the existing one, otherwise we need
4519          * to add a new backref so we can notice that the backrefs don't match
4520          * and we need to figure out who is telling the truth.  This is to
4521          * account for that awful fsync bug I introduced where we'd end up with
4522          * a btrfs_file_extent_item that would have its length include multiple
4523          * prealloc extents or point inside of a prealloc extent.
4524          */
4525         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4526                                  bytenr, max_size);
4527         if (!back) {
4528                 back = alloc_data_backref(rec, parent, root, owner, offset,
4529                                           max_size);
4530                 BUG_ON(!back);
4531                 insert = true;
4532         }
4533
4534         if (found_ref) {
4535                 BUG_ON(num_refs != 1);
4536                 if (back->node.found_ref)
4537                         BUG_ON(back->bytes != max_size);
4538                 back->node.found_ref = 1;
4539                 back->found_ref += 1;
4540                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
4541                         back->bytes = max_size;
4542                         back->disk_bytenr = bytenr;
4543
4544                         /* Need to reinsert if not already in the tree */
4545                         if (!insert) {
4546                                 rb_erase(&back->node.node, &rec->backref_tree);
4547                                 insert = true;
4548                         }
4549                 }
4550                 rec->refs += 1;
4551                 rec->content_checked = 1;
4552                 rec->owner_ref_checked = 1;
4553         } else {
4554                 if (back->node.found_extent_tree) {
4555                         fprintf(stderr,
4556 "Extent back ref already exists for %llu parent %llu root %llu owner %llu offset %llu num_refs %lu\n",
4557                                 (unsigned long long)bytenr,
4558                                 (unsigned long long)parent,
4559                                 (unsigned long long)root,
4560                                 (unsigned long long)owner,
4561                                 (unsigned long long)offset,
4562                                 (unsigned long)num_refs);
4563                 }
4564                 back->num_refs = num_refs;
4565                 back->node.found_extent_tree = 1;
4566         }
4567         if (insert)
4568                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4569                         compare_extent_backref));
4570
4571         maybe_free_extent_rec(extent_cache, rec);
4572         return 0;
4573 }
4574
4575 static int add_pending(struct cache_tree *pending,
4576                        struct cache_tree *seen, u64 bytenr, u32 size)
4577 {
4578         int ret;
4579
4580         ret = add_cache_extent(seen, bytenr, size);
4581         if (ret)
4582                 return ret;
4583         add_cache_extent(pending, bytenr, size);
4584         return 0;
4585 }
4586
4587 static int pick_next_pending(struct cache_tree *pending,
4588                         struct cache_tree *reada,
4589                         struct cache_tree *nodes,
4590                         u64 last, struct block_info *bits, int bits_nr,
4591                         int *reada_bits)
4592 {
4593         unsigned long node_start = last;
4594         struct cache_extent *cache;
4595         int ret;
4596
4597         cache = search_cache_extent(reada, 0);
4598         if (cache) {
4599                 bits[0].start = cache->start;
4600                 bits[0].size = cache->size;
4601                 *reada_bits = 1;
4602                 return 1;
4603         }
4604         *reada_bits = 0;
4605         if (node_start > 32768)
4606                 node_start -= 32768;
4607
4608         cache = search_cache_extent(nodes, node_start);
4609         if (!cache)
4610                 cache = search_cache_extent(nodes, 0);
4611
4612         if (!cache) {
4613                 cache = search_cache_extent(pending, 0);
4614                 if (!cache)
4615                         return 0;
4616                 ret = 0;
4617                 do {
4618                         bits[ret].start = cache->start;
4619                         bits[ret].size = cache->size;
4620                         cache = next_cache_extent(cache);
4621                         ret++;
4622                 } while (cache && ret < bits_nr);
4623                 return ret;
4624         }
4625
4626         ret = 0;
4627         do {
4628                 bits[ret].start = cache->start;
4629                 bits[ret].size = cache->size;
4630                 cache = next_cache_extent(cache);
4631                 ret++;
4632         } while (cache && ret < bits_nr);
4633
4634         if (bits_nr - ret > 8) {
4635                 u64 lookup = bits[0].start + bits[0].size;
4636                 struct cache_extent *next;
4637
4638                 next = search_cache_extent(pending, lookup);
4639                 while (next) {
4640                         if (next->start - lookup > 32768)
4641                                 break;
4642                         bits[ret].start = next->start;
4643                         bits[ret].size = next->size;
4644                         lookup = next->start + next->size;
4645                         ret++;
4646                         if (ret == bits_nr)
4647                                 break;
4648                         next = next_cache_extent(next);
4649                         if (!next)
4650                                 break;
4651                 }
4652         }
4653         return ret;
4654 }
4655
4656 static void free_chunk_record(struct cache_extent *cache)
4657 {
4658         struct chunk_record *rec;
4659
4660         rec = container_of(cache, struct chunk_record, cache);
4661         list_del_init(&rec->list);
4662         list_del_init(&rec->dextents);
4663         free(rec);
4664 }
4665
4666 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4667 {
4668         cache_tree_free_extents(chunk_cache, free_chunk_record);
4669 }
4670
4671 static void free_device_record(struct rb_node *node)
4672 {
4673         struct device_record *rec;
4674
4675         rec = container_of(node, struct device_record, node);
4676         free(rec);
4677 }
4678
4679 FREE_RB_BASED_TREE(device_cache, free_device_record);
4680
4681 int insert_block_group_record(struct block_group_tree *tree,
4682                               struct block_group_record *bg_rec)
4683 {
4684         int ret;
4685
4686         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4687         if (ret)
4688                 return ret;
4689
4690         list_add_tail(&bg_rec->list, &tree->block_groups);
4691         return 0;
4692 }
4693
4694 static void free_block_group_record(struct cache_extent *cache)
4695 {
4696         struct block_group_record *rec;
4697
4698         rec = container_of(cache, struct block_group_record, cache);
4699         list_del_init(&rec->list);
4700         free(rec);
4701 }
4702
4703 void free_block_group_tree(struct block_group_tree *tree)
4704 {
4705         cache_tree_free_extents(&tree->tree, free_block_group_record);
4706 }
4707
4708 int insert_device_extent_record(struct device_extent_tree *tree,
4709                                 struct device_extent_record *de_rec)
4710 {
4711         int ret;
4712
4713         /*
4714          * Device extent is a bit different from the other extents, because
4715          * the extents which belong to the different devices may have the
4716          * same start and size, so we need use the special extent cache
4717          * search/insert functions.
4718          */
4719         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4720         if (ret)
4721                 return ret;
4722
4723         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4724         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4725         return 0;
4726 }
4727
4728 static void free_device_extent_record(struct cache_extent *cache)
4729 {
4730         struct device_extent_record *rec;
4731
4732         rec = container_of(cache, struct device_extent_record, cache);
4733         if (!list_empty(&rec->chunk_list))
4734                 list_del_init(&rec->chunk_list);
4735         if (!list_empty(&rec->device_list))
4736                 list_del_init(&rec->device_list);
4737         free(rec);
4738 }
4739
4740 void free_device_extent_tree(struct device_extent_tree *tree)
4741 {
4742         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4743 }
4744
4745 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4746 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4747                                  struct extent_buffer *leaf, int slot)
4748 {
4749         struct btrfs_extent_ref_v0 *ref0;
4750         struct btrfs_key key;
4751         int ret;
4752
4753         btrfs_item_key_to_cpu(leaf, &key, slot);
4754         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4755         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4756                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
4757                                 0, 0);
4758         } else {
4759                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
4760                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4761         }
4762         return ret;
4763 }
4764 #endif
4765
4766 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4767                                             struct btrfs_key *key,
4768                                             int slot)
4769 {
4770         struct btrfs_chunk *ptr;
4771         struct chunk_record *rec;
4772         int num_stripes, i;
4773
4774         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4775         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4776
4777         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4778         if (!rec) {
4779                 fprintf(stderr, "memory allocation failed\n");
4780                 exit(-1);
4781         }
4782
4783         INIT_LIST_HEAD(&rec->list);
4784         INIT_LIST_HEAD(&rec->dextents);
4785         rec->bg_rec = NULL;
4786
4787         rec->cache.start = key->offset;
4788         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4789
4790         rec->generation = btrfs_header_generation(leaf);
4791
4792         rec->objectid = key->objectid;
4793         rec->type = key->type;
4794         rec->offset = key->offset;
4795
4796         rec->length = rec->cache.size;
4797         rec->owner = btrfs_chunk_owner(leaf, ptr);
4798         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4799         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4800         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4801         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4802         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4803         rec->num_stripes = num_stripes;
4804         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4805
4806         for (i = 0; i < rec->num_stripes; ++i) {
4807                 rec->stripes[i].devid =
4808                         btrfs_stripe_devid_nr(leaf, ptr, i);
4809                 rec->stripes[i].offset =
4810                         btrfs_stripe_offset_nr(leaf, ptr, i);
4811                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4812                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4813                                 BTRFS_UUID_SIZE);
4814         }
4815
4816         return rec;
4817 }
4818
4819 static int process_chunk_item(struct cache_tree *chunk_cache,
4820                               struct btrfs_key *key, struct extent_buffer *eb,
4821                               int slot)
4822 {
4823         struct chunk_record *rec;
4824         struct btrfs_chunk *chunk;
4825         int ret = 0;
4826
4827         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
4828         /*
4829          * Do extra check for this chunk item,
4830          *
4831          * It's still possible one can craft a leaf with CHUNK_ITEM, with
4832          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
4833          * and owner<->key_type check.
4834          */
4835         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
4836                                       key->offset);
4837         if (ret < 0) {
4838                 error("chunk(%llu, %llu) is not valid, ignore it",
4839                       key->offset, btrfs_chunk_length(eb, chunk));
4840                 return 0;
4841         }
4842         rec = btrfs_new_chunk_record(eb, key, slot);
4843         ret = insert_cache_extent(chunk_cache, &rec->cache);
4844         if (ret) {
4845                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4846                         rec->offset, rec->length);
4847                 free(rec);
4848         }
4849
4850         return ret;
4851 }
4852
4853 static int process_device_item(struct rb_root *dev_cache,
4854                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4855 {
4856         struct btrfs_dev_item *ptr;
4857         struct device_record *rec;
4858         int ret = 0;
4859
4860         ptr = btrfs_item_ptr(eb,
4861                 slot, struct btrfs_dev_item);
4862
4863         rec = malloc(sizeof(*rec));
4864         if (!rec) {
4865                 fprintf(stderr, "memory allocation failed\n");
4866                 return -ENOMEM;
4867         }
4868
4869         rec->devid = key->offset;
4870         rec->generation = btrfs_header_generation(eb);
4871
4872         rec->objectid = key->objectid;
4873         rec->type = key->type;
4874         rec->offset = key->offset;
4875
4876         rec->devid = btrfs_device_id(eb, ptr);
4877         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4878         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4879
4880         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4881         if (ret) {
4882                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4883                 free(rec);
4884         }
4885
4886         return ret;
4887 }
4888
4889 struct block_group_record *
4890 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4891                              int slot)
4892 {
4893         struct btrfs_block_group_item *ptr;
4894         struct block_group_record *rec;
4895
4896         rec = calloc(1, sizeof(*rec));
4897         if (!rec) {
4898                 fprintf(stderr, "memory allocation failed\n");
4899                 exit(-1);
4900         }
4901
4902         rec->cache.start = key->objectid;
4903         rec->cache.size = key->offset;
4904
4905         rec->generation = btrfs_header_generation(leaf);
4906
4907         rec->objectid = key->objectid;
4908         rec->type = key->type;
4909         rec->offset = key->offset;
4910
4911         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
4912         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
4913
4914         INIT_LIST_HEAD(&rec->list);
4915
4916         return rec;
4917 }
4918
4919 static int process_block_group_item(struct block_group_tree *block_group_cache,
4920                                     struct btrfs_key *key,
4921                                     struct extent_buffer *eb, int slot)
4922 {
4923         struct block_group_record *rec;
4924         int ret = 0;
4925
4926         rec = btrfs_new_block_group_record(eb, key, slot);
4927         ret = insert_block_group_record(block_group_cache, rec);
4928         if (ret) {
4929                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
4930                         rec->objectid, rec->offset);
4931                 free(rec);
4932         }
4933
4934         return ret;
4935 }
4936
4937 struct device_extent_record *
4938 btrfs_new_device_extent_record(struct extent_buffer *leaf,
4939                                struct btrfs_key *key, int slot)
4940 {
4941         struct device_extent_record *rec;
4942         struct btrfs_dev_extent *ptr;
4943
4944         rec = calloc(1, sizeof(*rec));
4945         if (!rec) {
4946                 fprintf(stderr, "memory allocation failed\n");
4947                 exit(-1);
4948         }
4949
4950         rec->cache.objectid = key->objectid;
4951         rec->cache.start = key->offset;
4952
4953         rec->generation = btrfs_header_generation(leaf);
4954
4955         rec->objectid = key->objectid;
4956         rec->type = key->type;
4957         rec->offset = key->offset;
4958
4959         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
4960         rec->chunk_objecteid =
4961                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
4962         rec->chunk_offset =
4963                 btrfs_dev_extent_chunk_offset(leaf, ptr);
4964         rec->length = btrfs_dev_extent_length(leaf, ptr);
4965         rec->cache.size = rec->length;
4966
4967         INIT_LIST_HEAD(&rec->chunk_list);
4968         INIT_LIST_HEAD(&rec->device_list);
4969
4970         return rec;
4971 }
4972
4973 static int
4974 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
4975                            struct btrfs_key *key, struct extent_buffer *eb,
4976                            int slot)
4977 {
4978         struct device_extent_record *rec;
4979         int ret;
4980
4981         rec = btrfs_new_device_extent_record(eb, key, slot);
4982         ret = insert_device_extent_record(dev_extent_cache, rec);
4983         if (ret) {
4984                 fprintf(stderr,
4985                         "Device extent[%llu, %llu, %llu] existed.\n",
4986                         rec->objectid, rec->offset, rec->length);
4987                 free(rec);
4988         }
4989
4990         return ret;
4991 }
4992
4993 static int process_extent_item(struct btrfs_root *root,
4994                                struct cache_tree *extent_cache,
4995                                struct extent_buffer *eb, int slot)
4996 {
4997         struct btrfs_extent_item *ei;
4998         struct btrfs_extent_inline_ref *iref;
4999         struct btrfs_extent_data_ref *dref;
5000         struct btrfs_shared_data_ref *sref;
5001         struct btrfs_key key;
5002         struct extent_record tmpl;
5003         unsigned long end;
5004         unsigned long ptr;
5005         int ret;
5006         int type;
5007         u32 item_size = btrfs_item_size_nr(eb, slot);
5008         u64 refs = 0;
5009         u64 offset;
5010         u64 num_bytes;
5011         int metadata = 0;
5012
5013         btrfs_item_key_to_cpu(eb, &key, slot);
5014
5015         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5016                 metadata = 1;
5017                 num_bytes = root->fs_info->nodesize;
5018         } else {
5019                 num_bytes = key.offset;
5020         }
5021
5022         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
5023                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5024                       key.objectid, root->fs_info->sectorsize);
5025                 return -EIO;
5026         }
5027         if (item_size < sizeof(*ei)) {
5028 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5029                 struct btrfs_extent_item_v0 *ei0;
5030
5031                 if (item_size != sizeof(*ei0)) {
5032                         error(
5033         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
5034                                 key.objectid, key.type, key.offset,
5035                                 btrfs_header_bytenr(eb), slot);
5036                         BUG();
5037                 }
5038                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5039                 refs = btrfs_extent_refs_v0(eb, ei0);
5040 #else
5041                 BUG();
5042 #endif
5043                 memset(&tmpl, 0, sizeof(tmpl));
5044                 tmpl.start = key.objectid;
5045                 tmpl.nr = num_bytes;
5046                 tmpl.extent_item_refs = refs;
5047                 tmpl.metadata = metadata;
5048                 tmpl.found_rec = 1;
5049                 tmpl.max_size = num_bytes;
5050
5051                 return add_extent_rec(extent_cache, &tmpl);
5052         }
5053
5054         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5055         refs = btrfs_extent_refs(eb, ei);
5056         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5057                 metadata = 1;
5058         else
5059                 metadata = 0;
5060         if (metadata && num_bytes != root->fs_info->nodesize) {
5061                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5062                       num_bytes, root->fs_info->nodesize);
5063                 return -EIO;
5064         }
5065         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
5066                 error("ignore invalid data extent, length %llu is not aligned to %u",
5067                       num_bytes, root->fs_info->sectorsize);
5068                 return -EIO;
5069         }
5070
5071         memset(&tmpl, 0, sizeof(tmpl));
5072         tmpl.start = key.objectid;
5073         tmpl.nr = num_bytes;
5074         tmpl.extent_item_refs = refs;
5075         tmpl.metadata = metadata;
5076         tmpl.found_rec = 1;
5077         tmpl.max_size = num_bytes;
5078         add_extent_rec(extent_cache, &tmpl);
5079
5080         ptr = (unsigned long)(ei + 1);
5081         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5082             key.type == BTRFS_EXTENT_ITEM_KEY)
5083                 ptr += sizeof(struct btrfs_tree_block_info);
5084
5085         end = (unsigned long)ei + item_size;
5086         while (ptr < end) {
5087                 iref = (struct btrfs_extent_inline_ref *)ptr;
5088                 type = btrfs_extent_inline_ref_type(eb, iref);
5089                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5090                 switch (type) {
5091                 case BTRFS_TREE_BLOCK_REF_KEY:
5092                         ret = add_tree_backref(extent_cache, key.objectid,
5093                                         0, offset, 0);
5094                         if (ret < 0)
5095                                 error(
5096                         "add_tree_backref failed (extent items tree block): %s",
5097                                       strerror(-ret));
5098                         break;
5099                 case BTRFS_SHARED_BLOCK_REF_KEY:
5100                         ret = add_tree_backref(extent_cache, key.objectid,
5101                                         offset, 0, 0);
5102                         if (ret < 0)
5103                                 error(
5104                         "add_tree_backref failed (extent items shared block): %s",
5105                                       strerror(-ret));
5106                         break;
5107                 case BTRFS_EXTENT_DATA_REF_KEY:
5108                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5109                         add_data_backref(extent_cache, key.objectid, 0,
5110                                         btrfs_extent_data_ref_root(eb, dref),
5111                                         btrfs_extent_data_ref_objectid(eb,
5112                                                                        dref),
5113                                         btrfs_extent_data_ref_offset(eb, dref),
5114                                         btrfs_extent_data_ref_count(eb, dref),
5115                                         0, num_bytes);
5116                         break;
5117                 case BTRFS_SHARED_DATA_REF_KEY:
5118                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5119                         add_data_backref(extent_cache, key.objectid, offset,
5120                                         0, 0, 0,
5121                                         btrfs_shared_data_ref_count(eb, sref),
5122                                         0, num_bytes);
5123                         break;
5124                 default:
5125                         fprintf(stderr,
5126                                 "corrupt extent record: key [%llu,%u,%llu]\n",
5127                                 key.objectid, key.type, num_bytes);
5128                         goto out;
5129                 }
5130                 ptr += btrfs_extent_inline_ref_size(type);
5131         }
5132         WARN_ON(ptr > end);
5133 out:
5134         return 0;
5135 }
5136
5137 static int check_cache_range(struct btrfs_root *root,
5138                              struct btrfs_block_group_cache *cache,
5139                              u64 offset, u64 bytes)
5140 {
5141         struct btrfs_free_space *entry;
5142         u64 *logical;
5143         u64 bytenr;
5144         int stripe_len;
5145         int i, nr, ret;
5146
5147         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5148                 bytenr = btrfs_sb_offset(i);
5149                 ret = btrfs_rmap_block(root->fs_info,
5150                                        cache->key.objectid, bytenr, 0,
5151                                        &logical, &nr, &stripe_len);
5152                 if (ret)
5153                         return ret;
5154
5155                 while (nr--) {
5156                         if (logical[nr] + stripe_len <= offset)
5157                                 continue;
5158                         if (offset + bytes <= logical[nr])
5159                                 continue;
5160                         if (logical[nr] == offset) {
5161                                 if (stripe_len >= bytes) {
5162                                         free(logical);
5163                                         return 0;
5164                                 }
5165                                 bytes -= stripe_len;
5166                                 offset += stripe_len;
5167                         } else if (logical[nr] < offset) {
5168                                 if (logical[nr] + stripe_len >=
5169                                     offset + bytes) {
5170                                         free(logical);
5171                                         return 0;
5172                                 }
5173                                 bytes = (offset + bytes) -
5174                                         (logical[nr] + stripe_len);
5175                                 offset = logical[nr] + stripe_len;
5176                         } else {
5177                                 /*
5178                                  * Could be tricky, the super may land in the
5179                                  * middle of the area we're checking.  First
5180                                  * check the easiest case, it's at the end.
5181                                  */
5182                                 if (logical[nr] + stripe_len >=
5183                                     bytes + offset) {
5184                                         bytes = logical[nr] - offset;
5185                                         continue;
5186                                 }
5187
5188                                 /* Check the left side */
5189                                 ret = check_cache_range(root, cache,
5190                                                         offset,
5191                                                         logical[nr] - offset);
5192                                 if (ret) {
5193                                         free(logical);
5194                                         return ret;
5195                                 }
5196
5197                                 /* Now we continue with the right side */
5198                                 bytes = (offset + bytes) -
5199                                         (logical[nr] + stripe_len);
5200                                 offset = logical[nr] + stripe_len;
5201                         }
5202                 }
5203
5204                 free(logical);
5205         }
5206
5207         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5208         if (!entry) {
5209                 fprintf(stderr, "there is no free space entry for %llu-%llu\n",
5210                         offset, offset+bytes);
5211                 return -EINVAL;
5212         }
5213
5214         if (entry->offset != offset) {
5215                 fprintf(stderr, "wanted offset %llu, found %llu\n", offset,
5216                         entry->offset);
5217                 return -EINVAL;
5218         }
5219
5220         if (entry->bytes != bytes) {
5221                 fprintf(stderr, "wanted bytes %llu, found %llu for off %llu\n",
5222                         bytes, entry->bytes, offset);
5223                 return -EINVAL;
5224         }
5225
5226         unlink_free_space(cache->free_space_ctl, entry);
5227         free(entry);
5228         return 0;
5229 }
5230
5231 static int verify_space_cache(struct btrfs_root *root,
5232                               struct btrfs_block_group_cache *cache)
5233 {
5234         struct btrfs_path path;
5235         struct extent_buffer *leaf;
5236         struct btrfs_key key;
5237         u64 last;
5238         int ret = 0;
5239
5240         root = root->fs_info->extent_root;
5241
5242         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5243
5244         btrfs_init_path(&path);
5245         key.objectid = last;
5246         key.offset = 0;
5247         key.type = BTRFS_EXTENT_ITEM_KEY;
5248         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5249         if (ret < 0)
5250                 goto out;
5251         ret = 0;
5252         while (1) {
5253                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5254                         ret = btrfs_next_leaf(root, &path);
5255                         if (ret < 0)
5256                                 goto out;
5257                         if (ret > 0) {
5258                                 ret = 0;
5259                                 break;
5260                         }
5261                 }
5262                 leaf = path.nodes[0];
5263                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5264                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5265                         break;
5266                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5267                     key.type != BTRFS_METADATA_ITEM_KEY) {
5268                         path.slots[0]++;
5269                         continue;
5270                 }
5271
5272                 if (last == key.objectid) {
5273                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5274                                 last = key.objectid + key.offset;
5275                         else
5276                                 last = key.objectid + root->fs_info->nodesize;
5277                         path.slots[0]++;
5278                         continue;
5279                 }
5280
5281                 ret = check_cache_range(root, cache, last,
5282                                         key.objectid - last);
5283                 if (ret)
5284                         break;
5285                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5286                         last = key.objectid + key.offset;
5287                 else
5288                         last = key.objectid + root->fs_info->nodesize;
5289                 path.slots[0]++;
5290         }
5291
5292         if (last < cache->key.objectid + cache->key.offset)
5293                 ret = check_cache_range(root, cache, last,
5294                                         cache->key.objectid +
5295                                         cache->key.offset - last);
5296
5297 out:
5298         btrfs_release_path(&path);
5299
5300         if (!ret &&
5301             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5302                 fprintf(stderr, "There are still entries left in the space "
5303                         "cache\n");
5304                 ret = -EINVAL;
5305         }
5306
5307         return ret;
5308 }
5309
5310 static int check_space_cache(struct btrfs_root *root)
5311 {
5312         struct btrfs_block_group_cache *cache;
5313         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5314         int ret;
5315         int error = 0;
5316
5317         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5318             btrfs_super_generation(root->fs_info->super_copy) !=
5319             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5320                 printf("cache and super generation don't match, space cache "
5321                        "will be invalidated\n");
5322                 return 0;
5323         }
5324
5325         if (ctx.progress_enabled) {
5326                 ctx.tp = TASK_FREE_SPACE;
5327                 task_start(ctx.info);
5328         }
5329
5330         while (1) {
5331                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5332                 if (!cache)
5333                         break;
5334
5335                 start = cache->key.objectid + cache->key.offset;
5336                 if (!cache->free_space_ctl) {
5337                         if (btrfs_init_free_space_ctl(cache,
5338                                                 root->fs_info->sectorsize)) {
5339                                 ret = -ENOMEM;
5340                                 break;
5341                         }
5342                 } else {
5343                         btrfs_remove_free_space_cache(cache);
5344                 }
5345
5346                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
5347                         ret = exclude_super_stripes(root, cache);
5348                         if (ret) {
5349                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5350                                         strerror(-ret));
5351                                 error++;
5352                                 continue;
5353                         }
5354                         ret = load_free_space_tree(root->fs_info, cache);
5355                         free_excluded_extents(root, cache);
5356                         if (ret < 0) {
5357                                 fprintf(stderr, "could not load free space tree: %s\n",
5358                                         strerror(-ret));
5359                                 error++;
5360                                 continue;
5361                         }
5362                         error += ret;
5363                 } else {
5364                         ret = load_free_space_cache(root->fs_info, cache);
5365                         if (ret < 0)
5366                                 error++;
5367                         if (ret <= 0)
5368                                 continue;
5369                 }
5370
5371                 ret = verify_space_cache(root, cache);
5372                 if (ret) {
5373                         fprintf(stderr, "cache appears valid but isn't %llu\n",
5374                                 cache->key.objectid);
5375                         error++;
5376                 }
5377         }
5378
5379         task_stop(ctx.info);
5380
5381         return error ? -EINVAL : 0;
5382 }
5383
5384 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5385                         u64 num_bytes, unsigned long leaf_offset,
5386                         struct extent_buffer *eb)
5387 {
5388         struct btrfs_fs_info *fs_info = root->fs_info;
5389         u64 offset = 0;
5390         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
5391         char *data;
5392         unsigned long csum_offset;
5393         u32 csum;
5394         u32 csum_expected;
5395         u64 read_len;
5396         u64 data_checked = 0;
5397         u64 tmp;
5398         int ret = 0;
5399         int mirror;
5400         int num_copies;
5401
5402         if (num_bytes % fs_info->sectorsize)
5403                 return -EINVAL;
5404
5405         data = malloc(num_bytes);
5406         if (!data)
5407                 return -ENOMEM;
5408
5409         while (offset < num_bytes) {
5410                 mirror = 0;
5411 again:
5412                 read_len = num_bytes - offset;
5413                 /* read as much space once a time */
5414                 ret = read_extent_data(fs_info, data + offset,
5415                                 bytenr + offset, &read_len, mirror);
5416                 if (ret)
5417                         goto out;
5418                 data_checked = 0;
5419                 /* verify every 4k data's checksum */
5420                 while (data_checked < read_len) {
5421                         csum = ~(u32)0;
5422                         tmp = offset + data_checked;
5423
5424                         csum = btrfs_csum_data((char *)data + tmp,
5425                                                csum, fs_info->sectorsize);
5426                         btrfs_csum_final(csum, (u8 *)&csum);
5427
5428                         csum_offset = leaf_offset +
5429                                  tmp / fs_info->sectorsize * csum_size;
5430                         read_extent_buffer(eb, (char *)&csum_expected,
5431                                            csum_offset, csum_size);
5432                         /* try another mirror */
5433                         if (csum != csum_expected) {
5434                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5435                                                 mirror, bytenr + tmp,
5436                                                 csum, csum_expected);
5437                                 num_copies = btrfs_num_copies(root->fs_info,
5438                                                 bytenr, num_bytes);
5439                                 if (mirror < num_copies - 1) {
5440                                         mirror += 1;
5441                                         goto again;
5442                                 }
5443                         }
5444                         data_checked += fs_info->sectorsize;
5445                 }
5446                 offset += read_len;
5447         }
5448 out:
5449         free(data);
5450         return ret;
5451 }
5452
5453 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5454                                u64 num_bytes)
5455 {
5456         struct btrfs_path path;
5457         struct extent_buffer *leaf;
5458         struct btrfs_key key;
5459         int ret;
5460
5461         btrfs_init_path(&path);
5462         key.objectid = bytenr;
5463         key.type = BTRFS_EXTENT_ITEM_KEY;
5464         key.offset = (u64)-1;
5465
5466 again:
5467         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
5468                                 0, 0);
5469         if (ret < 0) {
5470                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5471                 btrfs_release_path(&path);
5472                 return ret;
5473         } else if (ret) {
5474                 if (path.slots[0] > 0) {
5475                         path.slots[0]--;
5476                 } else {
5477                         ret = btrfs_prev_leaf(root, &path);
5478                         if (ret < 0) {
5479                                 goto out;
5480                         } else if (ret > 0) {
5481                                 ret = 0;
5482                                 goto out;
5483                         }
5484                 }
5485         }
5486
5487         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5488
5489         /*
5490          * Block group items come before extent items if they have the same
5491          * bytenr, so walk back one more just in case.  Dear future traveller,
5492          * first congrats on mastering time travel.  Now if it's not too much
5493          * trouble could you go back to 2006 and tell Chris to make the
5494          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5495          * EXTENT_ITEM_KEY please?
5496          */
5497         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5498                 if (path.slots[0] > 0) {
5499                         path.slots[0]--;
5500                 } else {
5501                         ret = btrfs_prev_leaf(root, &path);
5502                         if (ret < 0) {
5503                                 goto out;
5504                         } else if (ret > 0) {
5505                                 ret = 0;
5506                                 goto out;
5507                         }
5508                 }
5509                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5510         }
5511
5512         while (num_bytes) {
5513                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5514                         ret = btrfs_next_leaf(root, &path);
5515                         if (ret < 0) {
5516                                 fprintf(stderr, "Error going to next leaf "
5517                                         "%d\n", ret);
5518                                 btrfs_release_path(&path);
5519                                 return ret;
5520                         } else if (ret) {
5521                                 break;
5522                         }
5523                 }
5524                 leaf = path.nodes[0];
5525                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5526                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5527                         path.slots[0]++;
5528                         continue;
5529                 }
5530                 if (key.objectid + key.offset < bytenr) {
5531                         path.slots[0]++;
5532                         continue;
5533                 }
5534                 if (key.objectid > bytenr + num_bytes)
5535                         break;
5536
5537                 if (key.objectid == bytenr) {
5538                         if (key.offset >= num_bytes) {
5539                                 num_bytes = 0;
5540                                 break;
5541                         }
5542                         num_bytes -= key.offset;
5543                         bytenr += key.offset;
5544                 } else if (key.objectid < bytenr) {
5545                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5546                                 num_bytes = 0;
5547                                 break;
5548                         }
5549                         num_bytes = (bytenr + num_bytes) -
5550                                 (key.objectid + key.offset);
5551                         bytenr = key.objectid + key.offset;
5552                 } else {
5553                         if (key.objectid + key.offset < bytenr + num_bytes) {
5554                                 u64 new_start = key.objectid + key.offset;
5555                                 u64 new_bytes = bytenr + num_bytes - new_start;
5556
5557                                 /*
5558                                  * Weird case, the extent is in the middle of
5559                                  * our range, we'll have to search one side
5560                                  * and then the other.  Not sure if this happens
5561                                  * in real life, but no harm in coding it up
5562                                  * anyway just in case.
5563                                  */
5564                                 btrfs_release_path(&path);
5565                                 ret = check_extent_exists(root, new_start,
5566                                                           new_bytes);
5567                                 if (ret) {
5568                                         fprintf(stderr, "Right section didn't "
5569                                                 "have a record\n");
5570                                         break;
5571                                 }
5572                                 num_bytes = key.objectid - bytenr;
5573                                 goto again;
5574                         }
5575                         num_bytes = key.objectid - bytenr;
5576                 }
5577                 path.slots[0]++;
5578         }
5579         ret = 0;
5580
5581 out:
5582         if (num_bytes && !ret) {
5583                 fprintf(stderr,
5584                         "there are no extents for csum range %llu-%llu\n",
5585                         bytenr, bytenr+num_bytes);
5586                 ret = 1;
5587         }
5588
5589         btrfs_release_path(&path);
5590         return ret;
5591 }
5592
5593 static int check_csums(struct btrfs_root *root)
5594 {
5595         struct btrfs_path path;
5596         struct extent_buffer *leaf;
5597         struct btrfs_key key;
5598         u64 offset = 0, num_bytes = 0;
5599         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5600         int errors = 0;
5601         int ret;
5602         u64 data_len;
5603         unsigned long leaf_offset;
5604
5605         root = root->fs_info->csum_root;
5606         if (!extent_buffer_uptodate(root->node)) {
5607                 fprintf(stderr, "No valid csum tree found\n");
5608                 return -ENOENT;
5609         }
5610
5611         btrfs_init_path(&path);
5612         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5613         key.type = BTRFS_EXTENT_CSUM_KEY;
5614         key.offset = 0;
5615         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5616         if (ret < 0) {
5617                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5618                 btrfs_release_path(&path);
5619                 return ret;
5620         }
5621
5622         if (ret > 0 && path.slots[0])
5623                 path.slots[0]--;
5624         ret = 0;
5625
5626         while (1) {
5627                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5628                         ret = btrfs_next_leaf(root, &path);
5629                         if (ret < 0) {
5630                                 fprintf(stderr, "Error going to next leaf "
5631                                         "%d\n", ret);
5632                                 break;
5633                         }
5634                         if (ret)
5635                                 break;
5636                 }
5637                 leaf = path.nodes[0];
5638
5639                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5640                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5641                         path.slots[0]++;
5642                         continue;
5643                 }
5644
5645                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
5646                               csum_size) * root->fs_info->sectorsize;
5647                 if (!check_data_csum)
5648                         goto skip_csum_check;
5649                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5650                 ret = check_extent_csums(root, key.offset, data_len,
5651                                          leaf_offset, leaf);
5652                 if (ret)
5653                         break;
5654 skip_csum_check:
5655                 if (!num_bytes) {
5656                         offset = key.offset;
5657                 } else if (key.offset != offset + num_bytes) {
5658                         ret = check_extent_exists(root, offset, num_bytes);
5659                         if (ret) {
5660                                 fprintf(stderr,
5661                 "csum exists for %llu-%llu but there is no extent record\n",
5662                                         offset, offset+num_bytes);
5663                                 errors++;
5664                         }
5665                         offset = key.offset;
5666                         num_bytes = 0;
5667                 }
5668                 num_bytes += data_len;
5669                 path.slots[0]++;
5670         }
5671
5672         btrfs_release_path(&path);
5673         return errors;
5674 }
5675
5676 static int is_dropped_key(struct btrfs_key *key,
5677                           struct btrfs_key *drop_key)
5678 {
5679         if (key->objectid < drop_key->objectid)
5680                 return 1;
5681         else if (key->objectid == drop_key->objectid) {
5682                 if (key->type < drop_key->type)
5683                         return 1;
5684                 else if (key->type == drop_key->type) {
5685                         if (key->offset < drop_key->offset)
5686                                 return 1;
5687                 }
5688         }
5689         return 0;
5690 }
5691
5692 /*
5693  * Here are the rules for FULL_BACKREF.
5694  *
5695  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5696  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5697  *      FULL_BACKREF set.
5698  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
5699  *    if it happened after the relocation occurred since we'll have dropped the
5700  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5701  *    have no real way to know for sure.
5702  *
5703  * We process the blocks one root at a time, and we start from the lowest root
5704  * objectid and go to the highest.  So we can just lookup the owner backref for
5705  * the record and if we don't find it then we know it doesn't exist and we have
5706  * a FULL BACKREF.
5707  *
5708  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5709  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5710  * be set or not and then we can check later once we've gathered all the refs.
5711  */
5712 static int calc_extent_flag(struct cache_tree *extent_cache,
5713                            struct extent_buffer *buf,
5714                            struct root_item_record *ri,
5715                            u64 *flags)
5716 {
5717         struct extent_record *rec;
5718         struct cache_extent *cache;
5719         struct tree_backref *tback;
5720         u64 owner = 0;
5721
5722         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5723         /* we have added this extent before */
5724         if (!cache)
5725                 return -ENOENT;
5726
5727         rec = container_of(cache, struct extent_record, cache);
5728
5729         /*
5730          * Except file/reloc tree, we can not have
5731          * FULL BACKREF MODE
5732          */
5733         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5734                 goto normal;
5735         /*
5736          * root node
5737          */
5738         if (buf->start == ri->bytenr)
5739                 goto normal;
5740
5741         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5742                 goto full_backref;
5743
5744         owner = btrfs_header_owner(buf);
5745         if (owner == ri->objectid)
5746                 goto normal;
5747
5748         tback = find_tree_backref(rec, 0, owner);
5749         if (!tback)
5750                 goto full_backref;
5751 normal:
5752         *flags = 0;
5753         if (rec->flag_block_full_backref != FLAG_UNSET &&
5754             rec->flag_block_full_backref != 0)
5755                 rec->bad_full_backref = 1;
5756         return 0;
5757 full_backref:
5758         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5759         if (rec->flag_block_full_backref != FLAG_UNSET &&
5760             rec->flag_block_full_backref != 1)
5761                 rec->bad_full_backref = 1;
5762         return 0;
5763 }
5764
5765 static void report_mismatch_key_root(u8 key_type, u64 rootid)
5766 {
5767         fprintf(stderr, "Invalid key type(");
5768         print_key_type(stderr, 0, key_type);
5769         fprintf(stderr, ") found in root(");
5770         print_objectid(stderr, rootid, 0);
5771         fprintf(stderr, ")\n");
5772 }
5773
5774 /*
5775  * Check if the key is valid with its extent buffer.
5776  *
5777  * This is a early check in case invalid key exists in a extent buffer
5778  * This is not comprehensive yet, but should prevent wrong key/item passed
5779  * further
5780  */
5781 static int check_type_with_root(u64 rootid, u8 key_type)
5782 {
5783         switch (key_type) {
5784         /* Only valid in chunk tree */
5785         case BTRFS_DEV_ITEM_KEY:
5786         case BTRFS_CHUNK_ITEM_KEY:
5787                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
5788                         goto err;
5789                 break;
5790         /* valid in csum and log tree */
5791         case BTRFS_CSUM_TREE_OBJECTID:
5792                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
5793                       is_fstree(rootid)))
5794                         goto err;
5795                 break;
5796         case BTRFS_EXTENT_ITEM_KEY:
5797         case BTRFS_METADATA_ITEM_KEY:
5798         case BTRFS_BLOCK_GROUP_ITEM_KEY:
5799                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
5800                         goto err;
5801                 break;
5802         case BTRFS_ROOT_ITEM_KEY:
5803                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
5804                         goto err;
5805                 break;
5806         case BTRFS_DEV_EXTENT_KEY:
5807                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
5808                         goto err;
5809                 break;
5810         }
5811         return 0;
5812 err:
5813         report_mismatch_key_root(key_type, rootid);
5814         return -EINVAL;
5815 }
5816
5817 static int run_next_block(struct btrfs_root *root,
5818                           struct block_info *bits,
5819                           int bits_nr,
5820                           u64 *last,
5821                           struct cache_tree *pending,
5822                           struct cache_tree *seen,
5823                           struct cache_tree *reada,
5824                           struct cache_tree *nodes,
5825                           struct cache_tree *extent_cache,
5826                           struct cache_tree *chunk_cache,
5827                           struct rb_root *dev_cache,
5828                           struct block_group_tree *block_group_cache,
5829                           struct device_extent_tree *dev_extent_cache,
5830                           struct root_item_record *ri)
5831 {
5832         struct btrfs_fs_info *fs_info = root->fs_info;
5833         struct extent_buffer *buf;
5834         struct extent_record *rec = NULL;
5835         u64 bytenr;
5836         u32 size;
5837         u64 parent;
5838         u64 owner;
5839         u64 flags;
5840         u64 ptr;
5841         u64 gen = 0;
5842         int ret = 0;
5843         int i;
5844         int nritems;
5845         struct btrfs_key key;
5846         struct cache_extent *cache;
5847         int reada_bits;
5848
5849         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5850                                     bits_nr, &reada_bits);
5851         if (nritems == 0)
5852                 return 1;
5853
5854         if (!reada_bits) {
5855                 for (i = 0; i < nritems; i++) {
5856                         ret = add_cache_extent(reada, bits[i].start,
5857                                                bits[i].size);
5858                         if (ret == -EEXIST)
5859                                 continue;
5860
5861                         /* fixme, get the parent transid */
5862                         readahead_tree_block(fs_info, bits[i].start, 0);
5863                 }
5864         }
5865         *last = bits[0].start;
5866         bytenr = bits[0].start;
5867         size = bits[0].size;
5868
5869         cache = lookup_cache_extent(pending, bytenr, size);
5870         if (cache) {
5871                 remove_cache_extent(pending, cache);
5872                 free(cache);
5873         }
5874         cache = lookup_cache_extent(reada, bytenr, size);
5875         if (cache) {
5876                 remove_cache_extent(reada, cache);
5877                 free(cache);
5878         }
5879         cache = lookup_cache_extent(nodes, bytenr, size);
5880         if (cache) {
5881                 remove_cache_extent(nodes, cache);
5882                 free(cache);
5883         }
5884         cache = lookup_cache_extent(extent_cache, bytenr, size);
5885         if (cache) {
5886                 rec = container_of(cache, struct extent_record, cache);
5887                 gen = rec->parent_generation;
5888         }
5889
5890         /* fixme, get the real parent transid */
5891         buf = read_tree_block(root->fs_info, bytenr, gen);
5892         if (!extent_buffer_uptodate(buf)) {
5893                 record_bad_block_io(root->fs_info,
5894                                     extent_cache, bytenr, size);
5895                 goto out;
5896         }
5897
5898         nritems = btrfs_header_nritems(buf);
5899
5900         flags = 0;
5901         if (!init_extent_tree) {
5902                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5903                                        btrfs_header_level(buf), 1, NULL,
5904                                        &flags);
5905                 if (ret < 0) {
5906                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5907                         if (ret < 0) {
5908                                 fprintf(stderr, "Couldn't calc extent flags\n");
5909                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5910                         }
5911                 }
5912         } else {
5913                 flags = 0;
5914                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5915                 if (ret < 0) {
5916                         fprintf(stderr, "Couldn't calc extent flags\n");
5917                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5918                 }
5919         }
5920
5921         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5922                 if (ri != NULL &&
5923                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5924                     ri->objectid == btrfs_header_owner(buf)) {
5925                         /*
5926                          * Ok we got to this block from it's original owner and
5927                          * we have FULL_BACKREF set.  Relocation can leave
5928                          * converted blocks over so this is altogether possible,
5929                          * however it's not possible if the generation > the
5930                          * last snapshot, so check for this case.
5931                          */
5932                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5933                             btrfs_header_generation(buf) > ri->last_snapshot) {
5934                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5935                                 rec->bad_full_backref = 1;
5936                         }
5937                 }
5938         } else {
5939                 if (ri != NULL &&
5940                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5941                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5942                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5943                         rec->bad_full_backref = 1;
5944                 }
5945         }
5946
5947         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5948                 rec->flag_block_full_backref = 1;
5949                 parent = bytenr;
5950                 owner = 0;
5951         } else {
5952                 rec->flag_block_full_backref = 0;
5953                 parent = 0;
5954                 owner = btrfs_header_owner(buf);
5955         }
5956
5957         ret = check_block(root, extent_cache, buf, flags);
5958         if (ret)
5959                 goto out;
5960
5961         if (btrfs_is_leaf(buf)) {
5962                 btree_space_waste += btrfs_leaf_free_space(root, buf);
5963                 for (i = 0; i < nritems; i++) {
5964                         struct btrfs_file_extent_item *fi;
5965
5966                         btrfs_item_key_to_cpu(buf, &key, i);
5967                         /*
5968                          * Check key type against the leaf owner.
5969                          * Could filter quite a lot of early error if
5970                          * owner is correct
5971                          */
5972                         if (check_type_with_root(btrfs_header_owner(buf),
5973                                                  key.type)) {
5974                                 fprintf(stderr, "ignoring invalid key\n");
5975                                 continue;
5976                         }
5977                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5978                                 process_extent_item(root, extent_cache, buf,
5979                                                     i);
5980                                 continue;
5981                         }
5982                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5983                                 process_extent_item(root, extent_cache, buf,
5984                                                     i);
5985                                 continue;
5986                         }
5987                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5988                                 total_csum_bytes +=
5989                                         btrfs_item_size_nr(buf, i);
5990                                 continue;
5991                         }
5992                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5993                                 process_chunk_item(chunk_cache, &key, buf, i);
5994                                 continue;
5995                         }
5996                         if (key.type == BTRFS_DEV_ITEM_KEY) {
5997                                 process_device_item(dev_cache, &key, buf, i);
5998                                 continue;
5999                         }
6000                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6001                                 process_block_group_item(block_group_cache,
6002                                         &key, buf, i);
6003                                 continue;
6004                         }
6005                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6006                                 process_device_extent_item(dev_extent_cache,
6007                                         &key, buf, i);
6008                                 continue;
6009
6010                         }
6011                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6012 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6013                                 process_extent_ref_v0(extent_cache, buf, i);
6014 #else
6015                                 BUG();
6016 #endif
6017                                 continue;
6018                         }
6019
6020                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6021                                 ret = add_tree_backref(extent_cache,
6022                                                 key.objectid, 0, key.offset, 0);
6023                                 if (ret < 0)
6024                                         error(
6025                                 "add_tree_backref failed (leaf tree block): %s",
6026                                               strerror(-ret));
6027                                 continue;
6028                         }
6029                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6030                                 ret = add_tree_backref(extent_cache,
6031                                                 key.objectid, key.offset, 0, 0);
6032                                 if (ret < 0)
6033                                         error(
6034                                 "add_tree_backref failed (leaf shared block): %s",
6035                                               strerror(-ret));
6036                                 continue;
6037                         }
6038                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6039                                 struct btrfs_extent_data_ref *ref;
6040
6041                                 ref = btrfs_item_ptr(buf, i,
6042                                                 struct btrfs_extent_data_ref);
6043                                 add_data_backref(extent_cache,
6044                                         key.objectid, 0,
6045                                         btrfs_extent_data_ref_root(buf, ref),
6046                                         btrfs_extent_data_ref_objectid(buf,
6047                                                                        ref),
6048                                         btrfs_extent_data_ref_offset(buf, ref),
6049                                         btrfs_extent_data_ref_count(buf, ref),
6050                                         0, root->fs_info->sectorsize);
6051                                 continue;
6052                         }
6053                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6054                                 struct btrfs_shared_data_ref *ref;
6055
6056                                 ref = btrfs_item_ptr(buf, i,
6057                                                 struct btrfs_shared_data_ref);
6058                                 add_data_backref(extent_cache,
6059                                         key.objectid, key.offset, 0, 0, 0,
6060                                         btrfs_shared_data_ref_count(buf, ref),
6061                                         0, root->fs_info->sectorsize);
6062                                 continue;
6063                         }
6064                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6065                                 struct bad_item *bad;
6066
6067                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6068                                         continue;
6069                                 if (!owner)
6070                                         continue;
6071                                 bad = malloc(sizeof(struct bad_item));
6072                                 if (!bad)
6073                                         continue;
6074                                 INIT_LIST_HEAD(&bad->list);
6075                                 memcpy(&bad->key, &key,
6076                                        sizeof(struct btrfs_key));
6077                                 bad->root_id = owner;
6078                                 list_add_tail(&bad->list, &delete_items);
6079                                 continue;
6080                         }
6081                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6082                                 continue;
6083                         fi = btrfs_item_ptr(buf, i,
6084                                             struct btrfs_file_extent_item);
6085                         if (btrfs_file_extent_type(buf, fi) ==
6086                             BTRFS_FILE_EXTENT_INLINE)
6087                                 continue;
6088                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6089                                 continue;
6090
6091                         data_bytes_allocated +=
6092                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6093                         if (data_bytes_allocated < root->fs_info->sectorsize)
6094                                 abort();
6095
6096                         data_bytes_referenced +=
6097                                 btrfs_file_extent_num_bytes(buf, fi);
6098                         add_data_backref(extent_cache,
6099                                 btrfs_file_extent_disk_bytenr(buf, fi),
6100                                 parent, owner, key.objectid, key.offset -
6101                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6102                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6103                 }
6104         } else {
6105                 int level;
6106
6107                 level = btrfs_header_level(buf);
6108                 for (i = 0; i < nritems; i++) {
6109                         struct extent_record tmpl;
6110
6111                         ptr = btrfs_node_blockptr(buf, i);
6112                         size = root->fs_info->nodesize;
6113                         btrfs_node_key_to_cpu(buf, &key, i);
6114                         if (ri != NULL) {
6115                                 if ((level == ri->drop_level)
6116                                     && is_dropped_key(&key, &ri->drop_key)) {
6117                                         continue;
6118                                 }
6119                         }
6120
6121                         memset(&tmpl, 0, sizeof(tmpl));
6122                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6123                         tmpl.parent_generation =
6124                                 btrfs_node_ptr_generation(buf, i);
6125                         tmpl.start = ptr;
6126                         tmpl.nr = size;
6127                         tmpl.refs = 1;
6128                         tmpl.metadata = 1;
6129                         tmpl.max_size = size;
6130                         ret = add_extent_rec(extent_cache, &tmpl);
6131                         if (ret < 0)
6132                                 goto out;
6133
6134                         ret = add_tree_backref(extent_cache, ptr, parent,
6135                                         owner, 1);
6136                         if (ret < 0) {
6137                                 error(
6138                                 "add_tree_backref failed (non-leaf block): %s",
6139                                       strerror(-ret));
6140                                 continue;
6141                         }
6142
6143                         if (level > 1)
6144                                 add_pending(nodes, seen, ptr, size);
6145                         else
6146                                 add_pending(pending, seen, ptr, size);
6147                 }
6148                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
6149                                       nritems) * sizeof(struct btrfs_key_ptr);
6150         }
6151         total_btree_bytes += buf->len;
6152         if (fs_root_objectid(btrfs_header_owner(buf)))
6153                 total_fs_tree_bytes += buf->len;
6154         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6155                 total_extent_tree_bytes += buf->len;
6156 out:
6157         free_extent_buffer(buf);
6158         return ret;
6159 }
6160
6161 static int add_root_to_pending(struct extent_buffer *buf,
6162                                struct cache_tree *extent_cache,
6163                                struct cache_tree *pending,
6164                                struct cache_tree *seen,
6165                                struct cache_tree *nodes,
6166                                u64 objectid)
6167 {
6168         struct extent_record tmpl;
6169         int ret;
6170
6171         if (btrfs_header_level(buf) > 0)
6172                 add_pending(nodes, seen, buf->start, buf->len);
6173         else
6174                 add_pending(pending, seen, buf->start, buf->len);
6175
6176         memset(&tmpl, 0, sizeof(tmpl));
6177         tmpl.start = buf->start;
6178         tmpl.nr = buf->len;
6179         tmpl.is_root = 1;
6180         tmpl.refs = 1;
6181         tmpl.metadata = 1;
6182         tmpl.max_size = buf->len;
6183         add_extent_rec(extent_cache, &tmpl);
6184
6185         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6186             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6187                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6188                                 0, 1);
6189         else
6190                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6191                                 1);
6192         return ret;
6193 }
6194
6195 /* as we fix the tree, we might be deleting blocks that
6196  * we're tracking for repair.  This hook makes sure we
6197  * remove any backrefs for blocks as we are fixing them.
6198  */
6199 static int free_extent_hook(struct btrfs_trans_handle *trans,
6200                             struct btrfs_root *root,
6201                             u64 bytenr, u64 num_bytes, u64 parent,
6202                             u64 root_objectid, u64 owner, u64 offset,
6203                             int refs_to_drop)
6204 {
6205         struct extent_record *rec;
6206         struct cache_extent *cache;
6207         int is_data;
6208         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6209
6210         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6211         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6212         if (!cache)
6213                 return 0;
6214
6215         rec = container_of(cache, struct extent_record, cache);
6216         if (is_data) {
6217                 struct data_backref *back;
6218
6219                 back = find_data_backref(rec, parent, root_objectid, owner,
6220                                          offset, 1, bytenr, num_bytes);
6221                 if (!back)
6222                         goto out;
6223                 if (back->node.found_ref) {
6224                         back->found_ref -= refs_to_drop;
6225                         if (rec->refs)
6226                                 rec->refs -= refs_to_drop;
6227                 }
6228                 if (back->node.found_extent_tree) {
6229                         back->num_refs -= refs_to_drop;
6230                         if (rec->extent_item_refs)
6231                                 rec->extent_item_refs -= refs_to_drop;
6232                 }
6233                 if (back->found_ref == 0)
6234                         back->node.found_ref = 0;
6235                 if (back->num_refs == 0)
6236                         back->node.found_extent_tree = 0;
6237
6238                 if (!back->node.found_extent_tree && back->node.found_ref) {
6239                         rb_erase(&back->node.node, &rec->backref_tree);
6240                         free(back);
6241                 }
6242         } else {
6243                 struct tree_backref *back;
6244
6245                 back = find_tree_backref(rec, parent, root_objectid);
6246                 if (!back)
6247                         goto out;
6248                 if (back->node.found_ref) {
6249                         if (rec->refs)
6250                                 rec->refs--;
6251                         back->node.found_ref = 0;
6252                 }
6253                 if (back->node.found_extent_tree) {
6254                         if (rec->extent_item_refs)
6255                                 rec->extent_item_refs--;
6256                         back->node.found_extent_tree = 0;
6257                 }
6258                 if (!back->node.found_extent_tree && back->node.found_ref) {
6259                         rb_erase(&back->node.node, &rec->backref_tree);
6260                         free(back);
6261                 }
6262         }
6263         maybe_free_extent_rec(extent_cache, rec);
6264 out:
6265         return 0;
6266 }
6267
6268 static int delete_extent_records(struct btrfs_trans_handle *trans,
6269                                  struct btrfs_root *root,
6270                                  struct btrfs_path *path,
6271                                  u64 bytenr)
6272 {
6273         struct btrfs_key key;
6274         struct btrfs_key found_key;
6275         struct extent_buffer *leaf;
6276         int ret;
6277         int slot;
6278
6279
6280         key.objectid = bytenr;
6281         key.type = (u8)-1;
6282         key.offset = (u64)-1;
6283
6284         while (1) {
6285                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6286                                         &key, path, 0, 1);
6287                 if (ret < 0)
6288                         break;
6289
6290                 if (ret > 0) {
6291                         ret = 0;
6292                         if (path->slots[0] == 0)
6293                                 break;
6294                         path->slots[0]--;
6295                 }
6296                 ret = 0;
6297
6298                 leaf = path->nodes[0];
6299                 slot = path->slots[0];
6300
6301                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6302                 if (found_key.objectid != bytenr)
6303                         break;
6304
6305                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6306                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6307                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6308                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6309                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6310                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6311                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6312                         btrfs_release_path(path);
6313                         if (found_key.type == 0) {
6314                                 if (found_key.offset == 0)
6315                                         break;
6316                                 key.offset = found_key.offset - 1;
6317                                 key.type = found_key.type;
6318                         }
6319                         key.type = found_key.type - 1;
6320                         key.offset = (u64)-1;
6321                         continue;
6322                 }
6323
6324                 fprintf(stderr,
6325                         "repair deleting extent record: key [%llu,%u,%llu]\n",
6326                         found_key.objectid, found_key.type, found_key.offset);
6327
6328                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6329                 if (ret)
6330                         break;
6331                 btrfs_release_path(path);
6332
6333                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6334                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6335                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6336                                 found_key.offset : root->fs_info->nodesize;
6337
6338                         ret = btrfs_update_block_group(root, bytenr,
6339                                                        bytes, 0, 0);
6340                         if (ret)
6341                                 break;
6342                 }
6343         }
6344
6345         btrfs_release_path(path);
6346         return ret;
6347 }
6348
6349 /*
6350  * for a single backref, this will allocate a new extent
6351  * and add the backref to it.
6352  */
6353 static int record_extent(struct btrfs_trans_handle *trans,
6354                          struct btrfs_fs_info *info,
6355                          struct btrfs_path *path,
6356                          struct extent_record *rec,
6357                          struct extent_backref *back,
6358                          int allocated, u64 flags)
6359 {
6360         int ret = 0;
6361         struct btrfs_root *extent_root = info->extent_root;
6362         struct extent_buffer *leaf;
6363         struct btrfs_key ins_key;
6364         struct btrfs_extent_item *ei;
6365         struct data_backref *dback;
6366         struct btrfs_tree_block_info *bi;
6367
6368         if (!back->is_data)
6369                 rec->max_size = max_t(u64, rec->max_size,
6370                                     info->nodesize);
6371
6372         if (!allocated) {
6373                 u32 item_size = sizeof(*ei);
6374
6375                 if (!back->is_data)
6376                         item_size += sizeof(*bi);
6377
6378                 ins_key.objectid = rec->start;
6379                 ins_key.offset = rec->max_size;
6380                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6381
6382                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6383                                         &ins_key, item_size);
6384                 if (ret)
6385                         goto fail;
6386
6387                 leaf = path->nodes[0];
6388                 ei = btrfs_item_ptr(leaf, path->slots[0],
6389                                     struct btrfs_extent_item);
6390
6391                 btrfs_set_extent_refs(leaf, ei, 0);
6392                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6393
6394                 if (back->is_data) {
6395                         btrfs_set_extent_flags(leaf, ei,
6396                                                BTRFS_EXTENT_FLAG_DATA);
6397                 } else {
6398                         struct btrfs_disk_key copy_key;
6399
6400                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6401                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6402                                              sizeof(*bi));
6403
6404                         btrfs_set_disk_key_objectid(&copy_key,
6405                                                     rec->info_objectid);
6406                         btrfs_set_disk_key_type(&copy_key, 0);
6407                         btrfs_set_disk_key_offset(&copy_key, 0);
6408
6409                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6410                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6411
6412                         btrfs_set_extent_flags(leaf, ei,
6413                                         flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
6414                 }
6415
6416                 btrfs_mark_buffer_dirty(leaf);
6417                 ret = btrfs_update_block_group(extent_root, rec->start,
6418                                                rec->max_size, 1, 0);
6419                 if (ret)
6420                         goto fail;
6421                 btrfs_release_path(path);
6422         }
6423
6424         if (back->is_data) {
6425                 u64 parent;
6426                 int i;
6427
6428                 dback = to_data_backref(back);
6429                 if (back->full_backref)
6430                         parent = dback->parent;
6431                 else
6432                         parent = 0;
6433
6434                 for (i = 0; i < dback->found_ref; i++) {
6435                         /* if parent != 0, we're doing a full backref
6436                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6437                          * just makes the backref allocator create a data
6438                          * backref
6439                          */
6440                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6441                                                    rec->start, rec->max_size,
6442                                                    parent,
6443                                                    dback->root,
6444                                                    parent ?
6445                                                    BTRFS_FIRST_FREE_OBJECTID :
6446                                                    dback->owner,
6447                                                    dback->offset);
6448                         if (ret)
6449                                 break;
6450                 }
6451                 fprintf(stderr,
6452 "adding new data backref on %llu %s %llu owner %llu offset %llu found %d\n",
6453                         (unsigned long long)rec->start,
6454                         back->full_backref ? "parent" : "root",
6455                         back->full_backref ? (unsigned long long)parent :
6456                                              (unsigned long long)dback->root,
6457                         (unsigned long long)dback->owner,
6458                         (unsigned long long)dback->offset, dback->found_ref);
6459         } else {
6460                 u64 parent;
6461                 struct tree_backref *tback;
6462
6463                 tback = to_tree_backref(back);
6464                 if (back->full_backref)
6465                         parent = tback->parent;
6466                 else
6467                         parent = 0;
6468
6469                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6470                                            rec->start, rec->max_size,
6471                                            parent, tback->root, 0, 0);
6472                 fprintf(stderr,
6473 "adding new tree backref on start %llu len %llu parent %llu root %llu\n",
6474                         rec->start, rec->max_size, parent, tback->root);
6475         }
6476 fail:
6477         btrfs_release_path(path);
6478         return ret;
6479 }
6480
6481 static struct extent_entry *find_entry(struct list_head *entries,
6482                                        u64 bytenr, u64 bytes)
6483 {
6484         struct extent_entry *entry = NULL;
6485
6486         list_for_each_entry(entry, entries, list) {
6487                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6488                         return entry;
6489         }
6490
6491         return NULL;
6492 }
6493
6494 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6495 {
6496         struct extent_entry *entry, *best = NULL, *prev = NULL;
6497
6498         list_for_each_entry(entry, entries, list) {
6499                 /*
6500                  * If there are as many broken entries as entries then we know
6501                  * not to trust this particular entry.
6502                  */
6503                 if (entry->broken == entry->count)
6504                         continue;
6505
6506                 /*
6507                  * Special case, when there are only two entries and 'best' is
6508                  * the first one
6509                  */
6510                 if (!prev) {
6511                         best = entry;
6512                         prev = entry;
6513                         continue;
6514                 }
6515
6516                 /*
6517                  * If our current entry == best then we can't be sure our best
6518                  * is really the best, so we need to keep searching.
6519                  */
6520                 if (best && best->count == entry->count) {
6521                         prev = entry;
6522                         best = NULL;
6523                         continue;
6524                 }
6525
6526                 /* Prev == entry, not good enough, have to keep searching */
6527                 if (!prev->broken && prev->count == entry->count)
6528                         continue;
6529
6530                 if (!best)
6531                         best = (prev->count > entry->count) ? prev : entry;
6532                 else if (best->count < entry->count)
6533                         best = entry;
6534                 prev = entry;
6535         }
6536
6537         return best;
6538 }
6539
6540 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6541                       struct data_backref *dback, struct extent_entry *entry)
6542 {
6543         struct btrfs_trans_handle *trans;
6544         struct btrfs_root *root;
6545         struct btrfs_file_extent_item *fi;
6546         struct extent_buffer *leaf;
6547         struct btrfs_key key;
6548         u64 bytenr, bytes;
6549         int ret, err;
6550
6551         key.objectid = dback->root;
6552         key.type = BTRFS_ROOT_ITEM_KEY;
6553         key.offset = (u64)-1;
6554         root = btrfs_read_fs_root(info, &key);
6555         if (IS_ERR(root)) {
6556                 fprintf(stderr, "Couldn't find root for our ref\n");
6557                 return -EINVAL;
6558         }
6559
6560         /*
6561          * The backref points to the original offset of the extent if it was
6562          * split, so we need to search down to the offset we have and then walk
6563          * forward until we find the backref we're looking for.
6564          */
6565         key.objectid = dback->owner;
6566         key.type = BTRFS_EXTENT_DATA_KEY;
6567         key.offset = dback->offset;
6568         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6569         if (ret < 0) {
6570                 fprintf(stderr, "Error looking up ref %d\n", ret);
6571                 return ret;
6572         }
6573
6574         while (1) {
6575                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6576                         ret = btrfs_next_leaf(root, path);
6577                         if (ret) {
6578                                 fprintf(stderr, "Couldn't find our ref, next\n");
6579                                 return -EINVAL;
6580                         }
6581                 }
6582                 leaf = path->nodes[0];
6583                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6584                 if (key.objectid != dback->owner ||
6585                     key.type != BTRFS_EXTENT_DATA_KEY) {
6586                         fprintf(stderr, "Couldn't find our ref, search\n");
6587                         return -EINVAL;
6588                 }
6589                 fi = btrfs_item_ptr(leaf, path->slots[0],
6590                                     struct btrfs_file_extent_item);
6591                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6592                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6593
6594                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6595                         break;
6596                 path->slots[0]++;
6597         }
6598
6599         btrfs_release_path(path);
6600
6601         trans = btrfs_start_transaction(root, 1);
6602         if (IS_ERR(trans))
6603                 return PTR_ERR(trans);
6604
6605         /*
6606          * Ok we have the key of the file extent we want to fix, now we can cow
6607          * down to the thing and fix it.
6608          */
6609         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6610         if (ret < 0) {
6611                 fprintf(stderr, "error cowing down to ref [%llu,%u,%llu]: %d\n",
6612                         key.objectid, key.type, key.offset, ret);
6613                 goto out;
6614         }
6615         if (ret > 0) {
6616                 fprintf(stderr,
6617                 "well that's odd, we just found this key [%llu,%u,%llu]\n",
6618                         key.objectid, key.type, key.offset);
6619                 ret = -EINVAL;
6620                 goto out;
6621         }
6622         leaf = path->nodes[0];
6623         fi = btrfs_item_ptr(leaf, path->slots[0],
6624                             struct btrfs_file_extent_item);
6625
6626         if (btrfs_file_extent_compression(leaf, fi) &&
6627             dback->disk_bytenr != entry->bytenr) {
6628                 fprintf(stderr,
6629 "ref doesn't match the record start and is compressed, please take a btrfs-image of this file system and send it to a btrfs developer so they can complete this functionality for bytenr %llu\n",
6630                         dback->disk_bytenr);
6631                 ret = -EINVAL;
6632                 goto out;
6633         }
6634
6635         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6636                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6637         } else if (dback->disk_bytenr > entry->bytenr) {
6638                 u64 off_diff, offset;
6639
6640                 off_diff = dback->disk_bytenr - entry->bytenr;
6641                 offset = btrfs_file_extent_offset(leaf, fi);
6642                 if (dback->disk_bytenr + offset +
6643                     btrfs_file_extent_num_bytes(leaf, fi) >
6644                     entry->bytenr + entry->bytes) {
6645                         fprintf(stderr,
6646 "ref is past the entry end, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6647                                 dback->disk_bytenr);
6648                         ret = -EINVAL;
6649                         goto out;
6650                 }
6651                 offset += off_diff;
6652                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6653                 btrfs_set_file_extent_offset(leaf, fi, offset);
6654         } else if (dback->disk_bytenr < entry->bytenr) {
6655                 u64 offset;
6656
6657                 offset = btrfs_file_extent_offset(leaf, fi);
6658                 if (dback->disk_bytenr + offset < entry->bytenr) {
6659                         fprintf(stderr,
6660 "ref is before the entry start, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6661                                 dback->disk_bytenr);
6662                         ret = -EINVAL;
6663                         goto out;
6664                 }
6665
6666                 offset += dback->disk_bytenr;
6667                 offset -= entry->bytenr;
6668                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6669                 btrfs_set_file_extent_offset(leaf, fi, offset);
6670         }
6671
6672         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6673
6674         /*
6675          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6676          * only do this if we aren't using compression, otherwise it's a
6677          * trickier case.
6678          */
6679         if (!btrfs_file_extent_compression(leaf, fi))
6680                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6681         else
6682                 printf("ram bytes may be wrong?\n");
6683         btrfs_mark_buffer_dirty(leaf);
6684 out:
6685         err = btrfs_commit_transaction(trans, root);
6686         btrfs_release_path(path);
6687         return ret ? ret : err;
6688 }
6689
6690 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6691                            struct extent_record *rec)
6692 {
6693         struct extent_backref *back, *tmp;
6694         struct data_backref *dback;
6695         struct extent_entry *entry, *best = NULL;
6696         LIST_HEAD(entries);
6697         int nr_entries = 0;
6698         int broken_entries = 0;
6699         int ret = 0;
6700         short mismatch = 0;
6701
6702         /*
6703          * Metadata is easy and the backrefs should always agree on bytenr and
6704          * size, if not we've got bigger issues.
6705          */
6706         if (rec->metadata)
6707                 return 0;
6708
6709         rbtree_postorder_for_each_entry_safe(back, tmp,
6710                                              &rec->backref_tree, node) {
6711                 if (back->full_backref || !back->is_data)
6712                         continue;
6713
6714                 dback = to_data_backref(back);
6715
6716                 /*
6717                  * We only pay attention to backrefs that we found a real
6718                  * backref for.
6719                  */
6720                 if (dback->found_ref == 0)
6721                         continue;
6722
6723                 /*
6724                  * For now we only catch when the bytes don't match, not the
6725                  * bytenr.  We can easily do this at the same time, but I want
6726                  * to have a fs image to test on before we just add repair
6727                  * functionality willy-nilly so we know we won't screw up the
6728                  * repair.
6729                  */
6730
6731                 entry = find_entry(&entries, dback->disk_bytenr,
6732                                    dback->bytes);
6733                 if (!entry) {
6734                         entry = malloc(sizeof(struct extent_entry));
6735                         if (!entry) {
6736                                 ret = -ENOMEM;
6737                                 goto out;
6738                         }
6739                         memset(entry, 0, sizeof(*entry));
6740                         entry->bytenr = dback->disk_bytenr;
6741                         entry->bytes = dback->bytes;
6742                         list_add_tail(&entry->list, &entries);
6743                         nr_entries++;
6744                 }
6745
6746                 /*
6747                  * If we only have on entry we may think the entries agree when
6748                  * in reality they don't so we have to do some extra checking.
6749                  */
6750                 if (dback->disk_bytenr != rec->start ||
6751                     dback->bytes != rec->nr || back->broken)
6752                         mismatch = 1;
6753
6754                 if (back->broken) {
6755                         entry->broken++;
6756                         broken_entries++;
6757                 }
6758
6759                 entry->count++;
6760         }
6761
6762         /* Yay all the backrefs agree, carry on good sir */
6763         if (nr_entries <= 1 && !mismatch)
6764                 goto out;
6765
6766         fprintf(stderr,
6767                 "attempting to repair backref discrepency for bytenr %llu\n",
6768                 rec->start);
6769
6770         /*
6771          * First we want to see if the backrefs can agree amongst themselves who
6772          * is right, so figure out which one of the entries has the highest
6773          * count.
6774          */
6775         best = find_most_right_entry(&entries);
6776
6777         /*
6778          * Ok so we may have an even split between what the backrefs think, so
6779          * this is where we use the extent ref to see what it thinks.
6780          */
6781         if (!best) {
6782                 entry = find_entry(&entries, rec->start, rec->nr);
6783                 if (!entry && (!broken_entries || !rec->found_rec)) {
6784                         fprintf(stderr,
6785 "backrefs don't agree with each other and extent record doesn't agree with anybody, so we can't fix bytenr %llu bytes %llu\n",
6786                                 rec->start, rec->nr);
6787                         ret = -EINVAL;
6788                         goto out;
6789                 } else if (!entry) {
6790                         /*
6791                          * Ok our backrefs were broken, we'll assume this is the
6792                          * correct value and add an entry for this range.
6793                          */
6794                         entry = malloc(sizeof(struct extent_entry));
6795                         if (!entry) {
6796                                 ret = -ENOMEM;
6797                                 goto out;
6798                         }
6799                         memset(entry, 0, sizeof(*entry));
6800                         entry->bytenr = rec->start;
6801                         entry->bytes = rec->nr;
6802                         list_add_tail(&entry->list, &entries);
6803                         nr_entries++;
6804                 }
6805                 entry->count++;
6806                 best = find_most_right_entry(&entries);
6807                 if (!best) {
6808                         fprintf(stderr,
6809 "backrefs and extent record evenly split on who is right, this is going to require user input to fix bytenr %llu bytes %llu\n",
6810                                 rec->start, rec->nr);
6811                         ret = -EINVAL;
6812                         goto out;
6813                 }
6814         }
6815
6816         /*
6817          * I don't think this can happen currently as we'll abort() if we catch
6818          * this case higher up, but in case somebody removes that we still can't
6819          * deal with it properly here yet, so just bail out of that's the case.
6820          */
6821         if (best->bytenr != rec->start) {
6822                 fprintf(stderr,
6823 "extent start and backref starts don't match, please use btrfs-image on this file system and send it to a btrfs developer so they can make fsck fix this particular case.  bytenr is %llu, bytes is %llu\n",
6824                         rec->start, rec->nr);
6825                 ret = -EINVAL;
6826                 goto out;
6827         }
6828
6829         /*
6830          * Ok great we all agreed on an extent record, let's go find the real
6831          * references and fix up the ones that don't match.
6832          */
6833         rbtree_postorder_for_each_entry_safe(back, tmp,
6834                                              &rec->backref_tree, node) {
6835                 if (back->full_backref || !back->is_data)
6836                         continue;
6837
6838                 dback = to_data_backref(back);
6839
6840                 /*
6841                  * Still ignoring backrefs that don't have a real ref attached
6842                  * to them.
6843                  */
6844                 if (dback->found_ref == 0)
6845                         continue;
6846
6847                 if (dback->bytes == best->bytes &&
6848                     dback->disk_bytenr == best->bytenr)
6849                         continue;
6850
6851                 ret = repair_ref(info, path, dback, best);
6852                 if (ret)
6853                         goto out;
6854         }
6855
6856         /*
6857          * Ok we messed with the actual refs, which means we need to drop our
6858          * entire cache and go back and rescan.  I know this is a huge pain and
6859          * adds a lot of extra work, but it's the only way to be safe.  Once all
6860          * the backrefs agree we may not need to do anything to the extent
6861          * record itself.
6862          */
6863         ret = -EAGAIN;
6864 out:
6865         while (!list_empty(&entries)) {
6866                 entry = list_entry(entries.next, struct extent_entry, list);
6867                 list_del_init(&entry->list);
6868                 free(entry);
6869         }
6870         return ret;
6871 }
6872
6873 static int process_duplicates(struct cache_tree *extent_cache,
6874                               struct extent_record *rec)
6875 {
6876         struct extent_record *good, *tmp;
6877         struct cache_extent *cache;
6878         int ret;
6879
6880         /*
6881          * If we found a extent record for this extent then return, or if we
6882          * have more than one duplicate we are likely going to need to delete
6883          * something.
6884          */
6885         if (rec->found_rec || rec->num_duplicates > 1)
6886                 return 0;
6887
6888         /* Shouldn't happen but just in case */
6889         BUG_ON(!rec->num_duplicates);
6890
6891         /*
6892          * So this happens if we end up with a backref that doesn't match the
6893          * actual extent entry.  So either the backref is bad or the extent
6894          * entry is bad.  Either way we want to have the extent_record actually
6895          * reflect what we found in the extent_tree, so we need to take the
6896          * duplicate out and use that as the extent_record since the only way we
6897          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6898          */
6899         remove_cache_extent(extent_cache, &rec->cache);
6900
6901         good = to_extent_record(rec->dups.next);
6902         list_del_init(&good->list);
6903         INIT_LIST_HEAD(&good->backrefs);
6904         INIT_LIST_HEAD(&good->dups);
6905         good->cache.start = good->start;
6906         good->cache.size = good->nr;
6907         good->content_checked = 0;
6908         good->owner_ref_checked = 0;
6909         good->num_duplicates = 0;
6910         good->refs = rec->refs;
6911         list_splice_init(&rec->backrefs, &good->backrefs);
6912         while (1) {
6913                 cache = lookup_cache_extent(extent_cache, good->start,
6914                                             good->nr);
6915                 if (!cache)
6916                         break;
6917                 tmp = container_of(cache, struct extent_record, cache);
6918
6919                 /*
6920                  * If we find another overlapping extent and it's found_rec is
6921                  * set then it's a duplicate and we need to try and delete
6922                  * something.
6923                  */
6924                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6925                         if (list_empty(&good->list))
6926                                 list_add_tail(&good->list,
6927                                               &duplicate_extents);
6928                         good->num_duplicates += tmp->num_duplicates + 1;
6929                         list_splice_init(&tmp->dups, &good->dups);
6930                         list_del_init(&tmp->list);
6931                         list_add_tail(&tmp->list, &good->dups);
6932                         remove_cache_extent(extent_cache, &tmp->cache);
6933                         continue;
6934                 }
6935
6936                 /*
6937                  * Ok we have another non extent item backed extent rec, so lets
6938                  * just add it to this extent and carry on like we did above.
6939                  */
6940                 good->refs += tmp->refs;
6941                 list_splice_init(&tmp->backrefs, &good->backrefs);
6942                 remove_cache_extent(extent_cache, &tmp->cache);
6943                 free(tmp);
6944         }
6945         ret = insert_cache_extent(extent_cache, &good->cache);
6946         BUG_ON(ret);
6947         free(rec);
6948         return good->num_duplicates ? 0 : 1;
6949 }
6950
6951 static int delete_duplicate_records(struct btrfs_root *root,
6952                                     struct extent_record *rec)
6953 {
6954         struct btrfs_trans_handle *trans;
6955         LIST_HEAD(delete_list);
6956         struct btrfs_path path;
6957         struct extent_record *tmp, *good, *n;
6958         int nr_del = 0;
6959         int ret = 0, err;
6960         struct btrfs_key key;
6961
6962         btrfs_init_path(&path);
6963
6964         good = rec;
6965         /* Find the record that covers all of the duplicates. */
6966         list_for_each_entry(tmp, &rec->dups, list) {
6967                 if (good->start < tmp->start)
6968                         continue;
6969                 if (good->nr > tmp->nr)
6970                         continue;
6971
6972                 if (tmp->start + tmp->nr < good->start + good->nr) {
6973                         fprintf(stderr,
6974 "Ok we have overlapping extents that aren't completely covered by each other, this is going to require more careful thought. The extents are [%llu-%llu] and [%llu-%llu]\n",
6975                                 tmp->start, tmp->nr, good->start, good->nr);
6976                         abort();
6977                 }
6978                 good = tmp;
6979         }
6980
6981         if (good != rec)
6982                 list_add_tail(&rec->list, &delete_list);
6983
6984         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6985                 if (tmp == good)
6986                         continue;
6987                 list_move_tail(&tmp->list, &delete_list);
6988         }
6989
6990         root = root->fs_info->extent_root;
6991         trans = btrfs_start_transaction(root, 1);
6992         if (IS_ERR(trans)) {
6993                 ret = PTR_ERR(trans);
6994                 goto out;
6995         }
6996
6997         list_for_each_entry(tmp, &delete_list, list) {
6998                 if (tmp->found_rec == 0)
6999                         continue;
7000                 key.objectid = tmp->start;
7001                 key.type = BTRFS_EXTENT_ITEM_KEY;
7002                 key.offset = tmp->nr;
7003
7004                 /* Shouldn't happen but just in case */
7005                 if (tmp->metadata) {
7006                         fprintf(stderr,
7007 "well this shouldn't happen, extent record overlaps but is metadata? [%llu, %llu]\n",
7008                                 tmp->start, tmp->nr);
7009                         abort();
7010                 }
7011
7012                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7013                 if (ret) {
7014                         if (ret > 0)
7015                                 ret = -EINVAL;
7016                         break;
7017                 }
7018                 ret = btrfs_del_item(trans, root, &path);
7019                 if (ret)
7020                         break;
7021                 btrfs_release_path(&path);
7022                 nr_del++;
7023         }
7024         err = btrfs_commit_transaction(trans, root);
7025         if (err && !ret)
7026                 ret = err;
7027 out:
7028         while (!list_empty(&delete_list)) {
7029                 tmp = to_extent_record(delete_list.next);
7030                 list_del_init(&tmp->list);
7031                 if (tmp == rec)
7032                         continue;
7033                 free(tmp);
7034         }
7035
7036         while (!list_empty(&rec->dups)) {
7037                 tmp = to_extent_record(rec->dups.next);
7038                 list_del_init(&tmp->list);
7039                 free(tmp);
7040         }
7041
7042         btrfs_release_path(&path);
7043
7044         if (!ret && !nr_del)
7045                 rec->num_duplicates = 0;
7046
7047         return ret ? ret : nr_del;
7048 }
7049
7050 static int find_possible_backrefs(struct btrfs_fs_info *info,
7051                                   struct btrfs_path *path,
7052                                   struct cache_tree *extent_cache,
7053                                   struct extent_record *rec)
7054 {
7055         struct btrfs_root *root;
7056         struct extent_backref *back, *tmp;
7057         struct data_backref *dback;
7058         struct cache_extent *cache;
7059         struct btrfs_file_extent_item *fi;
7060         struct btrfs_key key;
7061         u64 bytenr, bytes;
7062         int ret;
7063
7064         rbtree_postorder_for_each_entry_safe(back, tmp,
7065                                              &rec->backref_tree, node) {
7066                 /* Don't care about full backrefs (poor unloved backrefs) */
7067                 if (back->full_backref || !back->is_data)
7068                         continue;
7069
7070                 dback = to_data_backref(back);
7071
7072                 /* We found this one, we don't need to do a lookup */
7073                 if (dback->found_ref)
7074                         continue;
7075
7076                 key.objectid = dback->root;
7077                 key.type = BTRFS_ROOT_ITEM_KEY;
7078                 key.offset = (u64)-1;
7079
7080                 root = btrfs_read_fs_root(info, &key);
7081
7082                 /* No root, definitely a bad ref, skip */
7083                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7084                         continue;
7085                 /* Other err, exit */
7086                 if (IS_ERR(root))
7087                         return PTR_ERR(root);
7088
7089                 key.objectid = dback->owner;
7090                 key.type = BTRFS_EXTENT_DATA_KEY;
7091                 key.offset = dback->offset;
7092                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7093                 if (ret) {
7094                         btrfs_release_path(path);
7095                         if (ret < 0)
7096                                 return ret;
7097                         /* Didn't find it, we can carry on */
7098                         ret = 0;
7099                         continue;
7100                 }
7101
7102                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7103                                     struct btrfs_file_extent_item);
7104                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7105                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7106                 btrfs_release_path(path);
7107                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7108                 if (cache) {
7109                         struct extent_record *tmp;
7110
7111                         tmp = container_of(cache, struct extent_record, cache);
7112
7113                         /*
7114                          * If we found an extent record for the bytenr for this
7115                          * particular backref then we can't add it to our
7116                          * current extent record.  We only want to add backrefs
7117                          * that don't have a corresponding extent item in the
7118                          * extent tree since they likely belong to this record
7119                          * and we need to fix it if it doesn't match bytenrs.
7120                          */
7121                         if  (tmp->found_rec)
7122                                 continue;
7123                 }
7124
7125                 dback->found_ref += 1;
7126                 dback->disk_bytenr = bytenr;
7127                 dback->bytes = bytes;
7128
7129                 /*
7130                  * Set this so the verify backref code knows not to trust the
7131                  * values in this backref.
7132                  */
7133                 back->broken = 1;
7134         }
7135
7136         return 0;
7137 }
7138
7139 /*
7140  * Record orphan data ref into corresponding root.
7141  *
7142  * Return 0 if the extent item contains data ref and recorded.
7143  * Return 1 if the extent item contains no useful data ref
7144  *   On that case, it may contains only shared_dataref or metadata backref
7145  *   or the file extent exists(this should be handled by the extent bytenr
7146  *   recovery routine)
7147  * Return <0 if something goes wrong.
7148  */
7149 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7150                                       struct extent_record *rec)
7151 {
7152         struct btrfs_key key;
7153         struct btrfs_root *dest_root;
7154         struct extent_backref *back, *tmp;
7155         struct data_backref *dback;
7156         struct orphan_data_extent *orphan;
7157         struct btrfs_path path;
7158         int recorded_data_ref = 0;
7159         int ret = 0;
7160
7161         if (rec->metadata)
7162                 return 1;
7163         btrfs_init_path(&path);
7164         rbtree_postorder_for_each_entry_safe(back, tmp,
7165                                              &rec->backref_tree, node) {
7166                 if (back->full_backref || !back->is_data ||
7167                     !back->found_extent_tree)
7168                         continue;
7169                 dback = to_data_backref(back);
7170                 if (dback->found_ref)
7171                         continue;
7172                 key.objectid = dback->root;
7173                 key.type = BTRFS_ROOT_ITEM_KEY;
7174                 key.offset = (u64)-1;
7175
7176                 dest_root = btrfs_read_fs_root(fs_info, &key);
7177
7178                 /* For non-exist root we just skip it */
7179                 if (IS_ERR(dest_root) || !dest_root)
7180                         continue;
7181
7182                 key.objectid = dback->owner;
7183                 key.type = BTRFS_EXTENT_DATA_KEY;
7184                 key.offset = dback->offset;
7185
7186                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7187                 btrfs_release_path(&path);
7188                 /*
7189                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7190                  * we need to record it for inode/file extent rebuild.
7191                  * For ret > 0, we record it only for file extent rebuild.
7192                  * For ret == 0, the file extent exists but only bytenr
7193                  * mismatch, let the original bytenr fix routine to handle,
7194                  * don't record it.
7195                  */
7196                 if (ret == 0)
7197                         continue;
7198                 ret = 0;
7199                 orphan = malloc(sizeof(*orphan));
7200                 if (!orphan) {
7201                         ret = -ENOMEM;
7202                         goto out;
7203                 }
7204                 INIT_LIST_HEAD(&orphan->list);
7205                 orphan->root = dback->root;
7206                 orphan->objectid = dback->owner;
7207                 orphan->offset = dback->offset;
7208                 orphan->disk_bytenr = rec->cache.start;
7209                 orphan->disk_len = rec->cache.size;
7210                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7211                 recorded_data_ref = 1;
7212         }
7213 out:
7214         btrfs_release_path(&path);
7215         if (!ret)
7216                 return !recorded_data_ref;
7217         else
7218                 return ret;
7219 }
7220
7221 /*
7222  * when an incorrect extent item is found, this will delete
7223  * all of the existing entries for it and recreate them
7224  * based on what the tree scan found.
7225  */
7226 static int fixup_extent_refs(struct btrfs_fs_info *info,
7227                              struct cache_tree *extent_cache,
7228                              struct extent_record *rec)
7229 {
7230         struct btrfs_trans_handle *trans = NULL;
7231         int ret;
7232         struct btrfs_path path;
7233         struct cache_extent *cache;
7234         struct extent_backref *back, *tmp;
7235         int allocated = 0;
7236         u64 flags = 0;
7237
7238         if (rec->flag_block_full_backref)
7239                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7240
7241         btrfs_init_path(&path);
7242         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7243                 /*
7244                  * Sometimes the backrefs themselves are so broken they don't
7245                  * get attached to any meaningful rec, so first go back and
7246                  * check any of our backrefs that we couldn't find and throw
7247                  * them into the list if we find the backref so that
7248                  * verify_backrefs can figure out what to do.
7249                  */
7250                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7251                 if (ret < 0)
7252                         goto out;
7253         }
7254
7255         /* step one, make sure all of the backrefs agree */
7256         ret = verify_backrefs(info, &path, rec);
7257         if (ret < 0)
7258                 goto out;
7259
7260         trans = btrfs_start_transaction(info->extent_root, 1);
7261         if (IS_ERR(trans)) {
7262                 ret = PTR_ERR(trans);
7263                 goto out;
7264         }
7265
7266         /* step two, delete all the existing records */
7267         ret = delete_extent_records(trans, info->extent_root, &path,
7268                                     rec->start);
7269
7270         if (ret < 0)
7271                 goto out;
7272
7273         /* was this block corrupt?  If so, don't add references to it */
7274         cache = lookup_cache_extent(info->corrupt_blocks,
7275                                     rec->start, rec->max_size);
7276         if (cache) {
7277                 ret = 0;
7278                 goto out;
7279         }
7280
7281         /* step three, recreate all the refs we did find */
7282         rbtree_postorder_for_each_entry_safe(back, tmp,
7283                                              &rec->backref_tree, node) {
7284                 /*
7285                  * if we didn't find any references, don't create a
7286                  * new extent record
7287                  */
7288                 if (!back->found_ref)
7289                         continue;
7290
7291                 rec->bad_full_backref = 0;
7292                 ret = record_extent(trans, info, &path, rec, back, allocated,
7293                                     flags);
7294                 allocated = 1;
7295
7296                 if (ret)
7297                         goto out;
7298         }
7299 out:
7300         if (trans) {
7301                 int err = btrfs_commit_transaction(trans, info->extent_root);
7302
7303                 if (!ret)
7304                         ret = err;
7305         }
7306
7307         if (!ret)
7308                 fprintf(stderr, "Repaired extent references for %llu\n",
7309                                 (unsigned long long)rec->start);
7310
7311         btrfs_release_path(&path);
7312         return ret;
7313 }
7314
7315 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7316                               struct extent_record *rec)
7317 {
7318         struct btrfs_trans_handle *trans;
7319         struct btrfs_root *root = fs_info->extent_root;
7320         struct btrfs_path path;
7321         struct btrfs_extent_item *ei;
7322         struct btrfs_key key;
7323         u64 flags;
7324         int ret = 0;
7325
7326         key.objectid = rec->start;
7327         if (rec->metadata) {
7328                 key.type = BTRFS_METADATA_ITEM_KEY;
7329                 key.offset = rec->info_level;
7330         } else {
7331                 key.type = BTRFS_EXTENT_ITEM_KEY;
7332                 key.offset = rec->max_size;
7333         }
7334
7335         trans = btrfs_start_transaction(root, 0);
7336         if (IS_ERR(trans))
7337                 return PTR_ERR(trans);
7338
7339         btrfs_init_path(&path);
7340         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7341         if (ret < 0) {
7342                 btrfs_release_path(&path);
7343                 btrfs_commit_transaction(trans, root);
7344                 return ret;
7345         } else if (ret) {
7346                 fprintf(stderr, "Didn't find extent for %llu\n",
7347                         (unsigned long long)rec->start);
7348                 btrfs_release_path(&path);
7349                 btrfs_commit_transaction(trans, root);
7350                 return -ENOENT;
7351         }
7352
7353         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7354                             struct btrfs_extent_item);
7355         flags = btrfs_extent_flags(path.nodes[0], ei);
7356         if (rec->flag_block_full_backref) {
7357                 fprintf(stderr, "setting full backref on %llu\n",
7358                         (unsigned long long)key.objectid);
7359                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7360         } else {
7361                 fprintf(stderr, "clearing full backref on %llu\n",
7362                         (unsigned long long)key.objectid);
7363                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7364         }
7365         btrfs_set_extent_flags(path.nodes[0], ei, flags);
7366         btrfs_mark_buffer_dirty(path.nodes[0]);
7367         btrfs_release_path(&path);
7368         ret = btrfs_commit_transaction(trans, root);
7369         if (!ret)
7370                 fprintf(stderr, "Repaired extent flags for %llu\n",
7371                                 (unsigned long long)rec->start);
7372
7373         return ret;
7374 }
7375
7376 /* right now we only prune from the extent allocation tree */
7377 static int prune_one_block(struct btrfs_trans_handle *trans,
7378                            struct btrfs_fs_info *info,
7379                            struct btrfs_corrupt_block *corrupt)
7380 {
7381         int ret;
7382         struct btrfs_path path;
7383         struct extent_buffer *eb;
7384         u64 found;
7385         int slot;
7386         int nritems;
7387         int level = corrupt->level + 1;
7388
7389         btrfs_init_path(&path);
7390 again:
7391         /* we want to stop at the parent to our busted block */
7392         path.lowest_level = level;
7393
7394         ret = btrfs_search_slot(trans, info->extent_root,
7395                                 &corrupt->key, &path, -1, 1);
7396
7397         if (ret < 0)
7398                 goto out;
7399
7400         eb = path.nodes[level];
7401         if (!eb) {
7402                 ret = -ENOENT;
7403                 goto out;
7404         }
7405
7406         /*
7407          * hopefully the search gave us the block we want to prune,
7408          * lets try that first
7409          */
7410         slot = path.slots[level];
7411         found =  btrfs_node_blockptr(eb, slot);
7412         if (found == corrupt->cache.start)
7413                 goto del_ptr;
7414
7415         nritems = btrfs_header_nritems(eb);
7416
7417         /* the search failed, lets scan this node and hope we find it */
7418         for (slot = 0; slot < nritems; slot++) {
7419                 found =  btrfs_node_blockptr(eb, slot);
7420                 if (found == corrupt->cache.start)
7421                         goto del_ptr;
7422         }
7423         /*
7424          * We couldn't find the bad block.
7425          * TODO: search all the nodes for pointers to this block
7426          */
7427         if (eb == info->extent_root->node) {
7428                 ret = -ENOENT;
7429                 goto out;
7430         } else {
7431                 level++;
7432                 btrfs_release_path(&path);
7433                 goto again;
7434         }
7435
7436 del_ptr:
7437         printk("deleting pointer to block %llu\n", corrupt->cache.start);
7438         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
7439
7440 out:
7441         btrfs_release_path(&path);
7442         return ret;
7443 }
7444
7445 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7446 {
7447         struct btrfs_trans_handle *trans = NULL;
7448         struct cache_extent *cache;
7449         struct btrfs_corrupt_block *corrupt;
7450
7451         while (1) {
7452                 cache = search_cache_extent(info->corrupt_blocks, 0);
7453                 if (!cache)
7454                         break;
7455                 if (!trans) {
7456                         trans = btrfs_start_transaction(info->extent_root, 1);
7457                         if (IS_ERR(trans))
7458                                 return PTR_ERR(trans);
7459                 }
7460                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7461                 prune_one_block(trans, info, corrupt);
7462                 remove_cache_extent(info->corrupt_blocks, cache);
7463         }
7464         if (trans)
7465                 return btrfs_commit_transaction(trans, info->extent_root);
7466         return 0;
7467 }
7468
7469 static int check_extent_refs(struct btrfs_root *root,
7470                              struct cache_tree *extent_cache)
7471 {
7472         struct extent_record *rec;
7473         struct cache_extent *cache;
7474         int ret = 0;
7475         int had_dups = 0;
7476         int err = 0;
7477
7478         if (repair) {
7479                 /*
7480                  * if we're doing a repair, we have to make sure
7481                  * we don't allocate from the problem extents.
7482                  * In the worst case, this will be all the
7483                  * extents in the FS
7484                  */
7485                 cache = search_cache_extent(extent_cache, 0);
7486                 while (cache) {
7487                         rec = container_of(cache, struct extent_record, cache);
7488                         set_extent_dirty(root->fs_info->excluded_extents,
7489                                          rec->start,
7490                                          rec->start + rec->max_size - 1);
7491                         cache = next_cache_extent(cache);
7492                 }
7493
7494                 /* pin down all the corrupted blocks too */
7495                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7496                 while (cache) {
7497                         set_extent_dirty(root->fs_info->excluded_extents,
7498                                          cache->start,
7499                                          cache->start + cache->size - 1);
7500                         cache = next_cache_extent(cache);
7501                 }
7502                 prune_corrupt_blocks(root->fs_info);
7503                 reset_cached_block_groups(root->fs_info);
7504         }
7505
7506         reset_cached_block_groups(root->fs_info);
7507
7508         /*
7509          * We need to delete any duplicate entries we find first otherwise we
7510          * could mess up the extent tree when we have backrefs that actually
7511          * belong to a different extent item and not the weird duplicate one.
7512          */
7513         while (repair && !list_empty(&duplicate_extents)) {
7514                 rec = to_extent_record(duplicate_extents.next);
7515                 list_del_init(&rec->list);
7516
7517                 /* Sometimes we can find a backref before we find an actual
7518                  * extent, so we need to process it a little bit to see if there
7519                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7520                  * if this is a backref screwup.  If we need to delete stuff
7521                  * process_duplicates() will return 0, otherwise it will return
7522                  * 1 and we
7523                  */
7524                 if (process_duplicates(extent_cache, rec))
7525                         continue;
7526                 ret = delete_duplicate_records(root, rec);
7527                 if (ret < 0)
7528                         return ret;
7529                 /*
7530                  * delete_duplicate_records will return the number of entries
7531                  * deleted, so if it's greater than 0 then we know we actually
7532                  * did something and we need to remove.
7533                  */
7534                 if (ret)
7535                         had_dups = 1;
7536         }
7537
7538         if (had_dups)
7539                 return -EAGAIN;
7540
7541         while (1) {
7542                 int cur_err = 0;
7543                 int fix = 0;
7544
7545                 cache = search_cache_extent(extent_cache, 0);
7546                 if (!cache)
7547                         break;
7548                 rec = container_of(cache, struct extent_record, cache);
7549                 if (rec->num_duplicates) {
7550                         fprintf(stderr,
7551                                 "extent item %llu has multiple extent items\n",
7552                                 (unsigned long long)rec->start);
7553                         cur_err = 1;
7554                 }
7555
7556                 if (rec->refs != rec->extent_item_refs) {
7557                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7558                                 (unsigned long long)rec->start,
7559                                 (unsigned long long)rec->nr);
7560                         fprintf(stderr, "extent item %llu, found %llu\n",
7561                                 (unsigned long long)rec->extent_item_refs,
7562                                 (unsigned long long)rec->refs);
7563                         ret = record_orphan_data_extents(root->fs_info, rec);
7564                         if (ret < 0)
7565                                 goto repair_abort;
7566                         fix = ret;
7567                         cur_err = 1;
7568                 }
7569                 if (all_backpointers_checked(rec, 1)) {
7570                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7571                                 (unsigned long long)rec->start,
7572                                 (unsigned long long)rec->nr);
7573                         fix = 1;
7574                         cur_err = 1;
7575                 }
7576                 if (!rec->owner_ref_checked) {
7577                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7578                                 (unsigned long long)rec->start,
7579                                 (unsigned long long)rec->nr);
7580                         fix = 1;
7581                         cur_err = 1;
7582                 }
7583
7584                 if (repair && fix) {
7585                         ret = fixup_extent_refs(root->fs_info, extent_cache,
7586                                                 rec);
7587                         if (ret)
7588                                 goto repair_abort;
7589                 }
7590
7591
7592                 if (rec->bad_full_backref) {
7593                         fprintf(stderr, "bad full backref, on [%llu]\n",
7594                                 (unsigned long long)rec->start);
7595                         if (repair) {
7596                                 ret = fixup_extent_flags(root->fs_info, rec);
7597                                 if (ret)
7598                                         goto repair_abort;
7599                                 fix = 1;
7600                         }
7601                         cur_err = 1;
7602                 }
7603                 /*
7604                  * Although it's not a extent ref's problem, we reuse this
7605                  * routine for error reporting.
7606                  * No repair function yet.
7607                  */
7608                 if (rec->crossing_stripes) {
7609                         fprintf(stderr,
7610                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7611                                 rec->start, rec->start + rec->max_size);
7612                         cur_err = 1;
7613                 }
7614
7615                 if (rec->wrong_chunk_type) {
7616                         fprintf(stderr,
7617                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7618                                 rec->start, rec->start + rec->max_size);
7619                         cur_err = 1;
7620                 }
7621
7622                 err = cur_err;
7623                 remove_cache_extent(extent_cache, cache);
7624                 free_all_extent_backrefs(rec);
7625                 if (!init_extent_tree && repair && (!cur_err || fix))
7626                         clear_extent_dirty(root->fs_info->excluded_extents,
7627                                            rec->start,
7628                                            rec->start + rec->max_size - 1);
7629                 free(rec);
7630         }
7631 repair_abort:
7632         if (repair) {
7633                 if (ret && ret != -EAGAIN) {
7634                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7635                         exit(1);
7636                 } else if (!ret) {
7637                         struct btrfs_trans_handle *trans;
7638
7639                         root = root->fs_info->extent_root;
7640                         trans = btrfs_start_transaction(root, 1);
7641                         if (IS_ERR(trans)) {
7642                                 ret = PTR_ERR(trans);
7643                                 goto repair_abort;
7644                         }
7645
7646                         ret = btrfs_fix_block_accounting(trans, root);
7647                         if (ret)
7648                                 goto repair_abort;
7649                         ret = btrfs_commit_transaction(trans, root);
7650                         if (ret)
7651                                 goto repair_abort;
7652                 }
7653                 return ret;
7654         }
7655
7656         if (err)
7657                 err = -EIO;
7658         return err;
7659 }
7660
7661 /*
7662  * Check the chunk with its block group/dev list ref:
7663  * Return 0 if all refs seems valid.
7664  * Return 1 if part of refs seems valid, need later check for rebuild ref
7665  * like missing block group and needs to search extent tree to rebuild them.
7666  * Return -1 if essential refs are missing and unable to rebuild.
7667  */
7668 static int check_chunk_refs(struct chunk_record *chunk_rec,
7669                             struct block_group_tree *block_group_cache,
7670                             struct device_extent_tree *dev_extent_cache,
7671                             int silent)
7672 {
7673         struct cache_extent *block_group_item;
7674         struct block_group_record *block_group_rec;
7675         struct cache_extent *dev_extent_item;
7676         struct device_extent_record *dev_extent_rec;
7677         u64 devid;
7678         u64 offset;
7679         u64 length;
7680         int metadump_v2 = 0;
7681         int i;
7682         int ret = 0;
7683
7684         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7685                                                chunk_rec->offset,
7686                                                chunk_rec->length);
7687         if (block_group_item) {
7688                 block_group_rec = container_of(block_group_item,
7689                                                struct block_group_record,
7690                                                cache);
7691                 if (chunk_rec->length != block_group_rec->offset ||
7692                     chunk_rec->offset != block_group_rec->objectid ||
7693                     (!metadump_v2 &&
7694                      chunk_rec->type_flags != block_group_rec->flags)) {
7695                         if (!silent)
7696                                 fprintf(stderr,
7697                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7698                                         chunk_rec->objectid,
7699                                         chunk_rec->type,
7700                                         chunk_rec->offset,
7701                                         chunk_rec->length,
7702                                         chunk_rec->offset,
7703                                         chunk_rec->type_flags,
7704                                         block_group_rec->objectid,
7705                                         block_group_rec->type,
7706                                         block_group_rec->offset,
7707                                         block_group_rec->offset,
7708                                         block_group_rec->objectid,
7709                                         block_group_rec->flags);
7710                         ret = -1;
7711                 } else {
7712                         list_del_init(&block_group_rec->list);
7713                         chunk_rec->bg_rec = block_group_rec;
7714                 }
7715         } else {
7716                 if (!silent)
7717                         fprintf(stderr,
7718                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7719                                 chunk_rec->objectid,
7720                                 chunk_rec->type,
7721                                 chunk_rec->offset,
7722                                 chunk_rec->length,
7723                                 chunk_rec->offset,
7724                                 chunk_rec->type_flags);
7725                 ret = 1;
7726         }
7727
7728         if (metadump_v2)
7729                 return ret;
7730
7731         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7732                                     chunk_rec->num_stripes);
7733         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7734                 devid = chunk_rec->stripes[i].devid;
7735                 offset = chunk_rec->stripes[i].offset;
7736                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7737                                                        devid, offset, length);
7738                 if (dev_extent_item) {
7739                         dev_extent_rec = container_of(dev_extent_item,
7740                                                 struct device_extent_record,
7741                                                 cache);
7742                         if (dev_extent_rec->objectid != devid ||
7743                             dev_extent_rec->offset != offset ||
7744                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7745                             dev_extent_rec->length != length) {
7746                                 if (!silent)
7747                                         fprintf(stderr,
7748                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7749                                                 chunk_rec->objectid,
7750                                                 chunk_rec->type,
7751                                                 chunk_rec->offset,
7752                                                 chunk_rec->stripes[i].devid,
7753                                                 chunk_rec->stripes[i].offset,
7754                                                 dev_extent_rec->objectid,
7755                                                 dev_extent_rec->offset,
7756                                                 dev_extent_rec->length);
7757                                 ret = -1;
7758                         } else {
7759                                 list_move(&dev_extent_rec->chunk_list,
7760                                           &chunk_rec->dextents);
7761                         }
7762                 } else {
7763                         if (!silent)
7764                                 fprintf(stderr,
7765                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7766                                         chunk_rec->objectid,
7767                                         chunk_rec->type,
7768                                         chunk_rec->offset,
7769                                         chunk_rec->stripes[i].devid,
7770                                         chunk_rec->stripes[i].offset);
7771                         ret = -1;
7772                 }
7773         }
7774         return ret;
7775 }
7776
7777 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7778 int check_chunks(struct cache_tree *chunk_cache,
7779                  struct block_group_tree *block_group_cache,
7780                  struct device_extent_tree *dev_extent_cache,
7781                  struct list_head *good, struct list_head *bad,
7782                  struct list_head *rebuild, int silent)
7783 {
7784         struct cache_extent *chunk_item;
7785         struct chunk_record *chunk_rec;
7786         struct block_group_record *bg_rec;
7787         struct device_extent_record *dext_rec;
7788         int err;
7789         int ret = 0;
7790
7791         chunk_item = first_cache_extent(chunk_cache);
7792         while (chunk_item) {
7793                 chunk_rec = container_of(chunk_item, struct chunk_record,
7794                                          cache);
7795                 err = check_chunk_refs(chunk_rec, block_group_cache,
7796                                        dev_extent_cache, silent);
7797                 if (err < 0)
7798                         ret = err;
7799                 if (err == 0 && good)
7800                         list_add_tail(&chunk_rec->list, good);
7801                 if (err > 0 && rebuild)
7802                         list_add_tail(&chunk_rec->list, rebuild);
7803                 if (err < 0 && bad)
7804                         list_add_tail(&chunk_rec->list, bad);
7805                 chunk_item = next_cache_extent(chunk_item);
7806         }
7807
7808         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7809                 if (!silent)
7810                         fprintf(stderr,
7811                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7812                                 bg_rec->objectid,
7813                                 bg_rec->offset,
7814                                 bg_rec->flags);
7815                 if (!ret)
7816                         ret = 1;
7817         }
7818
7819         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7820                             chunk_list) {
7821                 if (!silent)
7822                         fprintf(stderr,
7823                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7824                                 dext_rec->objectid,
7825                                 dext_rec->offset,
7826                                 dext_rec->length);
7827                 if (!ret)
7828                         ret = 1;
7829         }
7830         return ret;
7831 }
7832
7833
7834 static int check_device_used(struct device_record *dev_rec,
7835                              struct device_extent_tree *dext_cache)
7836 {
7837         struct cache_extent *cache;
7838         struct device_extent_record *dev_extent_rec;
7839         u64 total_byte = 0;
7840
7841         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7842         while (cache) {
7843                 dev_extent_rec = container_of(cache,
7844                                               struct device_extent_record,
7845                                               cache);
7846                 if (dev_extent_rec->objectid != dev_rec->devid)
7847                         break;
7848
7849                 list_del_init(&dev_extent_rec->device_list);
7850                 total_byte += dev_extent_rec->length;
7851                 cache = next_cache_extent(cache);
7852         }
7853
7854         if (total_byte != dev_rec->byte_used) {
7855                 fprintf(stderr,
7856                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7857                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7858                         dev_rec->type, dev_rec->offset);
7859                 return -1;
7860         } else {
7861                 return 0;
7862         }
7863 }
7864
7865 /*
7866  * Unlike device size alignment check above, some super total_bytes check
7867  * failure can lead to mount failure for newer kernel.
7868  *
7869  * So this function will return the error for a fatal super total_bytes problem.
7870  */
7871 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
7872 {
7873         struct btrfs_device *dev;
7874         struct list_head *dev_list = &fs_info->fs_devices->devices;
7875         u64 total_bytes = 0;
7876         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
7877
7878         list_for_each_entry(dev, dev_list, dev_list)
7879                 total_bytes += dev->total_bytes;
7880
7881         /* Important check, which can cause unmountable fs */
7882         if (super_bytes < total_bytes) {
7883                 error("super total bytes %llu smaller than real device(s) size %llu",
7884                         super_bytes, total_bytes);
7885                 error("mounting this fs may fail for newer kernels");
7886                 error("this can be fixed by 'btrfs rescue fix-device-size'");
7887                 return false;
7888         }
7889
7890         /*
7891          * Optional check, just to make everything aligned and match with each
7892          * other.
7893          *
7894          * For a btrfs-image restored fs, we don't need to check it anyway.
7895          */
7896         if (btrfs_super_flags(fs_info->super_copy) &
7897             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
7898                 return true;
7899         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
7900             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
7901             super_bytes != total_bytes) {
7902                 warning("minor unaligned/mismatch device size detected");
7903                 warning(
7904                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
7905         }
7906         return true;
7907 }
7908
7909 /* check btrfs_dev_item -> btrfs_dev_extent */
7910 static int check_devices(struct rb_root *dev_cache,
7911                          struct device_extent_tree *dev_extent_cache)
7912 {
7913         struct rb_node *dev_node;
7914         struct device_record *dev_rec;
7915         struct device_extent_record *dext_rec;
7916         int err;
7917         int ret = 0;
7918
7919         dev_node = rb_first(dev_cache);
7920         while (dev_node) {
7921                 dev_rec = container_of(dev_node, struct device_record, node);
7922                 err = check_device_used(dev_rec, dev_extent_cache);
7923                 if (err)
7924                         ret = err;
7925
7926                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
7927                                          global_info->sectorsize);
7928                 dev_node = rb_next(dev_node);
7929         }
7930         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7931                             device_list) {
7932                 fprintf(stderr,
7933                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7934                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7935                 if (!ret)
7936                         ret = 1;
7937         }
7938         return ret;
7939 }
7940
7941 static int add_root_item_to_list(struct list_head *head,
7942                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7943                                   u8 level, u8 drop_level,
7944                                   struct btrfs_key *drop_key)
7945 {
7946         struct root_item_record *ri_rec;
7947
7948         ri_rec = malloc(sizeof(*ri_rec));
7949         if (!ri_rec)
7950                 return -ENOMEM;
7951         ri_rec->bytenr = bytenr;
7952         ri_rec->objectid = objectid;
7953         ri_rec->level = level;
7954         ri_rec->drop_level = drop_level;
7955         ri_rec->last_snapshot = last_snapshot;
7956         if (drop_key)
7957                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7958         list_add_tail(&ri_rec->list, head);
7959
7960         return 0;
7961 }
7962
7963 static void free_root_item_list(struct list_head *list)
7964 {
7965         struct root_item_record *ri_rec;
7966
7967         while (!list_empty(list)) {
7968                 ri_rec = list_first_entry(list, struct root_item_record,
7969                                           list);
7970                 list_del_init(&ri_rec->list);
7971                 free(ri_rec);
7972         }
7973 }
7974
7975 static int deal_root_from_list(struct list_head *list,
7976                                struct btrfs_root *root,
7977                                struct block_info *bits,
7978                                int bits_nr,
7979                                struct cache_tree *pending,
7980                                struct cache_tree *seen,
7981                                struct cache_tree *reada,
7982                                struct cache_tree *nodes,
7983                                struct cache_tree *extent_cache,
7984                                struct cache_tree *chunk_cache,
7985                                struct rb_root *dev_cache,
7986                                struct block_group_tree *block_group_cache,
7987                                struct device_extent_tree *dev_extent_cache)
7988 {
7989         int ret = 0;
7990         u64 last;
7991
7992         while (!list_empty(list)) {
7993                 struct root_item_record *rec;
7994                 struct extent_buffer *buf;
7995
7996                 rec = list_entry(list->next,
7997                                  struct root_item_record, list);
7998                 last = 0;
7999                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
8000                 if (!extent_buffer_uptodate(buf)) {
8001                         free_extent_buffer(buf);
8002                         ret = -EIO;
8003                         break;
8004                 }
8005                 ret = add_root_to_pending(buf, extent_cache, pending,
8006                                     seen, nodes, rec->objectid);
8007                 if (ret < 0)
8008                         break;
8009                 /*
8010                  * To rebuild extent tree, we need deal with snapshot
8011                  * one by one, otherwise we deal with node firstly which
8012                  * can maximize readahead.
8013                  */
8014                 while (1) {
8015                         ret = run_next_block(root, bits, bits_nr, &last,
8016                                              pending, seen, reada, nodes,
8017                                              extent_cache, chunk_cache,
8018                                              dev_cache, block_group_cache,
8019                                              dev_extent_cache, rec);
8020                         if (ret != 0)
8021                                 break;
8022                 }
8023                 free_extent_buffer(buf);
8024                 list_del(&rec->list);
8025                 free(rec);
8026                 if (ret < 0)
8027                         break;
8028         }
8029         while (ret >= 0) {
8030                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8031                                      reada, nodes, extent_cache, chunk_cache,
8032                                      dev_cache, block_group_cache,
8033                                      dev_extent_cache, NULL);
8034                 if (ret != 0) {
8035                         if (ret > 0)
8036                                 ret = 0;
8037                         break;
8038                 }
8039         }
8040         return ret;
8041 }
8042
8043 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8044 {
8045         struct rb_root dev_cache;
8046         struct cache_tree chunk_cache;
8047         struct block_group_tree block_group_cache;
8048         struct device_extent_tree dev_extent_cache;
8049         struct cache_tree extent_cache;
8050         struct cache_tree seen;
8051         struct cache_tree pending;
8052         struct cache_tree reada;
8053         struct cache_tree nodes;
8054         struct extent_io_tree excluded_extents;
8055         struct cache_tree corrupt_blocks;
8056         struct btrfs_path path;
8057         struct btrfs_key key;
8058         struct btrfs_key found_key;
8059         int ret, err = 0;
8060         struct block_info *bits;
8061         int bits_nr;
8062         struct extent_buffer *leaf;
8063         int slot;
8064         struct btrfs_root_item ri;
8065         struct list_head dropping_trees;
8066         struct list_head normal_trees;
8067         struct btrfs_root *root1;
8068         struct btrfs_root *root;
8069         u64 objectid;
8070         u8 level;
8071
8072         root = fs_info->fs_root;
8073         dev_cache = RB_ROOT;
8074         cache_tree_init(&chunk_cache);
8075         block_group_tree_init(&block_group_cache);
8076         device_extent_tree_init(&dev_extent_cache);
8077
8078         cache_tree_init(&extent_cache);
8079         cache_tree_init(&seen);
8080         cache_tree_init(&pending);
8081         cache_tree_init(&nodes);
8082         cache_tree_init(&reada);
8083         cache_tree_init(&corrupt_blocks);
8084         extent_io_tree_init(&excluded_extents);
8085         INIT_LIST_HEAD(&dropping_trees);
8086         INIT_LIST_HEAD(&normal_trees);
8087
8088         if (repair) {
8089                 fs_info->excluded_extents = &excluded_extents;
8090                 fs_info->fsck_extent_cache = &extent_cache;
8091                 fs_info->free_extent_hook = free_extent_hook;
8092                 fs_info->corrupt_blocks = &corrupt_blocks;
8093         }
8094
8095         bits_nr = 1024;
8096         bits = malloc(bits_nr * sizeof(struct block_info));
8097         if (!bits) {
8098                 perror("malloc");
8099                 exit(1);
8100         }
8101
8102         if (ctx.progress_enabled) {
8103                 ctx.tp = TASK_EXTENTS;
8104                 task_start(ctx.info);
8105         }
8106
8107 again:
8108         root1 = fs_info->tree_root;
8109         level = btrfs_header_level(root1->node);
8110         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8111                                     root1->node->start, 0, level, 0, NULL);
8112         if (ret < 0)
8113                 goto out;
8114         root1 = fs_info->chunk_root;
8115         level = btrfs_header_level(root1->node);
8116         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8117                                     root1->node->start, 0, level, 0, NULL);
8118         if (ret < 0)
8119                 goto out;
8120         btrfs_init_path(&path);
8121         key.offset = 0;
8122         key.objectid = 0;
8123         key.type = BTRFS_ROOT_ITEM_KEY;
8124         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
8125         if (ret < 0)
8126                 goto out;
8127         while (1) {
8128                 leaf = path.nodes[0];
8129                 slot = path.slots[0];
8130                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8131                         ret = btrfs_next_leaf(root, &path);
8132                         if (ret != 0)
8133                                 break;
8134                         leaf = path.nodes[0];
8135                         slot = path.slots[0];
8136                 }
8137                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8138                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8139                         unsigned long offset;
8140                         u64 last_snapshot;
8141
8142                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8143                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8144                         last_snapshot = btrfs_root_last_snapshot(&ri);
8145                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8146                                 level = btrfs_root_level(&ri);
8147                                 ret = add_root_item_to_list(&normal_trees,
8148                                                 found_key.objectid,
8149                                                 btrfs_root_bytenr(&ri),
8150                                                 last_snapshot, level,
8151                                                 0, NULL);
8152                                 if (ret < 0)
8153                                         goto out;
8154                         } else {
8155                                 level = btrfs_root_level(&ri);
8156                                 objectid = found_key.objectid;
8157                                 btrfs_disk_key_to_cpu(&found_key,
8158                                                       &ri.drop_progress);
8159                                 ret = add_root_item_to_list(&dropping_trees,
8160                                                 objectid,
8161                                                 btrfs_root_bytenr(&ri),
8162                                                 last_snapshot, level,
8163                                                 ri.drop_level, &found_key);
8164                                 if (ret < 0)
8165                                         goto out;
8166                         }
8167                 }
8168                 path.slots[0]++;
8169         }
8170         btrfs_release_path(&path);
8171
8172         /*
8173          * check_block can return -EAGAIN if it fixes something, please keep
8174          * this in mind when dealing with return values from these functions, if
8175          * we get -EAGAIN we want to fall through and restart the loop.
8176          */
8177         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8178                                   &seen, &reada, &nodes, &extent_cache,
8179                                   &chunk_cache, &dev_cache, &block_group_cache,
8180                                   &dev_extent_cache);
8181         if (ret < 0) {
8182                 if (ret == -EAGAIN)
8183                         goto loop;
8184                 goto out;
8185         }
8186         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8187                                   &pending, &seen, &reada, &nodes,
8188                                   &extent_cache, &chunk_cache, &dev_cache,
8189                                   &block_group_cache, &dev_extent_cache);
8190         if (ret < 0) {
8191                 if (ret == -EAGAIN)
8192                         goto loop;
8193                 goto out;
8194         }
8195
8196         ret = check_chunks(&chunk_cache, &block_group_cache,
8197                            &dev_extent_cache, NULL, NULL, NULL, 0);
8198         if (ret) {
8199                 if (ret == -EAGAIN)
8200                         goto loop;
8201                 err = ret;
8202         }
8203
8204         ret = check_extent_refs(root, &extent_cache);
8205         if (ret < 0) {
8206                 if (ret == -EAGAIN)
8207                         goto loop;
8208                 goto out;
8209         }
8210
8211         ret = check_devices(&dev_cache, &dev_extent_cache);
8212         if (ret && err)
8213                 ret = err;
8214
8215 out:
8216         task_stop(ctx.info);
8217         if (repair) {
8218                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8219                 extent_io_tree_cleanup(&excluded_extents);
8220                 fs_info->fsck_extent_cache = NULL;
8221                 fs_info->free_extent_hook = NULL;
8222                 fs_info->corrupt_blocks = NULL;
8223                 fs_info->excluded_extents = NULL;
8224         }
8225         free(bits);
8226         free_chunk_cache_tree(&chunk_cache);
8227         free_device_cache_tree(&dev_cache);
8228         free_block_group_tree(&block_group_cache);
8229         free_device_extent_tree(&dev_extent_cache);
8230         free_extent_cache_tree(&seen);
8231         free_extent_cache_tree(&pending);
8232         free_extent_cache_tree(&reada);
8233         free_extent_cache_tree(&nodes);
8234         free_root_item_list(&normal_trees);
8235         free_root_item_list(&dropping_trees);
8236         return ret;
8237 loop:
8238         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8239         free_extent_cache_tree(&seen);
8240         free_extent_cache_tree(&pending);
8241         free_extent_cache_tree(&reada);
8242         free_extent_cache_tree(&nodes);
8243         free_chunk_cache_tree(&chunk_cache);
8244         free_block_group_tree(&block_group_cache);
8245         free_device_cache_tree(&dev_cache);
8246         free_device_extent_tree(&dev_extent_cache);
8247         free_extent_record_cache(&extent_cache);
8248         free_root_item_list(&normal_trees);
8249         free_root_item_list(&dropping_trees);
8250         extent_io_tree_cleanup(&excluded_extents);
8251         goto again;
8252 }
8253
8254 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8255 {
8256         int ret;
8257
8258         if (!ctx.progress_enabled)
8259                 fprintf(stderr, "checking extents\n");
8260         if (check_mode == CHECK_MODE_LOWMEM)
8261                 ret = check_chunks_and_extents_lowmem(fs_info);
8262         else
8263                 ret = check_chunks_and_extents(fs_info);
8264
8265         /* Also repair device size related problems */
8266         if (repair && !ret) {
8267                 ret = btrfs_fix_device_and_super_size(fs_info);
8268                 if (ret > 0)
8269                         ret = 0;
8270         }
8271         return ret;
8272 }
8273
8274 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8275                            struct btrfs_root *root, int overwrite)
8276 {
8277         struct extent_buffer *c;
8278         struct extent_buffer *old = root->node;
8279         int level;
8280         int ret;
8281         struct btrfs_disk_key disk_key = {0,0,0};
8282
8283         level = 0;
8284
8285         if (overwrite) {
8286                 c = old;
8287                 extent_buffer_get(c);
8288                 goto init;
8289         }
8290         c = btrfs_alloc_free_block(trans, root,
8291                                    root->fs_info->nodesize,
8292                                    root->root_key.objectid,
8293                                    &disk_key, level, 0, 0);
8294         if (IS_ERR(c)) {
8295                 c = old;
8296                 extent_buffer_get(c);
8297                 overwrite = 1;
8298         }
8299 init:
8300         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8301         btrfs_set_header_level(c, level);
8302         btrfs_set_header_bytenr(c, c->start);
8303         btrfs_set_header_generation(c, trans->transid);
8304         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8305         btrfs_set_header_owner(c, root->root_key.objectid);
8306
8307         write_extent_buffer(c, root->fs_info->fsid,
8308                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8309
8310         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8311                             btrfs_header_chunk_tree_uuid(c),
8312                             BTRFS_UUID_SIZE);
8313
8314         btrfs_mark_buffer_dirty(c);
8315         /*
8316          * this case can happen in the following case:
8317          *
8318          * 1.overwrite previous root.
8319          *
8320          * 2.reinit reloc data root, this is because we skip pin
8321          * down reloc data tree before which means we can allocate
8322          * same block bytenr here.
8323          */
8324         if (old->start == c->start) {
8325                 btrfs_set_root_generation(&root->root_item,
8326                                           trans->transid);
8327                 root->root_item.level = btrfs_header_level(root->node);
8328                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8329                                         &root->root_key, &root->root_item);
8330                 if (ret) {
8331                         free_extent_buffer(c);
8332                         return ret;
8333                 }
8334         }
8335         free_extent_buffer(old);
8336         root->node = c;
8337         add_root_to_dirty_list(root);
8338         return 0;
8339 }
8340
8341 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8342                                 struct extent_buffer *eb, int tree_root)
8343 {
8344         struct extent_buffer *tmp;
8345         struct btrfs_root_item *ri;
8346         struct btrfs_key key;
8347         u64 bytenr;
8348         int level = btrfs_header_level(eb);
8349         int nritems;
8350         int ret;
8351         int i;
8352
8353         /*
8354          * If we have pinned this block before, don't pin it again.
8355          * This can not only avoid forever loop with broken filesystem
8356          * but also give us some speedups.
8357          */
8358         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8359                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8360                 return 0;
8361
8362         btrfs_pin_extent(fs_info, eb->start, eb->len);
8363
8364         nritems = btrfs_header_nritems(eb);
8365         for (i = 0; i < nritems; i++) {
8366                 if (level == 0) {
8367                         btrfs_item_key_to_cpu(eb, &key, i);
8368                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8369                                 continue;
8370                         /* Skip the extent root and reloc roots */
8371                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8372                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8373                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8374                                 continue;
8375                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8376                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8377
8378                         /*
8379                          * If at any point we start needing the real root we
8380                          * will have to build a stump root for the root we are
8381                          * in, but for now this doesn't actually use the root so
8382                          * just pass in extent_root.
8383                          */
8384                         tmp = read_tree_block(fs_info, bytenr, 0);
8385                         if (!extent_buffer_uptodate(tmp)) {
8386                                 fprintf(stderr, "Error reading root block\n");
8387                                 return -EIO;
8388                         }
8389                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8390                         free_extent_buffer(tmp);
8391                         if (ret)
8392                                 return ret;
8393                 } else {
8394                         bytenr = btrfs_node_blockptr(eb, i);
8395
8396                         /* If we aren't the tree root don't read the block */
8397                         if (level == 1 && !tree_root) {
8398                                 btrfs_pin_extent(fs_info, bytenr,
8399                                                 fs_info->nodesize);
8400                                 continue;
8401                         }
8402
8403                         tmp = read_tree_block(fs_info, bytenr, 0);
8404                         if (!extent_buffer_uptodate(tmp)) {
8405                                 fprintf(stderr, "Error reading tree block\n");
8406                                 return -EIO;
8407                         }
8408                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8409                         free_extent_buffer(tmp);
8410                         if (ret)
8411                                 return ret;
8412                 }
8413         }
8414
8415         return 0;
8416 }
8417
8418 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8419 {
8420         int ret;
8421
8422         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8423         if (ret)
8424                 return ret;
8425
8426         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8427 }
8428
8429 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8430 {
8431         struct btrfs_block_group_cache *cache;
8432         struct btrfs_path path;
8433         struct extent_buffer *leaf;
8434         struct btrfs_chunk *chunk;
8435         struct btrfs_key key;
8436         int ret;
8437         u64 start;
8438
8439         btrfs_init_path(&path);
8440         key.objectid = 0;
8441         key.type = BTRFS_CHUNK_ITEM_KEY;
8442         key.offset = 0;
8443         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
8444         if (ret < 0) {
8445                 btrfs_release_path(&path);
8446                 return ret;
8447         }
8448
8449         /*
8450          * We do this in case the block groups were screwed up and had alloc
8451          * bits that aren't actually set on the chunks.  This happens with
8452          * restored images every time and could happen in real life I guess.
8453          */
8454         fs_info->avail_data_alloc_bits = 0;
8455         fs_info->avail_metadata_alloc_bits = 0;
8456         fs_info->avail_system_alloc_bits = 0;
8457
8458         /* First we need to create the in-memory block groups */
8459         while (1) {
8460                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8461                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
8462                         if (ret < 0) {
8463                                 btrfs_release_path(&path);
8464                                 return ret;
8465                         }
8466                         if (ret) {
8467                                 ret = 0;
8468                                 break;
8469                         }
8470                 }
8471                 leaf = path.nodes[0];
8472                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8473                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8474                         path.slots[0]++;
8475                         continue;
8476                 }
8477
8478                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
8479                 btrfs_add_block_group(fs_info, 0,
8480                                       btrfs_chunk_type(leaf, chunk), key.offset,
8481                                       btrfs_chunk_length(leaf, chunk));
8482                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8483                                  key.offset + btrfs_chunk_length(leaf, chunk));
8484                 path.slots[0]++;
8485         }
8486         start = 0;
8487         while (1) {
8488                 cache = btrfs_lookup_first_block_group(fs_info, start);
8489                 if (!cache)
8490                         break;
8491                 cache->cached = 1;
8492                 start = cache->key.objectid + cache->key.offset;
8493         }
8494
8495         btrfs_release_path(&path);
8496         return 0;
8497 }
8498
8499 static int reset_balance(struct btrfs_trans_handle *trans,
8500                          struct btrfs_fs_info *fs_info)
8501 {
8502         struct btrfs_root *root = fs_info->tree_root;
8503         struct btrfs_path path;
8504         struct extent_buffer *leaf;
8505         struct btrfs_key key;
8506         int del_slot, del_nr = 0;
8507         int ret;
8508         int found = 0;
8509
8510         btrfs_init_path(&path);
8511         key.objectid = BTRFS_BALANCE_OBJECTID;
8512         key.type = BTRFS_BALANCE_ITEM_KEY;
8513         key.offset = 0;
8514         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8515         if (ret) {
8516                 if (ret > 0)
8517                         ret = 0;
8518                 if (!ret)
8519                         goto reinit_data_reloc;
8520                 else
8521                         goto out;
8522         }
8523
8524         ret = btrfs_del_item(trans, root, &path);
8525         if (ret)
8526                 goto out;
8527         btrfs_release_path(&path);
8528
8529         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8530         key.type = BTRFS_ROOT_ITEM_KEY;
8531         key.offset = 0;
8532         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8533         if (ret < 0)
8534                 goto out;
8535         while (1) {
8536                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8537                         if (!found)
8538                                 break;
8539
8540                         if (del_nr) {
8541                                 ret = btrfs_del_items(trans, root, &path,
8542                                                       del_slot, del_nr);
8543                                 del_nr = 0;
8544                                 if (ret)
8545                                         goto out;
8546                         }
8547                         key.offset++;
8548                         btrfs_release_path(&path);
8549
8550                         found = 0;
8551                         ret = btrfs_search_slot(trans, root, &key, &path,
8552                                                 -1, 1);
8553                         if (ret < 0)
8554                                 goto out;
8555                         continue;
8556                 }
8557                 found = 1;
8558                 leaf = path.nodes[0];
8559                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8560                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8561                         break;
8562                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8563                         path.slots[0]++;
8564                         continue;
8565                 }
8566                 if (!del_nr) {
8567                         del_slot = path.slots[0];
8568                         del_nr = 1;
8569                 } else {
8570                         del_nr++;
8571                 }
8572                 path.slots[0]++;
8573         }
8574
8575         if (del_nr) {
8576                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
8577                 if (ret)
8578                         goto out;
8579         }
8580         btrfs_release_path(&path);
8581
8582 reinit_data_reloc:
8583         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8584         key.type = BTRFS_ROOT_ITEM_KEY;
8585         key.offset = (u64)-1;
8586         root = btrfs_read_fs_root(fs_info, &key);
8587         if (IS_ERR(root)) {
8588                 fprintf(stderr, "Error reading data reloc tree\n");
8589                 ret = PTR_ERR(root);
8590                 goto out;
8591         }
8592         record_root_in_trans(trans, root);
8593         ret = btrfs_fsck_reinit_root(trans, root, 0);
8594         if (ret)
8595                 goto out;
8596         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8597 out:
8598         btrfs_release_path(&path);
8599         return ret;
8600 }
8601
8602 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8603                               struct btrfs_fs_info *fs_info)
8604 {
8605         u64 start = 0;
8606         int ret;
8607
8608         /*
8609          * The only reason we don't do this is because right now we're just
8610          * walking the trees we find and pinning down their bytes, we don't look
8611          * at any of the leaves.  In order to do mixed groups we'd have to check
8612          * the leaves of any fs roots and pin down the bytes for any file
8613          * extents we find.  Not hard but why do it if we don't have to?
8614          */
8615         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
8616                 fprintf(stderr, "We don't support re-initing the extent tree "
8617                         "for mixed block groups yet, please notify a btrfs "
8618                         "developer you want to do this so they can add this "
8619                         "functionality.\n");
8620                 return -EINVAL;
8621         }
8622
8623         /*
8624          * first we need to walk all of the trees except the extent tree and pin
8625          * down the bytes that are in use so we don't overwrite any existing
8626          * metadata.
8627          */
8628         ret = pin_metadata_blocks(fs_info);
8629         if (ret) {
8630                 fprintf(stderr, "error pinning down used bytes\n");
8631                 return ret;
8632         }
8633
8634         /*
8635          * Need to drop all the block groups since we're going to recreate all
8636          * of them again.
8637          */
8638         btrfs_free_block_groups(fs_info);
8639         ret = reset_block_groups(fs_info);
8640         if (ret) {
8641                 fprintf(stderr, "error resetting the block groups\n");
8642                 return ret;
8643         }
8644
8645         /* Ok we can allocate now, reinit the extent root */
8646         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8647         if (ret) {
8648                 fprintf(stderr, "extent root initialization failed\n");
8649                 /*
8650                  * When the transaction code is updated we should end the
8651                  * transaction, but for now progs only knows about commit so
8652                  * just return an error.
8653                  */
8654                 return ret;
8655         }
8656
8657         /*
8658          * Now we have all the in-memory block groups setup so we can make
8659          * allocations properly, and the metadata we care about is safe since we
8660          * pinned all of it above.
8661          */
8662         while (1) {
8663                 struct btrfs_block_group_cache *cache;
8664
8665                 cache = btrfs_lookup_first_block_group(fs_info, start);
8666                 if (!cache)
8667                         break;
8668                 start = cache->key.objectid + cache->key.offset;
8669                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8670                                         &cache->key, &cache->item,
8671                                         sizeof(cache->item));
8672                 if (ret) {
8673                         fprintf(stderr, "Error adding block group\n");
8674                         return ret;
8675                 }
8676                 btrfs_extent_post_op(trans, fs_info->extent_root);
8677         }
8678
8679         ret = reset_balance(trans, fs_info);
8680         if (ret)
8681                 fprintf(stderr, "error resetting the pending balance\n");
8682
8683         return ret;
8684 }
8685
8686 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8687 {
8688         struct btrfs_path path;
8689         struct btrfs_trans_handle *trans;
8690         struct btrfs_key key;
8691         int ret;
8692
8693         printf("Recowing metadata block %llu\n", eb->start);
8694         key.objectid = btrfs_header_owner(eb);
8695         key.type = BTRFS_ROOT_ITEM_KEY;
8696         key.offset = (u64)-1;
8697
8698         root = btrfs_read_fs_root(root->fs_info, &key);
8699         if (IS_ERR(root)) {
8700                 fprintf(stderr, "Couldn't find owner root %llu\n",
8701                         key.objectid);
8702                 return PTR_ERR(root);
8703         }
8704
8705         trans = btrfs_start_transaction(root, 1);
8706         if (IS_ERR(trans))
8707                 return PTR_ERR(trans);
8708
8709         btrfs_init_path(&path);
8710         path.lowest_level = btrfs_header_level(eb);
8711         if (path.lowest_level)
8712                 btrfs_node_key_to_cpu(eb, &key, 0);
8713         else
8714                 btrfs_item_key_to_cpu(eb, &key, 0);
8715
8716         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8717         btrfs_commit_transaction(trans, root);
8718         btrfs_release_path(&path);
8719         return ret;
8720 }
8721
8722 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8723 {
8724         struct btrfs_path path;
8725         struct btrfs_trans_handle *trans;
8726         struct btrfs_key key;
8727         int ret;
8728
8729         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8730                bad->key.type, bad->key.offset);
8731         key.objectid = bad->root_id;
8732         key.type = BTRFS_ROOT_ITEM_KEY;
8733         key.offset = (u64)-1;
8734
8735         root = btrfs_read_fs_root(root->fs_info, &key);
8736         if (IS_ERR(root)) {
8737                 fprintf(stderr, "Couldn't find owner root %llu\n",
8738                         key.objectid);
8739                 return PTR_ERR(root);
8740         }
8741
8742         trans = btrfs_start_transaction(root, 1);
8743         if (IS_ERR(trans))
8744                 return PTR_ERR(trans);
8745
8746         btrfs_init_path(&path);
8747         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
8748         if (ret) {
8749                 if (ret > 0)
8750                         ret = 0;
8751                 goto out;
8752         }
8753         ret = btrfs_del_item(trans, root, &path);
8754 out:
8755         btrfs_commit_transaction(trans, root);
8756         btrfs_release_path(&path);
8757         return ret;
8758 }
8759
8760 static int zero_log_tree(struct btrfs_root *root)
8761 {
8762         struct btrfs_trans_handle *trans;
8763         int ret;
8764
8765         trans = btrfs_start_transaction(root, 1);
8766         if (IS_ERR(trans)) {
8767                 ret = PTR_ERR(trans);
8768                 return ret;
8769         }
8770         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8771         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8772         ret = btrfs_commit_transaction(trans, root);
8773         return ret;
8774 }
8775
8776 static int populate_csum(struct btrfs_trans_handle *trans,
8777                          struct btrfs_root *csum_root, char *buf, u64 start,
8778                          u64 len)
8779 {
8780         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8781         u64 offset = 0;
8782         u64 sectorsize;
8783         int ret = 0;
8784
8785         while (offset < len) {
8786                 sectorsize = fs_info->sectorsize;
8787                 ret = read_extent_data(fs_info, buf, start + offset,
8788                                        &sectorsize, 0);
8789                 if (ret)
8790                         break;
8791                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8792                                             start + offset, buf, sectorsize);
8793                 if (ret)
8794                         break;
8795                 offset += sectorsize;
8796         }
8797         return ret;
8798 }
8799
8800 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8801                                       struct btrfs_root *csum_root,
8802                                       struct btrfs_root *cur_root)
8803 {
8804         struct btrfs_path path;
8805         struct btrfs_key key;
8806         struct extent_buffer *node;
8807         struct btrfs_file_extent_item *fi;
8808         char *buf = NULL;
8809         u64 start = 0;
8810         u64 len = 0;
8811         int slot = 0;
8812         int ret = 0;
8813
8814         buf = malloc(cur_root->fs_info->sectorsize);
8815         if (!buf)
8816                 return -ENOMEM;
8817
8818         btrfs_init_path(&path);
8819         key.objectid = 0;
8820         key.offset = 0;
8821         key.type = 0;
8822         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
8823         if (ret < 0)
8824                 goto out;
8825         /* Iterate all regular file extents and fill its csum */
8826         while (1) {
8827                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8828
8829                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8830                         goto next;
8831                 node = path.nodes[0];
8832                 slot = path.slots[0];
8833                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8834                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8835                         goto next;
8836                 start = btrfs_file_extent_disk_bytenr(node, fi);
8837                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8838
8839                 ret = populate_csum(trans, csum_root, buf, start, len);
8840                 if (ret == -EEXIST)
8841                         ret = 0;
8842                 if (ret < 0)
8843                         goto out;
8844 next:
8845                 /*
8846                  * TODO: if next leaf is corrupted, jump to nearest next valid
8847                  * leaf.
8848                  */
8849                 ret = btrfs_next_item(cur_root, &path);
8850                 if (ret < 0)
8851                         goto out;
8852                 if (ret > 0) {
8853                         ret = 0;
8854                         goto out;
8855                 }
8856         }
8857
8858 out:
8859         btrfs_release_path(&path);
8860         free(buf);
8861         return ret;
8862 }
8863
8864 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8865                                   struct btrfs_root *csum_root)
8866 {
8867         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8868         struct btrfs_path path;
8869         struct btrfs_root *tree_root = fs_info->tree_root;
8870         struct btrfs_root *cur_root;
8871         struct extent_buffer *node;
8872         struct btrfs_key key;
8873         int slot = 0;
8874         int ret = 0;
8875
8876         btrfs_init_path(&path);
8877         key.objectid = BTRFS_FS_TREE_OBJECTID;
8878         key.offset = 0;
8879         key.type = BTRFS_ROOT_ITEM_KEY;
8880         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
8881         if (ret < 0)
8882                 goto out;
8883         if (ret > 0) {
8884                 ret = -ENOENT;
8885                 goto out;
8886         }
8887
8888         while (1) {
8889                 node = path.nodes[0];
8890                 slot = path.slots[0];
8891                 btrfs_item_key_to_cpu(node, &key, slot);
8892                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8893                         goto out;
8894                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8895                         goto next;
8896                 if (!is_fstree(key.objectid))
8897                         goto next;
8898                 key.offset = (u64)-1;
8899
8900                 cur_root = btrfs_read_fs_root(fs_info, &key);
8901                 if (IS_ERR(cur_root) || !cur_root) {
8902                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8903                                 key.objectid);
8904                         goto out;
8905                 }
8906                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8907                                 cur_root);
8908                 if (ret < 0)
8909                         goto out;
8910 next:
8911                 ret = btrfs_next_item(tree_root, &path);
8912                 if (ret > 0) {
8913                         ret = 0;
8914                         goto out;
8915                 }
8916                 if (ret < 0)
8917                         goto out;
8918         }
8919
8920 out:
8921         btrfs_release_path(&path);
8922         return ret;
8923 }
8924
8925 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8926                                       struct btrfs_root *csum_root)
8927 {
8928         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8929         struct btrfs_path path;
8930         struct btrfs_extent_item *ei;
8931         struct extent_buffer *leaf;
8932         char *buf;
8933         struct btrfs_key key;
8934         int ret;
8935
8936         btrfs_init_path(&path);
8937         key.objectid = 0;
8938         key.type = BTRFS_EXTENT_ITEM_KEY;
8939         key.offset = 0;
8940         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8941         if (ret < 0) {
8942                 btrfs_release_path(&path);
8943                 return ret;
8944         }
8945
8946         buf = malloc(csum_root->fs_info->sectorsize);
8947         if (!buf) {
8948                 btrfs_release_path(&path);
8949                 return -ENOMEM;
8950         }
8951
8952         while (1) {
8953                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8954                         ret = btrfs_next_leaf(extent_root, &path);
8955                         if (ret < 0)
8956                                 break;
8957                         if (ret) {
8958                                 ret = 0;
8959                                 break;
8960                         }
8961                 }
8962                 leaf = path.nodes[0];
8963
8964                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8965                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8966                         path.slots[0]++;
8967                         continue;
8968                 }
8969
8970                 ei = btrfs_item_ptr(leaf, path.slots[0],
8971                                     struct btrfs_extent_item);
8972                 if (!(btrfs_extent_flags(leaf, ei) &
8973                       BTRFS_EXTENT_FLAG_DATA)) {
8974                         path.slots[0]++;
8975                         continue;
8976                 }
8977
8978                 ret = populate_csum(trans, csum_root, buf, key.objectid,
8979                                     key.offset);
8980                 if (ret)
8981                         break;
8982                 path.slots[0]++;
8983         }
8984
8985         btrfs_release_path(&path);
8986         free(buf);
8987         return ret;
8988 }
8989
8990 /*
8991  * Recalculate the csum and put it into the csum tree.
8992  *
8993  * Extent tree init will wipe out all the extent info, so in that case, we
8994  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
8995  * will use fs/subvol trees to init the csum tree.
8996  */
8997 static int fill_csum_tree(struct btrfs_trans_handle *trans,
8998                           struct btrfs_root *csum_root,
8999                           int search_fs_tree)
9000 {
9001         if (search_fs_tree)
9002                 return fill_csum_tree_from_fs(trans, csum_root);
9003         else
9004                 return fill_csum_tree_from_extent(trans, csum_root);
9005 }
9006
9007 static void free_roots_info_cache(void)
9008 {
9009         if (!roots_info_cache)
9010                 return;
9011
9012         while (!cache_tree_empty(roots_info_cache)) {
9013                 struct cache_extent *entry;
9014                 struct root_item_info *rii;
9015
9016                 entry = first_cache_extent(roots_info_cache);
9017                 if (!entry)
9018                         break;
9019                 remove_cache_extent(roots_info_cache, entry);
9020                 rii = container_of(entry, struct root_item_info, cache_extent);
9021                 free(rii);
9022         }
9023
9024         free(roots_info_cache);
9025         roots_info_cache = NULL;
9026 }
9027
9028 static int build_roots_info_cache(struct btrfs_fs_info *info)
9029 {
9030         int ret = 0;
9031         struct btrfs_key key;
9032         struct extent_buffer *leaf;
9033         struct btrfs_path path;
9034
9035         if (!roots_info_cache) {
9036                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9037                 if (!roots_info_cache)
9038                         return -ENOMEM;
9039                 cache_tree_init(roots_info_cache);
9040         }
9041
9042         btrfs_init_path(&path);
9043         key.objectid = 0;
9044         key.type = BTRFS_EXTENT_ITEM_KEY;
9045         key.offset = 0;
9046         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
9047         if (ret < 0)
9048                 goto out;
9049         leaf = path.nodes[0];
9050
9051         while (1) {
9052                 struct btrfs_key found_key;
9053                 struct btrfs_extent_item *ei;
9054                 struct btrfs_extent_inline_ref *iref;
9055                 unsigned long item_end;
9056                 int slot = path.slots[0];
9057                 int type;
9058                 u64 flags;
9059                 u64 root_id;
9060                 u8 level;
9061                 struct cache_extent *entry;
9062                 struct root_item_info *rii;
9063
9064                 if (slot >= btrfs_header_nritems(leaf)) {
9065                         ret = btrfs_next_leaf(info->extent_root, &path);
9066                         if (ret < 0) {
9067                                 break;
9068                         } else if (ret) {
9069                                 ret = 0;
9070                                 break;
9071                         }
9072                         leaf = path.nodes[0];
9073                         slot = path.slots[0];
9074                 }
9075
9076                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9077
9078                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9079                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9080                         goto next;
9081
9082                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9083                 flags = btrfs_extent_flags(leaf, ei);
9084                 item_end = (unsigned long)ei + btrfs_item_size_nr(leaf, slot);
9085
9086                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9087                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9088                         goto next;
9089
9090                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9091                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9092                         level = found_key.offset;
9093                 } else {
9094                         struct btrfs_tree_block_info *binfo;
9095
9096                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9097                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9098                         level = btrfs_tree_block_level(leaf, binfo);
9099                 }
9100
9101                 /*
9102                  * It's a valid extent/metadata item that has no inline ref,
9103                  * but SHARED_BLOCK_REF or other shared references.
9104                  * So we need to do extra check to avoid reading beyond leaf
9105                  * boudnary.
9106                  */
9107                 if ((unsigned long)iref >= item_end)
9108                         goto next;
9109
9110                 /*
9111                  * For a root extent, it must be of the following type and the
9112                  * first (and only one) iref in the item.
9113                  */
9114                 type = btrfs_extent_inline_ref_type(leaf, iref);
9115                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9116                         goto next;
9117
9118                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9119                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9120                 if (!entry) {
9121                         rii = malloc(sizeof(struct root_item_info));
9122                         if (!rii) {
9123                                 ret = -ENOMEM;
9124                                 goto out;
9125                         }
9126                         rii->cache_extent.start = root_id;
9127                         rii->cache_extent.size = 1;
9128                         rii->level = (u8)-1;
9129                         entry = &rii->cache_extent;
9130                         ret = insert_cache_extent(roots_info_cache, entry);
9131                         ASSERT(ret == 0);
9132                 } else {
9133                         rii = container_of(entry, struct root_item_info,
9134                                            cache_extent);
9135                 }
9136
9137                 ASSERT(rii->cache_extent.start == root_id);
9138                 ASSERT(rii->cache_extent.size == 1);
9139
9140                 if (level > rii->level || rii->level == (u8)-1) {
9141                         rii->level = level;
9142                         rii->bytenr = found_key.objectid;
9143                         rii->gen = btrfs_extent_generation(leaf, ei);
9144                         rii->node_count = 1;
9145                 } else if (level == rii->level) {
9146                         rii->node_count++;
9147                 }
9148 next:
9149                 path.slots[0]++;
9150         }
9151
9152 out:
9153         btrfs_release_path(&path);
9154
9155         return ret;
9156 }
9157
9158 static int maybe_repair_root_item(struct btrfs_path *path,
9159                                   const struct btrfs_key *root_key,
9160                                   const int read_only_mode)
9161 {
9162         const u64 root_id = root_key->objectid;
9163         struct cache_extent *entry;
9164         struct root_item_info *rii;
9165         struct btrfs_root_item ri;
9166         unsigned long offset;
9167
9168         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9169         if (!entry) {
9170                 fprintf(stderr,
9171                         "Error: could not find extent items for root %llu\n",
9172                         root_key->objectid);
9173                 return -ENOENT;
9174         }
9175
9176         rii = container_of(entry, struct root_item_info, cache_extent);
9177         ASSERT(rii->cache_extent.start == root_id);
9178         ASSERT(rii->cache_extent.size == 1);
9179
9180         if (rii->node_count != 1) {
9181                 fprintf(stderr,
9182                         "Error: could not find btree root extent for root %llu\n",
9183                         root_id);
9184                 return -ENOENT;
9185         }
9186
9187         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9188         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9189
9190         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9191             btrfs_root_level(&ri) != rii->level ||
9192             btrfs_root_generation(&ri) != rii->gen) {
9193
9194                 /*
9195                  * If we're in repair mode but our caller told us to not update
9196                  * the root item, i.e. just check if it needs to be updated, don't
9197                  * print this message, since the caller will call us again shortly
9198                  * for the same root item without read only mode (the caller will
9199                  * open a transaction first).
9200                  */
9201                 if (!(read_only_mode && repair))
9202                         fprintf(stderr,
9203                                 "%sroot item for root %llu,"
9204                                 " current bytenr %llu, current gen %llu, current level %u,"
9205                                 " new bytenr %llu, new gen %llu, new level %u\n",
9206                                 (read_only_mode ? "" : "fixing "),
9207                                 root_id,
9208                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9209                                 btrfs_root_level(&ri),
9210                                 rii->bytenr, rii->gen, rii->level);
9211
9212                 if (btrfs_root_generation(&ri) > rii->gen) {
9213                         fprintf(stderr,
9214                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9215                                 root_id, btrfs_root_generation(&ri), rii->gen);
9216                         return -EINVAL;
9217                 }
9218
9219                 if (!read_only_mode) {
9220                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9221                         btrfs_set_root_level(&ri, rii->level);
9222                         btrfs_set_root_generation(&ri, rii->gen);
9223                         write_extent_buffer(path->nodes[0], &ri,
9224                                             offset, sizeof(ri));
9225                 }
9226
9227                 return 1;
9228         }
9229
9230         return 0;
9231 }
9232
9233 /*
9234  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9235  * caused read-only snapshots to be corrupted if they were created at a moment
9236  * when the source subvolume/snapshot had orphan items. The issue was that the
9237  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9238  * node instead of the post orphan cleanup root node.
9239  * So this function, and its callees, just detects and fixes those cases. Even
9240  * though the regression was for read-only snapshots, this function applies to
9241  * any snapshot/subvolume root.
9242  * This must be run before any other repair code - not doing it so, makes other
9243  * repair code delete or modify backrefs in the extent tree for example, which
9244  * will result in an inconsistent fs after repairing the root items.
9245  */
9246 static int repair_root_items(struct btrfs_fs_info *info)
9247 {
9248         struct btrfs_path path;
9249         struct btrfs_key key;
9250         struct extent_buffer *leaf;
9251         struct btrfs_trans_handle *trans = NULL;
9252         int ret = 0;
9253         int bad_roots = 0;
9254         int need_trans = 0;
9255
9256         btrfs_init_path(&path);
9257
9258         ret = build_roots_info_cache(info);
9259         if (ret)
9260                 goto out;
9261
9262         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9263         key.type = BTRFS_ROOT_ITEM_KEY;
9264         key.offset = 0;
9265
9266 again:
9267         /*
9268          * Avoid opening and committing transactions if a leaf doesn't have
9269          * any root items that need to be fixed, so that we avoid rotating
9270          * backup roots unnecessarily.
9271          */
9272         if (need_trans) {
9273                 trans = btrfs_start_transaction(info->tree_root, 1);
9274                 if (IS_ERR(trans)) {
9275                         ret = PTR_ERR(trans);
9276                         goto out;
9277                 }
9278         }
9279
9280         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
9281                                 0, trans ? 1 : 0);
9282         if (ret < 0)
9283                 goto out;
9284         leaf = path.nodes[0];
9285
9286         while (1) {
9287                 struct btrfs_key found_key;
9288
9289                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
9290                         int no_more_keys = find_next_key(&path, &key);
9291
9292                         btrfs_release_path(&path);
9293                         if (trans) {
9294                                 ret = btrfs_commit_transaction(trans,
9295                                                                info->tree_root);
9296                                 trans = NULL;
9297                                 if (ret < 0)
9298                                         goto out;
9299                         }
9300                         need_trans = 0;
9301                         if (no_more_keys)
9302                                 break;
9303                         goto again;
9304                 }
9305
9306                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9307
9308                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9309                         goto next;
9310                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9311                         goto next;
9312
9313                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
9314                 if (ret < 0)
9315                         goto out;
9316                 if (ret) {
9317                         if (!trans && repair) {
9318                                 need_trans = 1;
9319                                 key = found_key;
9320                                 btrfs_release_path(&path);
9321                                 goto again;
9322                         }
9323                         bad_roots++;
9324                 }
9325 next:
9326                 path.slots[0]++;
9327         }
9328         ret = 0;
9329 out:
9330         free_roots_info_cache();
9331         btrfs_release_path(&path);
9332         if (trans)
9333                 btrfs_commit_transaction(trans, info->tree_root);
9334         if (ret < 0)
9335                 return ret;
9336
9337         return bad_roots;
9338 }
9339
9340 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
9341 {
9342         struct btrfs_trans_handle *trans;
9343         struct btrfs_block_group_cache *bg_cache;
9344         u64 current = 0;
9345         int ret = 0;
9346
9347         /* Clear all free space cache inodes and its extent data */
9348         while (1) {
9349                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
9350                 if (!bg_cache)
9351                         break;
9352                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
9353                 if (ret < 0)
9354                         return ret;
9355                 current = bg_cache->key.objectid + bg_cache->key.offset;
9356         }
9357
9358         /* Don't forget to set cache_generation to -1 */
9359         trans = btrfs_start_transaction(fs_info->tree_root, 0);
9360         if (IS_ERR(trans)) {
9361                 error("failed to update super block cache generation");
9362                 return PTR_ERR(trans);
9363         }
9364         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
9365         btrfs_commit_transaction(trans, fs_info->tree_root);
9366
9367         return ret;
9368 }
9369
9370 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
9371                 int clear_version)
9372 {
9373         int ret = 0;
9374
9375         if (clear_version == 1) {
9376                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9377                         error(
9378                 "free space cache v2 detected, use --clear-space-cache v2");
9379                         ret = 1;
9380                         goto close_out;
9381                 }
9382                 printf("Clearing free space cache\n");
9383                 ret = clear_free_space_cache(fs_info);
9384                 if (ret) {
9385                         error("failed to clear free space cache");
9386                         ret = 1;
9387                 } else {
9388                         printf("Free space cache cleared\n");
9389                 }
9390         } else if (clear_version == 2) {
9391                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9392                         printf("no free space cache v2 to clear\n");
9393                         ret = 0;
9394                         goto close_out;
9395                 }
9396                 printf("Clear free space cache v2\n");
9397                 ret = btrfs_clear_free_space_tree(fs_info);
9398                 if (ret) {
9399                         error("failed to clear free space cache v2: %d", ret);
9400                         ret = 1;
9401                 } else {
9402                         printf("free space cache v2 cleared\n");
9403                 }
9404         }
9405 close_out:
9406         return ret;
9407 }
9408
9409 const char * const cmd_check_usage[] = {
9410         "btrfs check [options] <device>",
9411         "Check structural integrity of a filesystem (unmounted).",
9412         "Check structural integrity of an unmounted filesystem. Verify internal",
9413         "trees' consistency and item connectivity. In the repair mode try to",
9414         "fix the problems found. ",
9415         "WARNING: the repair mode is considered dangerous",
9416         "",
9417         "-s|--super <superblock>     use this superblock copy",
9418         "-b|--backup                 use the first valid backup root copy",
9419         "--force                     skip mount checks, repair is not possible",
9420         "--repair                    try to repair the filesystem",
9421         "--readonly                  run in read-only mode (default)",
9422         "--init-csum-tree            create a new CRC tree",
9423         "--init-extent-tree          create a new extent tree",
9424         "--mode <MODE>               allows choice of memory/IO trade-offs",
9425         "                            where MODE is one of:",
9426         "                            original - read inodes and extents to memory (requires",
9427         "                                       more memory, does less IO)",
9428         "                            lowmem   - try to use less memory but read blocks again",
9429         "                                       when needed",
9430         "--check-data-csum           verify checksums of data blocks",
9431         "-Q|--qgroup-report          print a report on qgroup consistency",
9432         "-E|--subvol-extents <subvolid>",
9433         "                            print subvolume extents and sharing state",
9434         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9435         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9436         "-p|--progress               indicate progress",
9437         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
9438         NULL
9439 };
9440
9441 int cmd_check(int argc, char **argv)
9442 {
9443         struct cache_tree root_cache;
9444         struct btrfs_root *root;
9445         struct btrfs_fs_info *info;
9446         u64 bytenr = 0;
9447         u64 subvolid = 0;
9448         u64 tree_root_bytenr = 0;
9449         u64 chunk_root_bytenr = 0;
9450         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9451         int ret = 0;
9452         int err = 0;
9453         u64 num;
9454         int init_csum_tree = 0;
9455         int readonly = 0;
9456         int clear_space_cache = 0;
9457         int qgroup_report = 0;
9458         int qgroups_repaired = 0;
9459         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
9460         int force = 0;
9461
9462         while(1) {
9463                 int c;
9464                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9465                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9466                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
9467                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
9468                         GETOPT_VAL_FORCE };
9469                 static const struct option long_options[] = {
9470                         { "super", required_argument, NULL, 's' },
9471                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9472                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9473                         { "init-csum-tree", no_argument, NULL,
9474                                 GETOPT_VAL_INIT_CSUM },
9475                         { "init-extent-tree", no_argument, NULL,
9476                                 GETOPT_VAL_INIT_EXTENT },
9477                         { "check-data-csum", no_argument, NULL,
9478                                 GETOPT_VAL_CHECK_CSUM },
9479                         { "backup", no_argument, NULL, 'b' },
9480                         { "subvol-extents", required_argument, NULL, 'E' },
9481                         { "qgroup-report", no_argument, NULL, 'Q' },
9482                         { "tree-root", required_argument, NULL, 'r' },
9483                         { "chunk-root", required_argument, NULL,
9484                                 GETOPT_VAL_CHUNK_TREE },
9485                         { "progress", no_argument, NULL, 'p' },
9486                         { "mode", required_argument, NULL,
9487                                 GETOPT_VAL_MODE },
9488                         { "clear-space-cache", required_argument, NULL,
9489                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
9490                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
9491                         { NULL, 0, NULL, 0}
9492                 };
9493
9494                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
9495                 if (c < 0)
9496                         break;
9497                 switch(c) {
9498                         case 'a': /* ignored */ break;
9499                         case 'b':
9500                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9501                                 break;
9502                         case 's':
9503                                 num = arg_strtou64(optarg);
9504                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9505                                         error(
9506                                         "super mirror should be less than %d",
9507                                                 BTRFS_SUPER_MIRROR_MAX);
9508                                         exit(1);
9509                                 }
9510                                 bytenr = btrfs_sb_offset(((int)num));
9511                                 printf("using SB copy %llu, bytenr %llu\n", num,
9512                                        (unsigned long long)bytenr);
9513                                 break;
9514                         case 'Q':
9515                                 qgroup_report = 1;
9516                                 break;
9517                         case 'E':
9518                                 subvolid = arg_strtou64(optarg);
9519                                 break;
9520                         case 'r':
9521                                 tree_root_bytenr = arg_strtou64(optarg);
9522                                 break;
9523                         case GETOPT_VAL_CHUNK_TREE:
9524                                 chunk_root_bytenr = arg_strtou64(optarg);
9525                                 break;
9526                         case 'p':
9527                                 ctx.progress_enabled = true;
9528                                 break;
9529                         case '?':
9530                         case 'h':
9531                                 usage(cmd_check_usage);
9532                         case GETOPT_VAL_REPAIR:
9533                                 printf("enabling repair mode\n");
9534                                 repair = 1;
9535                                 ctree_flags |= OPEN_CTREE_WRITES;
9536                                 break;
9537                         case GETOPT_VAL_READONLY:
9538                                 readonly = 1;
9539                                 break;
9540                         case GETOPT_VAL_INIT_CSUM:
9541                                 printf("Creating a new CRC tree\n");
9542                                 init_csum_tree = 1;
9543                                 repair = 1;
9544                                 ctree_flags |= OPEN_CTREE_WRITES;
9545                                 break;
9546                         case GETOPT_VAL_INIT_EXTENT:
9547                                 init_extent_tree = 1;
9548                                 ctree_flags |= (OPEN_CTREE_WRITES |
9549                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9550                                 repair = 1;
9551                                 break;
9552                         case GETOPT_VAL_CHECK_CSUM:
9553                                 check_data_csum = 1;
9554                                 break;
9555                         case GETOPT_VAL_MODE:
9556                                 check_mode = parse_check_mode(optarg);
9557                                 if (check_mode == CHECK_MODE_UNKNOWN) {
9558                                         error("unknown mode: %s", optarg);
9559                                         exit(1);
9560                                 }
9561                                 break;
9562                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
9563                                 if (strcmp(optarg, "v1") == 0) {
9564                                         clear_space_cache = 1;
9565                                 } else if (strcmp(optarg, "v2") == 0) {
9566                                         clear_space_cache = 2;
9567                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
9568                                 } else {
9569                                         error(
9570                 "invalid argument to --clear-space-cache, must be v1 or v2");
9571                                         exit(1);
9572                                 }
9573                                 ctree_flags |= OPEN_CTREE_WRITES;
9574                                 break;
9575                         case GETOPT_VAL_FORCE:
9576                                 force = 1;
9577                                 break;
9578                 }
9579         }
9580
9581         if (check_argc_exact(argc - optind, 1))
9582                 usage(cmd_check_usage);
9583
9584         if (ctx.progress_enabled) {
9585                 ctx.tp = TASK_NOTHING;
9586                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9587         }
9588
9589         /* This check is the only reason for --readonly to exist */
9590         if (readonly && repair) {
9591                 error("repair options are not compatible with --readonly");
9592                 exit(1);
9593         }
9594
9595         /*
9596          * experimental and dangerous
9597          */
9598         if (repair && check_mode == CHECK_MODE_LOWMEM)
9599                 warning("low-memory mode repair support is only partial");
9600
9601         radix_tree_init();
9602         cache_tree_init(&root_cache);
9603
9604         ret = check_mounted(argv[optind]);
9605         if (!force) {
9606                 if (ret < 0) {
9607                         error("could not check mount status: %s",
9608                                         strerror(-ret));
9609                         err |= !!ret;
9610                         goto err_out;
9611                 } else if (ret) {
9612                         error(
9613 "%s is currently mounted, use --force if you really intend to check the filesystem",
9614                                 argv[optind]);
9615                         ret = -EBUSY;
9616                         err |= !!ret;
9617                         goto err_out;
9618                 }
9619         } else {
9620                 if (repair) {
9621                         error("repair and --force is not yet supported");
9622                         ret = 1;
9623                         err |= !!ret;
9624                         goto err_out;
9625                 }
9626                 if (ret < 0) {
9627                         warning(
9628 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
9629                                 argv[optind]);
9630                 } else if (ret) {
9631                         warning(
9632                         "filesystem mounted, continuing because of --force");
9633                 }
9634                 /* A block device is mounted in exclusive mode by kernel */
9635                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
9636         }
9637
9638         /* only allow partial opening under repair mode */
9639         if (repair)
9640                 ctree_flags |= OPEN_CTREE_PARTIAL;
9641
9642         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9643                                   chunk_root_bytenr, ctree_flags);
9644         if (!info) {
9645                 error("cannot open file system");
9646                 ret = -EIO;
9647                 err |= !!ret;
9648                 goto err_out;
9649         }
9650
9651         global_info = info;
9652         root = info->fs_root;
9653         uuid_unparse(info->super_copy->fsid, uuidbuf);
9654
9655         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9656
9657         /*
9658          * Check the bare minimum before starting anything else that could rely
9659          * on it, namely the tree roots, any local consistency checks
9660          */
9661         if (!extent_buffer_uptodate(info->tree_root->node) ||
9662             !extent_buffer_uptodate(info->dev_root->node) ||
9663             !extent_buffer_uptodate(info->chunk_root->node)) {
9664                 error("critical roots corrupted, unable to check the filesystem");
9665                 err |= !!ret;
9666                 ret = -EIO;
9667                 goto close_out;
9668         }
9669
9670         if (clear_space_cache) {
9671                 ret = do_clear_free_space_cache(info, clear_space_cache);
9672                 err |= !!ret;
9673                 goto close_out;
9674         }
9675
9676         /*
9677          * repair mode will force us to commit transaction which
9678          * will make us fail to load log tree when mounting.
9679          */
9680         if (repair && btrfs_super_log_root(info->super_copy)) {
9681                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
9682                 if (!ret) {
9683                         ret = 1;
9684                         err |= !!ret;
9685                         goto close_out;
9686                 }
9687                 ret = zero_log_tree(root);
9688                 err |= !!ret;
9689                 if (ret) {
9690                         error("failed to zero log tree: %d", ret);
9691                         goto close_out;
9692                 }
9693         }
9694
9695         if (qgroup_report) {
9696                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9697                        uuidbuf);
9698                 ret = qgroup_verify_all(info);
9699                 err |= !!ret;
9700                 if (ret == 0)
9701                         report_qgroups(1);
9702                 goto close_out;
9703         }
9704         if (subvolid) {
9705                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9706                        subvolid, argv[optind], uuidbuf);
9707                 ret = print_extent_state(info, subvolid);
9708                 err |= !!ret;
9709                 goto close_out;
9710         }
9711
9712         if (init_extent_tree || init_csum_tree) {
9713                 struct btrfs_trans_handle *trans;
9714
9715                 trans = btrfs_start_transaction(info->extent_root, 0);
9716                 if (IS_ERR(trans)) {
9717                         error("error starting transaction");
9718                         ret = PTR_ERR(trans);
9719                         err |= !!ret;
9720                         goto close_out;
9721                 }
9722
9723                 if (init_extent_tree) {
9724                         printf("Creating a new extent tree\n");
9725                         ret = reinit_extent_tree(trans, info);
9726                         err |= !!ret;
9727                         if (ret)
9728                                 goto close_out;
9729                 }
9730
9731                 if (init_csum_tree) {
9732                         printf("Reinitialize checksum tree\n");
9733                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9734                         if (ret) {
9735                                 error("checksum tree initialization failed: %d",
9736                                                 ret);
9737                                 ret = -EIO;
9738                                 err |= !!ret;
9739                                 goto close_out;
9740                         }
9741
9742                         ret = fill_csum_tree(trans, info->csum_root,
9743                                              init_extent_tree);
9744                         err |= !!ret;
9745                         if (ret) {
9746                                 error("checksum tree refilling failed: %d", ret);
9747                                 return -EIO;
9748                         }
9749                 }
9750                 /*
9751                  * Ok now we commit and run the normal fsck, which will add
9752                  * extent entries for all of the items it finds.
9753                  */
9754                 ret = btrfs_commit_transaction(trans, info->extent_root);
9755                 err |= !!ret;
9756                 if (ret)
9757                         goto close_out;
9758         }
9759         if (!extent_buffer_uptodate(info->extent_root->node)) {
9760                 error("critical: extent_root, unable to check the filesystem");
9761                 ret = -EIO;
9762                 err |= !!ret;
9763                 goto close_out;
9764         }
9765         if (!extent_buffer_uptodate(info->csum_root->node)) {
9766                 error("critical: csum_root, unable to check the filesystem");
9767                 ret = -EIO;
9768                 err |= !!ret;
9769                 goto close_out;
9770         }
9771
9772         if (!init_extent_tree) {
9773                 ret = repair_root_items(info);
9774                 if (ret < 0) {
9775                         err = !!ret;
9776                         error("failed to repair root items: %s", strerror(-ret));
9777                         goto close_out;
9778                 }
9779                 if (repair) {
9780                         fprintf(stderr, "Fixed %d roots.\n", ret);
9781                         ret = 0;
9782                 } else if (ret > 0) {
9783                         fprintf(stderr,
9784                                 "Found %d roots with an outdated root item.\n",
9785                                 ret);
9786                         fprintf(stderr,
9787         "Please run a filesystem check with the option --repair to fix them.\n");
9788                         ret = 1;
9789                         err |= ret;
9790                         goto close_out;
9791                 }
9792         }
9793
9794         ret = do_check_chunks_and_extents(info);
9795         err |= !!ret;
9796         if (ret)
9797                 error(
9798                 "errors found in extent allocation tree or chunk allocation");
9799
9800         /* Only re-check super size after we checked and repaired the fs */
9801         err |= !is_super_size_valid(info);
9802
9803         if (!ctx.progress_enabled) {
9804                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9805                         fprintf(stderr, "checking free space tree\n");
9806                 else
9807                         fprintf(stderr, "checking free space cache\n");
9808         }
9809         ret = check_space_cache(root);
9810         err |= !!ret;
9811         if (ret) {
9812                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9813                         error("errors found in free space tree");
9814                 else
9815                         error("errors found in free space cache");
9816                 goto out;
9817         }
9818
9819         /*
9820          * We used to have to have these hole extents in between our real
9821          * extents so if we don't have this flag set we need to make sure there
9822          * are no gaps in the file extents for inodes, otherwise we can just
9823          * ignore it when this happens.
9824          */
9825         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
9826         ret = do_check_fs_roots(info, &root_cache);
9827         err |= !!ret;
9828         if (ret) {
9829                 error("errors found in fs roots");
9830                 goto out;
9831         }
9832
9833         fprintf(stderr, "checking csums\n");
9834         ret = check_csums(root);
9835         err |= !!ret;
9836         if (ret) {
9837                 error("errors found in csum tree");
9838                 goto out;
9839         }
9840
9841         fprintf(stderr, "checking root refs\n");
9842         /* For low memory mode, check_fs_roots_v2 handles root refs */
9843         if (check_mode != CHECK_MODE_LOWMEM) {
9844                 ret = check_root_refs(root, &root_cache);
9845                 err |= !!ret;
9846                 if (ret) {
9847                         error("errors found in root refs");
9848                         goto out;
9849                 }
9850         }
9851
9852         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9853                 struct extent_buffer *eb;
9854
9855                 eb = list_first_entry(&root->fs_info->recow_ebs,
9856                                       struct extent_buffer, recow);
9857                 list_del_init(&eb->recow);
9858                 ret = recow_extent_buffer(root, eb);
9859                 err |= !!ret;
9860                 if (ret) {
9861                         error("fails to fix transid errors");
9862                         break;
9863                 }
9864         }
9865
9866         while (!list_empty(&delete_items)) {
9867                 struct bad_item *bad;
9868
9869                 bad = list_first_entry(&delete_items, struct bad_item, list);
9870                 list_del_init(&bad->list);
9871                 if (repair) {
9872                         ret = delete_bad_item(root, bad);
9873                         err |= !!ret;
9874                 }
9875                 free(bad);
9876         }
9877
9878         if (info->quota_enabled) {
9879                 fprintf(stderr, "checking quota groups\n");
9880                 ret = qgroup_verify_all(info);
9881                 err |= !!ret;
9882                 if (ret) {
9883                         error("failed to check quota groups");
9884                         goto out;
9885                 }
9886                 report_qgroups(0);
9887                 ret = repair_qgroups(info, &qgroups_repaired);
9888                 err |= !!ret;
9889                 if (err) {
9890                         error("failed to repair quota groups");
9891                         goto out;
9892                 }
9893                 ret = 0;
9894         }
9895
9896         if (!list_empty(&root->fs_info->recow_ebs)) {
9897                 error("transid errors in file system");
9898                 ret = 1;
9899                 err |= !!ret;
9900         }
9901 out:
9902         printf("found %llu bytes used, ",
9903                (unsigned long long)bytes_used);
9904         if (err)
9905                 printf("error(s) found\n");
9906         else
9907                 printf("no error found\n");
9908         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9909         printf("total tree bytes: %llu\n",
9910                (unsigned long long)total_btree_bytes);
9911         printf("total fs tree bytes: %llu\n",
9912                (unsigned long long)total_fs_tree_bytes);
9913         printf("total extent tree bytes: %llu\n",
9914                (unsigned long long)total_extent_tree_bytes);
9915         printf("btree space waste bytes: %llu\n",
9916                (unsigned long long)btree_space_waste);
9917         printf("file data blocks allocated: %llu\n referenced %llu\n",
9918                 (unsigned long long)data_bytes_allocated,
9919                 (unsigned long long)data_bytes_referenced);
9920
9921         free_qgroup_counts();
9922         free_root_recs_tree(&root_cache);
9923 close_out:
9924         close_ctree(root);
9925 err_out:
9926         if (ctx.progress_enabled)
9927                 task_deinit(ctx.info);
9928
9929         return err;
9930 }