btrfs-progs: check: Move check_child_node to check/common.c
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
416
417 static void record_root_in_trans(struct btrfs_trans_handle *trans,
418                                  struct btrfs_root *root)
419 {
420         if (root->last_trans != trans->transid) {
421                 root->track_dirty = 1;
422                 root->last_trans = trans->transid;
423                 root->commit_root = root->node;
424                 extent_buffer_get(root->node);
425         }
426 }
427
428 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
429 {
430         struct device_record *rec1;
431         struct device_record *rec2;
432
433         rec1 = rb_entry(node1, struct device_record, node);
434         rec2 = rb_entry(node2, struct device_record, node);
435         if (rec1->devid > rec2->devid)
436                 return -1;
437         else if (rec1->devid < rec2->devid)
438                 return 1;
439         else
440                 return 0;
441 }
442
443 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
444 {
445         struct inode_record *rec;
446         struct inode_backref *backref;
447         struct inode_backref *orig;
448         struct inode_backref *tmp;
449         struct orphan_data_extent *src_orphan;
450         struct orphan_data_extent *dst_orphan;
451         struct rb_node *rb;
452         size_t size;
453         int ret;
454
455         rec = malloc(sizeof(*rec));
456         if (!rec)
457                 return ERR_PTR(-ENOMEM);
458         memcpy(rec, orig_rec, sizeof(*rec));
459         rec->refs = 1;
460         INIT_LIST_HEAD(&rec->backrefs);
461         INIT_LIST_HEAD(&rec->orphan_extents);
462         rec->holes = RB_ROOT;
463
464         list_for_each_entry(orig, &orig_rec->backrefs, list) {
465                 size = sizeof(*orig) + orig->namelen + 1;
466                 backref = malloc(size);
467                 if (!backref) {
468                         ret = -ENOMEM;
469                         goto cleanup;
470                 }
471                 memcpy(backref, orig, size);
472                 list_add_tail(&backref->list, &rec->backrefs);
473         }
474         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
475                 dst_orphan = malloc(sizeof(*dst_orphan));
476                 if (!dst_orphan) {
477                         ret = -ENOMEM;
478                         goto cleanup;
479                 }
480                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
481                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
482         }
483         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
484         if (ret < 0)
485                 goto cleanup_rb;
486
487         return rec;
488
489 cleanup_rb:
490         rb = rb_first(&rec->holes);
491         while (rb) {
492                 struct file_extent_hole *hole;
493
494                 hole = rb_entry(rb, struct file_extent_hole, node);
495                 rb = rb_next(rb);
496                 free(hole);
497         }
498
499 cleanup:
500         if (!list_empty(&rec->backrefs))
501                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
502                         list_del(&orig->list);
503                         free(orig);
504                 }
505
506         if (!list_empty(&rec->orphan_extents))
507                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
508                         list_del(&orig->list);
509                         free(orig);
510                 }
511
512         free(rec);
513
514         return ERR_PTR(ret);
515 }
516
517 static void print_orphan_data_extents(struct list_head *orphan_extents,
518                                       u64 objectid)
519 {
520         struct orphan_data_extent *orphan;
521
522         if (list_empty(orphan_extents))
523                 return;
524         printf("The following data extent is lost in tree %llu:\n",
525                objectid);
526         list_for_each_entry(orphan, orphan_extents, list) {
527                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
528                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
529                        orphan->disk_len);
530         }
531 }
532
533 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
534 {
535         u64 root_objectid = root->root_key.objectid;
536         int errors = rec->errors;
537
538         if (!errors)
539                 return;
540         /* reloc root errors, we print its corresponding fs root objectid*/
541         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
542                 root_objectid = root->root_key.offset;
543                 fprintf(stderr, "reloc");
544         }
545         fprintf(stderr, "root %llu inode %llu errors %x",
546                 (unsigned long long) root_objectid,
547                 (unsigned long long) rec->ino, rec->errors);
548
549         if (errors & I_ERR_NO_INODE_ITEM)
550                 fprintf(stderr, ", no inode item");
551         if (errors & I_ERR_NO_ORPHAN_ITEM)
552                 fprintf(stderr, ", no orphan item");
553         if (errors & I_ERR_DUP_INODE_ITEM)
554                 fprintf(stderr, ", dup inode item");
555         if (errors & I_ERR_DUP_DIR_INDEX)
556                 fprintf(stderr, ", dup dir index");
557         if (errors & I_ERR_ODD_DIR_ITEM)
558                 fprintf(stderr, ", odd dir item");
559         if (errors & I_ERR_ODD_FILE_EXTENT)
560                 fprintf(stderr, ", odd file extent");
561         if (errors & I_ERR_BAD_FILE_EXTENT)
562                 fprintf(stderr, ", bad file extent");
563         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
564                 fprintf(stderr, ", file extent overlap");
565         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566                 fprintf(stderr, ", file extent discount");
567         if (errors & I_ERR_DIR_ISIZE_WRONG)
568                 fprintf(stderr, ", dir isize wrong");
569         if (errors & I_ERR_FILE_NBYTES_WRONG)
570                 fprintf(stderr, ", nbytes wrong");
571         if (errors & I_ERR_ODD_CSUM_ITEM)
572                 fprintf(stderr, ", odd csum item");
573         if (errors & I_ERR_SOME_CSUM_MISSING)
574                 fprintf(stderr, ", some csum missing");
575         if (errors & I_ERR_LINK_COUNT_WRONG)
576                 fprintf(stderr, ", link count wrong");
577         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578                 fprintf(stderr, ", orphan file extent");
579         fprintf(stderr, "\n");
580         /* Print the orphan extents if needed */
581         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
583
584         /* Print the holes if needed */
585         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586                 struct file_extent_hole *hole;
587                 struct rb_node *node;
588                 int found = 0;
589
590                 node = rb_first(&rec->holes);
591                 fprintf(stderr, "Found file extent holes:\n");
592                 while (node) {
593                         found = 1;
594                         hole = rb_entry(node, struct file_extent_hole, node);
595                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
596                                 hole->start, hole->len);
597                         node = rb_next(node);
598                 }
599                 if (!found)
600                         fprintf(stderr, "\tstart: 0, len: %llu\n",
601                                 round_up(rec->isize,
602                                          root->fs_info->sectorsize));
603         }
604 }
605
606 static void print_ref_error(int errors)
607 {
608         if (errors & REF_ERR_NO_DIR_ITEM)
609                 fprintf(stderr, ", no dir item");
610         if (errors & REF_ERR_NO_DIR_INDEX)
611                 fprintf(stderr, ", no dir index");
612         if (errors & REF_ERR_NO_INODE_REF)
613                 fprintf(stderr, ", no inode ref");
614         if (errors & REF_ERR_DUP_DIR_ITEM)
615                 fprintf(stderr, ", dup dir item");
616         if (errors & REF_ERR_DUP_DIR_INDEX)
617                 fprintf(stderr, ", dup dir index");
618         if (errors & REF_ERR_DUP_INODE_REF)
619                 fprintf(stderr, ", dup inode ref");
620         if (errors & REF_ERR_INDEX_UNMATCH)
621                 fprintf(stderr, ", index mismatch");
622         if (errors & REF_ERR_FILETYPE_UNMATCH)
623                 fprintf(stderr, ", filetype mismatch");
624         if (errors & REF_ERR_NAME_TOO_LONG)
625                 fprintf(stderr, ", name too long");
626         if (errors & REF_ERR_NO_ROOT_REF)
627                 fprintf(stderr, ", no root ref");
628         if (errors & REF_ERR_NO_ROOT_BACKREF)
629                 fprintf(stderr, ", no root backref");
630         if (errors & REF_ERR_DUP_ROOT_REF)
631                 fprintf(stderr, ", dup root ref");
632         if (errors & REF_ERR_DUP_ROOT_BACKREF)
633                 fprintf(stderr, ", dup root backref");
634         fprintf(stderr, "\n");
635 }
636
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
638                                           u64 ino, int mod)
639 {
640         struct ptr_node *node;
641         struct cache_extent *cache;
642         struct inode_record *rec = NULL;
643         int ret;
644
645         cache = lookup_cache_extent(inode_cache, ino, 1);
646         if (cache) {
647                 node = container_of(cache, struct ptr_node, cache);
648                 rec = node->data;
649                 if (mod && rec->refs > 1) {
650                         node->data = clone_inode_rec(rec);
651                         if (IS_ERR(node->data))
652                                 return node->data;
653                         rec->refs--;
654                         rec = node->data;
655                 }
656         } else if (mod) {
657                 rec = calloc(1, sizeof(*rec));
658                 if (!rec)
659                         return ERR_PTR(-ENOMEM);
660                 rec->ino = ino;
661                 rec->extent_start = (u64)-1;
662                 rec->refs = 1;
663                 INIT_LIST_HEAD(&rec->backrefs);
664                 INIT_LIST_HEAD(&rec->orphan_extents);
665                 rec->holes = RB_ROOT;
666
667                 node = malloc(sizeof(*node));
668                 if (!node) {
669                         free(rec);
670                         return ERR_PTR(-ENOMEM);
671                 }
672                 node->cache.start = ino;
673                 node->cache.size = 1;
674                 node->data = rec;
675
676                 if (ino == BTRFS_FREE_INO_OBJECTID)
677                         rec->found_link = 1;
678
679                 ret = insert_cache_extent(inode_cache, &node->cache);
680                 if (ret)
681                         return ERR_PTR(-EEXIST);
682         }
683         return rec;
684 }
685
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
687 {
688         struct orphan_data_extent *orphan;
689
690         while (!list_empty(orphan_extents)) {
691                 orphan = list_entry(orphan_extents->next,
692                                     struct orphan_data_extent, list);
693                 list_del(&orphan->list);
694                 free(orphan);
695         }
696 }
697
698 static void free_inode_rec(struct inode_record *rec)
699 {
700         struct inode_backref *backref;
701
702         if (--rec->refs > 0)
703                 return;
704
705         while (!list_empty(&rec->backrefs)) {
706                 backref = to_inode_backref(rec->backrefs.next);
707                 list_del(&backref->list);
708                 free(backref);
709         }
710         free_orphan_data_extents(&rec->orphan_extents);
711         free_file_extent_holes(&rec->holes);
712         free(rec);
713 }
714
715 static int can_free_inode_rec(struct inode_record *rec)
716 {
717         if (!rec->errors && rec->checked && rec->found_inode_item &&
718             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
719                 return 1;
720         return 0;
721 }
722
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724                                  struct inode_record *rec)
725 {
726         struct cache_extent *cache;
727         struct inode_backref *tmp, *backref;
728         struct ptr_node *node;
729         u8 filetype;
730
731         if (!rec->found_inode_item)
732                 return;
733
734         filetype = imode_to_type(rec->imode);
735         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736                 if (backref->found_dir_item && backref->found_dir_index) {
737                         if (backref->filetype != filetype)
738                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739                         if (!backref->errors && backref->found_inode_ref &&
740                             rec->nlink == rec->found_link) {
741                                 list_del(&backref->list);
742                                 free(backref);
743                         }
744                 }
745         }
746
747         if (!rec->checked || rec->merging)
748                 return;
749
750         if (S_ISDIR(rec->imode)) {
751                 if (rec->found_size != rec->isize)
752                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753                 if (rec->found_file_extent)
754                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
755         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756                 if (rec->found_dir_item)
757                         rec->errors |= I_ERR_ODD_DIR_ITEM;
758                 if (rec->found_size != rec->nbytes)
759                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760                 if (rec->nlink > 0 && !no_holes &&
761                     (rec->extent_end < rec->isize ||
762                      first_extent_gap(&rec->holes) < rec->isize))
763                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
764         }
765
766         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767                 if (rec->found_csum_item && rec->nodatasum)
768                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
769                 if (rec->some_csum_missing && !rec->nodatasum)
770                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
771         }
772
773         BUG_ON(rec->refs != 1);
774         if (can_free_inode_rec(rec)) {
775                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776                 node = container_of(cache, struct ptr_node, cache);
777                 BUG_ON(node->data != rec);
778                 remove_cache_extent(inode_cache, &node->cache);
779                 free(node);
780                 free_inode_rec(rec);
781         }
782 }
783
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
785 {
786         struct btrfs_path path;
787         struct btrfs_key key;
788         int ret;
789
790         key.objectid = BTRFS_ORPHAN_OBJECTID;
791         key.type = BTRFS_ORPHAN_ITEM_KEY;
792         key.offset = ino;
793
794         btrfs_init_path(&path);
795         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796         btrfs_release_path(&path);
797         if (ret > 0)
798                 ret = -ENOENT;
799         return ret;
800 }
801
802 static int process_inode_item(struct extent_buffer *eb,
803                               int slot, struct btrfs_key *key,
804                               struct shared_node *active_node)
805 {
806         struct inode_record *rec;
807         struct btrfs_inode_item *item;
808
809         rec = active_node->current;
810         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811         if (rec->found_inode_item) {
812                 rec->errors |= I_ERR_DUP_INODE_ITEM;
813                 return 1;
814         }
815         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816         rec->nlink = btrfs_inode_nlink(eb, item);
817         rec->isize = btrfs_inode_size(eb, item);
818         rec->nbytes = btrfs_inode_nbytes(eb, item);
819         rec->imode = btrfs_inode_mode(eb, item);
820         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
821                 rec->nodatasum = 1;
822         rec->found_inode_item = 1;
823         if (rec->nlink == 0)
824                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825         maybe_free_inode_rec(&active_node->inode_cache, rec);
826         return 0;
827 }
828
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
830                                                 const char *name,
831                                                 int namelen, u64 dir)
832 {
833         struct inode_backref *backref;
834
835         list_for_each_entry(backref, &rec->backrefs, list) {
836                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
837                         break;
838                 if (backref->dir != dir || backref->namelen != namelen)
839                         continue;
840                 if (memcmp(name, backref->name, namelen))
841                         continue;
842                 return backref;
843         }
844
845         backref = malloc(sizeof(*backref) + namelen + 1);
846         if (!backref)
847                 return NULL;
848         memset(backref, 0, sizeof(*backref));
849         backref->dir = dir;
850         backref->namelen = namelen;
851         memcpy(backref->name, name, namelen);
852         backref->name[namelen] = '\0';
853         list_add_tail(&backref->list, &rec->backrefs);
854         return backref;
855 }
856
857 static int add_inode_backref(struct cache_tree *inode_cache,
858                              u64 ino, u64 dir, u64 index,
859                              const char *name, int namelen,
860                              u8 filetype, u8 itemtype, int errors)
861 {
862         struct inode_record *rec;
863         struct inode_backref *backref;
864
865         rec = get_inode_rec(inode_cache, ino, 1);
866         BUG_ON(IS_ERR(rec));
867         backref = get_inode_backref(rec, name, namelen, dir);
868         BUG_ON(!backref);
869         if (errors)
870                 backref->errors |= errors;
871         if (itemtype == BTRFS_DIR_INDEX_KEY) {
872                 if (backref->found_dir_index)
873                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
874                 if (backref->found_inode_ref && backref->index != index)
875                         backref->errors |= REF_ERR_INDEX_UNMATCH;
876                 if (backref->found_dir_item && backref->filetype != filetype)
877                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
878
879                 backref->index = index;
880                 backref->filetype = filetype;
881                 backref->found_dir_index = 1;
882         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
883                 rec->found_link++;
884                 if (backref->found_dir_item)
885                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
886                 if (backref->found_dir_index && backref->filetype != filetype)
887                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
888
889                 backref->filetype = filetype;
890                 backref->found_dir_item = 1;
891         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893                 if (backref->found_inode_ref)
894                         backref->errors |= REF_ERR_DUP_INODE_REF;
895                 if (backref->found_dir_index && backref->index != index)
896                         backref->errors |= REF_ERR_INDEX_UNMATCH;
897                 else
898                         backref->index = index;
899
900                 backref->ref_type = itemtype;
901                 backref->found_inode_ref = 1;
902         } else {
903                 BUG_ON(1);
904         }
905
906         maybe_free_inode_rec(inode_cache, rec);
907         return 0;
908 }
909
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911                             struct cache_tree *dst_cache)
912 {
913         struct inode_backref *backref;
914         u32 dir_count = 0;
915         int ret = 0;
916
917         dst->merging = 1;
918         list_for_each_entry(backref, &src->backrefs, list) {
919                 if (backref->found_dir_index) {
920                         add_inode_backref(dst_cache, dst->ino, backref->dir,
921                                         backref->index, backref->name,
922                                         backref->namelen, backref->filetype,
923                                         BTRFS_DIR_INDEX_KEY, backref->errors);
924                 }
925                 if (backref->found_dir_item) {
926                         dir_count++;
927                         add_inode_backref(dst_cache, dst->ino,
928                                         backref->dir, 0, backref->name,
929                                         backref->namelen, backref->filetype,
930                                         BTRFS_DIR_ITEM_KEY, backref->errors);
931                 }
932                 if (backref->found_inode_ref) {
933                         add_inode_backref(dst_cache, dst->ino,
934                                         backref->dir, backref->index,
935                                         backref->name, backref->namelen, 0,
936                                         backref->ref_type, backref->errors);
937                 }
938         }
939
940         if (src->found_dir_item)
941                 dst->found_dir_item = 1;
942         if (src->found_file_extent)
943                 dst->found_file_extent = 1;
944         if (src->found_csum_item)
945                 dst->found_csum_item = 1;
946         if (src->some_csum_missing)
947                 dst->some_csum_missing = 1;
948         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
950                 if (ret < 0)
951                         return ret;
952         }
953
954         BUG_ON(src->found_link < dir_count);
955         dst->found_link += src->found_link - dir_count;
956         dst->found_size += src->found_size;
957         if (src->extent_start != (u64)-1) {
958                 if (dst->extent_start == (u64)-1) {
959                         dst->extent_start = src->extent_start;
960                         dst->extent_end = src->extent_end;
961                 } else {
962                         if (dst->extent_end > src->extent_start)
963                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964                         else if (dst->extent_end < src->extent_start) {
965                                 ret = add_file_extent_hole(&dst->holes,
966                                         dst->extent_end,
967                                         src->extent_start - dst->extent_end);
968                         }
969                         if (dst->extent_end < src->extent_end)
970                                 dst->extent_end = src->extent_end;
971                 }
972         }
973
974         dst->errors |= src->errors;
975         if (src->found_inode_item) {
976                 if (!dst->found_inode_item) {
977                         dst->nlink = src->nlink;
978                         dst->isize = src->isize;
979                         dst->nbytes = src->nbytes;
980                         dst->imode = src->imode;
981                         dst->nodatasum = src->nodatasum;
982                         dst->found_inode_item = 1;
983                 } else {
984                         dst->errors |= I_ERR_DUP_INODE_ITEM;
985                 }
986         }
987         dst->merging = 0;
988
989         return 0;
990 }
991
992 static int splice_shared_node(struct shared_node *src_node,
993                               struct shared_node *dst_node)
994 {
995         struct cache_extent *cache;
996         struct ptr_node *node, *ins;
997         struct cache_tree *src, *dst;
998         struct inode_record *rec, *conflict;
999         u64 current_ino = 0;
1000         int splice = 0;
1001         int ret;
1002
1003         if (--src_node->refs == 0)
1004                 splice = 1;
1005         if (src_node->current)
1006                 current_ino = src_node->current->ino;
1007
1008         src = &src_node->root_cache;
1009         dst = &dst_node->root_cache;
1010 again:
1011         cache = search_cache_extent(src, 0);
1012         while (cache) {
1013                 node = container_of(cache, struct ptr_node, cache);
1014                 rec = node->data;
1015                 cache = next_cache_extent(cache);
1016
1017                 if (splice) {
1018                         remove_cache_extent(src, &node->cache);
1019                         ins = node;
1020                 } else {
1021                         ins = malloc(sizeof(*ins));
1022                         BUG_ON(!ins);
1023                         ins->cache.start = node->cache.start;
1024                         ins->cache.size = node->cache.size;
1025                         ins->data = rec;
1026                         rec->refs++;
1027                 }
1028                 ret = insert_cache_extent(dst, &ins->cache);
1029                 if (ret == -EEXIST) {
1030                         conflict = get_inode_rec(dst, rec->ino, 1);
1031                         BUG_ON(IS_ERR(conflict));
1032                         merge_inode_recs(rec, conflict, dst);
1033                         if (rec->checked) {
1034                                 conflict->checked = 1;
1035                                 if (dst_node->current == conflict)
1036                                         dst_node->current = NULL;
1037                         }
1038                         maybe_free_inode_rec(dst, conflict);
1039                         free_inode_rec(rec);
1040                         free(ins);
1041                 } else {
1042                         BUG_ON(ret);
1043                 }
1044         }
1045
1046         if (src == &src_node->root_cache) {
1047                 src = &src_node->inode_cache;
1048                 dst = &dst_node->inode_cache;
1049                 goto again;
1050         }
1051
1052         if (current_ino > 0 && (!dst_node->current ||
1053             current_ino > dst_node->current->ino)) {
1054                 if (dst_node->current) {
1055                         dst_node->current->checked = 1;
1056                         maybe_free_inode_rec(dst, dst_node->current);
1057                 }
1058                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059                 BUG_ON(IS_ERR(dst_node->current));
1060         }
1061         return 0;
1062 }
1063
1064 static void free_inode_ptr(struct cache_extent *cache)
1065 {
1066         struct ptr_node *node;
1067         struct inode_record *rec;
1068
1069         node = container_of(cache, struct ptr_node, cache);
1070         rec = node->data;
1071         free_inode_rec(rec);
1072         free(node);
1073 }
1074
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1076
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1078                                             u64 bytenr)
1079 {
1080         struct cache_extent *cache;
1081         struct shared_node *node;
1082
1083         cache = lookup_cache_extent(shared, bytenr, 1);
1084         if (cache) {
1085                 node = container_of(cache, struct shared_node, cache);
1086                 return node;
1087         }
1088         return NULL;
1089 }
1090
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1092 {
1093         int ret;
1094         struct shared_node *node;
1095
1096         node = calloc(1, sizeof(*node));
1097         if (!node)
1098                 return -ENOMEM;
1099         node->cache.start = bytenr;
1100         node->cache.size = 1;
1101         cache_tree_init(&node->root_cache);
1102         cache_tree_init(&node->inode_cache);
1103         node->refs = refs;
1104
1105         ret = insert_cache_extent(shared, &node->cache);
1106
1107         return ret;
1108 }
1109
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111                              struct walk_control *wc, int level)
1112 {
1113         struct shared_node *node;
1114         struct shared_node *dest;
1115         int ret;
1116
1117         if (level == wc->active_node)
1118                 return 0;
1119
1120         BUG_ON(wc->active_node <= level);
1121         node = find_shared_node(&wc->shared, bytenr);
1122         if (!node) {
1123                 ret = add_shared_node(&wc->shared, bytenr, refs);
1124                 BUG_ON(ret);
1125                 node = find_shared_node(&wc->shared, bytenr);
1126                 wc->nodes[level] = node;
1127                 wc->active_node = level;
1128                 return 0;
1129         }
1130
1131         if (wc->root_level == wc->active_node &&
1132             btrfs_root_refs(&root->root_item) == 0) {
1133                 if (--node->refs == 0) {
1134                         free_inode_recs_tree(&node->root_cache);
1135                         free_inode_recs_tree(&node->inode_cache);
1136                         remove_cache_extent(&wc->shared, &node->cache);
1137                         free(node);
1138                 }
1139                 return 1;
1140         }
1141
1142         dest = wc->nodes[wc->active_node];
1143         splice_shared_node(node, dest);
1144         if (node->refs == 0) {
1145                 remove_cache_extent(&wc->shared, &node->cache);
1146                 free(node);
1147         }
1148         return 1;
1149 }
1150
1151 static int leave_shared_node(struct btrfs_root *root,
1152                              struct walk_control *wc, int level)
1153 {
1154         struct shared_node *node;
1155         struct shared_node *dest;
1156         int i;
1157
1158         if (level == wc->root_level)
1159                 return 0;
1160
1161         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1162                 if (wc->nodes[i])
1163                         break;
1164         }
1165         BUG_ON(i >= BTRFS_MAX_LEVEL);
1166
1167         node = wc->nodes[wc->active_node];
1168         wc->nodes[wc->active_node] = NULL;
1169         wc->active_node = i;
1170
1171         dest = wc->nodes[wc->active_node];
1172         if (wc->active_node < wc->root_level ||
1173             btrfs_root_refs(&root->root_item) > 0) {
1174                 BUG_ON(node->refs <= 1);
1175                 splice_shared_node(node, dest);
1176         } else {
1177                 BUG_ON(node->refs < 2);
1178                 node->refs--;
1179         }
1180         return 0;
1181 }
1182
1183 /*
1184  * Returns:
1185  * < 0 - on error
1186  * 1   - if the root with id child_root_id is a child of root parent_root_id
1187  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1188  *       has other root(s) as parent(s)
1189  * 2   - if the root child_root_id doesn't have any parent roots
1190  */
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1192                          u64 child_root_id)
1193 {
1194         struct btrfs_path path;
1195         struct btrfs_key key;
1196         struct extent_buffer *leaf;
1197         int has_parent = 0;
1198         int ret;
1199
1200         btrfs_init_path(&path);
1201
1202         key.objectid = parent_root_id;
1203         key.type = BTRFS_ROOT_REF_KEY;
1204         key.offset = child_root_id;
1205         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1206                                 0, 0);
1207         if (ret < 0)
1208                 return ret;
1209         btrfs_release_path(&path);
1210         if (!ret)
1211                 return 1;
1212
1213         key.objectid = child_root_id;
1214         key.type = BTRFS_ROOT_BACKREF_KEY;
1215         key.offset = 0;
1216         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1217                                 0, 0);
1218         if (ret < 0)
1219                 goto out;
1220
1221         while (1) {
1222                 leaf = path.nodes[0];
1223                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1225                         if (ret)
1226                                 break;
1227                         leaf = path.nodes[0];
1228                 }
1229
1230                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231                 if (key.objectid != child_root_id ||
1232                     key.type != BTRFS_ROOT_BACKREF_KEY)
1233                         break;
1234
1235                 has_parent = 1;
1236
1237                 if (key.offset == parent_root_id) {
1238                         btrfs_release_path(&path);
1239                         return 1;
1240                 }
1241
1242                 path.slots[0]++;
1243         }
1244 out:
1245         btrfs_release_path(&path);
1246         if (ret < 0)
1247                 return ret;
1248         return has_parent ? 0 : 2;
1249 }
1250
1251 static int process_dir_item(struct extent_buffer *eb,
1252                             int slot, struct btrfs_key *key,
1253                             struct shared_node *active_node)
1254 {
1255         u32 total;
1256         u32 cur = 0;
1257         u32 len;
1258         u32 name_len;
1259         u32 data_len;
1260         int error;
1261         int nritems = 0;
1262         u8 filetype;
1263         struct btrfs_dir_item *di;
1264         struct inode_record *rec;
1265         struct cache_tree *root_cache;
1266         struct cache_tree *inode_cache;
1267         struct btrfs_key location;
1268         char namebuf[BTRFS_NAME_LEN];
1269
1270         root_cache = &active_node->root_cache;
1271         inode_cache = &active_node->inode_cache;
1272         rec = active_node->current;
1273         rec->found_dir_item = 1;
1274
1275         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276         total = btrfs_item_size_nr(eb, slot);
1277         while (cur < total) {
1278                 nritems++;
1279                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280                 name_len = btrfs_dir_name_len(eb, di);
1281                 data_len = btrfs_dir_data_len(eb, di);
1282                 filetype = btrfs_dir_type(eb, di);
1283
1284                 rec->found_size += name_len;
1285                 if (cur + sizeof(*di) + name_len > total ||
1286                     name_len > BTRFS_NAME_LEN) {
1287                         error = REF_ERR_NAME_TOO_LONG;
1288
1289                         if (cur + sizeof(*di) > total)
1290                                 break;
1291                         len = min_t(u32, total - cur - sizeof(*di),
1292                                     BTRFS_NAME_LEN);
1293                 } else {
1294                         len = name_len;
1295                         error = 0;
1296                 }
1297
1298                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1299
1300                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301                     key->offset != btrfs_name_hash(namebuf, len)) {
1302                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1303                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304                         key->objectid, key->offset, namebuf, len, filetype,
1305                         key->offset, btrfs_name_hash(namebuf, len));
1306                 }
1307
1308                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309                         add_inode_backref(inode_cache, location.objectid,
1310                                           key->objectid, key->offset, namebuf,
1311                                           len, filetype, key->type, error);
1312                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313                         add_inode_backref(root_cache, location.objectid,
1314                                           key->objectid, key->offset,
1315                                           namebuf, len, filetype,
1316                                           key->type, error);
1317                 } else {
1318                         fprintf(stderr,
1319                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320                                 location.type, key->objectid, key->offset);
1321                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322                                           key->objectid, key->offset, namebuf,
1323                                           len, filetype, key->type, error);
1324                 }
1325
1326                 len = sizeof(*di) + name_len + data_len;
1327                 di = (struct btrfs_dir_item *)((char *)di + len);
1328                 cur += len;
1329         }
1330         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1332
1333         return 0;
1334 }
1335
1336 static int process_inode_ref(struct extent_buffer *eb,
1337                              int slot, struct btrfs_key *key,
1338                              struct shared_node *active_node)
1339 {
1340         u32 total;
1341         u32 cur = 0;
1342         u32 len;
1343         u32 name_len;
1344         u64 index;
1345         int error;
1346         struct cache_tree *inode_cache;
1347         struct btrfs_inode_ref *ref;
1348         char namebuf[BTRFS_NAME_LEN];
1349
1350         inode_cache = &active_node->inode_cache;
1351
1352         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353         total = btrfs_item_size_nr(eb, slot);
1354         while (cur < total) {
1355                 name_len = btrfs_inode_ref_name_len(eb, ref);
1356                 index = btrfs_inode_ref_index(eb, ref);
1357
1358                 /* inode_ref + namelen should not cross item boundary */
1359                 if (cur + sizeof(*ref) + name_len > total ||
1360                     name_len > BTRFS_NAME_LEN) {
1361                         if (total < cur + sizeof(*ref))
1362                                 break;
1363
1364                         /* Still try to read out the remaining part */
1365                         len = min_t(u32, total - cur - sizeof(*ref),
1366                                     BTRFS_NAME_LEN);
1367                         error = REF_ERR_NAME_TOO_LONG;
1368                 } else {
1369                         len = name_len;
1370                         error = 0;
1371                 }
1372
1373                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374                 add_inode_backref(inode_cache, key->objectid, key->offset,
1375                                   index, namebuf, len, 0, key->type, error);
1376
1377                 len = sizeof(*ref) + name_len;
1378                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1379                 cur += len;
1380         }
1381         return 0;
1382 }
1383
1384 static int process_inode_extref(struct extent_buffer *eb,
1385                                 int slot, struct btrfs_key *key,
1386                                 struct shared_node *active_node)
1387 {
1388         u32 total;
1389         u32 cur = 0;
1390         u32 len;
1391         u32 name_len;
1392         u64 index;
1393         u64 parent;
1394         int error;
1395         struct cache_tree *inode_cache;
1396         struct btrfs_inode_extref *extref;
1397         char namebuf[BTRFS_NAME_LEN];
1398
1399         inode_cache = &active_node->inode_cache;
1400
1401         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402         total = btrfs_item_size_nr(eb, slot);
1403         while (cur < total) {
1404                 name_len = btrfs_inode_extref_name_len(eb, extref);
1405                 index = btrfs_inode_extref_index(eb, extref);
1406                 parent = btrfs_inode_extref_parent(eb, extref);
1407                 if (name_len <= BTRFS_NAME_LEN) {
1408                         len = name_len;
1409                         error = 0;
1410                 } else {
1411                         len = BTRFS_NAME_LEN;
1412                         error = REF_ERR_NAME_TOO_LONG;
1413                 }
1414                 read_extent_buffer(eb, namebuf,
1415                                    (unsigned long)(extref + 1), len);
1416                 add_inode_backref(inode_cache, key->objectid, parent,
1417                                   index, namebuf, len, 0, key->type, error);
1418
1419                 len = sizeof(*extref) + name_len;
1420                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1421                 cur += len;
1422         }
1423         return 0;
1424
1425 }
1426
1427 static int process_file_extent(struct btrfs_root *root,
1428                                 struct extent_buffer *eb,
1429                                 int slot, struct btrfs_key *key,
1430                                 struct shared_node *active_node)
1431 {
1432         struct inode_record *rec;
1433         struct btrfs_file_extent_item *fi;
1434         u64 num_bytes = 0;
1435         u64 disk_bytenr = 0;
1436         u64 extent_offset = 0;
1437         u64 mask = root->fs_info->sectorsize - 1;
1438         int extent_type;
1439         int ret;
1440
1441         rec = active_node->current;
1442         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1443         rec->found_file_extent = 1;
1444
1445         if (rec->extent_start == (u64)-1) {
1446                 rec->extent_start = key->offset;
1447                 rec->extent_end = key->offset;
1448         }
1449
1450         if (rec->extent_end > key->offset)
1451                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1452         else if (rec->extent_end < key->offset) {
1453                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1454                                            key->offset - rec->extent_end);
1455                 if (ret < 0)
1456                         return ret;
1457         }
1458
1459         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1460         extent_type = btrfs_file_extent_type(eb, fi);
1461
1462         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1463                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1464                 if (num_bytes == 0)
1465                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1466                 rec->found_size += num_bytes;
1467                 num_bytes = (num_bytes + mask) & ~mask;
1468         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1469                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1470                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1471                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1472                 extent_offset = btrfs_file_extent_offset(eb, fi);
1473                 if (num_bytes == 0 || (num_bytes & mask))
1474                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1475                 if (num_bytes + extent_offset >
1476                     btrfs_file_extent_ram_bytes(eb, fi))
1477                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1478                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1479                     (btrfs_file_extent_compression(eb, fi) ||
1480                      btrfs_file_extent_encryption(eb, fi) ||
1481                      btrfs_file_extent_other_encoding(eb, fi)))
1482                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1483                 if (disk_bytenr > 0)
1484                         rec->found_size += num_bytes;
1485         } else {
1486                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1487         }
1488         rec->extent_end = key->offset + num_bytes;
1489
1490         /*
1491          * The data reloc tree will copy full extents into its inode and then
1492          * copy the corresponding csums.  Because the extent it copied could be
1493          * a preallocated extent that hasn't been written to yet there may be no
1494          * csums to copy, ergo we won't have csums for our file extent.  This is
1495          * ok so just don't bother checking csums if the inode belongs to the
1496          * data reloc tree.
1497          */
1498         if (disk_bytenr > 0 &&
1499             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1500                 u64 found;
1501                 if (btrfs_file_extent_compression(eb, fi))
1502                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1503                 else
1504                         disk_bytenr += extent_offset;
1505
1506                 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1507                                        &found);
1508                 if (ret < 0)
1509                         return ret;
1510                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1511                         if (found > 0)
1512                                 rec->found_csum_item = 1;
1513                         if (found < num_bytes)
1514                                 rec->some_csum_missing = 1;
1515                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1516                         if (found > 0)
1517                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1518                 }
1519         }
1520         return 0;
1521 }
1522
1523 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1524                             struct walk_control *wc)
1525 {
1526         struct btrfs_key key;
1527         u32 nritems;
1528         int i;
1529         int ret = 0;
1530         struct cache_tree *inode_cache;
1531         struct shared_node *active_node;
1532
1533         if (wc->root_level == wc->active_node &&
1534             btrfs_root_refs(&root->root_item) == 0)
1535                 return 0;
1536
1537         active_node = wc->nodes[wc->active_node];
1538         inode_cache = &active_node->inode_cache;
1539         nritems = btrfs_header_nritems(eb);
1540         for (i = 0; i < nritems; i++) {
1541                 btrfs_item_key_to_cpu(eb, &key, i);
1542
1543                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1544                         continue;
1545                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1546                         continue;
1547
1548                 if (active_node->current == NULL ||
1549                     active_node->current->ino < key.objectid) {
1550                         if (active_node->current) {
1551                                 active_node->current->checked = 1;
1552                                 maybe_free_inode_rec(inode_cache,
1553                                                      active_node->current);
1554                         }
1555                         active_node->current = get_inode_rec(inode_cache,
1556                                                              key.objectid, 1);
1557                         BUG_ON(IS_ERR(active_node->current));
1558                 }
1559                 switch (key.type) {
1560                 case BTRFS_DIR_ITEM_KEY:
1561                 case BTRFS_DIR_INDEX_KEY:
1562                         ret = process_dir_item(eb, i, &key, active_node);
1563                         break;
1564                 case BTRFS_INODE_REF_KEY:
1565                         ret = process_inode_ref(eb, i, &key, active_node);
1566                         break;
1567                 case BTRFS_INODE_EXTREF_KEY:
1568                         ret = process_inode_extref(eb, i, &key, active_node);
1569                         break;
1570                 case BTRFS_INODE_ITEM_KEY:
1571                         ret = process_inode_item(eb, i, &key, active_node);
1572                         break;
1573                 case BTRFS_EXTENT_DATA_KEY:
1574                         ret = process_file_extent(root, eb, i, &key,
1575                                                   active_node);
1576                         break;
1577                 default:
1578                         break;
1579                 };
1580         }
1581         return ret;
1582 }
1583
1584 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1585                              struct extent_buffer *eb, struct node_refs *nrefs,
1586                              u64 level, int check_all);
1587 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1588                             unsigned int ext_ref);
1589
1590 /*
1591  * Returns >0  Found error, not fatal, should continue
1592  * Returns <0  Fatal error, must exit the whole check
1593  * Returns 0   No errors found
1594  */
1595 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1596                                struct node_refs *nrefs, int *level, int ext_ref)
1597 {
1598         struct extent_buffer *cur = path->nodes[0];
1599         struct btrfs_key key;
1600         u64 cur_bytenr;
1601         u32 nritems;
1602         u64 first_ino = 0;
1603         int root_level = btrfs_header_level(root->node);
1604         int i;
1605         int ret = 0; /* Final return value */
1606         int err = 0; /* Positive error bitmap */
1607
1608         cur_bytenr = cur->start;
1609
1610         /* skip to first inode item or the first inode number change */
1611         nritems = btrfs_header_nritems(cur);
1612         for (i = 0; i < nritems; i++) {
1613                 btrfs_item_key_to_cpu(cur, &key, i);
1614                 if (i == 0)
1615                         first_ino = key.objectid;
1616                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1617                     (first_ino && first_ino != key.objectid))
1618                         break;
1619         }
1620         if (i == nritems) {
1621                 path->slots[0] = nritems;
1622                 return 0;
1623         }
1624         path->slots[0] = i;
1625
1626 again:
1627         err |= check_inode_item(root, path, ext_ref);
1628
1629         /* modify cur since check_inode_item may change path */
1630         cur = path->nodes[0];
1631
1632         if (err & LAST_ITEM)
1633                 goto out;
1634
1635         /* still have inode items in thie leaf */
1636         if (cur->start == cur_bytenr)
1637                 goto again;
1638
1639         /*
1640          * we have switched to another leaf, above nodes may
1641          * have changed, here walk down the path, if a node
1642          * or leaf is shared, check whether we can skip this
1643          * node or leaf.
1644          */
1645         for (i = root_level; i >= 0; i--) {
1646                 if (path->nodes[i]->start == nrefs->bytenr[i])
1647                         continue;
1648
1649                 ret = update_nodes_refs(root, path->nodes[i]->start,
1650                                 path->nodes[i], nrefs, i, 0);
1651                 if (ret)
1652                         goto out;
1653
1654                 if (!nrefs->need_check[i]) {
1655                         *level += 1;
1656                         break;
1657                 }
1658         }
1659
1660         for (i = 0; i < *level; i++) {
1661                 free_extent_buffer(path->nodes[i]);
1662                 path->nodes[i] = NULL;
1663         }
1664 out:
1665         err &= ~LAST_ITEM;
1666         if (err && !ret)
1667                 ret = err;
1668         return ret;
1669 }
1670
1671 /*
1672  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1673  * in every fs or file tree check. Here we find its all root ids, and only check
1674  * it in the fs or file tree which has the smallest root id.
1675  */
1676 static int need_check(struct btrfs_root *root, struct ulist *roots)
1677 {
1678         struct rb_node *node;
1679         struct ulist_node *u;
1680
1681         /*
1682          * @roots can be empty if it belongs to tree reloc tree
1683          * In that case, we should always check the leaf, as we can't use
1684          * the tree owner to ensure some other root will check it.
1685          */
1686         if (roots->nnodes == 1 || roots->nnodes == 0)
1687                 return 1;
1688
1689         node = rb_first(&roots->root);
1690         u = rb_entry(node, struct ulist_node, rb_node);
1691         /*
1692          * current root id is not smallest, we skip it and let it be checked
1693          * in the fs or file tree who hash the smallest root id.
1694          */
1695         if (root->objectid != u->val)
1696                 return 0;
1697
1698         return 1;
1699 }
1700
1701 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1702                                u64 *flags_ret)
1703 {
1704         struct btrfs_root *extent_root = root->fs_info->extent_root;
1705         struct btrfs_root_item *ri = &root->root_item;
1706         struct btrfs_extent_inline_ref *iref;
1707         struct btrfs_extent_item *ei;
1708         struct btrfs_key key;
1709         struct btrfs_path *path = NULL;
1710         unsigned long ptr;
1711         unsigned long end;
1712         u64 flags;
1713         u64 owner = 0;
1714         u64 offset;
1715         int slot;
1716         int type;
1717         int ret = 0;
1718
1719         /*
1720          * Except file/reloc tree, we can not have FULL BACKREF MODE
1721          */
1722         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1723                 goto normal;
1724
1725         /* root node */
1726         if (eb->start == btrfs_root_bytenr(ri))
1727                 goto normal;
1728
1729         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1730                 goto full_backref;
1731
1732         owner = btrfs_header_owner(eb);
1733         if (owner == root->objectid)
1734                 goto normal;
1735
1736         path = btrfs_alloc_path();
1737         if (!path)
1738                 return -ENOMEM;
1739
1740         key.objectid = btrfs_header_bytenr(eb);
1741         key.type = (u8)-1;
1742         key.offset = (u64)-1;
1743
1744         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1745         if (ret <= 0) {
1746                 ret = -EIO;
1747                 goto out;
1748         }
1749
1750         if (ret > 0) {
1751                 ret = btrfs_previous_extent_item(extent_root, path,
1752                                                  key.objectid);
1753                 if (ret)
1754                         goto full_backref;
1755
1756         }
1757         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1758
1759         eb = path->nodes[0];
1760         slot = path->slots[0];
1761         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1762
1763         flags = btrfs_extent_flags(eb, ei);
1764         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1765                 goto full_backref;
1766
1767         ptr = (unsigned long)(ei + 1);
1768         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1769
1770         if (key.type == BTRFS_EXTENT_ITEM_KEY)
1771                 ptr += sizeof(struct btrfs_tree_block_info);
1772
1773 next:
1774         /* Reached extent item ends normally */
1775         if (ptr == end)
1776                 goto full_backref;
1777
1778         /* Beyond extent item end, wrong item size */
1779         if (ptr > end) {
1780                 error("extent item at bytenr %llu slot %d has wrong size",
1781                         eb->start, slot);
1782                 goto full_backref;
1783         }
1784
1785         iref = (struct btrfs_extent_inline_ref *)ptr;
1786         offset = btrfs_extent_inline_ref_offset(eb, iref);
1787         type = btrfs_extent_inline_ref_type(eb, iref);
1788
1789         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1790                 goto normal;
1791         ptr += btrfs_extent_inline_ref_size(type);
1792         goto next;
1793
1794 normal:
1795         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1796         goto out;
1797
1798 full_backref:
1799         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1800 out:
1801         btrfs_free_path(path);
1802         return ret;
1803 }
1804
1805 /*
1806  * for a tree node or leaf, we record its reference count, so later if we still
1807  * process this node or leaf, don't need to compute its reference count again.
1808  *
1809  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1810  */
1811 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1812                              struct extent_buffer *eb, struct node_refs *nrefs,
1813                              u64 level, int check_all)
1814 {
1815         struct ulist *roots;
1816         u64 refs = 0;
1817         u64 flags = 0;
1818         int root_level = btrfs_header_level(root->node);
1819         int check;
1820         int ret;
1821
1822         if (nrefs->bytenr[level] == bytenr)
1823                 return 0;
1824
1825         if (bytenr != (u64)-1) {
1826                 /* the return value of this function seems a mistake */
1827                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1828                                        level, 1, &refs, &flags);
1829                 /* temporary fix */
1830                 if (ret < 0 && !check_all)
1831                         return ret;
1832
1833                 nrefs->bytenr[level] = bytenr;
1834                 nrefs->refs[level] = refs;
1835                 nrefs->full_backref[level] = 0;
1836                 nrefs->checked[level] = 0;
1837
1838                 if (refs > 1) {
1839                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1840                                                    0, &roots);
1841                         if (ret)
1842                                 return -EIO;
1843
1844                         check = need_check(root, roots);
1845                         ulist_free(roots);
1846                         nrefs->need_check[level] = check;
1847                 } else {
1848                         if (!check_all) {
1849                                 nrefs->need_check[level] = 1;
1850                         } else {
1851                                 if (level == root_level) {
1852                                         nrefs->need_check[level] = 1;
1853                                 } else {
1854                                         /*
1855                                          * The node refs may have not been
1856                                          * updated if upper needs checking (the
1857                                          * lowest root_objectid) the node can
1858                                          * be checked.
1859                                          */
1860                                         nrefs->need_check[level] =
1861                                                 nrefs->need_check[level + 1];
1862                                 }
1863                         }
1864                 }
1865         }
1866
1867         if (check_all && eb) {
1868                 calc_extent_flag_v2(root, eb, &flags);
1869                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1870                         nrefs->full_backref[level] = 1;
1871         }
1872
1873         return 0;
1874 }
1875
1876 /*
1877  * @level           if @level == -1 means extent data item
1878  *                  else normal treeblocl.
1879  */
1880 static int should_check_extent_strictly(struct btrfs_root *root,
1881                                         struct node_refs *nrefs, int level)
1882 {
1883         int root_level = btrfs_header_level(root->node);
1884
1885         if (level > root_level || level < -1)
1886                 return 1;
1887         if (level == root_level)
1888                 return 1;
1889         /*
1890          * if the upper node is marked full backref, it should contain shared
1891          * backref of the parent (except owner == root->objectid).
1892          */
1893         while (++level <= root_level)
1894                 if (nrefs->refs[level] > 1)
1895                         return 0;
1896
1897         return 1;
1898 }
1899
1900 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1901                           struct walk_control *wc, int *level,
1902                           struct node_refs *nrefs)
1903 {
1904         enum btrfs_tree_block_status status;
1905         u64 bytenr;
1906         u64 ptr_gen;
1907         struct btrfs_fs_info *fs_info = root->fs_info;
1908         struct extent_buffer *next;
1909         struct extent_buffer *cur;
1910         int ret, err = 0;
1911         u64 refs;
1912
1913         WARN_ON(*level < 0);
1914         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1915
1916         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1917                 refs = nrefs->refs[*level];
1918                 ret = 0;
1919         } else {
1920                 ret = btrfs_lookup_extent_info(NULL, root,
1921                                        path->nodes[*level]->start,
1922                                        *level, 1, &refs, NULL);
1923                 if (ret < 0) {
1924                         err = ret;
1925                         goto out;
1926                 }
1927                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1928                 nrefs->refs[*level] = refs;
1929         }
1930
1931         if (refs > 1) {
1932                 ret = enter_shared_node(root, path->nodes[*level]->start,
1933                                         refs, wc, *level);
1934                 if (ret > 0) {
1935                         err = ret;
1936                         goto out;
1937                 }
1938         }
1939
1940         while (*level >= 0) {
1941                 WARN_ON(*level < 0);
1942                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1943                 cur = path->nodes[*level];
1944
1945                 if (btrfs_header_level(cur) != *level)
1946                         WARN_ON(1);
1947
1948                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1949                         break;
1950                 if (*level == 0) {
1951                         ret = process_one_leaf(root, cur, wc);
1952                         if (ret < 0)
1953                                 err = ret;
1954                         break;
1955                 }
1956                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1957                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1958
1959                 if (bytenr == nrefs->bytenr[*level - 1]) {
1960                         refs = nrefs->refs[*level - 1];
1961                 } else {
1962                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1963                                         *level - 1, 1, &refs, NULL);
1964                         if (ret < 0) {
1965                                 refs = 0;
1966                         } else {
1967                                 nrefs->bytenr[*level - 1] = bytenr;
1968                                 nrefs->refs[*level - 1] = refs;
1969                         }
1970                 }
1971
1972                 if (refs > 1) {
1973                         ret = enter_shared_node(root, bytenr, refs,
1974                                                 wc, *level - 1);
1975                         if (ret > 0) {
1976                                 path->slots[*level]++;
1977                                 continue;
1978                         }
1979                 }
1980
1981                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
1982                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1983                         free_extent_buffer(next);
1984                         reada_walk_down(root, cur, path->slots[*level]);
1985                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
1986                         if (!extent_buffer_uptodate(next)) {
1987                                 struct btrfs_key node_key;
1988
1989                                 btrfs_node_key_to_cpu(path->nodes[*level],
1990                                                       &node_key,
1991                                                       path->slots[*level]);
1992                                 btrfs_add_corrupt_extent_record(root->fs_info,
1993                                                 &node_key,
1994                                                 path->nodes[*level]->start,
1995                                                 root->fs_info->nodesize,
1996                                                 *level);
1997                                 err = -EIO;
1998                                 goto out;
1999                         }
2000                 }
2001
2002                 ret = check_child_node(cur, path->slots[*level], next);
2003                 if (ret) {
2004                         free_extent_buffer(next);
2005                         err = ret;
2006                         goto out;
2007                 }
2008
2009                 if (btrfs_is_leaf(next))
2010                         status = btrfs_check_leaf(root, NULL, next);
2011                 else
2012                         status = btrfs_check_node(root, NULL, next);
2013                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2014                         free_extent_buffer(next);
2015                         err = -EIO;
2016                         goto out;
2017                 }
2018
2019                 *level = *level - 1;
2020                 free_extent_buffer(path->nodes[*level]);
2021                 path->nodes[*level] = next;
2022                 path->slots[*level] = 0;
2023         }
2024 out:
2025         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2026         return err;
2027 }
2028
2029 /*
2030  * Update global fs information.
2031  */
2032 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2033                          int level)
2034 {
2035         u32 free_nrs;
2036         struct extent_buffer *eb = path->nodes[level];
2037
2038         total_btree_bytes += eb->len;
2039         if (fs_root_objectid(root->objectid))
2040                 total_fs_tree_bytes += eb->len;
2041         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2042                 total_extent_tree_bytes += eb->len;
2043
2044         if (level == 0) {
2045                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2046         } else {
2047                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2048                             btrfs_header_nritems(eb));
2049                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2050         }
2051 }
2052
2053 /*
2054  * This function only handles BACKREF_MISSING,
2055  * If corresponding extent item exists, increase the ref, else insert an extent
2056  * item and backref.
2057  *
2058  * Returns error bits after repair.
2059  */
2060 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2061                                  struct btrfs_root *root,
2062                                  struct extent_buffer *node,
2063                                  struct node_refs *nrefs, int level, int err)
2064 {
2065         struct btrfs_fs_info *fs_info = root->fs_info;
2066         struct btrfs_root *extent_root = fs_info->extent_root;
2067         struct btrfs_path path;
2068         struct btrfs_extent_item *ei;
2069         struct btrfs_tree_block_info *bi;
2070         struct btrfs_key key;
2071         struct extent_buffer *eb;
2072         u32 size = sizeof(*ei);
2073         u32 node_size = root->fs_info->nodesize;
2074         int insert_extent = 0;
2075         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2076         int root_level = btrfs_header_level(root->node);
2077         int generation;
2078         int ret;
2079         u64 owner;
2080         u64 bytenr;
2081         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2082         u64 parent = 0;
2083
2084         if ((err & BACKREF_MISSING) == 0)
2085                 return err;
2086
2087         WARN_ON(level > BTRFS_MAX_LEVEL);
2088         WARN_ON(level < 0);
2089
2090         btrfs_init_path(&path);
2091         bytenr = btrfs_header_bytenr(node);
2092         owner = btrfs_header_owner(node);
2093         generation = btrfs_header_generation(node);
2094
2095         key.objectid = bytenr;
2096         key.type = (u8)-1;
2097         key.offset = (u64)-1;
2098
2099         /* Search for the extent item */
2100         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2101         if (ret <= 0) {
2102                 ret = -EIO;
2103                 goto out;
2104         }
2105
2106         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2107         if (ret)
2108                 insert_extent = 1;
2109
2110         /* calculate if the extent item flag is full backref or not */
2111         if (nrefs->full_backref[level] != 0)
2112                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2113
2114         /* insert an extent item */
2115         if (insert_extent) {
2116                 struct btrfs_disk_key copy_key;
2117
2118                 generation = btrfs_header_generation(node);
2119
2120                 if (level < root_level && nrefs->full_backref[level + 1] &&
2121                     owner != root->objectid) {
2122                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2123                 }
2124
2125                 key.objectid = bytenr;
2126                 if (!skinny_metadata) {
2127                         key.type = BTRFS_EXTENT_ITEM_KEY;
2128                         key.offset = node_size;
2129                         size += sizeof(*bi);
2130                 } else {
2131                         key.type = BTRFS_METADATA_ITEM_KEY;
2132                         key.offset = level;
2133                 }
2134
2135                 btrfs_release_path(&path);
2136                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2137                                               size);
2138                 if (ret)
2139                         goto out;
2140
2141                 eb = path.nodes[0];
2142                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2143
2144                 btrfs_set_extent_refs(eb, ei, 0);
2145                 btrfs_set_extent_generation(eb, ei, generation);
2146                 btrfs_set_extent_flags(eb, ei, flags);
2147
2148                 if (!skinny_metadata) {
2149                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2150                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2151                                              sizeof(*bi));
2152                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2153                         btrfs_set_disk_key_type(&copy_key, 0);
2154                         btrfs_set_disk_key_offset(&copy_key, 0);
2155
2156                         btrfs_set_tree_block_level(eb, bi, level);
2157                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2158                 }
2159                 btrfs_mark_buffer_dirty(eb);
2160                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2161                 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2162
2163                 nrefs->refs[level] = 0;
2164                 nrefs->full_backref[level] =
2165                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2166                 btrfs_release_path(&path);
2167         }
2168
2169         if (level < root_level && nrefs->full_backref[level + 1] &&
2170             owner != root->objectid)
2171                 parent = nrefs->bytenr[level + 1];
2172
2173         /* increase the ref */
2174         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2175                         parent, root->objectid, level, 0);
2176
2177         nrefs->refs[level]++;
2178 out:
2179         btrfs_release_path(&path);
2180         if (ret) {
2181                 error(
2182         "failed to repair tree block ref start %llu root %llu due to %s",
2183                       bytenr, root->objectid, strerror(-ret));
2184         } else {
2185                 printf("Added one tree block ref start %llu %s %llu\n",
2186                        bytenr, parent ? "parent" : "root",
2187                        parent ? parent : root->objectid);
2188                 err &= ~BACKREF_MISSING;
2189         }
2190
2191         return err;
2192 }
2193
2194 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2195                             unsigned int ext_ref);
2196 static int check_tree_block_ref(struct btrfs_root *root,
2197                                 struct extent_buffer *eb, u64 bytenr,
2198                                 int level, u64 owner, struct node_refs *nrefs);
2199 static int check_leaf_items(struct btrfs_trans_handle *trans,
2200                             struct btrfs_root *root, struct btrfs_path *path,
2201                             struct node_refs *nrefs, int account_bytes);
2202
2203 /*
2204  * @trans      just for lowmem repair mode
2205  * @check all  if not 0 then check all tree block backrefs and items
2206  *             0 then just check relationship of items in fs tree(s)
2207  *
2208  * Returns >0  Found error, should continue
2209  * Returns <0  Fatal error, must exit the whole check
2210  * Returns 0   No errors found
2211  */
2212 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2213                              struct btrfs_root *root, struct btrfs_path *path,
2214                              int *level, struct node_refs *nrefs, int ext_ref,
2215                              int check_all)
2216
2217 {
2218         enum btrfs_tree_block_status status;
2219         u64 bytenr;
2220         u64 ptr_gen;
2221         struct btrfs_fs_info *fs_info = root->fs_info;
2222         struct extent_buffer *next;
2223         struct extent_buffer *cur;
2224         int ret;
2225         int err = 0;
2226         int check;
2227         int account_file_data = 0;
2228
2229         WARN_ON(*level < 0);
2230         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2231
2232         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2233                                 path->nodes[*level], nrefs, *level, check_all);
2234         if (ret < 0)
2235                 return ret;
2236
2237         while (*level >= 0) {
2238                 WARN_ON(*level < 0);
2239                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2240                 cur = path->nodes[*level];
2241                 bytenr = btrfs_header_bytenr(cur);
2242                 check = nrefs->need_check[*level];
2243
2244                 if (btrfs_header_level(cur) != *level)
2245                         WARN_ON(1);
2246                /*
2247                 * Update bytes accounting and check tree block ref
2248                 * NOTE: Doing accounting and check before checking nritems
2249                 * is necessary because of empty node/leaf.
2250                 */
2251                 if ((check_all && !nrefs->checked[*level]) ||
2252                     (!check_all && nrefs->need_check[*level])) {
2253                         ret = check_tree_block_ref(root, cur,
2254                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2255                            btrfs_header_owner(cur), nrefs);
2256
2257                         if (repair && ret)
2258                                 ret = repair_tree_block_ref(trans, root,
2259                                     path->nodes[*level], nrefs, *level, ret);
2260                         err |= ret;
2261
2262                         if (check_all && nrefs->need_check[*level] &&
2263                                 nrefs->refs[*level]) {
2264                                 account_bytes(root, path, *level);
2265                                 account_file_data = 1;
2266                         }
2267                         nrefs->checked[*level] = 1;
2268                 }
2269
2270                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2271                         break;
2272
2273                 /* Don't forgot to check leaf/node validation */
2274                 if (*level == 0) {
2275                         /* skip duplicate check */
2276                         if (check || !check_all) {
2277                                 ret = btrfs_check_leaf(root, NULL, cur);
2278                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2279                                         err |= -EIO;
2280                                         break;
2281                                 }
2282                         }
2283
2284                         ret = 0;
2285                         if (!check_all)
2286                                 ret = process_one_leaf_v2(root, path, nrefs,
2287                                                           level, ext_ref);
2288                         else
2289                                 ret = check_leaf_items(trans, root, path,
2290                                                nrefs, account_file_data);
2291                         err |= ret;
2292                         break;
2293                 } else {
2294                         if (check || !check_all) {
2295                                 ret = btrfs_check_node(root, NULL, cur);
2296                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2297                                         err |= -EIO;
2298                                         break;
2299                                 }
2300                         }
2301                 }
2302
2303                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2304                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2305
2306                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2307                                         check_all);
2308                 if (ret < 0)
2309                         break;
2310                 /*
2311                  * check all trees in check_chunks_and_extent_v2
2312                  * check shared node once in check_fs_roots
2313                  */
2314                 if (!check_all && !nrefs->need_check[*level - 1]) {
2315                         path->slots[*level]++;
2316                         continue;
2317                 }
2318
2319                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2320                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2321                         free_extent_buffer(next);
2322                         reada_walk_down(root, cur, path->slots[*level]);
2323                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2324                         if (!extent_buffer_uptodate(next)) {
2325                                 struct btrfs_key node_key;
2326
2327                                 btrfs_node_key_to_cpu(path->nodes[*level],
2328                                                       &node_key,
2329                                                       path->slots[*level]);
2330                                 btrfs_add_corrupt_extent_record(fs_info,
2331                                         &node_key, path->nodes[*level]->start,
2332                                         fs_info->nodesize, *level);
2333                                 err |= -EIO;
2334                                 break;
2335                         }
2336                 }
2337
2338                 ret = check_child_node(cur, path->slots[*level], next);
2339                 err |= ret;
2340                 if (ret < 0) 
2341                         break;
2342
2343                 if (btrfs_is_leaf(next))
2344                         status = btrfs_check_leaf(root, NULL, next);
2345                 else
2346                         status = btrfs_check_node(root, NULL, next);
2347                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2348                         free_extent_buffer(next);
2349                         err |= -EIO;
2350                         break;
2351                 }
2352
2353                 *level = *level - 1;
2354                 free_extent_buffer(path->nodes[*level]);
2355                 path->nodes[*level] = next;
2356                 path->slots[*level] = 0;
2357                 account_file_data = 0;
2358
2359                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2360         }
2361         return err;
2362 }
2363
2364 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2365                         struct walk_control *wc, int *level)
2366 {
2367         int i;
2368         struct extent_buffer *leaf;
2369
2370         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2371                 leaf = path->nodes[i];
2372                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2373                         path->slots[i]++;
2374                         *level = i;
2375                         return 0;
2376                 } else {
2377                         free_extent_buffer(path->nodes[*level]);
2378                         path->nodes[*level] = NULL;
2379                         BUG_ON(*level > wc->active_node);
2380                         if (*level == wc->active_node)
2381                                 leave_shared_node(root, wc, *level);
2382                         *level = i + 1;
2383                 }
2384         }
2385         return 1;
2386 }
2387
2388 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2389                            int *level)
2390 {
2391         int i;
2392         struct extent_buffer *leaf;
2393
2394         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2395                 leaf = path->nodes[i];
2396                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2397                         path->slots[i]++;
2398                         *level = i;
2399                         return 0;
2400                 } else {
2401                         free_extent_buffer(path->nodes[*level]);
2402                         path->nodes[*level] = NULL;
2403                         *level = i + 1;
2404                 }
2405         }
2406         return 1;
2407 }
2408
2409 static int check_root_dir(struct inode_record *rec)
2410 {
2411         struct inode_backref *backref;
2412         int ret = -1;
2413
2414         if (!rec->found_inode_item || rec->errors)
2415                 goto out;
2416         if (rec->nlink != 1 || rec->found_link != 0)
2417                 goto out;
2418         if (list_empty(&rec->backrefs))
2419                 goto out;
2420         backref = to_inode_backref(rec->backrefs.next);
2421         if (!backref->found_inode_ref)
2422                 goto out;
2423         if (backref->index != 0 || backref->namelen != 2 ||
2424             memcmp(backref->name, "..", 2))
2425                 goto out;
2426         if (backref->found_dir_index || backref->found_dir_item)
2427                 goto out;
2428         ret = 0;
2429 out:
2430         return ret;
2431 }
2432
2433 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2434                               struct btrfs_root *root, struct btrfs_path *path,
2435                               struct inode_record *rec)
2436 {
2437         struct btrfs_inode_item *ei;
2438         struct btrfs_key key;
2439         int ret;
2440
2441         key.objectid = rec->ino;
2442         key.type = BTRFS_INODE_ITEM_KEY;
2443         key.offset = (u64)-1;
2444
2445         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2446         if (ret < 0)
2447                 goto out;
2448         if (ret) {
2449                 if (!path->slots[0]) {
2450                         ret = -ENOENT;
2451                         goto out;
2452                 }
2453                 path->slots[0]--;
2454                 ret = 0;
2455         }
2456         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2457         if (key.objectid != rec->ino) {
2458                 ret = -ENOENT;
2459                 goto out;
2460         }
2461
2462         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2463                             struct btrfs_inode_item);
2464         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2465         btrfs_mark_buffer_dirty(path->nodes[0]);
2466         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2467         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2468                root->root_key.objectid);
2469 out:
2470         btrfs_release_path(path);
2471         return ret;
2472 }
2473
2474 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2475                                     struct btrfs_root *root,
2476                                     struct btrfs_path *path,
2477                                     struct inode_record *rec)
2478 {
2479         int ret;
2480
2481         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2482         btrfs_release_path(path);
2483         if (!ret)
2484                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2485         return ret;
2486 }
2487
2488 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2489                                struct btrfs_root *root,
2490                                struct btrfs_path *path,
2491                                struct inode_record *rec)
2492 {
2493         struct btrfs_inode_item *ei;
2494         struct btrfs_key key;
2495         int ret = 0;
2496
2497         key.objectid = rec->ino;
2498         key.type = BTRFS_INODE_ITEM_KEY;
2499         key.offset = 0;
2500
2501         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2502         if (ret) {
2503                 if (ret > 0)
2504                         ret = -ENOENT;
2505                 goto out;
2506         }
2507
2508         /* Since ret == 0, no need to check anything */
2509         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2510                             struct btrfs_inode_item);
2511         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2512         btrfs_mark_buffer_dirty(path->nodes[0]);
2513         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2514         printf("reset nbytes for ino %llu root %llu\n",
2515                rec->ino, root->root_key.objectid);
2516 out:
2517         btrfs_release_path(path);
2518         return ret;
2519 }
2520
2521 static int add_missing_dir_index(struct btrfs_root *root,
2522                                  struct cache_tree *inode_cache,
2523                                  struct inode_record *rec,
2524                                  struct inode_backref *backref)
2525 {
2526         struct btrfs_path path;
2527         struct btrfs_trans_handle *trans;
2528         struct btrfs_dir_item *dir_item;
2529         struct extent_buffer *leaf;
2530         struct btrfs_key key;
2531         struct btrfs_disk_key disk_key;
2532         struct inode_record *dir_rec;
2533         unsigned long name_ptr;
2534         u32 data_size = sizeof(*dir_item) + backref->namelen;
2535         int ret;
2536
2537         trans = btrfs_start_transaction(root, 1);
2538         if (IS_ERR(trans))
2539                 return PTR_ERR(trans);
2540
2541         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2542                 (unsigned long long)rec->ino);
2543
2544         btrfs_init_path(&path);
2545         key.objectid = backref->dir;
2546         key.type = BTRFS_DIR_INDEX_KEY;
2547         key.offset = backref->index;
2548         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2549         BUG_ON(ret);
2550
2551         leaf = path.nodes[0];
2552         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2553
2554         disk_key.objectid = cpu_to_le64(rec->ino);
2555         disk_key.type = BTRFS_INODE_ITEM_KEY;
2556         disk_key.offset = 0;
2557
2558         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2559         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2560         btrfs_set_dir_data_len(leaf, dir_item, 0);
2561         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2562         name_ptr = (unsigned long)(dir_item + 1);
2563         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2564         btrfs_mark_buffer_dirty(leaf);
2565         btrfs_release_path(&path);
2566         btrfs_commit_transaction(trans, root);
2567
2568         backref->found_dir_index = 1;
2569         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2570         BUG_ON(IS_ERR(dir_rec));
2571         if (!dir_rec)
2572                 return 0;
2573         dir_rec->found_size += backref->namelen;
2574         if (dir_rec->found_size == dir_rec->isize &&
2575             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2576                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2577         if (dir_rec->found_size != dir_rec->isize)
2578                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2579
2580         return 0;
2581 }
2582
2583 static int delete_dir_index(struct btrfs_root *root,
2584                             struct inode_backref *backref)
2585 {
2586         struct btrfs_trans_handle *trans;
2587         struct btrfs_dir_item *di;
2588         struct btrfs_path path;
2589         int ret = 0;
2590
2591         trans = btrfs_start_transaction(root, 1);
2592         if (IS_ERR(trans))
2593                 return PTR_ERR(trans);
2594
2595         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2596                 (unsigned long long)backref->dir,
2597                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2598                 (unsigned long long)root->objectid);
2599
2600         btrfs_init_path(&path);
2601         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2602                                     backref->name, backref->namelen,
2603                                     backref->index, -1);
2604         if (IS_ERR(di)) {
2605                 ret = PTR_ERR(di);
2606                 btrfs_release_path(&path);
2607                 btrfs_commit_transaction(trans, root);
2608                 if (ret == -ENOENT)
2609                         return 0;
2610                 return ret;
2611         }
2612
2613         if (!di)
2614                 ret = btrfs_del_item(trans, root, &path);
2615         else
2616                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2617         BUG_ON(ret);
2618         btrfs_release_path(&path);
2619         btrfs_commit_transaction(trans, root);
2620         return ret;
2621 }
2622
2623 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2624                                     struct btrfs_root *root, u64 ino,
2625                                     u8 filetype)
2626 {
2627         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2628
2629         return insert_inode_item(trans, root, ino, 0, 0, 0, mode);
2630 }
2631
2632 static int create_inode_item(struct btrfs_root *root,
2633                              struct inode_record *rec, int root_dir)
2634 {
2635         struct btrfs_trans_handle *trans;
2636         u64 nlink = 0;
2637         u32 mode = 0;
2638         u64 size = 0;
2639         int ret;
2640
2641         trans = btrfs_start_transaction(root, 1);
2642         if (IS_ERR(trans)) {
2643                 ret = PTR_ERR(trans);
2644                 return ret;
2645         }
2646
2647         nlink = root_dir ? 1 : rec->found_link;
2648         if (rec->found_dir_item) {
2649                 if (rec->found_file_extent)
2650                         fprintf(stderr, "root %llu inode %llu has both a dir "
2651                                 "item and extents, unsure if it is a dir or a "
2652                                 "regular file so setting it as a directory\n",
2653                                 (unsigned long long)root->objectid,
2654                                 (unsigned long long)rec->ino);
2655                 mode = S_IFDIR | 0755;
2656                 size = rec->found_size;
2657         } else if (!rec->found_dir_item) {
2658                 size = rec->extent_end;
2659                 mode =  S_IFREG | 0755;
2660         }
2661
2662         ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
2663                                   nlink, mode);
2664         btrfs_commit_transaction(trans, root);
2665         return 0;
2666 }
2667
2668 static int repair_inode_backrefs(struct btrfs_root *root,
2669                                  struct inode_record *rec,
2670                                  struct cache_tree *inode_cache,
2671                                  int delete)
2672 {
2673         struct inode_backref *tmp, *backref;
2674         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2675         int ret = 0;
2676         int repaired = 0;
2677
2678         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2679                 if (!delete && rec->ino == root_dirid) {
2680                         if (!rec->found_inode_item) {
2681                                 ret = create_inode_item(root, rec, 1);
2682                                 if (ret)
2683                                         break;
2684                                 repaired++;
2685                         }
2686                 }
2687
2688                 /* Index 0 for root dir's are special, don't mess with it */
2689                 if (rec->ino == root_dirid && backref->index == 0)
2690                         continue;
2691
2692                 if (delete &&
2693                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2694                      (backref->found_dir_index && backref->found_inode_ref &&
2695                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2696                         ret = delete_dir_index(root, backref);
2697                         if (ret)
2698                                 break;
2699                         repaired++;
2700                         list_del(&backref->list);
2701                         free(backref);
2702                         continue;
2703                 }
2704
2705                 if (!delete && !backref->found_dir_index &&
2706                     backref->found_dir_item && backref->found_inode_ref) {
2707                         ret = add_missing_dir_index(root, inode_cache, rec,
2708                                                     backref);
2709                         if (ret)
2710                                 break;
2711                         repaired++;
2712                         if (backref->found_dir_item &&
2713                             backref->found_dir_index) {
2714                                 if (!backref->errors &&
2715                                     backref->found_inode_ref) {
2716                                         list_del(&backref->list);
2717                                         free(backref);
2718                                         continue;
2719                                 }
2720                         }
2721                 }
2722
2723                 if (!delete && (!backref->found_dir_index &&
2724                                 !backref->found_dir_item &&
2725                                 backref->found_inode_ref)) {
2726                         struct btrfs_trans_handle *trans;
2727                         struct btrfs_key location;
2728
2729                         ret = check_dir_conflict(root, backref->name,
2730                                                  backref->namelen,
2731                                                  backref->dir,
2732                                                  backref->index);
2733                         if (ret) {
2734                                 /*
2735                                  * let nlink fixing routine to handle it,
2736                                  * which can do it better.
2737                                  */
2738                                 ret = 0;
2739                                 break;
2740                         }
2741                         location.objectid = rec->ino;
2742                         location.type = BTRFS_INODE_ITEM_KEY;
2743                         location.offset = 0;
2744
2745                         trans = btrfs_start_transaction(root, 1);
2746                         if (IS_ERR(trans)) {
2747                                 ret = PTR_ERR(trans);
2748                                 break;
2749                         }
2750                         fprintf(stderr, "adding missing dir index/item pair "
2751                                 "for inode %llu\n",
2752                                 (unsigned long long)rec->ino);
2753                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2754                                                     backref->namelen,
2755                                                     backref->dir, &location,
2756                                                     imode_to_type(rec->imode),
2757                                                     backref->index);
2758                         BUG_ON(ret);
2759                         btrfs_commit_transaction(trans, root);
2760                         repaired++;
2761                 }
2762
2763                 if (!delete && (backref->found_inode_ref &&
2764                                 backref->found_dir_index &&
2765                                 backref->found_dir_item &&
2766                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2767                                 !rec->found_inode_item)) {
2768                         ret = create_inode_item(root, rec, 0);
2769                         if (ret)
2770                                 break;
2771                         repaired++;
2772                 }
2773
2774         }
2775         return ret ? ret : repaired;
2776 }
2777
2778 /*
2779  * To determine the file type for nlink/inode_item repair
2780  *
2781  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2782  * Return -ENOENT if file type is not found.
2783  */
2784 static int find_file_type(struct inode_record *rec, u8 *type)
2785 {
2786         struct inode_backref *backref;
2787
2788         /* For inode item recovered case */
2789         if (rec->found_inode_item) {
2790                 *type = imode_to_type(rec->imode);
2791                 return 0;
2792         }
2793
2794         list_for_each_entry(backref, &rec->backrefs, list) {
2795                 if (backref->found_dir_index || backref->found_dir_item) {
2796                         *type = backref->filetype;
2797                         return 0;
2798                 }
2799         }
2800         return -ENOENT;
2801 }
2802
2803 /*
2804  * To determine the file name for nlink repair
2805  *
2806  * Return 0 if file name is found, set name and namelen.
2807  * Return -ENOENT if file name is not found.
2808  */
2809 static int find_file_name(struct inode_record *rec,
2810                           char *name, int *namelen)
2811 {
2812         struct inode_backref *backref;
2813
2814         list_for_each_entry(backref, &rec->backrefs, list) {
2815                 if (backref->found_dir_index || backref->found_dir_item ||
2816                     backref->found_inode_ref) {
2817                         memcpy(name, backref->name, backref->namelen);
2818                         *namelen = backref->namelen;
2819                         return 0;
2820                 }
2821         }
2822         return -ENOENT;
2823 }
2824
2825 /* Reset the nlink of the inode to the correct one */
2826 static int reset_nlink(struct btrfs_trans_handle *trans,
2827                        struct btrfs_root *root,
2828                        struct btrfs_path *path,
2829                        struct inode_record *rec)
2830 {
2831         struct inode_backref *backref;
2832         struct inode_backref *tmp;
2833         struct btrfs_key key;
2834         struct btrfs_inode_item *inode_item;
2835         int ret = 0;
2836
2837         /* We don't believe this either, reset it and iterate backref */
2838         rec->found_link = 0;
2839
2840         /* Remove all backref including the valid ones */
2841         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2842                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2843                                    backref->index, backref->name,
2844                                    backref->namelen, 0);
2845                 if (ret < 0)
2846                         goto out;
2847
2848                 /* remove invalid backref, so it won't be added back */
2849                 if (!(backref->found_dir_index &&
2850                       backref->found_dir_item &&
2851                       backref->found_inode_ref)) {
2852                         list_del(&backref->list);
2853                         free(backref);
2854                 } else {
2855                         rec->found_link++;
2856                 }
2857         }
2858
2859         /* Set nlink to 0 */
2860         key.objectid = rec->ino;
2861         key.type = BTRFS_INODE_ITEM_KEY;
2862         key.offset = 0;
2863         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2864         if (ret < 0)
2865                 goto out;
2866         if (ret > 0) {
2867                 ret = -ENOENT;
2868                 goto out;
2869         }
2870         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2871                                     struct btrfs_inode_item);
2872         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2873         btrfs_mark_buffer_dirty(path->nodes[0]);
2874         btrfs_release_path(path);
2875
2876         /*
2877          * Add back valid inode_ref/dir_item/dir_index,
2878          * add_link() will handle the nlink inc, so new nlink must be correct
2879          */
2880         list_for_each_entry(backref, &rec->backrefs, list) {
2881                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2882                                      backref->name, backref->namelen,
2883                                      backref->filetype, &backref->index, 1, 0);
2884                 if (ret < 0)
2885                         goto out;
2886         }
2887 out:
2888         btrfs_release_path(path);
2889         return ret;
2890 }
2891
2892 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2893                                struct btrfs_root *root,
2894                                struct btrfs_path *path,
2895                                struct inode_record *rec)
2896 {
2897         char namebuf[BTRFS_NAME_LEN] = {0};
2898         u8 type = 0;
2899         int namelen = 0;
2900         int name_recovered = 0;
2901         int type_recovered = 0;
2902         int ret = 0;
2903
2904         /*
2905          * Get file name and type first before these invalid inode ref
2906          * are deleted by remove_all_invalid_backref()
2907          */
2908         name_recovered = !find_file_name(rec, namebuf, &namelen);
2909         type_recovered = !find_file_type(rec, &type);
2910
2911         if (!name_recovered) {
2912                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2913                        rec->ino, rec->ino);
2914                 namelen = count_digits(rec->ino);
2915                 sprintf(namebuf, "%llu", rec->ino);
2916                 name_recovered = 1;
2917         }
2918         if (!type_recovered) {
2919                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2920                        rec->ino);
2921                 type = BTRFS_FT_REG_FILE;
2922                 type_recovered = 1;
2923         }
2924
2925         ret = reset_nlink(trans, root, path, rec);
2926         if (ret < 0) {
2927                 fprintf(stderr,
2928                         "Failed to reset nlink for inode %llu: %s\n",
2929                         rec->ino, strerror(-ret));
2930                 goto out;
2931         }
2932
2933         if (rec->found_link == 0) {
2934                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2935                                               namebuf, namelen, type,
2936                                               (u64 *)&rec->found_link);
2937                 if (ret)
2938                         goto out;
2939         }
2940         printf("Fixed the nlink of inode %llu\n", rec->ino);
2941 out:
2942         /*
2943          * Clear the flag anyway, or we will loop forever for the same inode
2944          * as it will not be removed from the bad inode list and the dead loop
2945          * happens.
2946          */
2947         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2948         btrfs_release_path(path);
2949         return ret;
2950 }
2951
2952 /*
2953  * Check if there is any normal(reg or prealloc) file extent for given
2954  * ino.
2955  * This is used to determine the file type when neither its dir_index/item or
2956  * inode_item exists.
2957  *
2958  * This will *NOT* report error, if any error happens, just consider it does
2959  * not have any normal file extent.
2960  */
2961 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2962 {
2963         struct btrfs_path path;
2964         struct btrfs_key key;
2965         struct btrfs_key found_key;
2966         struct btrfs_file_extent_item *fi;
2967         u8 type;
2968         int ret = 0;
2969
2970         btrfs_init_path(&path);
2971         key.objectid = ino;
2972         key.type = BTRFS_EXTENT_DATA_KEY;
2973         key.offset = 0;
2974
2975         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2976         if (ret < 0) {
2977                 ret = 0;
2978                 goto out;
2979         }
2980         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2981                 ret = btrfs_next_leaf(root, &path);
2982                 if (ret) {
2983                         ret = 0;
2984                         goto out;
2985                 }
2986         }
2987         while (1) {
2988                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2989                                       path.slots[0]);
2990                 if (found_key.objectid != ino ||
2991                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2992                         break;
2993                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2994                                     struct btrfs_file_extent_item);
2995                 type = btrfs_file_extent_type(path.nodes[0], fi);
2996                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2997                         ret = 1;
2998                         goto out;
2999                 }
3000         }
3001 out:
3002         btrfs_release_path(&path);
3003         return ret;
3004 }
3005
3006 static u32 btrfs_type_to_imode(u8 type)
3007 {
3008         static u32 imode_by_btrfs_type[] = {
3009                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3010                 [BTRFS_FT_DIR]          = S_IFDIR,
3011                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3012                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3013                 [BTRFS_FT_FIFO]         = S_IFIFO,
3014                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3015                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3016         };
3017
3018         return imode_by_btrfs_type[(type)];
3019 }
3020
3021 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3022                                 struct btrfs_root *root,
3023                                 struct btrfs_path *path,
3024                                 struct inode_record *rec)
3025 {
3026         u8 filetype;
3027         u32 mode = 0700;
3028         int type_recovered = 0;
3029         int ret = 0;
3030
3031         printf("Trying to rebuild inode:%llu\n", rec->ino);
3032
3033         type_recovered = !find_file_type(rec, &filetype);
3034
3035         /*
3036          * Try to determine inode type if type not found.
3037          *
3038          * For found regular file extent, it must be FILE.
3039          * For found dir_item/index, it must be DIR.
3040          *
3041          * For undetermined one, use FILE as fallback.
3042          *
3043          * TODO:
3044          * 1. If found backref(inode_index/item is already handled) to it,
3045          *    it must be DIR.
3046          *    Need new inode-inode ref structure to allow search for that.
3047          */
3048         if (!type_recovered) {
3049                 if (rec->found_file_extent &&
3050                     find_normal_file_extent(root, rec->ino)) {
3051                         type_recovered = 1;
3052                         filetype = BTRFS_FT_REG_FILE;
3053                 } else if (rec->found_dir_item) {
3054                         type_recovered = 1;
3055                         filetype = BTRFS_FT_DIR;
3056                 } else if (!list_empty(&rec->orphan_extents)) {
3057                         type_recovered = 1;
3058                         filetype = BTRFS_FT_REG_FILE;
3059                 } else{
3060                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3061                                rec->ino);
3062                         type_recovered = 1;
3063                         filetype = BTRFS_FT_REG_FILE;
3064                 }
3065         }
3066
3067         ret = btrfs_new_inode(trans, root, rec->ino,
3068                               mode | btrfs_type_to_imode(filetype));
3069         if (ret < 0)
3070                 goto out;
3071
3072         /*
3073          * Here inode rebuild is done, we only rebuild the inode item,
3074          * don't repair the nlink(like move to lost+found).
3075          * That is the job of nlink repair.
3076          *
3077          * We just fill the record and return
3078          */
3079         rec->found_dir_item = 1;
3080         rec->imode = mode | btrfs_type_to_imode(filetype);
3081         rec->nlink = 0;
3082         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3083         /* Ensure the inode_nlinks repair function will be called */
3084         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3085 out:
3086         return ret;
3087 }
3088
3089 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3090                                       struct btrfs_root *root,
3091                                       struct btrfs_path *path,
3092                                       struct inode_record *rec)
3093 {
3094         struct orphan_data_extent *orphan;
3095         struct orphan_data_extent *tmp;
3096         int ret = 0;
3097
3098         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3099                 /*
3100                  * Check for conflicting file extents
3101                  *
3102                  * Here we don't know whether the extents is compressed or not,
3103                  * so we can only assume it not compressed nor data offset,
3104                  * and use its disk_len as extent length.
3105                  */
3106                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3107                                        orphan->offset, orphan->disk_len, 0);
3108                 btrfs_release_path(path);
3109                 if (ret < 0)
3110                         goto out;
3111                 if (!ret) {
3112                         fprintf(stderr,
3113                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3114                                 orphan->disk_bytenr, orphan->disk_len);
3115                         ret = btrfs_free_extent(trans,
3116                                         root->fs_info->extent_root,
3117                                         orphan->disk_bytenr, orphan->disk_len,
3118                                         0, root->objectid, orphan->objectid,
3119                                         orphan->offset);
3120                         if (ret < 0)
3121                                 goto out;
3122                 }
3123                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3124                                 orphan->offset, orphan->disk_bytenr,
3125                                 orphan->disk_len, orphan->disk_len);
3126                 if (ret < 0)
3127                         goto out;
3128
3129                 /* Update file size info */
3130                 rec->found_size += orphan->disk_len;
3131                 if (rec->found_size == rec->nbytes)
3132                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3133
3134                 /* Update the file extent hole info too */
3135                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3136                                            orphan->disk_len);
3137                 if (ret < 0)
3138                         goto out;
3139                 if (RB_EMPTY_ROOT(&rec->holes))
3140                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3141
3142                 list_del(&orphan->list);
3143                 free(orphan);
3144         }
3145         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3146 out:
3147         return ret;
3148 }
3149
3150 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3151                                         struct btrfs_root *root,
3152                                         struct btrfs_path *path,
3153                                         struct inode_record *rec)
3154 {
3155         struct rb_node *node;
3156         struct file_extent_hole *hole;
3157         int found = 0;
3158         int ret = 0;
3159
3160         node = rb_first(&rec->holes);
3161
3162         while (node) {
3163                 found = 1;
3164                 hole = rb_entry(node, struct file_extent_hole, node);
3165                 ret = btrfs_punch_hole(trans, root, rec->ino,
3166                                        hole->start, hole->len);
3167                 if (ret < 0)
3168                         goto out;
3169                 ret = del_file_extent_hole(&rec->holes, hole->start,
3170                                            hole->len);
3171                 if (ret < 0)
3172                         goto out;
3173                 if (RB_EMPTY_ROOT(&rec->holes))
3174                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3175                 node = rb_first(&rec->holes);
3176         }
3177         /* special case for a file losing all its file extent */
3178         if (!found) {
3179                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3180                                        round_up(rec->isize,
3181                                                 root->fs_info->sectorsize));
3182                 if (ret < 0)
3183                         goto out;
3184         }
3185         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3186                rec->ino, root->objectid);
3187 out:
3188         return ret;
3189 }
3190
3191 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3192 {
3193         struct btrfs_trans_handle *trans;
3194         struct btrfs_path path;
3195         int ret = 0;
3196
3197         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3198                              I_ERR_NO_ORPHAN_ITEM |
3199                              I_ERR_LINK_COUNT_WRONG |
3200                              I_ERR_NO_INODE_ITEM |
3201                              I_ERR_FILE_EXTENT_ORPHAN |
3202                              I_ERR_FILE_EXTENT_DISCOUNT|
3203                              I_ERR_FILE_NBYTES_WRONG)))
3204                 return rec->errors;
3205
3206         /*
3207          * For nlink repair, it may create a dir and add link, so
3208          * 2 for parent(256)'s dir_index and dir_item
3209          * 2 for lost+found dir's inode_item and inode_ref
3210          * 1 for the new inode_ref of the file
3211          * 2 for lost+found dir's dir_index and dir_item for the file
3212          */
3213         trans = btrfs_start_transaction(root, 7);
3214         if (IS_ERR(trans))
3215                 return PTR_ERR(trans);
3216
3217         btrfs_init_path(&path);
3218         if (rec->errors & I_ERR_NO_INODE_ITEM)
3219                 ret = repair_inode_no_item(trans, root, &path, rec);
3220         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3221                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3222         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3223                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3224         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3225                 ret = repair_inode_isize(trans, root, &path, rec);
3226         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3227                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3228         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3229                 ret = repair_inode_nlinks(trans, root, &path, rec);
3230         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3231                 ret = repair_inode_nbytes(trans, root, &path, rec);
3232         btrfs_commit_transaction(trans, root);
3233         btrfs_release_path(&path);
3234         return ret;
3235 }
3236
3237 static int check_inode_recs(struct btrfs_root *root,
3238                             struct cache_tree *inode_cache)
3239 {
3240         struct cache_extent *cache;
3241         struct ptr_node *node;
3242         struct inode_record *rec;
3243         struct inode_backref *backref;
3244         int stage = 0;
3245         int ret = 0;
3246         int err = 0;
3247         u64 error = 0;
3248         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3249
3250         if (btrfs_root_refs(&root->root_item) == 0) {
3251                 if (!cache_tree_empty(inode_cache))
3252                         fprintf(stderr, "warning line %d\n", __LINE__);
3253                 return 0;
3254         }
3255
3256         /*
3257          * We need to repair backrefs first because we could change some of the
3258          * errors in the inode recs.
3259          *
3260          * We also need to go through and delete invalid backrefs first and then
3261          * add the correct ones second.  We do this because we may get EEXIST
3262          * when adding back the correct index because we hadn't yet deleted the
3263          * invalid index.
3264          *
3265          * For example, if we were missing a dir index then the directories
3266          * isize would be wrong, so if we fixed the isize to what we thought it
3267          * would be and then fixed the backref we'd still have a invalid fs, so
3268          * we need to add back the dir index and then check to see if the isize
3269          * is still wrong.
3270          */
3271         while (stage < 3) {
3272                 stage++;
3273                 if (stage == 3 && !err)
3274                         break;
3275
3276                 cache = search_cache_extent(inode_cache, 0);
3277                 while (repair && cache) {
3278                         node = container_of(cache, struct ptr_node, cache);
3279                         rec = node->data;
3280                         cache = next_cache_extent(cache);
3281
3282                         /* Need to free everything up and rescan */
3283                         if (stage == 3) {
3284                                 remove_cache_extent(inode_cache, &node->cache);
3285                                 free(node);
3286                                 free_inode_rec(rec);
3287                                 continue;
3288                         }
3289
3290                         if (list_empty(&rec->backrefs))
3291                                 continue;
3292
3293                         ret = repair_inode_backrefs(root, rec, inode_cache,
3294                                                     stage == 1);
3295                         if (ret < 0) {
3296                                 err = ret;
3297                                 stage = 2;
3298                                 break;
3299                         } if (ret > 0) {
3300                                 err = -EAGAIN;
3301                         }
3302                 }
3303         }
3304         if (err)
3305                 return err;
3306
3307         rec = get_inode_rec(inode_cache, root_dirid, 0);
3308         BUG_ON(IS_ERR(rec));
3309         if (rec) {
3310                 ret = check_root_dir(rec);
3311                 if (ret) {
3312                         fprintf(stderr, "root %llu root dir %llu error\n",
3313                                 (unsigned long long)root->root_key.objectid,
3314                                 (unsigned long long)root_dirid);
3315                         print_inode_error(root, rec);
3316                         error++;
3317                 }
3318         } else {
3319                 if (repair) {
3320                         struct btrfs_trans_handle *trans;
3321
3322                         trans = btrfs_start_transaction(root, 1);
3323                         if (IS_ERR(trans)) {
3324                                 err = PTR_ERR(trans);
3325                                 return err;
3326                         }
3327
3328                         fprintf(stderr,
3329                                 "root %llu missing its root dir, recreating\n",
3330                                 (unsigned long long)root->objectid);
3331
3332                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3333                         BUG_ON(ret);
3334
3335                         btrfs_commit_transaction(trans, root);
3336                         return -EAGAIN;
3337                 }
3338
3339                 fprintf(stderr, "root %llu root dir %llu not found\n",
3340                         (unsigned long long)root->root_key.objectid,
3341                         (unsigned long long)root_dirid);
3342         }
3343
3344         while (1) {
3345                 cache = search_cache_extent(inode_cache, 0);
3346                 if (!cache)
3347                         break;
3348                 node = container_of(cache, struct ptr_node, cache);
3349                 rec = node->data;
3350                 remove_cache_extent(inode_cache, &node->cache);
3351                 free(node);
3352                 if (rec->ino == root_dirid ||
3353                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3354                         free_inode_rec(rec);
3355                         continue;
3356                 }
3357
3358                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3359                         ret = check_orphan_item(root, rec->ino);
3360                         if (ret == 0)
3361                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3362                         if (can_free_inode_rec(rec)) {
3363                                 free_inode_rec(rec);
3364                                 continue;
3365                         }
3366                 }
3367
3368                 if (!rec->found_inode_item)
3369                         rec->errors |= I_ERR_NO_INODE_ITEM;
3370                 if (rec->found_link != rec->nlink)
3371                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3372                 if (repair) {
3373                         ret = try_repair_inode(root, rec);
3374                         if (ret == 0 && can_free_inode_rec(rec)) {
3375                                 free_inode_rec(rec);
3376                                 continue;
3377                         }
3378                         ret = 0;
3379                 }
3380
3381                 if (!(repair && ret == 0))
3382                         error++;
3383                 print_inode_error(root, rec);
3384                 list_for_each_entry(backref, &rec->backrefs, list) {
3385                         if (!backref->found_dir_item)
3386                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3387                         if (!backref->found_dir_index)
3388                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3389                         if (!backref->found_inode_ref)
3390                                 backref->errors |= REF_ERR_NO_INODE_REF;
3391                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3392                                 " namelen %u name %s filetype %d errors %x",
3393                                 (unsigned long long)backref->dir,
3394                                 (unsigned long long)backref->index,
3395                                 backref->namelen, backref->name,
3396                                 backref->filetype, backref->errors);
3397                         print_ref_error(backref->errors);
3398                 }
3399                 free_inode_rec(rec);
3400         }
3401         return (error > 0) ? -1 : 0;
3402 }
3403
3404 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3405                                         u64 objectid)
3406 {
3407         struct cache_extent *cache;
3408         struct root_record *rec = NULL;
3409         int ret;
3410
3411         cache = lookup_cache_extent(root_cache, objectid, 1);
3412         if (cache) {
3413                 rec = container_of(cache, struct root_record, cache);
3414         } else {
3415                 rec = calloc(1, sizeof(*rec));
3416                 if (!rec)
3417                         return ERR_PTR(-ENOMEM);
3418                 rec->objectid = objectid;
3419                 INIT_LIST_HEAD(&rec->backrefs);
3420                 rec->cache.start = objectid;
3421                 rec->cache.size = 1;
3422
3423                 ret = insert_cache_extent(root_cache, &rec->cache);
3424                 if (ret)
3425                         return ERR_PTR(-EEXIST);
3426         }
3427         return rec;
3428 }
3429
3430 static struct root_backref *get_root_backref(struct root_record *rec,
3431                                              u64 ref_root, u64 dir, u64 index,
3432                                              const char *name, int namelen)
3433 {
3434         struct root_backref *backref;
3435
3436         list_for_each_entry(backref, &rec->backrefs, list) {
3437                 if (backref->ref_root != ref_root || backref->dir != dir ||
3438                     backref->namelen != namelen)
3439                         continue;
3440                 if (memcmp(name, backref->name, namelen))
3441                         continue;
3442                 return backref;
3443         }
3444
3445         backref = calloc(1, sizeof(*backref) + namelen + 1);
3446         if (!backref)
3447                 return NULL;
3448         backref->ref_root = ref_root;
3449         backref->dir = dir;
3450         backref->index = index;
3451         backref->namelen = namelen;
3452         memcpy(backref->name, name, namelen);
3453         backref->name[namelen] = '\0';
3454         list_add_tail(&backref->list, &rec->backrefs);
3455         return backref;
3456 }
3457
3458 static void free_root_record(struct cache_extent *cache)
3459 {
3460         struct root_record *rec;
3461         struct root_backref *backref;
3462
3463         rec = container_of(cache, struct root_record, cache);
3464         while (!list_empty(&rec->backrefs)) {
3465                 backref = to_root_backref(rec->backrefs.next);
3466                 list_del(&backref->list);
3467                 free(backref);
3468         }
3469
3470         free(rec);
3471 }
3472
3473 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3474
3475 static int add_root_backref(struct cache_tree *root_cache,
3476                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3477                             const char *name, int namelen,
3478                             int item_type, int errors)
3479 {
3480         struct root_record *rec;
3481         struct root_backref *backref;
3482
3483         rec = get_root_rec(root_cache, root_id);
3484         BUG_ON(IS_ERR(rec));
3485         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3486         BUG_ON(!backref);
3487
3488         backref->errors |= errors;
3489
3490         if (item_type != BTRFS_DIR_ITEM_KEY) {
3491                 if (backref->found_dir_index || backref->found_back_ref ||
3492                     backref->found_forward_ref) {
3493                         if (backref->index != index)
3494                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3495                 } else {
3496                         backref->index = index;
3497                 }
3498         }
3499
3500         if (item_type == BTRFS_DIR_ITEM_KEY) {
3501                 if (backref->found_forward_ref)
3502                         rec->found_ref++;
3503                 backref->found_dir_item = 1;
3504         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3505                 backref->found_dir_index = 1;
3506         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3507                 if (backref->found_forward_ref)
3508                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3509                 else if (backref->found_dir_item)
3510                         rec->found_ref++;
3511                 backref->found_forward_ref = 1;
3512         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3513                 if (backref->found_back_ref)
3514                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3515                 backref->found_back_ref = 1;
3516         } else {
3517                 BUG_ON(1);
3518         }
3519
3520         if (backref->found_forward_ref && backref->found_dir_item)
3521                 backref->reachable = 1;
3522         return 0;
3523 }
3524
3525 static int merge_root_recs(struct btrfs_root *root,
3526                            struct cache_tree *src_cache,
3527                            struct cache_tree *dst_cache)
3528 {
3529         struct cache_extent *cache;
3530         struct ptr_node *node;
3531         struct inode_record *rec;
3532         struct inode_backref *backref;
3533         int ret = 0;
3534
3535         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3536                 free_inode_recs_tree(src_cache);
3537                 return 0;
3538         }
3539
3540         while (1) {
3541                 cache = search_cache_extent(src_cache, 0);
3542                 if (!cache)
3543                         break;
3544                 node = container_of(cache, struct ptr_node, cache);
3545                 rec = node->data;
3546                 remove_cache_extent(src_cache, &node->cache);
3547                 free(node);
3548
3549                 ret = is_child_root(root, root->objectid, rec->ino);
3550                 if (ret < 0)
3551                         break;
3552                 else if (ret == 0)
3553                         goto skip;
3554
3555                 list_for_each_entry(backref, &rec->backrefs, list) {
3556                         BUG_ON(backref->found_inode_ref);
3557                         if (backref->found_dir_item)
3558                                 add_root_backref(dst_cache, rec->ino,
3559                                         root->root_key.objectid, backref->dir,
3560                                         backref->index, backref->name,
3561                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3562                                         backref->errors);
3563                         if (backref->found_dir_index)
3564                                 add_root_backref(dst_cache, rec->ino,
3565                                         root->root_key.objectid, backref->dir,
3566                                         backref->index, backref->name,
3567                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3568                                         backref->errors);
3569                 }
3570 skip:
3571                 free_inode_rec(rec);
3572         }
3573         if (ret < 0)
3574                 return ret;
3575         return 0;
3576 }
3577
3578 static int check_root_refs(struct btrfs_root *root,
3579                            struct cache_tree *root_cache)
3580 {
3581         struct root_record *rec;
3582         struct root_record *ref_root;
3583         struct root_backref *backref;
3584         struct cache_extent *cache;
3585         int loop = 1;
3586         int ret;
3587         int error;
3588         int errors = 0;
3589
3590         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3591         BUG_ON(IS_ERR(rec));
3592         rec->found_ref = 1;
3593
3594         /* fixme: this can not detect circular references */
3595         while (loop) {
3596                 loop = 0;
3597                 cache = search_cache_extent(root_cache, 0);
3598                 while (1) {
3599                         if (!cache)
3600                                 break;
3601                         rec = container_of(cache, struct root_record, cache);
3602                         cache = next_cache_extent(cache);
3603
3604                         if (rec->found_ref == 0)
3605                                 continue;
3606
3607                         list_for_each_entry(backref, &rec->backrefs, list) {
3608                                 if (!backref->reachable)
3609                                         continue;
3610
3611                                 ref_root = get_root_rec(root_cache,
3612                                                         backref->ref_root);
3613                                 BUG_ON(IS_ERR(ref_root));
3614                                 if (ref_root->found_ref > 0)
3615                                         continue;
3616
3617                                 backref->reachable = 0;
3618                                 rec->found_ref--;
3619                                 if (rec->found_ref == 0)
3620                                         loop = 1;
3621                         }
3622                 }
3623         }
3624
3625         cache = search_cache_extent(root_cache, 0);
3626         while (1) {
3627                 if (!cache)
3628                         break;
3629                 rec = container_of(cache, struct root_record, cache);
3630                 cache = next_cache_extent(cache);
3631
3632                 if (rec->found_ref == 0 &&
3633                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3634                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3635                         ret = check_orphan_item(root->fs_info->tree_root,
3636                                                 rec->objectid);
3637                         if (ret == 0)
3638                                 continue;
3639
3640                         /*
3641                          * If we don't have a root item then we likely just have
3642                          * a dir item in a snapshot for this root but no actual
3643                          * ref key or anything so it's meaningless.
3644                          */
3645                         if (!rec->found_root_item)
3646                                 continue;
3647                         errors++;
3648                         fprintf(stderr, "fs tree %llu not referenced\n",
3649                                 (unsigned long long)rec->objectid);
3650                 }
3651
3652                 error = 0;
3653                 if (rec->found_ref > 0 && !rec->found_root_item)
3654                         error = 1;
3655                 list_for_each_entry(backref, &rec->backrefs, list) {
3656                         if (!backref->found_dir_item)
3657                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3658                         if (!backref->found_dir_index)
3659                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3660                         if (!backref->found_back_ref)
3661                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3662                         if (!backref->found_forward_ref)
3663                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3664                         if (backref->reachable && backref->errors)
3665                                 error = 1;
3666                 }
3667                 if (!error)
3668                         continue;
3669
3670                 errors++;
3671                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3672                         (unsigned long long)rec->objectid, rec->found_ref,
3673                          rec->found_root_item ? "" : "not found");
3674
3675                 list_for_each_entry(backref, &rec->backrefs, list) {
3676                         if (!backref->reachable)
3677                                 continue;
3678                         if (!backref->errors && rec->found_root_item)
3679                                 continue;
3680                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3681                                 " index %llu namelen %u name %s errors %x\n",
3682                                 (unsigned long long)backref->ref_root,
3683                                 (unsigned long long)backref->dir,
3684                                 (unsigned long long)backref->index,
3685                                 backref->namelen, backref->name,
3686                                 backref->errors);
3687                         print_ref_error(backref->errors);
3688                 }
3689         }
3690         return errors > 0 ? 1 : 0;
3691 }
3692
3693 static int process_root_ref(struct extent_buffer *eb, int slot,
3694                             struct btrfs_key *key,
3695                             struct cache_tree *root_cache)
3696 {
3697         u64 dirid;
3698         u64 index;
3699         u32 len;
3700         u32 name_len;
3701         struct btrfs_root_ref *ref;
3702         char namebuf[BTRFS_NAME_LEN];
3703         int error;
3704
3705         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3706
3707         dirid = btrfs_root_ref_dirid(eb, ref);
3708         index = btrfs_root_ref_sequence(eb, ref);
3709         name_len = btrfs_root_ref_name_len(eb, ref);
3710
3711         if (name_len <= BTRFS_NAME_LEN) {
3712                 len = name_len;
3713                 error = 0;
3714         } else {
3715                 len = BTRFS_NAME_LEN;
3716                 error = REF_ERR_NAME_TOO_LONG;
3717         }
3718         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3719
3720         if (key->type == BTRFS_ROOT_REF_KEY) {
3721                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3722                                  index, namebuf, len, key->type, error);
3723         } else {
3724                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3725                                  index, namebuf, len, key->type, error);
3726         }
3727         return 0;
3728 }
3729
3730 static void free_corrupt_block(struct cache_extent *cache)
3731 {
3732         struct btrfs_corrupt_block *corrupt;
3733
3734         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3735         free(corrupt);
3736 }
3737
3738 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3739
3740 /*
3741  * Repair the btree of the given root.
3742  *
3743  * The fix is to remove the node key in corrupt_blocks cache_tree.
3744  * and rebalance the tree.
3745  * After the fix, the btree should be writeable.
3746  */
3747 static int repair_btree(struct btrfs_root *root,
3748                         struct cache_tree *corrupt_blocks)
3749 {
3750         struct btrfs_trans_handle *trans;
3751         struct btrfs_path path;
3752         struct btrfs_corrupt_block *corrupt;
3753         struct cache_extent *cache;
3754         struct btrfs_key key;
3755         u64 offset;
3756         int level;
3757         int ret = 0;
3758
3759         if (cache_tree_empty(corrupt_blocks))
3760                 return 0;
3761
3762         trans = btrfs_start_transaction(root, 1);
3763         if (IS_ERR(trans)) {
3764                 ret = PTR_ERR(trans);
3765                 fprintf(stderr, "Error starting transaction: %s\n",
3766                         strerror(-ret));
3767                 return ret;
3768         }
3769         btrfs_init_path(&path);
3770         cache = first_cache_extent(corrupt_blocks);
3771         while (cache) {
3772                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3773                                        cache);
3774                 level = corrupt->level;
3775                 path.lowest_level = level;
3776                 key.objectid = corrupt->key.objectid;
3777                 key.type = corrupt->key.type;
3778                 key.offset = corrupt->key.offset;
3779
3780                 /*
3781                  * Here we don't want to do any tree balance, since it may
3782                  * cause a balance with corrupted brother leaf/node,
3783                  * so ins_len set to 0 here.
3784                  * Balance will be done after all corrupt node/leaf is deleted.
3785                  */
3786                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3787                 if (ret < 0)
3788                         goto out;
3789                 offset = btrfs_node_blockptr(path.nodes[level],
3790                                              path.slots[level]);
3791
3792                 /* Remove the ptr */
3793                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3794                 if (ret < 0)
3795                         goto out;
3796                 /*
3797                  * Remove the corresponding extent
3798                  * return value is not concerned.
3799                  */
3800                 btrfs_release_path(&path);
3801                 ret = btrfs_free_extent(trans, root, offset,
3802                                 root->fs_info->nodesize, 0,
3803                                 root->root_key.objectid, level - 1, 0);
3804                 cache = next_cache_extent(cache);
3805         }
3806
3807         /* Balance the btree using btrfs_search_slot() */
3808         cache = first_cache_extent(corrupt_blocks);
3809         while (cache) {
3810                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3811                                        cache);
3812                 memcpy(&key, &corrupt->key, sizeof(key));
3813                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3814                 if (ret < 0)
3815                         goto out;
3816                 /* return will always >0 since it won't find the item */
3817                 ret = 0;
3818                 btrfs_release_path(&path);
3819                 cache = next_cache_extent(cache);
3820         }
3821 out:
3822         btrfs_commit_transaction(trans, root);
3823         btrfs_release_path(&path);
3824         return ret;
3825 }
3826
3827 static int check_fs_root(struct btrfs_root *root,
3828                          struct cache_tree *root_cache,
3829                          struct walk_control *wc)
3830 {
3831         int ret = 0;
3832         int err = 0;
3833         int wret;
3834         int level;
3835         struct btrfs_path path;
3836         struct shared_node root_node;
3837         struct root_record *rec;
3838         struct btrfs_root_item *root_item = &root->root_item;
3839         struct cache_tree corrupt_blocks;
3840         struct orphan_data_extent *orphan;
3841         struct orphan_data_extent *tmp;
3842         enum btrfs_tree_block_status status;
3843         struct node_refs nrefs;
3844
3845         /*
3846          * Reuse the corrupt_block cache tree to record corrupted tree block
3847          *
3848          * Unlike the usage in extent tree check, here we do it in a per
3849          * fs/subvol tree base.
3850          */
3851         cache_tree_init(&corrupt_blocks);
3852         root->fs_info->corrupt_blocks = &corrupt_blocks;
3853
3854         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3855                 rec = get_root_rec(root_cache, root->root_key.objectid);
3856                 BUG_ON(IS_ERR(rec));
3857                 if (btrfs_root_refs(root_item) > 0)
3858                         rec->found_root_item = 1;
3859         }
3860
3861         btrfs_init_path(&path);
3862         memset(&root_node, 0, sizeof(root_node));
3863         cache_tree_init(&root_node.root_cache);
3864         cache_tree_init(&root_node.inode_cache);
3865         memset(&nrefs, 0, sizeof(nrefs));
3866
3867         /* Move the orphan extent record to corresponding inode_record */
3868         list_for_each_entry_safe(orphan, tmp,
3869                                  &root->orphan_data_extents, list) {
3870                 struct inode_record *inode;
3871
3872                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3873                                       1);
3874                 BUG_ON(IS_ERR(inode));
3875                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3876                 list_move(&orphan->list, &inode->orphan_extents);
3877         }
3878
3879         level = btrfs_header_level(root->node);
3880         memset(wc->nodes, 0, sizeof(wc->nodes));
3881         wc->nodes[level] = &root_node;
3882         wc->active_node = level;
3883         wc->root_level = level;
3884
3885         /* We may not have checked the root block, lets do that now */
3886         if (btrfs_is_leaf(root->node))
3887                 status = btrfs_check_leaf(root, NULL, root->node);
3888         else
3889                 status = btrfs_check_node(root, NULL, root->node);
3890         if (status != BTRFS_TREE_BLOCK_CLEAN)
3891                 return -EIO;
3892
3893         if (btrfs_root_refs(root_item) > 0 ||
3894             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3895                 path.nodes[level] = root->node;
3896                 extent_buffer_get(root->node);
3897                 path.slots[level] = 0;
3898         } else {
3899                 struct btrfs_key key;
3900                 struct btrfs_disk_key found_key;
3901
3902                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3903                 level = root_item->drop_level;
3904                 path.lowest_level = level;
3905                 if (level > btrfs_header_level(root->node) ||
3906                     level >= BTRFS_MAX_LEVEL) {
3907                         error("ignoring invalid drop level: %u", level);
3908                         goto skip_walking;
3909                 }
3910                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3911                 if (wret < 0)
3912                         goto skip_walking;
3913                 btrfs_node_key(path.nodes[level], &found_key,
3914                                 path.slots[level]);
3915                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3916                                         sizeof(found_key)));
3917         }
3918
3919         while (1) {
3920                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3921                 if (wret < 0)
3922                         ret = wret;
3923                 if (wret != 0)
3924                         break;
3925
3926                 wret = walk_up_tree(root, &path, wc, &level);
3927                 if (wret < 0)
3928                         ret = wret;
3929                 if (wret != 0)
3930                         break;
3931         }
3932 skip_walking:
3933         btrfs_release_path(&path);
3934
3935         if (!cache_tree_empty(&corrupt_blocks)) {
3936                 struct cache_extent *cache;
3937                 struct btrfs_corrupt_block *corrupt;
3938
3939                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3940                        root->root_key.objectid);
3941                 cache = first_cache_extent(&corrupt_blocks);
3942                 while (cache) {
3943                         corrupt = container_of(cache,
3944                                                struct btrfs_corrupt_block,
3945                                                cache);
3946                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3947                                cache->start, corrupt->level,
3948                                corrupt->key.objectid, corrupt->key.type,
3949                                corrupt->key.offset);
3950                         cache = next_cache_extent(cache);
3951                 }
3952                 if (repair) {
3953                         printf("Try to repair the btree for root %llu\n",
3954                                root->root_key.objectid);
3955                         ret = repair_btree(root, &corrupt_blocks);
3956                         if (ret < 0)
3957                                 fprintf(stderr, "Failed to repair btree: %s\n",
3958                                         strerror(-ret));
3959                         if (!ret)
3960                                 printf("Btree for root %llu is fixed\n",
3961                                        root->root_key.objectid);
3962                 }
3963         }
3964
3965         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3966         if (err < 0)
3967                 ret = err;
3968
3969         if (root_node.current) {
3970                 root_node.current->checked = 1;
3971                 maybe_free_inode_rec(&root_node.inode_cache,
3972                                 root_node.current);
3973         }
3974
3975         err = check_inode_recs(root, &root_node.inode_cache);
3976         if (!ret)
3977                 ret = err;
3978
3979         free_corrupt_blocks_tree(&corrupt_blocks);
3980         root->fs_info->corrupt_blocks = NULL;
3981         free_orphan_data_extents(&root->orphan_data_extents);
3982         return ret;
3983 }
3984
3985 static int check_fs_roots(struct btrfs_fs_info *fs_info,
3986                           struct cache_tree *root_cache)
3987 {
3988         struct btrfs_path path;
3989         struct btrfs_key key;
3990         struct walk_control wc;
3991         struct extent_buffer *leaf, *tree_node;
3992         struct btrfs_root *tmp_root;
3993         struct btrfs_root *tree_root = fs_info->tree_root;
3994         int ret;
3995         int err = 0;
3996
3997         if (ctx.progress_enabled) {
3998                 ctx.tp = TASK_FS_ROOTS;
3999                 task_start(ctx.info);
4000         }
4001
4002         /*
4003          * Just in case we made any changes to the extent tree that weren't
4004          * reflected into the free space cache yet.
4005          */
4006         if (repair)
4007                 reset_cached_block_groups(fs_info);
4008         memset(&wc, 0, sizeof(wc));
4009         cache_tree_init(&wc.shared);
4010         btrfs_init_path(&path);
4011
4012 again:
4013         key.offset = 0;
4014         key.objectid = 0;
4015         key.type = BTRFS_ROOT_ITEM_KEY;
4016         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4017         if (ret < 0) {
4018                 err = 1;
4019                 goto out;
4020         }
4021         tree_node = tree_root->node;
4022         while (1) {
4023                 if (tree_node != tree_root->node) {
4024                         free_root_recs_tree(root_cache);
4025                         btrfs_release_path(&path);
4026                         goto again;
4027                 }
4028                 leaf = path.nodes[0];
4029                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4030                         ret = btrfs_next_leaf(tree_root, &path);
4031                         if (ret) {
4032                                 if (ret < 0)
4033                                         err = 1;
4034                                 break;
4035                         }
4036                         leaf = path.nodes[0];
4037                 }
4038                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4039                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4040                     fs_root_objectid(key.objectid)) {
4041                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4042                                 tmp_root = btrfs_read_fs_root_no_cache(
4043                                                 fs_info, &key);
4044                         } else {
4045                                 key.offset = (u64)-1;
4046                                 tmp_root = btrfs_read_fs_root(
4047                                                 fs_info, &key);
4048                         }
4049                         if (IS_ERR(tmp_root)) {
4050                                 err = 1;
4051                                 goto next;
4052                         }
4053                         ret = check_fs_root(tmp_root, root_cache, &wc);
4054                         if (ret == -EAGAIN) {
4055                                 free_root_recs_tree(root_cache);
4056                                 btrfs_release_path(&path);
4057                                 goto again;
4058                         }
4059                         if (ret)
4060                                 err = 1;
4061                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4062                                 btrfs_free_fs_root(tmp_root);
4063                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4064                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4065                         process_root_ref(leaf, path.slots[0], &key,
4066                                          root_cache);
4067                 }
4068 next:
4069                 path.slots[0]++;
4070         }
4071 out:
4072         btrfs_release_path(&path);
4073         if (err)
4074                 free_extent_cache_tree(&wc.shared);
4075         if (!cache_tree_empty(&wc.shared))
4076                 fprintf(stderr, "warning line %d\n", __LINE__);
4077
4078         task_stop(ctx.info);
4079
4080         return err;
4081 }
4082
4083 /*
4084  * Find the @index according by @ino and name.
4085  * Notice:time efficiency is O(N)
4086  *
4087  * @root:       the root of the fs/file tree
4088  * @index_ret:  the index as return value
4089  * @namebuf:    the name to match
4090  * @name_len:   the length of name to match
4091  * @file_type:  the file_type of INODE_ITEM to match
4092  *
4093  * Returns 0 if found and *@index_ret will be modified with right value
4094  * Returns< 0 not found and *@index_ret will be (u64)-1
4095  */
4096 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4097                           u64 *index_ret, char *namebuf, u32 name_len,
4098                           u8 file_type)
4099 {
4100         struct btrfs_path path;
4101         struct extent_buffer *node;
4102         struct btrfs_dir_item *di;
4103         struct btrfs_key key;
4104         struct btrfs_key location;
4105         char name[BTRFS_NAME_LEN] = {0};
4106
4107         u32 total;
4108         u32 cur = 0;
4109         u32 len;
4110         u32 data_len;
4111         u8 filetype;
4112         int slot;
4113         int ret;
4114
4115         ASSERT(index_ret);
4116
4117         /* search from the last index */
4118         key.objectid = dirid;
4119         key.offset = (u64)-1;
4120         key.type = BTRFS_DIR_INDEX_KEY;
4121
4122         btrfs_init_path(&path);
4123         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4124         if (ret < 0)
4125                 return ret;
4126
4127 loop:
4128         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4129         if (ret) {
4130                 ret = -ENOENT;
4131                 *index_ret = (64)-1;
4132                 goto out;
4133         }
4134         /* Check whether inode_id/filetype/name match */
4135         node = path.nodes[0];
4136         slot = path.slots[0];
4137         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4138         total = btrfs_item_size_nr(node, slot);
4139         while (cur < total) {
4140                 ret = -ENOENT;
4141                 len = btrfs_dir_name_len(node, di);
4142                 data_len = btrfs_dir_data_len(node, di);
4143
4144                 btrfs_dir_item_key_to_cpu(node, di, &location);
4145                 if (location.objectid != location_id ||
4146                     location.type != BTRFS_INODE_ITEM_KEY ||
4147                     location.offset != 0)
4148                         goto next;
4149
4150                 filetype = btrfs_dir_type(node, di);
4151                 if (file_type != filetype)
4152                         goto next;
4153
4154                 if (len > BTRFS_NAME_LEN)
4155                         len = BTRFS_NAME_LEN;
4156
4157                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4158                 if (len != name_len || strncmp(namebuf, name, len))
4159                         goto next;
4160
4161                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4162                 *index_ret = key.offset;
4163                 ret = 0;
4164                 goto out;
4165 next:
4166                 len += sizeof(*di) + data_len;
4167                 di = (struct btrfs_dir_item *)((char *)di + len);
4168                 cur += len;
4169         }
4170         goto loop;
4171
4172 out:
4173         btrfs_release_path(&path);
4174         return ret;
4175 }
4176
4177 /*
4178  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4179  * INODE_REF/INODE_EXTREF match.
4180  *
4181  * @root:       the root of the fs/file tree
4182  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4183  *              value while find index
4184  * @location_key: location key of the struct btrfs_dir_item to match
4185  * @name:       the name to match
4186  * @namelen:    the length of name
4187  * @file_type:  the type of file to math
4188  *
4189  * Return 0 if no error occurred.
4190  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4191  * DIR_ITEM/DIR_INDEX
4192  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4193  * and DIR_ITEM/DIR_INDEX mismatch
4194  */
4195 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4196                          struct btrfs_key *location_key, char *name,
4197                          u32 namelen, u8 file_type)
4198 {
4199         struct btrfs_path path;
4200         struct extent_buffer *node;
4201         struct btrfs_dir_item *di;
4202         struct btrfs_key location;
4203         char namebuf[BTRFS_NAME_LEN] = {0};
4204         u32 total;
4205         u32 cur = 0;
4206         u32 len;
4207         u32 data_len;
4208         u8 filetype;
4209         int slot;
4210         int ret;
4211
4212         /* get the index by traversing all index */
4213         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4214                 ret = find_dir_index(root, key->objectid,
4215                                      location_key->objectid, &key->offset,
4216                                      name, namelen, file_type);
4217                 if (ret)
4218                         ret = DIR_INDEX_MISSING;
4219                 return ret;
4220         }
4221
4222         btrfs_init_path(&path);
4223         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4224         if (ret) {
4225                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4226                         DIR_INDEX_MISSING;
4227                 goto out;
4228         }
4229
4230         /* Check whether inode_id/filetype/name match */
4231         node = path.nodes[0];
4232         slot = path.slots[0];
4233         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4234         total = btrfs_item_size_nr(node, slot);
4235         while (cur < total) {
4236                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4237                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4238
4239                 len = btrfs_dir_name_len(node, di);
4240                 data_len = btrfs_dir_data_len(node, di);
4241
4242                 btrfs_dir_item_key_to_cpu(node, di, &location);
4243                 if (location.objectid != location_key->objectid ||
4244                     location.type != location_key->type ||
4245                     location.offset != location_key->offset)
4246                         goto next;
4247
4248                 filetype = btrfs_dir_type(node, di);
4249                 if (file_type != filetype)
4250                         goto next;
4251
4252                 if (len > BTRFS_NAME_LEN) {
4253                         len = BTRFS_NAME_LEN;
4254                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4255                         root->objectid,
4256                         key->type == BTRFS_DIR_ITEM_KEY ?
4257                         "DIR_ITEM" : "DIR_INDEX",
4258                         key->objectid, key->offset, len);
4259                 }
4260                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4261                                    len);
4262                 if (len != namelen || strncmp(namebuf, name, len))
4263                         goto next;
4264
4265                 ret = 0;
4266                 goto out;
4267 next:
4268                 len += sizeof(*di) + data_len;
4269                 di = (struct btrfs_dir_item *)((char *)di + len);
4270                 cur += len;
4271         }
4272
4273 out:
4274         btrfs_release_path(&path);
4275         return ret;
4276 }
4277
4278 /*
4279  * Prints inode ref error message
4280  */
4281 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4282                                 u64 index, const char *namebuf, int name_len,
4283                                 u8 filetype, int err)
4284 {
4285         if (!err)
4286                 return;
4287
4288         /* root dir error */
4289         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4290                 error(
4291         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4292                       root->objectid, key->objectid, key->offset, namebuf);
4293                 return;
4294         }
4295
4296         /* normal error */
4297         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4298                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4299                       root->objectid, key->offset,
4300                       btrfs_name_hash(namebuf, name_len),
4301                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4302                       namebuf, filetype);
4303         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4304                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4305                       root->objectid, key->offset, index,
4306                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4307                       namebuf, filetype);
4308 }
4309
4310 /*
4311  * Insert the missing inode item.
4312  *
4313  * Returns 0 means success.
4314  * Returns <0 means error.
4315  */
4316 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4317                                      u8 filetype)
4318 {
4319         struct btrfs_key key;
4320         struct btrfs_trans_handle *trans;
4321         struct btrfs_path path;
4322         int ret;
4323
4324         key.objectid = ino;
4325         key.type = BTRFS_INODE_ITEM_KEY;
4326         key.offset = 0;
4327
4328         btrfs_init_path(&path);
4329         trans = btrfs_start_transaction(root, 1);
4330         if (IS_ERR(trans)) {
4331                 ret = -EIO;
4332                 goto out;
4333         }
4334
4335         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4336         if (ret < 0 || !ret)
4337                 goto fail;
4338
4339         /* insert inode item */
4340         create_inode_item_lowmem(trans, root, ino, filetype);
4341         ret = 0;
4342 fail:
4343         btrfs_commit_transaction(trans, root);
4344 out:
4345         if (ret)
4346                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4347                       root->objectid, ino);
4348         btrfs_release_path(&path);
4349         return ret;
4350 }
4351
4352 /*
4353  * The ternary means dir item, dir index and relative inode ref.
4354  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4355  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4356  * strategy:
4357  * If two of three is missing or mismatched, delete the existing one.
4358  * If one of three is missing or mismatched, add the missing one.
4359  *
4360  * returns 0 means success.
4361  * returns not 0 means on error;
4362  */
4363 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4364                           u64 index, char *name, int name_len, u8 filetype,
4365                           int err)
4366 {
4367         struct btrfs_trans_handle *trans;
4368         int stage = 0;
4369         int ret = 0;
4370
4371         /*
4372          * stage shall be one of following valild values:
4373          *      0: Fine, nothing to do.
4374          *      1: One of three is wrong, so add missing one.
4375          *      2: Two of three is wrong, so delete existed one.
4376          */
4377         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4378                 stage++;
4379         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4380                 stage++;
4381         if (err & (INODE_REF_MISSING))
4382                 stage++;
4383
4384         /* stage must be smllarer than 3 */
4385         ASSERT(stage < 3);
4386
4387         trans = btrfs_start_transaction(root, 1);
4388         if (stage == 2) {
4389                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4390                                    name_len, 0);
4391                 goto out;
4392         }
4393         if (stage == 1) {
4394                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4395                                filetype, &index, 1, 1);
4396                 goto out;
4397         }
4398 out:
4399         btrfs_commit_transaction(trans, root);
4400
4401         if (ret)
4402                 error("fail to repair inode %llu name %s filetype %u",
4403                       ino, name, filetype);
4404         else
4405                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4406                        stage == 2 ? "Delete" : "Add",
4407                        ino, name, filetype);
4408
4409         return ret;
4410 }
4411
4412 /*
4413  * Traverse the given INODE_REF and call find_dir_item() to find related
4414  * DIR_ITEM/DIR_INDEX.
4415  *
4416  * @root:       the root of the fs/file tree
4417  * @ref_key:    the key of the INODE_REF
4418  * @path        the path provides node and slot
4419  * @refs:       the count of INODE_REF
4420  * @mode:       the st_mode of INODE_ITEM
4421  * @name_ret:   returns with the first ref's name
4422  * @name_len_ret:    len of the name_ret
4423  *
4424  * Return 0 if no error occurred.
4425  */
4426 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4427                            struct btrfs_path *path, char *name_ret,
4428                            u32 *namelen_ret, u64 *refs_ret, int mode)
4429 {
4430         struct btrfs_key key;
4431         struct btrfs_key location;
4432         struct btrfs_inode_ref *ref;
4433         struct extent_buffer *node;
4434         char namebuf[BTRFS_NAME_LEN] = {0};
4435         u32 total;
4436         u32 cur = 0;
4437         u32 len;
4438         u32 name_len;
4439         u64 index;
4440         int ret;
4441         int err = 0;
4442         int tmp_err;
4443         int slot;
4444         int need_research = 0;
4445         u64 refs;
4446
4447 begin:
4448         err = 0;
4449         cur = 0;
4450         refs = *refs_ret;
4451
4452         /* since after repair, path and the dir item may be changed */
4453         if (need_research) {
4454                 need_research = 0;
4455                 btrfs_release_path(path);
4456                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4457                 /* the item was deleted, let path point to the last checked item */
4458                 if (ret > 0) {
4459                         if (path->slots[0] == 0)
4460                                 btrfs_prev_leaf(root, path);
4461                         else
4462                                 path->slots[0]--;
4463                 }
4464                 if (ret)
4465                         goto out;
4466         }
4467
4468         location.objectid = ref_key->objectid;
4469         location.type = BTRFS_INODE_ITEM_KEY;
4470         location.offset = 0;
4471         node = path->nodes[0];
4472         slot = path->slots[0];
4473
4474         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4475         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4476         total = btrfs_item_size_nr(node, slot);
4477
4478 next:
4479         /* Update inode ref count */
4480         refs++;
4481         tmp_err = 0;
4482         index = btrfs_inode_ref_index(node, ref);
4483         name_len = btrfs_inode_ref_name_len(node, ref);
4484
4485         if (name_len <= BTRFS_NAME_LEN) {
4486                 len = name_len;
4487         } else {
4488                 len = BTRFS_NAME_LEN;
4489                 warning("root %llu INODE_REF[%llu %llu] name too long",
4490                         root->objectid, ref_key->objectid, ref_key->offset);
4491         }
4492
4493         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4494
4495         /* copy the first name found to name_ret */
4496         if (refs == 1 && name_ret) {
4497                 memcpy(name_ret, namebuf, len);
4498                 *namelen_ret = len;
4499         }
4500
4501         /* Check root dir ref */
4502         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4503                 if (index != 0 || len != strlen("..") ||
4504                     strncmp("..", namebuf, len) ||
4505                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4506                         /* set err bits then repair will delete the ref */
4507                         err |= DIR_INDEX_MISSING;
4508                         err |= DIR_ITEM_MISSING;
4509                 }
4510                 goto end;
4511         }
4512
4513         /* Find related DIR_INDEX */
4514         key.objectid = ref_key->offset;
4515         key.type = BTRFS_DIR_INDEX_KEY;
4516         key.offset = index;
4517         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4518                             imode_to_type(mode));
4519
4520         /* Find related dir_item */
4521         key.objectid = ref_key->offset;
4522         key.type = BTRFS_DIR_ITEM_KEY;
4523         key.offset = btrfs_name_hash(namebuf, len);
4524         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4525                             imode_to_type(mode));
4526 end:
4527         if (tmp_err && repair) {
4528                 ret = repair_ternary_lowmem(root, ref_key->offset,
4529                                             ref_key->objectid, index, namebuf,
4530                                             name_len, imode_to_type(mode),
4531                                             tmp_err);
4532                 if (!ret) {
4533                         need_research = 1;
4534                         goto begin;
4535                 }
4536         }
4537         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4538                             imode_to_type(mode), tmp_err);
4539         err |= tmp_err;
4540         len = sizeof(*ref) + name_len;
4541         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4542         cur += len;
4543         if (cur < total)
4544                 goto next;
4545
4546 out:
4547         *refs_ret = refs;
4548         return err;
4549 }
4550
4551 /*
4552  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4553  * DIR_ITEM/DIR_INDEX.
4554  *
4555  * @root:       the root of the fs/file tree
4556  * @ref_key:    the key of the INODE_EXTREF
4557  * @refs:       the count of INODE_EXTREF
4558  * @mode:       the st_mode of INODE_ITEM
4559  *
4560  * Return 0 if no error occurred.
4561  */
4562 static int check_inode_extref(struct btrfs_root *root,
4563                               struct btrfs_key *ref_key,
4564                               struct extent_buffer *node, int slot, u64 *refs,
4565                               int mode)
4566 {
4567         struct btrfs_key key;
4568         struct btrfs_key location;
4569         struct btrfs_inode_extref *extref;
4570         char namebuf[BTRFS_NAME_LEN] = {0};
4571         u32 total;
4572         u32 cur = 0;
4573         u32 len;
4574         u32 name_len;
4575         u64 index;
4576         u64 parent;
4577         int ret;
4578         int err = 0;
4579
4580         location.objectid = ref_key->objectid;
4581         location.type = BTRFS_INODE_ITEM_KEY;
4582         location.offset = 0;
4583
4584         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4585         total = btrfs_item_size_nr(node, slot);
4586
4587 next:
4588         /* update inode ref count */
4589         (*refs)++;
4590         name_len = btrfs_inode_extref_name_len(node, extref);
4591         index = btrfs_inode_extref_index(node, extref);
4592         parent = btrfs_inode_extref_parent(node, extref);
4593         if (name_len <= BTRFS_NAME_LEN) {
4594                 len = name_len;
4595         } else {
4596                 len = BTRFS_NAME_LEN;
4597                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4598                         root->objectid, ref_key->objectid, ref_key->offset);
4599         }
4600         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4601
4602         /* Check root dir ref name */
4603         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4604                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4605                       root->objectid, ref_key->objectid, ref_key->offset,
4606                       namebuf);
4607                 err |= ROOT_DIR_ERROR;
4608         }
4609
4610         /* find related dir_index */
4611         key.objectid = parent;
4612         key.type = BTRFS_DIR_INDEX_KEY;
4613         key.offset = index;
4614         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4615         err |= ret;
4616
4617         /* find related dir_item */
4618         key.objectid = parent;
4619         key.type = BTRFS_DIR_ITEM_KEY;
4620         key.offset = btrfs_name_hash(namebuf, len);
4621         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4622         err |= ret;
4623
4624         len = sizeof(*extref) + name_len;
4625         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4626         cur += len;
4627
4628         if (cur < total)
4629                 goto next;
4630
4631         return err;
4632 }
4633
4634 /*
4635  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4636  * DIR_ITEM/DIR_INDEX match.
4637  * Return with @index_ret.
4638  *
4639  * @root:       the root of the fs/file tree
4640  * @key:        the key of the INODE_REF/INODE_EXTREF
4641  * @name:       the name in the INODE_REF/INODE_EXTREF
4642  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4643  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4644  *              value (64)-1 means do not check index
4645  * @ext_ref:    the EXTENDED_IREF feature
4646  *
4647  * Return 0 if no error occurred.
4648  * Return >0 for error bitmap
4649  */
4650 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4651                           char *name, int namelen, u64 *index_ret,
4652                           unsigned int ext_ref)
4653 {
4654         struct btrfs_path path;
4655         struct btrfs_inode_ref *ref;
4656         struct btrfs_inode_extref *extref;
4657         struct extent_buffer *node;
4658         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4659         u32 total;
4660         u32 cur = 0;
4661         u32 len;
4662         u32 ref_namelen;
4663         u64 ref_index;
4664         u64 parent;
4665         u64 dir_id;
4666         int slot;
4667         int ret;
4668
4669         ASSERT(index_ret);
4670
4671         btrfs_init_path(&path);
4672         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4673         if (ret) {
4674                 ret = INODE_REF_MISSING;
4675                 goto extref;
4676         }
4677
4678         node = path.nodes[0];
4679         slot = path.slots[0];
4680
4681         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4682         total = btrfs_item_size_nr(node, slot);
4683
4684         /* Iterate all entry of INODE_REF */
4685         while (cur < total) {
4686                 ret = INODE_REF_MISSING;
4687
4688                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4689                 ref_index = btrfs_inode_ref_index(node, ref);
4690                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4691                         goto next_ref;
4692
4693                 if (cur + sizeof(*ref) + ref_namelen > total ||
4694                     ref_namelen > BTRFS_NAME_LEN) {
4695                         warning("root %llu INODE %s[%llu %llu] name too long",
4696                                 root->objectid,
4697                                 key->type == BTRFS_INODE_REF_KEY ?
4698                                         "REF" : "EXTREF",
4699                                 key->objectid, key->offset);
4700
4701                         if (cur + sizeof(*ref) > total)
4702                                 break;
4703                         len = min_t(u32, total - cur - sizeof(*ref),
4704                                     BTRFS_NAME_LEN);
4705                 } else {
4706                         len = ref_namelen;
4707                 }
4708
4709                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4710                                    len);
4711
4712                 if (len != namelen || strncmp(ref_namebuf, name, len))
4713                         goto next_ref;
4714
4715                 *index_ret = ref_index;
4716                 ret = 0;
4717                 goto out;
4718 next_ref:
4719                 len = sizeof(*ref) + ref_namelen;
4720                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4721                 cur += len;
4722         }
4723
4724 extref:
4725         /* Skip if not support EXTENDED_IREF feature */
4726         if (!ext_ref)
4727                 goto out;
4728
4729         btrfs_release_path(&path);
4730         btrfs_init_path(&path);
4731
4732         dir_id = key->offset;
4733         key->type = BTRFS_INODE_EXTREF_KEY;
4734         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4735
4736         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4737         if (ret) {
4738                 ret = INODE_REF_MISSING;
4739                 goto out;
4740         }
4741
4742         node = path.nodes[0];
4743         slot = path.slots[0];
4744
4745         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4746         cur = 0;
4747         total = btrfs_item_size_nr(node, slot);
4748
4749         /* Iterate all entry of INODE_EXTREF */
4750         while (cur < total) {
4751                 ret = INODE_REF_MISSING;
4752
4753                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4754                 ref_index = btrfs_inode_extref_index(node, extref);
4755                 parent = btrfs_inode_extref_parent(node, extref);
4756                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4757                         goto next_extref;
4758
4759                 if (parent != dir_id)
4760                         goto next_extref;
4761
4762                 if (ref_namelen <= BTRFS_NAME_LEN) {
4763                         len = ref_namelen;
4764                 } else {
4765                         len = BTRFS_NAME_LEN;
4766                         warning("root %llu INODE %s[%llu %llu] name too long",
4767                                 root->objectid,
4768                                 key->type == BTRFS_INODE_REF_KEY ?
4769                                         "REF" : "EXTREF",
4770                                 key->objectid, key->offset);
4771                 }
4772                 read_extent_buffer(node, ref_namebuf,
4773                                    (unsigned long)(extref + 1), len);
4774
4775                 if (len != namelen || strncmp(ref_namebuf, name, len))
4776                         goto next_extref;
4777
4778                 *index_ret = ref_index;
4779                 ret = 0;
4780                 goto out;
4781
4782 next_extref:
4783                 len = sizeof(*extref) + ref_namelen;
4784                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4785                 cur += len;
4786
4787         }
4788 out:
4789         btrfs_release_path(&path);
4790         return ret;
4791 }
4792
4793 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4794                                u64 ino, u64 index, const char *namebuf,
4795                                int name_len, u8 filetype, int err)
4796 {
4797         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4798                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4799                       root->objectid, key->objectid, key->offset, namebuf,
4800                       filetype,
4801                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4802         }
4803
4804         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4805                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4806                       root->objectid, key->objectid, index, namebuf, filetype,
4807                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4808         }
4809
4810         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4811                 error(
4812                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4813                       root->objectid, ino, index, namebuf, filetype,
4814                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4815         }
4816
4817         if (err & INODE_REF_MISSING)
4818                 error(
4819                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
4820                       root->objectid, ino, key->objectid, namebuf, filetype);
4821
4822 }
4823
4824 /*
4825  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
4826  *
4827  * Returns error after repair
4828  */
4829 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
4830                            u64 index, u8 filetype, char *namebuf, u32 name_len,
4831                            int err)
4832 {
4833         int ret;
4834
4835         if (err & INODE_ITEM_MISSING) {
4836                 ret = repair_inode_item_missing(root, ino, filetype);
4837                 if (!ret)
4838                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
4839         }
4840
4841         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
4842                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
4843                                             name_len, filetype, err);
4844                 if (!ret) {
4845                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
4846                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
4847                         err &= ~(INODE_REF_MISSING);
4848                 }
4849         }
4850         return err;
4851 }
4852
4853 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
4854                 u64 *size_ret)
4855 {
4856         struct btrfs_key key;
4857         struct btrfs_path path;
4858         u32 len;
4859         struct btrfs_dir_item *di;
4860         int ret;
4861         int cur = 0;
4862         int total = 0;
4863
4864         ASSERT(size_ret);
4865         *size_ret = 0;
4866
4867         key.objectid = ino;
4868         key.type = type;
4869         key.offset = (u64)-1;
4870
4871         btrfs_init_path(&path);
4872         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4873         if (ret < 0) {
4874                 ret = -EIO;
4875                 goto out;
4876         }
4877         /* if found, go to spacial case */
4878         if (ret == 0)
4879                 goto special_case;
4880
4881 loop:
4882         ret = btrfs_previous_item(root, &path, ino, type);
4883
4884         if (ret) {
4885                 ret = 0;
4886                 goto out;
4887         }
4888
4889 special_case:
4890         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
4891         cur = 0;
4892         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
4893
4894         while (cur < total) {
4895                 len = btrfs_dir_name_len(path.nodes[0], di);
4896                 if (len > BTRFS_NAME_LEN)
4897                         len = BTRFS_NAME_LEN;
4898                 *size_ret += len;
4899
4900                 len += btrfs_dir_data_len(path.nodes[0], di);
4901                 len += sizeof(*di);
4902                 di = (struct btrfs_dir_item *)((char *)di + len);
4903                 cur += len;
4904         }
4905         goto loop;
4906
4907 out:
4908         btrfs_release_path(&path);
4909         return ret;
4910 }
4911
4912 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
4913 {
4914         u64 item_size;
4915         u64 index_size;
4916         int ret;
4917
4918         ASSERT(size);
4919         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
4920         if (ret)
4921                 goto out;
4922
4923         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
4924         if (ret)
4925                 goto out;
4926
4927         *size = item_size + index_size;
4928
4929 out:
4930         if (ret)
4931                 error("failed to count root %llu INODE[%llu] root size",
4932                       root->objectid, ino);
4933         return ret;
4934 }
4935
4936 /*
4937  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4938  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4939  *
4940  * @root:       the root of the fs/file tree
4941  * @key:        the key of the INODE_REF/INODE_EXTREF
4942  * @path:       the path
4943  * @size:       the st_size of the INODE_ITEM
4944  * @ext_ref:    the EXTENDED_IREF feature
4945  *
4946  * Return 0 if no error occurred.
4947  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
4948  */
4949 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
4950                           struct btrfs_path *path, u64 *size,
4951                           unsigned int ext_ref)
4952 {
4953         struct btrfs_dir_item *di;
4954         struct btrfs_inode_item *ii;
4955         struct btrfs_key key;
4956         struct btrfs_key location;
4957         struct extent_buffer *node;
4958         int slot;
4959         char namebuf[BTRFS_NAME_LEN] = {0};
4960         u32 total;
4961         u32 cur = 0;
4962         u32 len;
4963         u32 name_len;
4964         u32 data_len;
4965         u8 filetype;
4966         u32 mode = 0;
4967         u64 index;
4968         int ret;
4969         int err;
4970         int tmp_err;
4971         int need_research = 0;
4972
4973         /*
4974          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4975          * ignore index check.
4976          */
4977         if (di_key->type == BTRFS_DIR_INDEX_KEY)
4978                 index = di_key->offset;
4979         else
4980                 index = (u64)-1;
4981 begin:
4982         err = 0;
4983         cur = 0;
4984
4985         /* since after repair, path and the dir item may be changed */
4986         if (need_research) {
4987                 need_research = 0;
4988                 err |= DIR_COUNT_AGAIN;
4989                 btrfs_release_path(path);
4990                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
4991                 /* the item was deleted, let path point the last checked item */
4992                 if (ret > 0) {
4993                         if (path->slots[0] == 0)
4994                                 btrfs_prev_leaf(root, path);
4995                         else
4996                                 path->slots[0]--;
4997                 }
4998                 if (ret)
4999                         goto out;
5000         }
5001
5002         node = path->nodes[0];
5003         slot = path->slots[0];
5004
5005         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5006         total = btrfs_item_size_nr(node, slot);
5007         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5008
5009         while (cur < total) {
5010                 data_len = btrfs_dir_data_len(node, di);
5011                 tmp_err = 0;
5012                 if (data_len)
5013                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5014                               root->objectid,
5015               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5016                               di_key->objectid, di_key->offset, data_len);
5017
5018                 name_len = btrfs_dir_name_len(node, di);
5019                 if (name_len <= BTRFS_NAME_LEN) {
5020                         len = name_len;
5021                 } else {
5022                         len = BTRFS_NAME_LEN;
5023                         warning("root %llu %s[%llu %llu] name too long",
5024                                 root->objectid,
5025                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5026                                 di_key->objectid, di_key->offset);
5027                 }
5028                 (*size) += name_len;
5029                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5030                                    len);
5031                 filetype = btrfs_dir_type(node, di);
5032
5033                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5034                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5035                         err |= -EIO;
5036                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5037                         root->objectid, di_key->objectid, di_key->offset,
5038                         namebuf, len, filetype, di_key->offset,
5039                         btrfs_name_hash(namebuf, len));
5040                 }
5041
5042                 btrfs_dir_item_key_to_cpu(node, di, &location);
5043                 /* Ignore related ROOT_ITEM check */
5044                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5045                         goto next;
5046
5047                 btrfs_release_path(path);
5048                 /* Check relative INODE_ITEM(existence/filetype) */
5049                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5050                 if (ret) {
5051                         tmp_err |= INODE_ITEM_MISSING;
5052                         goto next;
5053                 }
5054
5055                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5056                                     struct btrfs_inode_item);
5057                 mode = btrfs_inode_mode(path->nodes[0], ii);
5058                 if (imode_to_type(mode) != filetype) {
5059                         tmp_err |= INODE_ITEM_MISMATCH;
5060                         goto next;
5061                 }
5062
5063                 /* Check relative INODE_REF/INODE_EXTREF */
5064                 key.objectid = location.objectid;
5065                 key.type = BTRFS_INODE_REF_KEY;
5066                 key.offset = di_key->objectid;
5067                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5068                                           &index, ext_ref);
5069
5070                 /* check relative INDEX/ITEM */
5071                 key.objectid = di_key->objectid;
5072                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5073                         key.type = BTRFS_DIR_INDEX_KEY;
5074                         key.offset = index;
5075                 } else {
5076                         key.type = BTRFS_DIR_ITEM_KEY;
5077                         key.offset = btrfs_name_hash(namebuf, name_len);
5078                 }
5079
5080                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5081                                          name_len, filetype);
5082                 /* find_dir_item may find index */
5083                 if (key.type == BTRFS_DIR_INDEX_KEY)
5084                         index = key.offset;
5085 next:
5086
5087                 if (tmp_err && repair) {
5088                         ret = repair_dir_item(root, di_key->objectid,
5089                                               location.objectid, index,
5090                                               imode_to_type(mode), namebuf,
5091                                               name_len, tmp_err);
5092                         if (ret != tmp_err) {
5093                                 need_research = 1;
5094                                 goto begin;
5095                         }
5096                 }
5097                 btrfs_release_path(path);
5098                 print_dir_item_err(root, di_key, location.objectid, index,
5099                                    namebuf, name_len, filetype, tmp_err);
5100                 err |= tmp_err;
5101                 len = sizeof(*di) + name_len + data_len;
5102                 di = (struct btrfs_dir_item *)((char *)di + len);
5103                 cur += len;
5104
5105                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5106                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5107                               root->objectid, di_key->objectid,
5108                               di_key->offset);
5109                         break;
5110                 }
5111         }
5112 out:
5113         /* research path */
5114         btrfs_release_path(path);
5115         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5116         if (ret)
5117                 err |= ret > 0 ? -ENOENT : ret;
5118         return err;
5119 }
5120
5121 /*
5122  * Wrapper function of btrfs_punch_hole.
5123  *
5124  * Returns 0 means success.
5125  * Returns not 0 means error.
5126  */
5127 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5128                              u64 len)
5129 {
5130         struct btrfs_trans_handle *trans;
5131         int ret = 0;
5132
5133         trans = btrfs_start_transaction(root, 1);
5134         if (IS_ERR(trans))
5135                 return PTR_ERR(trans);
5136
5137         ret = btrfs_punch_hole(trans, root, ino, start, len);
5138         if (ret)
5139                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5140                       start, len, ino);
5141         else
5142                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5143                        ino);
5144
5145         btrfs_commit_transaction(trans, root);
5146         return ret;
5147 }
5148
5149 /*
5150  * Check file extent datasum/hole, update the size of the file extents,
5151  * check and update the last offset of the file extent.
5152  *
5153  * @root:       the root of fs/file tree.
5154  * @fkey:       the key of the file extent.
5155  * @nodatasum:  INODE_NODATASUM feature.
5156  * @size:       the sum of all EXTENT_DATA items size for this inode.
5157  * @end:        the offset of the last extent.
5158  *
5159  * Return 0 if no error occurred.
5160  */
5161 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5162                              struct extent_buffer *node, int slot,
5163                              unsigned int nodatasum, u64 *size, u64 *end)
5164 {
5165         struct btrfs_file_extent_item *fi;
5166         u64 disk_bytenr;
5167         u64 disk_num_bytes;
5168         u64 extent_num_bytes;
5169         u64 extent_offset;
5170         u64 csum_found;         /* In byte size, sectorsize aligned */
5171         u64 search_start;       /* Logical range start we search for csum */
5172         u64 search_len;         /* Logical range len we search for csum */
5173         unsigned int extent_type;
5174         unsigned int is_hole;
5175         int compressed = 0;
5176         int ret;
5177         int err = 0;
5178
5179         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5180
5181         /* Check inline extent */
5182         extent_type = btrfs_file_extent_type(node, fi);
5183         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5184                 struct btrfs_item *e = btrfs_item_nr(slot);
5185                 u32 item_inline_len;
5186
5187                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5188                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5189                 compressed = btrfs_file_extent_compression(node, fi);
5190                 if (extent_num_bytes == 0) {
5191                         error(
5192                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5193                                 root->objectid, fkey->objectid, fkey->offset);
5194                         err |= FILE_EXTENT_ERROR;
5195                 }
5196                 if (!compressed && extent_num_bytes != item_inline_len) {
5197                         error(
5198                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5199                                 root->objectid, fkey->objectid, fkey->offset,
5200                                 extent_num_bytes, item_inline_len);
5201                         err |= FILE_EXTENT_ERROR;
5202                 }
5203                 *end += extent_num_bytes;
5204                 *size += extent_num_bytes;
5205                 return err;
5206         }
5207
5208         /* Check extent type */
5209         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5210                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5211                 err |= FILE_EXTENT_ERROR;
5212                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5213                       root->objectid, fkey->objectid, fkey->offset);
5214                 return err;
5215         }
5216
5217         /* Check REG_EXTENT/PREALLOC_EXTENT */
5218         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5219         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5220         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5221         extent_offset = btrfs_file_extent_offset(node, fi);
5222         compressed = btrfs_file_extent_compression(node, fi);
5223         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5224
5225         /*
5226          * Check EXTENT_DATA csum
5227          *
5228          * For plain (uncompressed) extent, we should only check the range
5229          * we're referring to, as it's possible that part of prealloc extent
5230          * has been written, and has csum:
5231          *
5232          * |<--- Original large preallocated extent A ---->|
5233          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5234          *      No csum                         Has csum
5235          *
5236          * For compressed extent, we should check the whole range.
5237          */
5238         if (!compressed) {
5239                 search_start = disk_bytenr + extent_offset;
5240                 search_len = extent_num_bytes;
5241         } else {
5242                 search_start = disk_bytenr;
5243                 search_len = disk_num_bytes;
5244         }
5245         ret = count_csum_range(root->fs_info, search_start, search_len, &csum_found);
5246         if (csum_found > 0 && nodatasum) {
5247                 err |= ODD_CSUM_ITEM;
5248                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5249                       root->objectid, fkey->objectid, fkey->offset);
5250         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5251                    !is_hole && (ret < 0 || csum_found < search_len)) {
5252                 err |= CSUM_ITEM_MISSING;
5253                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5254                       root->objectid, fkey->objectid, fkey->offset,
5255                       csum_found, search_len);
5256         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5257                 err |= ODD_CSUM_ITEM;
5258                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5259                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5260         }
5261
5262         /* Check EXTENT_DATA hole */
5263         if (!no_holes && *end != fkey->offset) {
5264                 if (repair)
5265                         ret = punch_extent_hole(root, fkey->objectid,
5266                                                 *end, fkey->offset - *end);
5267                 if (!repair || ret) {
5268                         err |= FILE_EXTENT_ERROR;
5269                         error(
5270 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5271                                 root->objectid, fkey->objectid, fkey->offset,
5272                                 fkey->objectid, *end);
5273                 }
5274         }
5275
5276         *end += extent_num_bytes;
5277         if (!is_hole)
5278                 *size += extent_num_bytes;
5279
5280         return err;
5281 }
5282
5283 /*
5284  * Set inode item nbytes to @nbytes
5285  *
5286  * Returns  0     on success
5287  * Returns  != 0  on error
5288  */
5289 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5290                                       struct btrfs_path *path,
5291                                       u64 ino, u64 nbytes)
5292 {
5293         struct btrfs_trans_handle *trans;
5294         struct btrfs_inode_item *ii;
5295         struct btrfs_key key;
5296         struct btrfs_key research_key;
5297         int err = 0;
5298         int ret;
5299
5300         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5301
5302         key.objectid = ino;
5303         key.type = BTRFS_INODE_ITEM_KEY;
5304         key.offset = 0;
5305
5306         trans = btrfs_start_transaction(root, 1);
5307         if (IS_ERR(trans)) {
5308                 ret = PTR_ERR(trans);
5309                 err |= ret;
5310                 goto out;
5311         }
5312
5313         btrfs_release_path(path);
5314         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5315         if (ret > 0)
5316                 ret = -ENOENT;
5317         if (ret) {
5318                 err |= ret;
5319                 goto fail;
5320         }
5321
5322         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5323                             struct btrfs_inode_item);
5324         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5325         btrfs_mark_buffer_dirty(path->nodes[0]);
5326 fail:
5327         btrfs_commit_transaction(trans, root);
5328 out:
5329         if (ret)
5330                 error("failed to set nbytes in inode %llu root %llu",
5331                       ino, root->root_key.objectid);
5332         else
5333                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5334                        root->root_key.objectid, nbytes);
5335
5336         /* research path */
5337         btrfs_release_path(path);
5338         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5339         err |= ret;
5340
5341         return err;
5342 }
5343
5344 /*
5345  * Set directory inode isize to @isize.
5346  *
5347  * Returns 0     on success.
5348  * Returns != 0  on error.
5349  */
5350 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5351                                    struct btrfs_path *path,
5352                                    u64 ino, u64 isize)
5353 {
5354         struct btrfs_trans_handle *trans;
5355         struct btrfs_inode_item *ii;
5356         struct btrfs_key key;
5357         struct btrfs_key research_key;
5358         int ret;
5359         int err = 0;
5360
5361         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5362
5363         key.objectid = ino;
5364         key.type = BTRFS_INODE_ITEM_KEY;
5365         key.offset = 0;
5366
5367         trans = btrfs_start_transaction(root, 1);
5368         if (IS_ERR(trans)) {
5369                 ret = PTR_ERR(trans);
5370                 err |= ret;
5371                 goto out;
5372         }
5373
5374         btrfs_release_path(path);
5375         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5376         if (ret > 0)
5377                 ret = -ENOENT;
5378         if (ret) {
5379                 err |= ret;
5380                 goto fail;
5381         }
5382
5383         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5384                             struct btrfs_inode_item);
5385         btrfs_set_inode_size(path->nodes[0], ii, isize);
5386         btrfs_mark_buffer_dirty(path->nodes[0]);
5387 fail:
5388         btrfs_commit_transaction(trans, root);
5389 out:
5390         if (ret)
5391                 error("failed to set isize in inode %llu root %llu",
5392                       ino, root->root_key.objectid);
5393         else
5394                 printf("Set isize in inode %llu root %llu to %llu\n",
5395                        ino, root->root_key.objectid, isize);
5396
5397         btrfs_release_path(path);
5398         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5399         err |= ret;
5400
5401         return err;
5402 }
5403
5404 /*
5405  * Wrapper function for btrfs_add_orphan_item().
5406  *
5407  * Returns 0     on success.
5408  * Returns != 0  on error.
5409  */
5410 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5411                                            struct btrfs_path *path, u64 ino)
5412 {
5413         struct btrfs_trans_handle *trans;
5414         struct btrfs_key research_key;
5415         int ret;
5416         int err = 0;
5417
5418         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5419
5420         trans = btrfs_start_transaction(root, 1);
5421         if (IS_ERR(trans)) {
5422                 ret = PTR_ERR(trans);
5423                 err |= ret;
5424                 goto out;
5425         }
5426
5427         btrfs_release_path(path);
5428         ret = btrfs_add_orphan_item(trans, root, path, ino);
5429         err |= ret;
5430         btrfs_commit_transaction(trans, root);
5431 out:
5432         if (ret)
5433                 error("failed to add inode %llu as orphan item root %llu",
5434                       ino, root->root_key.objectid);
5435         else
5436                 printf("Added inode %llu as orphan item root %llu\n",
5437                        ino, root->root_key.objectid);
5438
5439         btrfs_release_path(path);
5440         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5441         err |= ret;
5442
5443         return err;
5444 }
5445
5446 /* Set inode_item nlink to @ref_count.
5447  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5448  *
5449  * Returns 0 on success
5450  */
5451 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5452                                       struct btrfs_path *path, u64 ino,
5453                                       const char *name, u32 namelen,
5454                                       u64 ref_count, u8 filetype, u64 *nlink)
5455 {
5456         struct btrfs_trans_handle *trans;
5457         struct btrfs_inode_item *ii;
5458         struct btrfs_key key;
5459         struct btrfs_key old_key;
5460         char namebuf[BTRFS_NAME_LEN] = {0};
5461         int name_len;
5462         int ret;
5463         int ret2;
5464
5465         /* save the key */
5466         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5467
5468         if (name && namelen) {
5469                 ASSERT(namelen <= BTRFS_NAME_LEN);
5470                 memcpy(namebuf, name, namelen);
5471                 name_len = namelen;
5472         } else {
5473                 sprintf(namebuf, "%llu", ino);
5474                 name_len = count_digits(ino);
5475                 printf("Can't find file name for inode %llu, use %s instead\n",
5476                        ino, namebuf);
5477         }
5478
5479         trans = btrfs_start_transaction(root, 1);
5480         if (IS_ERR(trans)) {
5481                 ret = PTR_ERR(trans);
5482                 goto out;
5483         }
5484
5485         btrfs_release_path(path);
5486         /* if refs is 0, put it into lostfound */
5487         if (ref_count == 0) {
5488                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5489                                               name_len, filetype, &ref_count);
5490                 if (ret)
5491                         goto fail;
5492         }
5493
5494         /* reset inode_item's nlink to ref_count */
5495         key.objectid = ino;
5496         key.type = BTRFS_INODE_ITEM_KEY;
5497         key.offset = 0;
5498
5499         btrfs_release_path(path);
5500         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5501         if (ret > 0)
5502                 ret = -ENOENT;
5503         if (ret)
5504                 goto fail;
5505
5506         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5507                             struct btrfs_inode_item);
5508         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5509         btrfs_mark_buffer_dirty(path->nodes[0]);
5510
5511         if (nlink)
5512                 *nlink = ref_count;
5513 fail:
5514         btrfs_commit_transaction(trans, root);
5515 out:
5516         if (ret)
5517                 error(
5518         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5519                        root->objectid, ino, namebuf, filetype);
5520         else
5521                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5522                        root->objectid, ino, namebuf, filetype);
5523
5524         /* research */
5525         btrfs_release_path(path);
5526         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5527         if (ret2 < 0)
5528                 return ret |= ret2;
5529         return ret;
5530 }
5531
5532 /*
5533  * Check INODE_ITEM and related ITEMs (the same inode number)
5534  * 1. check link count
5535  * 2. check inode ref/extref
5536  * 3. check dir item/index
5537  *
5538  * @ext_ref:    the EXTENDED_IREF feature
5539  *
5540  * Return 0 if no error occurred.
5541  * Return >0 for error or hit the traversal is done(by error bitmap)
5542  */
5543 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5544                             unsigned int ext_ref)
5545 {
5546         struct extent_buffer *node;
5547         struct btrfs_inode_item *ii;
5548         struct btrfs_key key;
5549         struct btrfs_key last_key;
5550         u64 inode_id;
5551         u32 mode;
5552         u64 nlink;
5553         u64 nbytes;
5554         u64 isize;
5555         u64 size = 0;
5556         u64 refs = 0;
5557         u64 extent_end = 0;
5558         u64 extent_size = 0;
5559         unsigned int dir;
5560         unsigned int nodatasum;
5561         int slot;
5562         int ret;
5563         int err = 0;
5564         char namebuf[BTRFS_NAME_LEN] = {0};
5565         u32 name_len = 0;
5566
5567         node = path->nodes[0];
5568         slot = path->slots[0];
5569
5570         btrfs_item_key_to_cpu(node, &key, slot);
5571         inode_id = key.objectid;
5572
5573         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5574                 ret = btrfs_next_item(root, path);
5575                 if (ret > 0)
5576                         err |= LAST_ITEM;
5577                 return err;
5578         }
5579
5580         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5581         isize = btrfs_inode_size(node, ii);
5582         nbytes = btrfs_inode_nbytes(node, ii);
5583         mode = btrfs_inode_mode(node, ii);
5584         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5585         nlink = btrfs_inode_nlink(node, ii);
5586         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5587
5588         while (1) {
5589                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5590                 ret = btrfs_next_item(root, path);
5591                 if (ret < 0) {
5592                         /* out will fill 'err' rusing current statistics */
5593                         goto out;
5594                 } else if (ret > 0) {
5595                         err |= LAST_ITEM;
5596                         goto out;
5597                 }
5598
5599                 node = path->nodes[0];
5600                 slot = path->slots[0];
5601                 btrfs_item_key_to_cpu(node, &key, slot);
5602                 if (key.objectid != inode_id)
5603                         goto out;
5604
5605                 switch (key.type) {
5606                 case BTRFS_INODE_REF_KEY:
5607                         ret = check_inode_ref(root, &key, path, namebuf,
5608                                               &name_len, &refs, mode);
5609                         err |= ret;
5610                         break;
5611                 case BTRFS_INODE_EXTREF_KEY:
5612                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5613                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5614                                         root->objectid, key.objectid,
5615                                         key.offset);
5616                         ret = check_inode_extref(root, &key, node, slot, &refs,
5617                                                  mode);
5618                         err |= ret;
5619                         break;
5620                 case BTRFS_DIR_ITEM_KEY:
5621                 case BTRFS_DIR_INDEX_KEY:
5622                         if (!dir) {
5623                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5624                                         root->objectid, inode_id,
5625                                         imode_to_type(mode), key.objectid,
5626                                         key.offset);
5627                         }
5628                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5629                         err |= ret;
5630                         break;
5631                 case BTRFS_EXTENT_DATA_KEY:
5632                         if (dir) {
5633                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5634                                         root->objectid, inode_id, key.objectid,
5635                                         key.offset);
5636                         }
5637                         ret = check_file_extent(root, &key, node, slot,
5638                                                 nodatasum, &extent_size,
5639                                                 &extent_end);
5640                         err |= ret;
5641                         break;
5642                 case BTRFS_XATTR_ITEM_KEY:
5643                         break;
5644                 default:
5645                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5646                               key.objectid, key.type, key.offset);
5647                 }
5648         }
5649
5650 out:
5651         if (err & LAST_ITEM) {
5652                 btrfs_release_path(path);
5653                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5654                 if (ret)
5655                         return err;
5656         }
5657
5658         /* verify INODE_ITEM nlink/isize/nbytes */
5659         if (dir) {
5660                 if (repair && (err & DIR_COUNT_AGAIN)) {
5661                         err &= ~DIR_COUNT_AGAIN;
5662                         count_dir_isize(root, inode_id, &size);
5663                 }
5664
5665                 if ((nlink != 1 || refs != 1) && repair) {
5666                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5667                                 namebuf, name_len, refs, imode_to_type(mode),
5668                                 &nlink);
5669                 }
5670
5671                 if (nlink != 1) {
5672                         err |= LINK_COUNT_ERROR;
5673                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5674                               root->objectid, inode_id, nlink);
5675                 }
5676
5677                 /*
5678                  * Just a warning, as dir inode nbytes is just an
5679                  * instructive value.
5680                  */
5681                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5682                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5683                                 root->objectid, inode_id,
5684                                 root->fs_info->nodesize);
5685                 }
5686
5687                 if (isize != size) {
5688                         if (repair)
5689                                 ret = repair_dir_isize_lowmem(root, path,
5690                                                               inode_id, size);
5691                         if (!repair || ret) {
5692                                 err |= ISIZE_ERROR;
5693                                 error(
5694                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5695                                       root->objectid, inode_id, isize, size);
5696                         }
5697                 }
5698         } else {
5699                 if (nlink != refs) {
5700                         if (repair)
5701                                 ret = repair_inode_nlinks_lowmem(root, path,
5702                                          inode_id, namebuf, name_len, refs,
5703                                          imode_to_type(mode), &nlink);
5704                         if (!repair || ret) {
5705                                 err |= LINK_COUNT_ERROR;
5706                                 error(
5707                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5708                                       root->objectid, inode_id, nlink, refs);
5709                         }
5710                 } else if (!nlink) {
5711                         if (repair)
5712                                 ret = repair_inode_orphan_item_lowmem(root,
5713                                                               path, inode_id);
5714                         if (!repair || ret) {
5715                                 err |= ORPHAN_ITEM;
5716                                 error("root %llu INODE[%llu] is orphan item",
5717                                       root->objectid, inode_id);
5718                         }
5719                 }
5720
5721                 if (!nbytes && !no_holes && extent_end < isize) {
5722                         if (repair)
5723                                 ret = punch_extent_hole(root, inode_id,
5724                                                 extent_end, isize - extent_end);
5725                         if (!repair || ret) {
5726                                 err |= NBYTES_ERROR;
5727                                 error(
5728         "root %llu INODE[%llu] size %llu should have a file extent hole",
5729                                       root->objectid, inode_id, isize);
5730                         }
5731                 }
5732
5733                 if (nbytes != extent_size) {
5734                         if (repair)
5735                                 ret = repair_inode_nbytes_lowmem(root, path,
5736                                                          inode_id, extent_size);
5737                         if (!repair || ret) {
5738                                 err |= NBYTES_ERROR;
5739                                 error(
5740         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5741                                       root->objectid, inode_id, nbytes,
5742                                       extent_size);
5743                         }
5744                 }
5745         }
5746
5747         if (err & LAST_ITEM)
5748                 btrfs_next_item(root, path);
5749         return err;
5750 }
5751
5752 /*
5753  * Insert the missing inode item and inode ref.
5754  *
5755  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5756  * Root dir should be handled specially because root dir is the root of fs.
5757  *
5758  * returns err (>0 or 0) after repair
5759  */
5760 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5761 {
5762         struct btrfs_trans_handle *trans;
5763         struct btrfs_key key;
5764         struct btrfs_path path;
5765         int filetype = BTRFS_FT_DIR;
5766         int ret = 0;
5767
5768         btrfs_init_path(&path);
5769
5770         if (err & INODE_REF_MISSING) {
5771                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5772                 key.type = BTRFS_INODE_REF_KEY;
5773                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5774
5775                 trans = btrfs_start_transaction(root, 1);
5776                 if (IS_ERR(trans)) {
5777                         ret = PTR_ERR(trans);
5778                         goto out;
5779                 }
5780
5781                 btrfs_release_path(&path);
5782                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5783                 if (ret)
5784                         goto trans_fail;
5785
5786                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5787                                              BTRFS_FIRST_FREE_OBJECTID,
5788                                              BTRFS_FIRST_FREE_OBJECTID, 0);
5789                 if (ret)
5790                         goto trans_fail;
5791
5792                 printf("Add INODE_REF[%llu %llu] name %s\n",
5793                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5794                        "..");
5795                 err &= ~INODE_REF_MISSING;
5796 trans_fail:
5797                 if (ret)
5798                         error("fail to insert first inode's ref");
5799                 btrfs_commit_transaction(trans, root);
5800         }
5801
5802         if (err & INODE_ITEM_MISSING) {
5803                 ret = repair_inode_item_missing(root,
5804                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
5805                 if (ret)
5806                         goto out;
5807                 err &= ~INODE_ITEM_MISSING;
5808         }
5809 out:
5810         if (ret)
5811                 error("fail to repair first inode");
5812         btrfs_release_path(&path);
5813         return err;
5814 }
5815
5816 /*
5817  * check first root dir's inode_item and inode_ref
5818  *
5819  * returns 0 means no error
5820  * returns >0 means error
5821  * returns <0 means fatal error
5822  */
5823 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5824 {
5825         struct btrfs_path path;
5826         struct btrfs_key key;
5827         struct btrfs_inode_item *ii;
5828         u64 index;
5829         u32 mode;
5830         int err = 0;
5831         int ret;
5832
5833         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5834         key.type = BTRFS_INODE_ITEM_KEY;
5835         key.offset = 0;
5836
5837         /* For root being dropped, we don't need to check first inode */
5838         if (btrfs_root_refs(&root->root_item) == 0 &&
5839             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5840             BTRFS_FIRST_FREE_OBJECTID)
5841                 return 0;
5842
5843         btrfs_init_path(&path);
5844         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5845         if (ret < 0)
5846                 goto out;
5847         if (ret > 0) {
5848                 ret = 0;
5849                 err |= INODE_ITEM_MISSING;
5850         } else {
5851                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5852                                     struct btrfs_inode_item);
5853                 mode = btrfs_inode_mode(path.nodes[0], ii);
5854                 if (imode_to_type(mode) != BTRFS_FT_DIR)
5855                         err |= INODE_ITEM_MISMATCH;
5856         }
5857
5858         /* lookup first inode ref */
5859         key.offset = BTRFS_FIRST_FREE_OBJECTID;
5860         key.type = BTRFS_INODE_REF_KEY;
5861         /* special index value */
5862         index = 0;
5863
5864         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5865         if (ret < 0)
5866                 goto out;
5867         err |= ret;
5868
5869 out:
5870         btrfs_release_path(&path);
5871
5872         if (err && repair)
5873                 err = repair_fs_first_inode(root, err);
5874
5875         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5876                 error("root dir INODE_ITEM is %s",
5877                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5878         if (err & INODE_REF_MISSING)
5879                 error("root dir INODE_REF is missing");
5880
5881         return ret < 0 ? ret : err;
5882 }
5883
5884 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5885                                                 u64 parent, u64 root)
5886 {
5887         struct rb_node *node;
5888         struct tree_backref *back = NULL;
5889         struct tree_backref match = {
5890                 .node = {
5891                         .is_data = 0,
5892                 },
5893         };
5894
5895         if (parent) {
5896                 match.parent = parent;
5897                 match.node.full_backref = 1;
5898         } else {
5899                 match.root = root;
5900         }
5901
5902         node = rb_search(&rec->backref_tree, &match.node.node,
5903                          (rb_compare_keys)compare_extent_backref, NULL);
5904         if (node)
5905                 back = to_tree_backref(rb_node_to_extent_backref(node));
5906
5907         return back;
5908 }
5909
5910 static struct data_backref *find_data_backref(struct extent_record *rec,
5911                                                 u64 parent, u64 root,
5912                                                 u64 owner, u64 offset,
5913                                                 int found_ref,
5914                                                 u64 disk_bytenr, u64 bytes)
5915 {
5916         struct rb_node *node;
5917         struct data_backref *back = NULL;
5918         struct data_backref match = {
5919                 .node = {
5920                         .is_data = 1,
5921                 },
5922                 .owner = owner,
5923                 .offset = offset,
5924                 .bytes = bytes,
5925                 .found_ref = found_ref,
5926                 .disk_bytenr = disk_bytenr,
5927         };
5928
5929         if (parent) {
5930                 match.parent = parent;
5931                 match.node.full_backref = 1;
5932         } else {
5933                 match.root = root;
5934         }
5935
5936         node = rb_search(&rec->backref_tree, &match.node.node,
5937                          (rb_compare_keys)compare_extent_backref, NULL);
5938         if (node)
5939                 back = to_data_backref(rb_node_to_extent_backref(node));
5940
5941         return back;
5942 }
5943 /*
5944  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
5945  * blocks and integrity of fs tree items.
5946  *
5947  * @root:         the root of the tree to be checked.
5948  * @ext_ref       feature EXTENDED_IREF is enable or not.
5949  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
5950  *                otherwise means check fs tree(s) items relationship and
5951  *                @root MUST be a fs tree root.
5952  * Returns 0      represents OK.
5953  * Returns not 0  represents error.
5954  */
5955 static int check_btrfs_root(struct btrfs_trans_handle *trans,
5956                             struct btrfs_root *root, unsigned int ext_ref,
5957                             int check_all)
5958
5959 {
5960         struct btrfs_path path;
5961         struct node_refs nrefs;
5962         struct btrfs_root_item *root_item = &root->root_item;
5963         int ret;
5964         int level;
5965         int err = 0;
5966
5967         memset(&nrefs, 0, sizeof(nrefs));
5968         if (!check_all) {
5969                 /*
5970                  * We need to manually check the first inode item (256)
5971                  * As the following traversal function will only start from
5972                  * the first inode item in the leaf, if inode item (256) is
5973                  * missing we will skip it forever.
5974                  */
5975                 ret = check_fs_first_inode(root, ext_ref);
5976                 if (ret < 0)
5977                         return ret;
5978         }
5979
5980
5981         level = btrfs_header_level(root->node);
5982         btrfs_init_path(&path);
5983
5984         if (btrfs_root_refs(root_item) > 0 ||
5985             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5986                 path.nodes[level] = root->node;
5987                 path.slots[level] = 0;
5988                 extent_buffer_get(root->node);
5989         } else {
5990                 struct btrfs_key key;
5991
5992                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5993                 level = root_item->drop_level;
5994                 path.lowest_level = level;
5995                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5996                 if (ret < 0)
5997                         goto out;
5998                 ret = 0;
5999         }
6000
6001         while (1) {
6002                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6003                                         ext_ref, check_all);
6004
6005                 err |= !!ret;
6006
6007                 /* if ret is negative, walk shall stop */
6008                 if (ret < 0) {
6009                         ret = err;
6010                         break;
6011                 }
6012
6013                 ret = walk_up_tree_v2(root, &path, &level);
6014                 if (ret != 0) {
6015                         /* Normal exit, reset ret to err */
6016                         ret = err;
6017                         break;
6018                 }
6019         }
6020
6021 out:
6022         btrfs_release_path(&path);
6023         return ret;
6024 }
6025
6026 /*
6027  * Iterate all items in the tree and call check_inode_item() to check.
6028  *
6029  * @root:       the root of the tree to be checked.
6030  * @ext_ref:    the EXTENDED_IREF feature
6031  *
6032  * Return 0 if no error found.
6033  * Return <0 for error.
6034  */
6035 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6036 {
6037         reset_cached_block_groups(root->fs_info);
6038         return check_btrfs_root(NULL, root, ext_ref, 0);
6039 }
6040
6041 /*
6042  * Find the relative ref for root_ref and root_backref.
6043  *
6044  * @root:       the root of the root tree.
6045  * @ref_key:    the key of the root ref.
6046  *
6047  * Return 0 if no error occurred.
6048  */
6049 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6050                           struct extent_buffer *node, int slot)
6051 {
6052         struct btrfs_path path;
6053         struct btrfs_key key;
6054         struct btrfs_root_ref *ref;
6055         struct btrfs_root_ref *backref;
6056         char ref_name[BTRFS_NAME_LEN] = {0};
6057         char backref_name[BTRFS_NAME_LEN] = {0};
6058         u64 ref_dirid;
6059         u64 ref_seq;
6060         u32 ref_namelen;
6061         u64 backref_dirid;
6062         u64 backref_seq;
6063         u32 backref_namelen;
6064         u32 len;
6065         int ret;
6066         int err = 0;
6067
6068         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6069         ref_dirid = btrfs_root_ref_dirid(node, ref);
6070         ref_seq = btrfs_root_ref_sequence(node, ref);
6071         ref_namelen = btrfs_root_ref_name_len(node, ref);
6072
6073         if (ref_namelen <= BTRFS_NAME_LEN) {
6074                 len = ref_namelen;
6075         } else {
6076                 len = BTRFS_NAME_LEN;
6077                 warning("%s[%llu %llu] ref_name too long",
6078                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6079                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6080                         ref_key->offset);
6081         }
6082         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6083
6084         /* Find relative root_ref */
6085         key.objectid = ref_key->offset;
6086         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6087         key.offset = ref_key->objectid;
6088
6089         btrfs_init_path(&path);
6090         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6091         if (ret) {
6092                 err |= ROOT_REF_MISSING;
6093                 error("%s[%llu %llu] couldn't find relative ref",
6094                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6095                       "ROOT_REF" : "ROOT_BACKREF",
6096                       ref_key->objectid, ref_key->offset);
6097                 goto out;
6098         }
6099
6100         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6101                                  struct btrfs_root_ref);
6102         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6103         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6104         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6105
6106         if (backref_namelen <= BTRFS_NAME_LEN) {
6107                 len = backref_namelen;
6108         } else {
6109                 len = BTRFS_NAME_LEN;
6110                 warning("%s[%llu %llu] ref_name too long",
6111                         key.type == BTRFS_ROOT_REF_KEY ?
6112                         "ROOT_REF" : "ROOT_BACKREF",
6113                         key.objectid, key.offset);
6114         }
6115         read_extent_buffer(path.nodes[0], backref_name,
6116                            (unsigned long)(backref + 1), len);
6117
6118         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6119             ref_namelen != backref_namelen ||
6120             strncmp(ref_name, backref_name, len)) {
6121                 err |= ROOT_REF_MISMATCH;
6122                 error("%s[%llu %llu] mismatch relative ref",
6123                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6124                       "ROOT_REF" : "ROOT_BACKREF",
6125                       ref_key->objectid, ref_key->offset);
6126         }
6127 out:
6128         btrfs_release_path(&path);
6129         return err;
6130 }
6131
6132 /*
6133  * Check all fs/file tree in low_memory mode.
6134  *
6135  * 1. for fs tree root item, call check_fs_root_v2()
6136  * 2. for fs tree root ref/backref, call check_root_ref()
6137  *
6138  * Return 0 if no error occurred.
6139  */
6140 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6141 {
6142         struct btrfs_root *tree_root = fs_info->tree_root;
6143         struct btrfs_root *cur_root = NULL;
6144         struct btrfs_path path;
6145         struct btrfs_key key;
6146         struct extent_buffer *node;
6147         unsigned int ext_ref;
6148         int slot;
6149         int ret;
6150         int err = 0;
6151
6152         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6153
6154         btrfs_init_path(&path);
6155         key.objectid = BTRFS_FS_TREE_OBJECTID;
6156         key.offset = 0;
6157         key.type = BTRFS_ROOT_ITEM_KEY;
6158
6159         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6160         if (ret < 0) {
6161                 err = ret;
6162                 goto out;
6163         } else if (ret > 0) {
6164                 err = -ENOENT;
6165                 goto out;
6166         }
6167
6168         while (1) {
6169                 node = path.nodes[0];
6170                 slot = path.slots[0];
6171                 btrfs_item_key_to_cpu(node, &key, slot);
6172                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6173                         goto out;
6174                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6175                     fs_root_objectid(key.objectid)) {
6176                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6177                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6178                                                                        &key);
6179                         } else {
6180                                 key.offset = (u64)-1;
6181                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6182                         }
6183
6184                         if (IS_ERR(cur_root)) {
6185                                 error("Fail to read fs/subvol tree: %lld",
6186                                       key.objectid);
6187                                 err = -EIO;
6188                                 goto next;
6189                         }
6190
6191                         ret = check_fs_root_v2(cur_root, ext_ref);
6192                         err |= ret;
6193
6194                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6195                                 btrfs_free_fs_root(cur_root);
6196                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6197                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6198                         ret = check_root_ref(tree_root, &key, node, slot);
6199                         err |= ret;
6200                 }
6201 next:
6202                 ret = btrfs_next_item(tree_root, &path);
6203                 if (ret > 0)
6204                         goto out;
6205                 if (ret < 0) {
6206                         err = ret;
6207                         goto out;
6208                 }
6209         }
6210
6211 out:
6212         btrfs_release_path(&path);
6213         return err;
6214 }
6215
6216 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6217                           struct cache_tree *root_cache)
6218 {
6219         int ret;
6220
6221         if (!ctx.progress_enabled)
6222                 fprintf(stderr, "checking fs roots\n");
6223         if (check_mode == CHECK_MODE_LOWMEM)
6224                 ret = check_fs_roots_v2(fs_info);
6225         else
6226                 ret = check_fs_roots(fs_info, root_cache);
6227
6228         return ret;
6229 }
6230
6231 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6232 {
6233         struct extent_backref *back, *tmp;
6234         struct tree_backref *tback;
6235         struct data_backref *dback;
6236         u64 found = 0;
6237         int err = 0;
6238
6239         rbtree_postorder_for_each_entry_safe(back, tmp,
6240                                              &rec->backref_tree, node) {
6241                 if (!back->found_extent_tree) {
6242                         err = 1;
6243                         if (!print_errs)
6244                                 goto out;
6245                         if (back->is_data) {
6246                                 dback = to_data_backref(back);
6247                                 fprintf(stderr, "Data backref %llu %s %llu"
6248                                         " owner %llu offset %llu num_refs %lu"
6249                                         " not found in extent tree\n",
6250                                         (unsigned long long)rec->start,
6251                                         back->full_backref ?
6252                                         "parent" : "root",
6253                                         back->full_backref ?
6254                                         (unsigned long long)dback->parent:
6255                                         (unsigned long long)dback->root,
6256                                         (unsigned long long)dback->owner,
6257                                         (unsigned long long)dback->offset,
6258                                         (unsigned long)dback->num_refs);
6259                         } else {
6260                                 tback = to_tree_backref(back);
6261                                 fprintf(stderr, "Tree backref %llu parent %llu"
6262                                         " root %llu not found in extent tree\n",
6263                                         (unsigned long long)rec->start,
6264                                         (unsigned long long)tback->parent,
6265                                         (unsigned long long)tback->root);
6266                         }
6267                 }
6268                 if (!back->is_data && !back->found_ref) {
6269                         err = 1;
6270                         if (!print_errs)
6271                                 goto out;
6272                         tback = to_tree_backref(back);
6273                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6274                                 (unsigned long long)rec->start,
6275                                 back->full_backref ? "parent" : "root",
6276                                 back->full_backref ?
6277                                 (unsigned long long)tback->parent :
6278                                 (unsigned long long)tback->root, back);
6279                 }
6280                 if (back->is_data) {
6281                         dback = to_data_backref(back);
6282                         if (dback->found_ref != dback->num_refs) {
6283                                 err = 1;
6284                                 if (!print_errs)
6285                                         goto out;
6286                                 fprintf(stderr, "Incorrect local backref count"
6287                                         " on %llu %s %llu owner %llu"
6288                                         " offset %llu found %u wanted %u back %p\n",
6289                                         (unsigned long long)rec->start,
6290                                         back->full_backref ?
6291                                         "parent" : "root",
6292                                         back->full_backref ?
6293                                         (unsigned long long)dback->parent:
6294                                         (unsigned long long)dback->root,
6295                                         (unsigned long long)dback->owner,
6296                                         (unsigned long long)dback->offset,
6297                                         dback->found_ref, dback->num_refs, back);
6298                         }
6299                         if (dback->disk_bytenr != rec->start) {
6300                                 err = 1;
6301                                 if (!print_errs)
6302                                         goto out;
6303                                 fprintf(stderr, "Backref disk bytenr does not"
6304                                         " match extent record, bytenr=%llu, "
6305                                         "ref bytenr=%llu\n",
6306                                         (unsigned long long)rec->start,
6307                                         (unsigned long long)dback->disk_bytenr);
6308                         }
6309
6310                         if (dback->bytes != rec->nr) {
6311                                 err = 1;
6312                                 if (!print_errs)
6313                                         goto out;
6314                                 fprintf(stderr, "Backref bytes do not match "
6315                                         "extent backref, bytenr=%llu, ref "
6316                                         "bytes=%llu, backref bytes=%llu\n",
6317                                         (unsigned long long)rec->start,
6318                                         (unsigned long long)rec->nr,
6319                                         (unsigned long long)dback->bytes);
6320                         }
6321                 }
6322                 if (!back->is_data) {
6323                         found += 1;
6324                 } else {
6325                         dback = to_data_backref(back);
6326                         found += dback->found_ref;
6327                 }
6328         }
6329         if (found != rec->refs) {
6330                 err = 1;
6331                 if (!print_errs)
6332                         goto out;
6333                 fprintf(stderr, "Incorrect global backref count "
6334                         "on %llu found %llu wanted %llu\n",
6335                         (unsigned long long)rec->start,
6336                         (unsigned long long)found,
6337                         (unsigned long long)rec->refs);
6338         }
6339 out:
6340         return err;
6341 }
6342
6343 static void __free_one_backref(struct rb_node *node)
6344 {
6345         struct extent_backref *back = rb_node_to_extent_backref(node);
6346
6347         free(back);
6348 }
6349
6350 static void free_all_extent_backrefs(struct extent_record *rec)
6351 {
6352         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6353 }
6354
6355 static void free_extent_record_cache(struct cache_tree *extent_cache)
6356 {
6357         struct cache_extent *cache;
6358         struct extent_record *rec;
6359
6360         while (1) {
6361                 cache = first_cache_extent(extent_cache);
6362                 if (!cache)
6363                         break;
6364                 rec = container_of(cache, struct extent_record, cache);
6365                 remove_cache_extent(extent_cache, cache);
6366                 free_all_extent_backrefs(rec);
6367                 free(rec);
6368         }
6369 }
6370
6371 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6372                                  struct extent_record *rec)
6373 {
6374         if (rec->content_checked && rec->owner_ref_checked &&
6375             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6376             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6377             !rec->bad_full_backref && !rec->crossing_stripes &&
6378             !rec->wrong_chunk_type) {
6379                 remove_cache_extent(extent_cache, &rec->cache);
6380                 free_all_extent_backrefs(rec);
6381                 list_del_init(&rec->list);
6382                 free(rec);
6383         }
6384         return 0;
6385 }
6386
6387 static int check_owner_ref(struct btrfs_root *root,
6388                             struct extent_record *rec,
6389                             struct extent_buffer *buf)
6390 {
6391         struct extent_backref *node, *tmp;
6392         struct tree_backref *back;
6393         struct btrfs_root *ref_root;
6394         struct btrfs_key key;
6395         struct btrfs_path path;
6396         struct extent_buffer *parent;
6397         int level;
6398         int found = 0;
6399         int ret;
6400
6401         rbtree_postorder_for_each_entry_safe(node, tmp,
6402                                              &rec->backref_tree, node) {
6403                 if (node->is_data)
6404                         continue;
6405                 if (!node->found_ref)
6406                         continue;
6407                 if (node->full_backref)
6408                         continue;
6409                 back = to_tree_backref(node);
6410                 if (btrfs_header_owner(buf) == back->root)
6411                         return 0;
6412         }
6413         BUG_ON(rec->is_root);
6414
6415         /* try to find the block by search corresponding fs tree */
6416         key.objectid = btrfs_header_owner(buf);
6417         key.type = BTRFS_ROOT_ITEM_KEY;
6418         key.offset = (u64)-1;
6419
6420         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6421         if (IS_ERR(ref_root))
6422                 return 1;
6423
6424         level = btrfs_header_level(buf);
6425         if (level == 0)
6426                 btrfs_item_key_to_cpu(buf, &key, 0);
6427         else
6428                 btrfs_node_key_to_cpu(buf, &key, 0);
6429
6430         btrfs_init_path(&path);
6431         path.lowest_level = level + 1;
6432         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6433         if (ret < 0)
6434                 return 0;
6435
6436         parent = path.nodes[level + 1];
6437         if (parent && buf->start == btrfs_node_blockptr(parent,
6438                                                         path.slots[level + 1]))
6439                 found = 1;
6440
6441         btrfs_release_path(&path);
6442         return found ? 0 : 1;
6443 }
6444
6445 static int is_extent_tree_record(struct extent_record *rec)
6446 {
6447         struct extent_backref *node, *tmp;
6448         struct tree_backref *back;
6449         int is_extent = 0;
6450
6451         rbtree_postorder_for_each_entry_safe(node, tmp,
6452                                              &rec->backref_tree, node) {
6453                 if (node->is_data)
6454                         return 0;
6455                 back = to_tree_backref(node);
6456                 if (node->full_backref)
6457                         return 0;
6458                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6459                         is_extent = 1;
6460         }
6461         return is_extent;
6462 }
6463
6464
6465 static int record_bad_block_io(struct btrfs_fs_info *info,
6466                                struct cache_tree *extent_cache,
6467                                u64 start, u64 len)
6468 {
6469         struct extent_record *rec;
6470         struct cache_extent *cache;
6471         struct btrfs_key key;
6472
6473         cache = lookup_cache_extent(extent_cache, start, len);
6474         if (!cache)
6475                 return 0;
6476
6477         rec = container_of(cache, struct extent_record, cache);
6478         if (!is_extent_tree_record(rec))
6479                 return 0;
6480
6481         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6482         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6483 }
6484
6485 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6486                        struct extent_buffer *buf, int slot)
6487 {
6488         if (btrfs_header_level(buf)) {
6489                 struct btrfs_key_ptr ptr1, ptr2;
6490
6491                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6492                                    sizeof(struct btrfs_key_ptr));
6493                 read_extent_buffer(buf, &ptr2,
6494                                    btrfs_node_key_ptr_offset(slot + 1),
6495                                    sizeof(struct btrfs_key_ptr));
6496                 write_extent_buffer(buf, &ptr1,
6497                                     btrfs_node_key_ptr_offset(slot + 1),
6498                                     sizeof(struct btrfs_key_ptr));
6499                 write_extent_buffer(buf, &ptr2,
6500                                     btrfs_node_key_ptr_offset(slot),
6501                                     sizeof(struct btrfs_key_ptr));
6502                 if (slot == 0) {
6503                         struct btrfs_disk_key key;
6504                         btrfs_node_key(buf, &key, 0);
6505                         btrfs_fixup_low_keys(root, path, &key,
6506                                              btrfs_header_level(buf) + 1);
6507                 }
6508         } else {
6509                 struct btrfs_item *item1, *item2;
6510                 struct btrfs_key k1, k2;
6511                 char *item1_data, *item2_data;
6512                 u32 item1_offset, item2_offset, item1_size, item2_size;
6513
6514                 item1 = btrfs_item_nr(slot);
6515                 item2 = btrfs_item_nr(slot + 1);
6516                 btrfs_item_key_to_cpu(buf, &k1, slot);
6517                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6518                 item1_offset = btrfs_item_offset(buf, item1);
6519                 item2_offset = btrfs_item_offset(buf, item2);
6520                 item1_size = btrfs_item_size(buf, item1);
6521                 item2_size = btrfs_item_size(buf, item2);
6522
6523                 item1_data = malloc(item1_size);
6524                 if (!item1_data)
6525                         return -ENOMEM;
6526                 item2_data = malloc(item2_size);
6527                 if (!item2_data) {
6528                         free(item1_data);
6529                         return -ENOMEM;
6530                 }
6531
6532                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6533                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6534
6535                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6536                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6537                 free(item1_data);
6538                 free(item2_data);
6539
6540                 btrfs_set_item_offset(buf, item1, item2_offset);
6541                 btrfs_set_item_offset(buf, item2, item1_offset);
6542                 btrfs_set_item_size(buf, item1, item2_size);
6543                 btrfs_set_item_size(buf, item2, item1_size);
6544
6545                 path->slots[0] = slot;
6546                 btrfs_set_item_key_unsafe(root, path, &k2);
6547                 path->slots[0] = slot + 1;
6548                 btrfs_set_item_key_unsafe(root, path, &k1);
6549         }
6550         return 0;
6551 }
6552
6553 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6554 {
6555         struct extent_buffer *buf;
6556         struct btrfs_key k1, k2;
6557         int i;
6558         int level = path->lowest_level;
6559         int ret = -EIO;
6560
6561         buf = path->nodes[level];
6562         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6563                 if (level) {
6564                         btrfs_node_key_to_cpu(buf, &k1, i);
6565                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6566                 } else {
6567                         btrfs_item_key_to_cpu(buf, &k1, i);
6568                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6569                 }
6570                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6571                         continue;
6572                 ret = swap_values(root, path, buf, i);
6573                 if (ret)
6574                         break;
6575                 btrfs_mark_buffer_dirty(buf);
6576                 i = 0;
6577         }
6578         return ret;
6579 }
6580
6581 static int delete_bogus_item(struct btrfs_root *root,
6582                              struct btrfs_path *path,
6583                              struct extent_buffer *buf, int slot)
6584 {
6585         struct btrfs_key key;
6586         int nritems = btrfs_header_nritems(buf);
6587
6588         btrfs_item_key_to_cpu(buf, &key, slot);
6589
6590         /* These are all the keys we can deal with missing. */
6591         if (key.type != BTRFS_DIR_INDEX_KEY &&
6592             key.type != BTRFS_EXTENT_ITEM_KEY &&
6593             key.type != BTRFS_METADATA_ITEM_KEY &&
6594             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6595             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6596                 return -1;
6597
6598         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6599                (unsigned long long)key.objectid, key.type,
6600                (unsigned long long)key.offset, slot, buf->start);
6601         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6602                               btrfs_item_nr_offset(slot + 1),
6603                               sizeof(struct btrfs_item) *
6604                               (nritems - slot - 1));
6605         btrfs_set_header_nritems(buf, nritems - 1);
6606         if (slot == 0) {
6607                 struct btrfs_disk_key disk_key;
6608
6609                 btrfs_item_key(buf, &disk_key, 0);
6610                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6611         }
6612         btrfs_mark_buffer_dirty(buf);
6613         return 0;
6614 }
6615
6616 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6617 {
6618         struct extent_buffer *buf;
6619         int i;
6620         int ret = 0;
6621
6622         /* We should only get this for leaves */
6623         BUG_ON(path->lowest_level);
6624         buf = path->nodes[0];
6625 again:
6626         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6627                 unsigned int shift = 0, offset;
6628
6629                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6630                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6631                         if (btrfs_item_end_nr(buf, i) >
6632                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6633                                 ret = delete_bogus_item(root, path, buf, i);
6634                                 if (!ret)
6635                                         goto again;
6636                                 fprintf(stderr, "item is off the end of the "
6637                                         "leaf, can't fix\n");
6638                                 ret = -EIO;
6639                                 break;
6640                         }
6641                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6642                                 btrfs_item_end_nr(buf, i);
6643                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6644                            btrfs_item_offset_nr(buf, i - 1)) {
6645                         if (btrfs_item_end_nr(buf, i) >
6646                             btrfs_item_offset_nr(buf, i - 1)) {
6647                                 ret = delete_bogus_item(root, path, buf, i);
6648                                 if (!ret)
6649                                         goto again;
6650                                 fprintf(stderr, "items overlap, can't fix\n");
6651                                 ret = -EIO;
6652                                 break;
6653                         }
6654                         shift = btrfs_item_offset_nr(buf, i - 1) -
6655                                 btrfs_item_end_nr(buf, i);
6656                 }
6657                 if (!shift)
6658                         continue;
6659
6660                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6661                        i, shift, (unsigned long long)buf->start);
6662                 offset = btrfs_item_offset_nr(buf, i);
6663                 memmove_extent_buffer(buf,
6664                                       btrfs_leaf_data(buf) + offset + shift,
6665                                       btrfs_leaf_data(buf) + offset,
6666                                       btrfs_item_size_nr(buf, i));
6667                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6668                                       offset + shift);
6669                 btrfs_mark_buffer_dirty(buf);
6670         }
6671
6672         /*
6673          * We may have moved things, in which case we want to exit so we don't
6674          * write those changes out.  Once we have proper abort functionality in
6675          * progs this can be changed to something nicer.
6676          */
6677         BUG_ON(ret);
6678         return ret;
6679 }
6680
6681 /*
6682  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6683  * then just return -EIO.
6684  */
6685 static int try_to_fix_bad_block(struct btrfs_root *root,
6686                                 struct extent_buffer *buf,
6687                                 enum btrfs_tree_block_status status)
6688 {
6689         struct btrfs_trans_handle *trans;
6690         struct ulist *roots;
6691         struct ulist_node *node;
6692         struct btrfs_root *search_root;
6693         struct btrfs_path path;
6694         struct ulist_iterator iter;
6695         struct btrfs_key root_key, key;
6696         int ret;
6697
6698         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6699             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6700                 return -EIO;
6701
6702         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6703         if (ret)
6704                 return -EIO;
6705
6706         btrfs_init_path(&path);
6707         ULIST_ITER_INIT(&iter);
6708         while ((node = ulist_next(roots, &iter))) {
6709                 root_key.objectid = node->val;
6710                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6711                 root_key.offset = (u64)-1;
6712
6713                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6714                 if (IS_ERR(root)) {
6715                         ret = -EIO;
6716                         break;
6717                 }
6718
6719
6720                 trans = btrfs_start_transaction(search_root, 0);
6721                 if (IS_ERR(trans)) {
6722                         ret = PTR_ERR(trans);
6723                         break;
6724                 }
6725
6726                 path.lowest_level = btrfs_header_level(buf);
6727                 path.skip_check_block = 1;
6728                 if (path.lowest_level)
6729                         btrfs_node_key_to_cpu(buf, &key, 0);
6730                 else
6731                         btrfs_item_key_to_cpu(buf, &key, 0);
6732                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6733                 if (ret) {
6734                         ret = -EIO;
6735                         btrfs_commit_transaction(trans, search_root);
6736                         break;
6737                 }
6738                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6739                         ret = fix_key_order(search_root, &path);
6740                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6741                         ret = fix_item_offset(search_root, &path);
6742                 if (ret) {
6743                         btrfs_commit_transaction(trans, search_root);
6744                         break;
6745                 }
6746                 btrfs_release_path(&path);
6747                 btrfs_commit_transaction(trans, search_root);
6748         }
6749         ulist_free(roots);
6750         btrfs_release_path(&path);
6751         return ret;
6752 }
6753
6754 static int check_block(struct btrfs_root *root,
6755                        struct cache_tree *extent_cache,
6756                        struct extent_buffer *buf, u64 flags)
6757 {
6758         struct extent_record *rec;
6759         struct cache_extent *cache;
6760         struct btrfs_key key;
6761         enum btrfs_tree_block_status status;
6762         int ret = 0;
6763         int level;
6764
6765         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6766         if (!cache)
6767                 return 1;
6768         rec = container_of(cache, struct extent_record, cache);
6769         rec->generation = btrfs_header_generation(buf);
6770
6771         level = btrfs_header_level(buf);
6772         if (btrfs_header_nritems(buf) > 0) {
6773
6774                 if (level == 0)
6775                         btrfs_item_key_to_cpu(buf, &key, 0);
6776                 else
6777                         btrfs_node_key_to_cpu(buf, &key, 0);
6778
6779                 rec->info_objectid = key.objectid;
6780         }
6781         rec->info_level = level;
6782
6783         if (btrfs_is_leaf(buf))
6784                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6785         else
6786                 status = btrfs_check_node(root, &rec->parent_key, buf);
6787
6788         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6789                 if (repair)
6790                         status = try_to_fix_bad_block(root, buf, status);
6791                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6792                         ret = -EIO;
6793                         fprintf(stderr, "bad block %llu\n",
6794                                 (unsigned long long)buf->start);
6795                 } else {
6796                         /*
6797                          * Signal to callers we need to start the scan over
6798                          * again since we'll have cowed blocks.
6799                          */
6800                         ret = -EAGAIN;
6801                 }
6802         } else {
6803                 rec->content_checked = 1;
6804                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6805                         rec->owner_ref_checked = 1;
6806                 else {
6807                         ret = check_owner_ref(root, rec, buf);
6808                         if (!ret)
6809                                 rec->owner_ref_checked = 1;
6810                 }
6811         }
6812         if (!ret)
6813                 maybe_free_extent_rec(extent_cache, rec);
6814         return ret;
6815 }
6816
6817 #if 0
6818 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6819                                                 u64 parent, u64 root)
6820 {
6821         struct list_head *cur = rec->backrefs.next;
6822         struct extent_backref *node;
6823         struct tree_backref *back;
6824
6825         while(cur != &rec->backrefs) {
6826                 node = to_extent_backref(cur);
6827                 cur = cur->next;
6828                 if (node->is_data)
6829                         continue;
6830                 back = to_tree_backref(node);
6831                 if (parent > 0) {
6832                         if (!node->full_backref)
6833                                 continue;
6834                         if (parent == back->parent)
6835                                 return back;
6836                 } else {
6837                         if (node->full_backref)
6838                                 continue;
6839                         if (back->root == root)
6840                                 return back;
6841                 }
6842         }
6843         return NULL;
6844 }
6845 #endif
6846
6847 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6848                                                 u64 parent, u64 root)
6849 {
6850         struct tree_backref *ref = malloc(sizeof(*ref));
6851
6852         if (!ref)
6853                 return NULL;
6854         memset(&ref->node, 0, sizeof(ref->node));
6855         if (parent > 0) {
6856                 ref->parent = parent;
6857                 ref->node.full_backref = 1;
6858         } else {
6859                 ref->root = root;
6860                 ref->node.full_backref = 0;
6861         }
6862
6863         return ref;
6864 }
6865
6866 #if 0
6867 static struct data_backref *find_data_backref(struct extent_record *rec,
6868                                                 u64 parent, u64 root,
6869                                                 u64 owner, u64 offset,
6870                                                 int found_ref,
6871                                                 u64 disk_bytenr, u64 bytes)
6872 {
6873         struct list_head *cur = rec->backrefs.next;
6874         struct extent_backref *node;
6875         struct data_backref *back;
6876
6877         while(cur != &rec->backrefs) {
6878                 node = to_extent_backref(cur);
6879                 cur = cur->next;
6880                 if (!node->is_data)
6881                         continue;
6882                 back = to_data_backref(node);
6883                 if (parent > 0) {
6884                         if (!node->full_backref)
6885                                 continue;
6886                         if (parent == back->parent)
6887                                 return back;
6888                 } else {
6889                         if (node->full_backref)
6890                                 continue;
6891                         if (back->root == root && back->owner == owner &&
6892                             back->offset == offset) {
6893                                 if (found_ref && node->found_ref &&
6894                                     (back->bytes != bytes ||
6895                                     back->disk_bytenr != disk_bytenr))
6896                                         continue;
6897                                 return back;
6898                         }
6899                 }
6900         }
6901         return NULL;
6902 }
6903 #endif
6904
6905 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6906                                                 u64 parent, u64 root,
6907                                                 u64 owner, u64 offset,
6908                                                 u64 max_size)
6909 {
6910         struct data_backref *ref = malloc(sizeof(*ref));
6911
6912         if (!ref)
6913                 return NULL;
6914         memset(&ref->node, 0, sizeof(ref->node));
6915         ref->node.is_data = 1;
6916
6917         if (parent > 0) {
6918                 ref->parent = parent;
6919                 ref->owner = 0;
6920                 ref->offset = 0;
6921                 ref->node.full_backref = 1;
6922         } else {
6923                 ref->root = root;
6924                 ref->owner = owner;
6925                 ref->offset = offset;
6926                 ref->node.full_backref = 0;
6927         }
6928         ref->bytes = max_size;
6929         ref->found_ref = 0;
6930         ref->num_refs = 0;
6931         if (max_size > rec->max_size)
6932                 rec->max_size = max_size;
6933         return ref;
6934 }
6935
6936 /* Check if the type of extent matches with its chunk */
6937 static void check_extent_type(struct extent_record *rec)
6938 {
6939         struct btrfs_block_group_cache *bg_cache;
6940
6941         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6942         if (!bg_cache)
6943                 return;
6944
6945         /* data extent, check chunk directly*/
6946         if (!rec->metadata) {
6947                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6948                         rec->wrong_chunk_type = 1;
6949                 return;
6950         }
6951
6952         /* metadata extent, check the obvious case first */
6953         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6954                                  BTRFS_BLOCK_GROUP_METADATA))) {
6955                 rec->wrong_chunk_type = 1;
6956                 return;
6957         }
6958
6959         /*
6960          * Check SYSTEM extent, as it's also marked as metadata, we can only
6961          * make sure it's a SYSTEM extent by its backref
6962          */
6963         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6964                 struct extent_backref *node;
6965                 struct tree_backref *tback;
6966                 u64 bg_type;
6967
6968                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6969                 if (node->is_data) {
6970                         /* tree block shouldn't have data backref */
6971                         rec->wrong_chunk_type = 1;
6972                         return;
6973                 }
6974                 tback = container_of(node, struct tree_backref, node);
6975
6976                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6977                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6978                 else
6979                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6980                 if (!(bg_cache->flags & bg_type))
6981                         rec->wrong_chunk_type = 1;
6982         }
6983 }
6984
6985 /*
6986  * Allocate a new extent record, fill default values from @tmpl and insert int
6987  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6988  * the cache, otherwise it fails.
6989  */
6990 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6991                 struct extent_record *tmpl)
6992 {
6993         struct extent_record *rec;
6994         int ret = 0;
6995
6996         BUG_ON(tmpl->max_size == 0);
6997         rec = malloc(sizeof(*rec));
6998         if (!rec)
6999                 return -ENOMEM;
7000         rec->start = tmpl->start;
7001         rec->max_size = tmpl->max_size;
7002         rec->nr = max(tmpl->nr, tmpl->max_size);
7003         rec->found_rec = tmpl->found_rec;
7004         rec->content_checked = tmpl->content_checked;
7005         rec->owner_ref_checked = tmpl->owner_ref_checked;
7006         rec->num_duplicates = 0;
7007         rec->metadata = tmpl->metadata;
7008         rec->flag_block_full_backref = FLAG_UNSET;
7009         rec->bad_full_backref = 0;
7010         rec->crossing_stripes = 0;
7011         rec->wrong_chunk_type = 0;
7012         rec->is_root = tmpl->is_root;
7013         rec->refs = tmpl->refs;
7014         rec->extent_item_refs = tmpl->extent_item_refs;
7015         rec->parent_generation = tmpl->parent_generation;
7016         INIT_LIST_HEAD(&rec->backrefs);
7017         INIT_LIST_HEAD(&rec->dups);
7018         INIT_LIST_HEAD(&rec->list);
7019         rec->backref_tree = RB_ROOT;
7020         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7021         rec->cache.start = tmpl->start;
7022         rec->cache.size = tmpl->nr;
7023         ret = insert_cache_extent(extent_cache, &rec->cache);
7024         if (ret) {
7025                 free(rec);
7026                 return ret;
7027         }
7028         bytes_used += rec->nr;
7029
7030         if (tmpl->metadata)
7031                 rec->crossing_stripes = check_crossing_stripes(global_info,
7032                                 rec->start, global_info->nodesize);
7033         check_extent_type(rec);
7034         return ret;
7035 }
7036
7037 /*
7038  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7039  * some are hints:
7040  * - refs              - if found, increase refs
7041  * - is_root           - if found, set
7042  * - content_checked   - if found, set
7043  * - owner_ref_checked - if found, set
7044  *
7045  * If not found, create a new one, initialize and insert.
7046  */
7047 static int add_extent_rec(struct cache_tree *extent_cache,
7048                 struct extent_record *tmpl)
7049 {
7050         struct extent_record *rec;
7051         struct cache_extent *cache;
7052         int ret = 0;
7053         int dup = 0;
7054
7055         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7056         if (cache) {
7057                 rec = container_of(cache, struct extent_record, cache);
7058                 if (tmpl->refs)
7059                         rec->refs++;
7060                 if (rec->nr == 1)
7061                         rec->nr = max(tmpl->nr, tmpl->max_size);
7062
7063                 /*
7064                  * We need to make sure to reset nr to whatever the extent
7065                  * record says was the real size, this way we can compare it to
7066                  * the backrefs.
7067                  */
7068                 if (tmpl->found_rec) {
7069                         if (tmpl->start != rec->start || rec->found_rec) {
7070                                 struct extent_record *tmp;
7071
7072                                 dup = 1;
7073                                 if (list_empty(&rec->list))
7074                                         list_add_tail(&rec->list,
7075                                                       &duplicate_extents);
7076
7077                                 /*
7078                                  * We have to do this song and dance in case we
7079                                  * find an extent record that falls inside of
7080                                  * our current extent record but does not have
7081                                  * the same objectid.
7082                                  */
7083                                 tmp = malloc(sizeof(*tmp));
7084                                 if (!tmp)
7085                                         return -ENOMEM;
7086                                 tmp->start = tmpl->start;
7087                                 tmp->max_size = tmpl->max_size;
7088                                 tmp->nr = tmpl->nr;
7089                                 tmp->found_rec = 1;
7090                                 tmp->metadata = tmpl->metadata;
7091                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7092                                 INIT_LIST_HEAD(&tmp->list);
7093                                 list_add_tail(&tmp->list, &rec->dups);
7094                                 rec->num_duplicates++;
7095                         } else {
7096                                 rec->nr = tmpl->nr;
7097                                 rec->found_rec = 1;
7098                         }
7099                 }
7100
7101                 if (tmpl->extent_item_refs && !dup) {
7102                         if (rec->extent_item_refs) {
7103                                 fprintf(stderr, "block %llu rec "
7104                                         "extent_item_refs %llu, passed %llu\n",
7105                                         (unsigned long long)tmpl->start,
7106                                         (unsigned long long)
7107                                                         rec->extent_item_refs,
7108                                         (unsigned long long)tmpl->extent_item_refs);
7109                         }
7110                         rec->extent_item_refs = tmpl->extent_item_refs;
7111                 }
7112                 if (tmpl->is_root)
7113                         rec->is_root = 1;
7114                 if (tmpl->content_checked)
7115                         rec->content_checked = 1;
7116                 if (tmpl->owner_ref_checked)
7117                         rec->owner_ref_checked = 1;
7118                 memcpy(&rec->parent_key, &tmpl->parent_key,
7119                                 sizeof(tmpl->parent_key));
7120                 if (tmpl->parent_generation)
7121                         rec->parent_generation = tmpl->parent_generation;
7122                 if (rec->max_size < tmpl->max_size)
7123                         rec->max_size = tmpl->max_size;
7124
7125                 /*
7126                  * A metadata extent can't cross stripe_len boundary, otherwise
7127                  * kernel scrub won't be able to handle it.
7128                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7129                  * it.
7130                  */
7131                 if (tmpl->metadata)
7132                         rec->crossing_stripes = check_crossing_stripes(
7133                                         global_info, rec->start,
7134                                         global_info->nodesize);
7135                 check_extent_type(rec);
7136                 maybe_free_extent_rec(extent_cache, rec);
7137                 return ret;
7138         }
7139
7140         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7141
7142         return ret;
7143 }
7144
7145 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7146                             u64 parent, u64 root, int found_ref)
7147 {
7148         struct extent_record *rec;
7149         struct tree_backref *back;
7150         struct cache_extent *cache;
7151         int ret;
7152         bool insert = false;
7153
7154         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7155         if (!cache) {
7156                 struct extent_record tmpl;
7157
7158                 memset(&tmpl, 0, sizeof(tmpl));
7159                 tmpl.start = bytenr;
7160                 tmpl.nr = 1;
7161                 tmpl.metadata = 1;
7162                 tmpl.max_size = 1;
7163
7164                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7165                 if (ret)
7166                         return ret;
7167
7168                 /* really a bug in cache_extent implement now */
7169                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7170                 if (!cache)
7171                         return -ENOENT;
7172         }
7173
7174         rec = container_of(cache, struct extent_record, cache);
7175         if (rec->start != bytenr) {
7176                 /*
7177                  * Several cause, from unaligned bytenr to over lapping extents
7178                  */
7179                 return -EEXIST;
7180         }
7181
7182         back = find_tree_backref(rec, parent, root);
7183         if (!back) {
7184                 back = alloc_tree_backref(rec, parent, root);
7185                 if (!back)
7186                         return -ENOMEM;
7187                 insert = true;
7188         }
7189
7190         if (found_ref) {
7191                 if (back->node.found_ref) {
7192                         fprintf(stderr, "Extent back ref already exists "
7193                                 "for %llu parent %llu root %llu \n",
7194                                 (unsigned long long)bytenr,
7195                                 (unsigned long long)parent,
7196                                 (unsigned long long)root);
7197                 }
7198                 back->node.found_ref = 1;
7199         } else {
7200                 if (back->node.found_extent_tree) {
7201                         fprintf(stderr, "Extent back ref already exists "
7202                                 "for %llu parent %llu root %llu \n",
7203                                 (unsigned long long)bytenr,
7204                                 (unsigned long long)parent,
7205                                 (unsigned long long)root);
7206                 }
7207                 back->node.found_extent_tree = 1;
7208         }
7209         if (insert)
7210                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7211                         compare_extent_backref));
7212         check_extent_type(rec);
7213         maybe_free_extent_rec(extent_cache, rec);
7214         return 0;
7215 }
7216
7217 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7218                             u64 parent, u64 root, u64 owner, u64 offset,
7219                             u32 num_refs, int found_ref, u64 max_size)
7220 {
7221         struct extent_record *rec;
7222         struct data_backref *back;
7223         struct cache_extent *cache;
7224         int ret;
7225         bool insert = false;
7226
7227         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7228         if (!cache) {
7229                 struct extent_record tmpl;
7230
7231                 memset(&tmpl, 0, sizeof(tmpl));
7232                 tmpl.start = bytenr;
7233                 tmpl.nr = 1;
7234                 tmpl.max_size = max_size;
7235
7236                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7237                 if (ret)
7238                         return ret;
7239
7240                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7241                 if (!cache)
7242                         abort();
7243         }
7244
7245         rec = container_of(cache, struct extent_record, cache);
7246         if (rec->max_size < max_size)
7247                 rec->max_size = max_size;
7248
7249         /*
7250          * If found_ref is set then max_size is the real size and must match the
7251          * existing refs.  So if we have already found a ref then we need to
7252          * make sure that this ref matches the existing one, otherwise we need
7253          * to add a new backref so we can notice that the backrefs don't match
7254          * and we need to figure out who is telling the truth.  This is to
7255          * account for that awful fsync bug I introduced where we'd end up with
7256          * a btrfs_file_extent_item that would have its length include multiple
7257          * prealloc extents or point inside of a prealloc extent.
7258          */
7259         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7260                                  bytenr, max_size);
7261         if (!back) {
7262                 back = alloc_data_backref(rec, parent, root, owner, offset,
7263                                           max_size);
7264                 BUG_ON(!back);
7265                 insert = true;
7266         }
7267
7268         if (found_ref) {
7269                 BUG_ON(num_refs != 1);
7270                 if (back->node.found_ref)
7271                         BUG_ON(back->bytes != max_size);
7272                 back->node.found_ref = 1;
7273                 back->found_ref += 1;
7274                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7275                         back->bytes = max_size;
7276                         back->disk_bytenr = bytenr;
7277
7278                         /* Need to reinsert if not already in the tree */
7279                         if (!insert) {
7280                                 rb_erase(&back->node.node, &rec->backref_tree);
7281                                 insert = true;
7282                         }
7283                 }
7284                 rec->refs += 1;
7285                 rec->content_checked = 1;
7286                 rec->owner_ref_checked = 1;
7287         } else {
7288                 if (back->node.found_extent_tree) {
7289                         fprintf(stderr, "Extent back ref already exists "
7290                                 "for %llu parent %llu root %llu "
7291                                 "owner %llu offset %llu num_refs %lu\n",
7292                                 (unsigned long long)bytenr,
7293                                 (unsigned long long)parent,
7294                                 (unsigned long long)root,
7295                                 (unsigned long long)owner,
7296                                 (unsigned long long)offset,
7297                                 (unsigned long)num_refs);
7298                 }
7299                 back->num_refs = num_refs;
7300                 back->node.found_extent_tree = 1;
7301         }
7302         if (insert)
7303                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7304                         compare_extent_backref));
7305
7306         maybe_free_extent_rec(extent_cache, rec);
7307         return 0;
7308 }
7309
7310 static int add_pending(struct cache_tree *pending,
7311                        struct cache_tree *seen, u64 bytenr, u32 size)
7312 {
7313         int ret;
7314         ret = add_cache_extent(seen, bytenr, size);
7315         if (ret)
7316                 return ret;
7317         add_cache_extent(pending, bytenr, size);
7318         return 0;
7319 }
7320
7321 static int pick_next_pending(struct cache_tree *pending,
7322                         struct cache_tree *reada,
7323                         struct cache_tree *nodes,
7324                         u64 last, struct block_info *bits, int bits_nr,
7325                         int *reada_bits)
7326 {
7327         unsigned long node_start = last;
7328         struct cache_extent *cache;
7329         int ret;
7330
7331         cache = search_cache_extent(reada, 0);
7332         if (cache) {
7333                 bits[0].start = cache->start;
7334                 bits[0].size = cache->size;
7335                 *reada_bits = 1;
7336                 return 1;
7337         }
7338         *reada_bits = 0;
7339         if (node_start > 32768)
7340                 node_start -= 32768;
7341
7342         cache = search_cache_extent(nodes, node_start);
7343         if (!cache)
7344                 cache = search_cache_extent(nodes, 0);
7345
7346         if (!cache) {
7347                  cache = search_cache_extent(pending, 0);
7348                  if (!cache)
7349                          return 0;
7350                  ret = 0;
7351                  do {
7352                          bits[ret].start = cache->start;
7353                          bits[ret].size = cache->size;
7354                          cache = next_cache_extent(cache);
7355                          ret++;
7356                  } while (cache && ret < bits_nr);
7357                  return ret;
7358         }
7359
7360         ret = 0;
7361         do {
7362                 bits[ret].start = cache->start;
7363                 bits[ret].size = cache->size;
7364                 cache = next_cache_extent(cache);
7365                 ret++;
7366         } while (cache && ret < bits_nr);
7367
7368         if (bits_nr - ret > 8) {
7369                 u64 lookup = bits[0].start + bits[0].size;
7370                 struct cache_extent *next;
7371                 next = search_cache_extent(pending, lookup);
7372                 while(next) {
7373                         if (next->start - lookup > 32768)
7374                                 break;
7375                         bits[ret].start = next->start;
7376                         bits[ret].size = next->size;
7377                         lookup = next->start + next->size;
7378                         ret++;
7379                         if (ret == bits_nr)
7380                                 break;
7381                         next = next_cache_extent(next);
7382                         if (!next)
7383                                 break;
7384                 }
7385         }
7386         return ret;
7387 }
7388
7389 static void free_chunk_record(struct cache_extent *cache)
7390 {
7391         struct chunk_record *rec;
7392
7393         rec = container_of(cache, struct chunk_record, cache);
7394         list_del_init(&rec->list);
7395         list_del_init(&rec->dextents);
7396         free(rec);
7397 }
7398
7399 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7400 {
7401         cache_tree_free_extents(chunk_cache, free_chunk_record);
7402 }
7403
7404 static void free_device_record(struct rb_node *node)
7405 {
7406         struct device_record *rec;
7407
7408         rec = container_of(node, struct device_record, node);
7409         free(rec);
7410 }
7411
7412 FREE_RB_BASED_TREE(device_cache, free_device_record);
7413
7414 int insert_block_group_record(struct block_group_tree *tree,
7415                               struct block_group_record *bg_rec)
7416 {
7417         int ret;
7418
7419         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7420         if (ret)
7421                 return ret;
7422
7423         list_add_tail(&bg_rec->list, &tree->block_groups);
7424         return 0;
7425 }
7426
7427 static void free_block_group_record(struct cache_extent *cache)
7428 {
7429         struct block_group_record *rec;
7430
7431         rec = container_of(cache, struct block_group_record, cache);
7432         list_del_init(&rec->list);
7433         free(rec);
7434 }
7435
7436 void free_block_group_tree(struct block_group_tree *tree)
7437 {
7438         cache_tree_free_extents(&tree->tree, free_block_group_record);
7439 }
7440
7441 int insert_device_extent_record(struct device_extent_tree *tree,
7442                                 struct device_extent_record *de_rec)
7443 {
7444         int ret;
7445
7446         /*
7447          * Device extent is a bit different from the other extents, because
7448          * the extents which belong to the different devices may have the
7449          * same start and size, so we need use the special extent cache
7450          * search/insert functions.
7451          */
7452         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7453         if (ret)
7454                 return ret;
7455
7456         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7457         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7458         return 0;
7459 }
7460
7461 static void free_device_extent_record(struct cache_extent *cache)
7462 {
7463         struct device_extent_record *rec;
7464
7465         rec = container_of(cache, struct device_extent_record, cache);
7466         if (!list_empty(&rec->chunk_list))
7467                 list_del_init(&rec->chunk_list);
7468         if (!list_empty(&rec->device_list))
7469                 list_del_init(&rec->device_list);
7470         free(rec);
7471 }
7472
7473 void free_device_extent_tree(struct device_extent_tree *tree)
7474 {
7475         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7476 }
7477
7478 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7479 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7480                                  struct extent_buffer *leaf, int slot)
7481 {
7482         struct btrfs_extent_ref_v0 *ref0;
7483         struct btrfs_key key;
7484         int ret;
7485
7486         btrfs_item_key_to_cpu(leaf, &key, slot);
7487         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7488         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7489                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7490                                 0, 0);
7491         } else {
7492                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7493                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7494         }
7495         return ret;
7496 }
7497 #endif
7498
7499 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7500                                             struct btrfs_key *key,
7501                                             int slot)
7502 {
7503         struct btrfs_chunk *ptr;
7504         struct chunk_record *rec;
7505         int num_stripes, i;
7506
7507         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7508         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7509
7510         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7511         if (!rec) {
7512                 fprintf(stderr, "memory allocation failed\n");
7513                 exit(-1);
7514         }
7515
7516         INIT_LIST_HEAD(&rec->list);
7517         INIT_LIST_HEAD(&rec->dextents);
7518         rec->bg_rec = NULL;
7519
7520         rec->cache.start = key->offset;
7521         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7522
7523         rec->generation = btrfs_header_generation(leaf);
7524
7525         rec->objectid = key->objectid;
7526         rec->type = key->type;
7527         rec->offset = key->offset;
7528
7529         rec->length = rec->cache.size;
7530         rec->owner = btrfs_chunk_owner(leaf, ptr);
7531         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7532         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7533         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7534         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7535         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7536         rec->num_stripes = num_stripes;
7537         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7538
7539         for (i = 0; i < rec->num_stripes; ++i) {
7540                 rec->stripes[i].devid =
7541                         btrfs_stripe_devid_nr(leaf, ptr, i);
7542                 rec->stripes[i].offset =
7543                         btrfs_stripe_offset_nr(leaf, ptr, i);
7544                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7545                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7546                                 BTRFS_UUID_SIZE);
7547         }
7548
7549         return rec;
7550 }
7551
7552 static int process_chunk_item(struct cache_tree *chunk_cache,
7553                               struct btrfs_key *key, struct extent_buffer *eb,
7554                               int slot)
7555 {
7556         struct chunk_record *rec;
7557         struct btrfs_chunk *chunk;
7558         int ret = 0;
7559
7560         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7561         /*
7562          * Do extra check for this chunk item,
7563          *
7564          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7565          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7566          * and owner<->key_type check.
7567          */
7568         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7569                                       key->offset);
7570         if (ret < 0) {
7571                 error("chunk(%llu, %llu) is not valid, ignore it",
7572                       key->offset, btrfs_chunk_length(eb, chunk));
7573                 return 0;
7574         }
7575         rec = btrfs_new_chunk_record(eb, key, slot);
7576         ret = insert_cache_extent(chunk_cache, &rec->cache);
7577         if (ret) {
7578                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7579                         rec->offset, rec->length);
7580                 free(rec);
7581         }
7582
7583         return ret;
7584 }
7585
7586 static int process_device_item(struct rb_root *dev_cache,
7587                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7588 {
7589         struct btrfs_dev_item *ptr;
7590         struct device_record *rec;
7591         int ret = 0;
7592
7593         ptr = btrfs_item_ptr(eb,
7594                 slot, struct btrfs_dev_item);
7595
7596         rec = malloc(sizeof(*rec));
7597         if (!rec) {
7598                 fprintf(stderr, "memory allocation failed\n");
7599                 return -ENOMEM;
7600         }
7601
7602         rec->devid = key->offset;
7603         rec->generation = btrfs_header_generation(eb);
7604
7605         rec->objectid = key->objectid;
7606         rec->type = key->type;
7607         rec->offset = key->offset;
7608
7609         rec->devid = btrfs_device_id(eb, ptr);
7610         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7611         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7612
7613         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7614         if (ret) {
7615                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7616                 free(rec);
7617         }
7618
7619         return ret;
7620 }
7621
7622 struct block_group_record *
7623 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7624                              int slot)
7625 {
7626         struct btrfs_block_group_item *ptr;
7627         struct block_group_record *rec;
7628
7629         rec = calloc(1, sizeof(*rec));
7630         if (!rec) {
7631                 fprintf(stderr, "memory allocation failed\n");
7632                 exit(-1);
7633         }
7634
7635         rec->cache.start = key->objectid;
7636         rec->cache.size = key->offset;
7637
7638         rec->generation = btrfs_header_generation(leaf);
7639
7640         rec->objectid = key->objectid;
7641         rec->type = key->type;
7642         rec->offset = key->offset;
7643
7644         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7645         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7646
7647         INIT_LIST_HEAD(&rec->list);
7648
7649         return rec;
7650 }
7651
7652 static int process_block_group_item(struct block_group_tree *block_group_cache,
7653                                     struct btrfs_key *key,
7654                                     struct extent_buffer *eb, int slot)
7655 {
7656         struct block_group_record *rec;
7657         int ret = 0;
7658
7659         rec = btrfs_new_block_group_record(eb, key, slot);
7660         ret = insert_block_group_record(block_group_cache, rec);
7661         if (ret) {
7662                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7663                         rec->objectid, rec->offset);
7664                 free(rec);
7665         }
7666
7667         return ret;
7668 }
7669
7670 struct device_extent_record *
7671 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7672                                struct btrfs_key *key, int slot)
7673 {
7674         struct device_extent_record *rec;
7675         struct btrfs_dev_extent *ptr;
7676
7677         rec = calloc(1, sizeof(*rec));
7678         if (!rec) {
7679                 fprintf(stderr, "memory allocation failed\n");
7680                 exit(-1);
7681         }
7682
7683         rec->cache.objectid = key->objectid;
7684         rec->cache.start = key->offset;
7685
7686         rec->generation = btrfs_header_generation(leaf);
7687
7688         rec->objectid = key->objectid;
7689         rec->type = key->type;
7690         rec->offset = key->offset;
7691
7692         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7693         rec->chunk_objecteid =
7694                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7695         rec->chunk_offset =
7696                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7697         rec->length = btrfs_dev_extent_length(leaf, ptr);
7698         rec->cache.size = rec->length;
7699
7700         INIT_LIST_HEAD(&rec->chunk_list);
7701         INIT_LIST_HEAD(&rec->device_list);
7702
7703         return rec;
7704 }
7705
7706 static int
7707 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7708                            struct btrfs_key *key, struct extent_buffer *eb,
7709                            int slot)
7710 {
7711         struct device_extent_record *rec;
7712         int ret;
7713
7714         rec = btrfs_new_device_extent_record(eb, key, slot);
7715         ret = insert_device_extent_record(dev_extent_cache, rec);
7716         if (ret) {
7717                 fprintf(stderr,
7718                         "Device extent[%llu, %llu, %llu] existed.\n",
7719                         rec->objectid, rec->offset, rec->length);
7720                 free(rec);
7721         }
7722
7723         return ret;
7724 }
7725
7726 static int process_extent_item(struct btrfs_root *root,
7727                                struct cache_tree *extent_cache,
7728                                struct extent_buffer *eb, int slot)
7729 {
7730         struct btrfs_extent_item *ei;
7731         struct btrfs_extent_inline_ref *iref;
7732         struct btrfs_extent_data_ref *dref;
7733         struct btrfs_shared_data_ref *sref;
7734         struct btrfs_key key;
7735         struct extent_record tmpl;
7736         unsigned long end;
7737         unsigned long ptr;
7738         int ret;
7739         int type;
7740         u32 item_size = btrfs_item_size_nr(eb, slot);
7741         u64 refs = 0;
7742         u64 offset;
7743         u64 num_bytes;
7744         int metadata = 0;
7745
7746         btrfs_item_key_to_cpu(eb, &key, slot);
7747
7748         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7749                 metadata = 1;
7750                 num_bytes = root->fs_info->nodesize;
7751         } else {
7752                 num_bytes = key.offset;
7753         }
7754
7755         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7756                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7757                       key.objectid, root->fs_info->sectorsize);
7758                 return -EIO;
7759         }
7760         if (item_size < sizeof(*ei)) {
7761 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7762                 struct btrfs_extent_item_v0 *ei0;
7763                 if (item_size != sizeof(*ei0)) {
7764                         error(
7765         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
7766                                 key.objectid, key.type, key.offset,
7767                                 btrfs_header_bytenr(eb), slot);
7768                         BUG();
7769                 }
7770                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7771                 refs = btrfs_extent_refs_v0(eb, ei0);
7772 #else
7773                 BUG();
7774 #endif
7775                 memset(&tmpl, 0, sizeof(tmpl));
7776                 tmpl.start = key.objectid;
7777                 tmpl.nr = num_bytes;
7778                 tmpl.extent_item_refs = refs;
7779                 tmpl.metadata = metadata;
7780                 tmpl.found_rec = 1;
7781                 tmpl.max_size = num_bytes;
7782
7783                 return add_extent_rec(extent_cache, &tmpl);
7784         }
7785
7786         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7787         refs = btrfs_extent_refs(eb, ei);
7788         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7789                 metadata = 1;
7790         else
7791                 metadata = 0;
7792         if (metadata && num_bytes != root->fs_info->nodesize) {
7793                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7794                       num_bytes, root->fs_info->nodesize);
7795                 return -EIO;
7796         }
7797         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7798                 error("ignore invalid data extent, length %llu is not aligned to %u",
7799                       num_bytes, root->fs_info->sectorsize);
7800                 return -EIO;
7801         }
7802
7803         memset(&tmpl, 0, sizeof(tmpl));
7804         tmpl.start = key.objectid;
7805         tmpl.nr = num_bytes;
7806         tmpl.extent_item_refs = refs;
7807         tmpl.metadata = metadata;
7808         tmpl.found_rec = 1;
7809         tmpl.max_size = num_bytes;
7810         add_extent_rec(extent_cache, &tmpl);
7811
7812         ptr = (unsigned long)(ei + 1);
7813         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7814             key.type == BTRFS_EXTENT_ITEM_KEY)
7815                 ptr += sizeof(struct btrfs_tree_block_info);
7816
7817         end = (unsigned long)ei + item_size;
7818         while (ptr < end) {
7819                 iref = (struct btrfs_extent_inline_ref *)ptr;
7820                 type = btrfs_extent_inline_ref_type(eb, iref);
7821                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7822                 switch (type) {
7823                 case BTRFS_TREE_BLOCK_REF_KEY:
7824                         ret = add_tree_backref(extent_cache, key.objectid,
7825                                         0, offset, 0);
7826                         if (ret < 0)
7827                                 error(
7828                         "add_tree_backref failed (extent items tree block): %s",
7829                                       strerror(-ret));
7830                         break;
7831                 case BTRFS_SHARED_BLOCK_REF_KEY:
7832                         ret = add_tree_backref(extent_cache, key.objectid,
7833                                         offset, 0, 0);
7834                         if (ret < 0)
7835                                 error(
7836                         "add_tree_backref failed (extent items shared block): %s",
7837                                       strerror(-ret));
7838                         break;
7839                 case BTRFS_EXTENT_DATA_REF_KEY:
7840                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7841                         add_data_backref(extent_cache, key.objectid, 0,
7842                                         btrfs_extent_data_ref_root(eb, dref),
7843                                         btrfs_extent_data_ref_objectid(eb,
7844                                                                        dref),
7845                                         btrfs_extent_data_ref_offset(eb, dref),
7846                                         btrfs_extent_data_ref_count(eb, dref),
7847                                         0, num_bytes);
7848                         break;
7849                 case BTRFS_SHARED_DATA_REF_KEY:
7850                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7851                         add_data_backref(extent_cache, key.objectid, offset,
7852                                         0, 0, 0,
7853                                         btrfs_shared_data_ref_count(eb, sref),
7854                                         0, num_bytes);
7855                         break;
7856                 default:
7857                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7858                                 key.objectid, key.type, num_bytes);
7859                         goto out;
7860                 }
7861                 ptr += btrfs_extent_inline_ref_size(type);
7862         }
7863         WARN_ON(ptr > end);
7864 out:
7865         return 0;
7866 }
7867
7868 static int check_cache_range(struct btrfs_root *root,
7869                              struct btrfs_block_group_cache *cache,
7870                              u64 offset, u64 bytes)
7871 {
7872         struct btrfs_free_space *entry;
7873         u64 *logical;
7874         u64 bytenr;
7875         int stripe_len;
7876         int i, nr, ret;
7877
7878         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7879                 bytenr = btrfs_sb_offset(i);
7880                 ret = btrfs_rmap_block(root->fs_info,
7881                                        cache->key.objectid, bytenr, 0,
7882                                        &logical, &nr, &stripe_len);
7883                 if (ret)
7884                         return ret;
7885
7886                 while (nr--) {
7887                         if (logical[nr] + stripe_len <= offset)
7888                                 continue;
7889                         if (offset + bytes <= logical[nr])
7890                                 continue;
7891                         if (logical[nr] == offset) {
7892                                 if (stripe_len >= bytes) {
7893                                         free(logical);
7894                                         return 0;
7895                                 }
7896                                 bytes -= stripe_len;
7897                                 offset += stripe_len;
7898                         } else if (logical[nr] < offset) {
7899                                 if (logical[nr] + stripe_len >=
7900                                     offset + bytes) {
7901                                         free(logical);
7902                                         return 0;
7903                                 }
7904                                 bytes = (offset + bytes) -
7905                                         (logical[nr] + stripe_len);
7906                                 offset = logical[nr] + stripe_len;
7907                         } else {
7908                                 /*
7909                                  * Could be tricky, the super may land in the
7910                                  * middle of the area we're checking.  First
7911                                  * check the easiest case, it's at the end.
7912                                  */
7913                                 if (logical[nr] + stripe_len >=
7914                                     bytes + offset) {
7915                                         bytes = logical[nr] - offset;
7916                                         continue;
7917                                 }
7918
7919                                 /* Check the left side */
7920                                 ret = check_cache_range(root, cache,
7921                                                         offset,
7922                                                         logical[nr] - offset);
7923                                 if (ret) {
7924                                         free(logical);
7925                                         return ret;
7926                                 }
7927
7928                                 /* Now we continue with the right side */
7929                                 bytes = (offset + bytes) -
7930                                         (logical[nr] + stripe_len);
7931                                 offset = logical[nr] + stripe_len;
7932                         }
7933                 }
7934
7935                 free(logical);
7936         }
7937
7938         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7939         if (!entry) {
7940                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7941                         offset, offset+bytes);
7942                 return -EINVAL;
7943         }
7944
7945         if (entry->offset != offset) {
7946                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7947                         entry->offset);
7948                 return -EINVAL;
7949         }
7950
7951         if (entry->bytes != bytes) {
7952                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7953                         bytes, entry->bytes, offset);
7954                 return -EINVAL;
7955         }
7956
7957         unlink_free_space(cache->free_space_ctl, entry);
7958         free(entry);
7959         return 0;
7960 }
7961
7962 static int verify_space_cache(struct btrfs_root *root,
7963                               struct btrfs_block_group_cache *cache)
7964 {
7965         struct btrfs_path path;
7966         struct extent_buffer *leaf;
7967         struct btrfs_key key;
7968         u64 last;
7969         int ret = 0;
7970
7971         root = root->fs_info->extent_root;
7972
7973         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7974
7975         btrfs_init_path(&path);
7976         key.objectid = last;
7977         key.offset = 0;
7978         key.type = BTRFS_EXTENT_ITEM_KEY;
7979         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7980         if (ret < 0)
7981                 goto out;
7982         ret = 0;
7983         while (1) {
7984                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7985                         ret = btrfs_next_leaf(root, &path);
7986                         if (ret < 0)
7987                                 goto out;
7988                         if (ret > 0) {
7989                                 ret = 0;
7990                                 break;
7991                         }
7992                 }
7993                 leaf = path.nodes[0];
7994                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7995                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7996                         break;
7997                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7998                     key.type != BTRFS_METADATA_ITEM_KEY) {
7999                         path.slots[0]++;
8000                         continue;
8001                 }
8002
8003                 if (last == key.objectid) {
8004                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8005                                 last = key.objectid + key.offset;
8006                         else
8007                                 last = key.objectid + root->fs_info->nodesize;
8008                         path.slots[0]++;
8009                         continue;
8010                 }
8011
8012                 ret = check_cache_range(root, cache, last,
8013                                         key.objectid - last);
8014                 if (ret)
8015                         break;
8016                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8017                         last = key.objectid + key.offset;
8018                 else
8019                         last = key.objectid + root->fs_info->nodesize;
8020                 path.slots[0]++;
8021         }
8022
8023         if (last < cache->key.objectid + cache->key.offset)
8024                 ret = check_cache_range(root, cache, last,
8025                                         cache->key.objectid +
8026                                         cache->key.offset - last);
8027
8028 out:
8029         btrfs_release_path(&path);
8030
8031         if (!ret &&
8032             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8033                 fprintf(stderr, "There are still entries left in the space "
8034                         "cache\n");
8035                 ret = -EINVAL;
8036         }
8037
8038         return ret;
8039 }
8040
8041 static int check_space_cache(struct btrfs_root *root)
8042 {
8043         struct btrfs_block_group_cache *cache;
8044         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8045         int ret;
8046         int error = 0;
8047
8048         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8049             btrfs_super_generation(root->fs_info->super_copy) !=
8050             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8051                 printf("cache and super generation don't match, space cache "
8052                        "will be invalidated\n");
8053                 return 0;
8054         }
8055
8056         if (ctx.progress_enabled) {
8057                 ctx.tp = TASK_FREE_SPACE;
8058                 task_start(ctx.info);
8059         }
8060
8061         while (1) {
8062                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8063                 if (!cache)
8064                         break;
8065
8066                 start = cache->key.objectid + cache->key.offset;
8067                 if (!cache->free_space_ctl) {
8068                         if (btrfs_init_free_space_ctl(cache,
8069                                                 root->fs_info->sectorsize)) {
8070                                 ret = -ENOMEM;
8071                                 break;
8072                         }
8073                 } else {
8074                         btrfs_remove_free_space_cache(cache);
8075                 }
8076
8077                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8078                         ret = exclude_super_stripes(root, cache);
8079                         if (ret) {
8080                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8081                                         strerror(-ret));
8082                                 error++;
8083                                 continue;
8084                         }
8085                         ret = load_free_space_tree(root->fs_info, cache);
8086                         free_excluded_extents(root, cache);
8087                         if (ret < 0) {
8088                                 fprintf(stderr, "could not load free space tree: %s\n",
8089                                         strerror(-ret));
8090                                 error++;
8091                                 continue;
8092                         }
8093                         error += ret;
8094                 } else {
8095                         ret = load_free_space_cache(root->fs_info, cache);
8096                         if (!ret)
8097                                 continue;
8098                 }
8099
8100                 ret = verify_space_cache(root, cache);
8101                 if (ret) {
8102                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8103                                 cache->key.objectid);
8104                         error++;
8105                 }
8106         }
8107
8108         task_stop(ctx.info);
8109
8110         return error ? -EINVAL : 0;
8111 }
8112
8113 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8114                         u64 num_bytes, unsigned long leaf_offset,
8115                         struct extent_buffer *eb) {
8116
8117         struct btrfs_fs_info *fs_info = root->fs_info;
8118         u64 offset = 0;
8119         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8120         char *data;
8121         unsigned long csum_offset;
8122         u32 csum;
8123         u32 csum_expected;
8124         u64 read_len;
8125         u64 data_checked = 0;
8126         u64 tmp;
8127         int ret = 0;
8128         int mirror;
8129         int num_copies;
8130
8131         if (num_bytes % fs_info->sectorsize)
8132                 return -EINVAL;
8133
8134         data = malloc(num_bytes);
8135         if (!data)
8136                 return -ENOMEM;
8137
8138         while (offset < num_bytes) {
8139                 mirror = 0;
8140 again:
8141                 read_len = num_bytes - offset;
8142                 /* read as much space once a time */
8143                 ret = read_extent_data(fs_info, data + offset,
8144                                 bytenr + offset, &read_len, mirror);
8145                 if (ret)
8146                         goto out;
8147                 data_checked = 0;
8148                 /* verify every 4k data's checksum */
8149                 while (data_checked < read_len) {
8150                         csum = ~(u32)0;
8151                         tmp = offset + data_checked;
8152
8153                         csum = btrfs_csum_data((char *)data + tmp,
8154                                                csum, fs_info->sectorsize);
8155                         btrfs_csum_final(csum, (u8 *)&csum);
8156
8157                         csum_offset = leaf_offset +
8158                                  tmp / fs_info->sectorsize * csum_size;
8159                         read_extent_buffer(eb, (char *)&csum_expected,
8160                                            csum_offset, csum_size);
8161                         /* try another mirror */
8162                         if (csum != csum_expected) {
8163                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8164                                                 mirror, bytenr + tmp,
8165                                                 csum, csum_expected);
8166                                 num_copies = btrfs_num_copies(root->fs_info,
8167                                                 bytenr, num_bytes);
8168                                 if (mirror < num_copies - 1) {
8169                                         mirror += 1;
8170                                         goto again;
8171                                 }
8172                         }
8173                         data_checked += fs_info->sectorsize;
8174                 }
8175                 offset += read_len;
8176         }
8177 out:
8178         free(data);
8179         return ret;
8180 }
8181
8182 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8183                                u64 num_bytes)
8184 {
8185         struct btrfs_path path;
8186         struct extent_buffer *leaf;
8187         struct btrfs_key key;
8188         int ret;
8189
8190         btrfs_init_path(&path);
8191         key.objectid = bytenr;
8192         key.type = BTRFS_EXTENT_ITEM_KEY;
8193         key.offset = (u64)-1;
8194
8195 again:
8196         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8197                                 0, 0);
8198         if (ret < 0) {
8199                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8200                 btrfs_release_path(&path);
8201                 return ret;
8202         } else if (ret) {
8203                 if (path.slots[0] > 0) {
8204                         path.slots[0]--;
8205                 } else {
8206                         ret = btrfs_prev_leaf(root, &path);
8207                         if (ret < 0) {
8208                                 goto out;
8209                         } else if (ret > 0) {
8210                                 ret = 0;
8211                                 goto out;
8212                         }
8213                 }
8214         }
8215
8216         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8217
8218         /*
8219          * Block group items come before extent items if they have the same
8220          * bytenr, so walk back one more just in case.  Dear future traveller,
8221          * first congrats on mastering time travel.  Now if it's not too much
8222          * trouble could you go back to 2006 and tell Chris to make the
8223          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8224          * EXTENT_ITEM_KEY please?
8225          */
8226         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8227                 if (path.slots[0] > 0) {
8228                         path.slots[0]--;
8229                 } else {
8230                         ret = btrfs_prev_leaf(root, &path);
8231                         if (ret < 0) {
8232                                 goto out;
8233                         } else if (ret > 0) {
8234                                 ret = 0;
8235                                 goto out;
8236                         }
8237                 }
8238                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8239         }
8240
8241         while (num_bytes) {
8242                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8243                         ret = btrfs_next_leaf(root, &path);
8244                         if (ret < 0) {
8245                                 fprintf(stderr, "Error going to next leaf "
8246                                         "%d\n", ret);
8247                                 btrfs_release_path(&path);
8248                                 return ret;
8249                         } else if (ret) {
8250                                 break;
8251                         }
8252                 }
8253                 leaf = path.nodes[0];
8254                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8255                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8256                         path.slots[0]++;
8257                         continue;
8258                 }
8259                 if (key.objectid + key.offset < bytenr) {
8260                         path.slots[0]++;
8261                         continue;
8262                 }
8263                 if (key.objectid > bytenr + num_bytes)
8264                         break;
8265
8266                 if (key.objectid == bytenr) {
8267                         if (key.offset >= num_bytes) {
8268                                 num_bytes = 0;
8269                                 break;
8270                         }
8271                         num_bytes -= key.offset;
8272                         bytenr += key.offset;
8273                 } else if (key.objectid < bytenr) {
8274                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8275                                 num_bytes = 0;
8276                                 break;
8277                         }
8278                         num_bytes = (bytenr + num_bytes) -
8279                                 (key.objectid + key.offset);
8280                         bytenr = key.objectid + key.offset;
8281                 } else {
8282                         if (key.objectid + key.offset < bytenr + num_bytes) {
8283                                 u64 new_start = key.objectid + key.offset;
8284                                 u64 new_bytes = bytenr + num_bytes - new_start;
8285
8286                                 /*
8287                                  * Weird case, the extent is in the middle of
8288                                  * our range, we'll have to search one side
8289                                  * and then the other.  Not sure if this happens
8290                                  * in real life, but no harm in coding it up
8291                                  * anyway just in case.
8292                                  */
8293                                 btrfs_release_path(&path);
8294                                 ret = check_extent_exists(root, new_start,
8295                                                           new_bytes);
8296                                 if (ret) {
8297                                         fprintf(stderr, "Right section didn't "
8298                                                 "have a record\n");
8299                                         break;
8300                                 }
8301                                 num_bytes = key.objectid - bytenr;
8302                                 goto again;
8303                         }
8304                         num_bytes = key.objectid - bytenr;
8305                 }
8306                 path.slots[0]++;
8307         }
8308         ret = 0;
8309
8310 out:
8311         if (num_bytes && !ret) {
8312                 fprintf(stderr, "There are no extents for csum range "
8313                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8314                 ret = 1;
8315         }
8316
8317         btrfs_release_path(&path);
8318         return ret;
8319 }
8320
8321 static int check_csums(struct btrfs_root *root)
8322 {
8323         struct btrfs_path path;
8324         struct extent_buffer *leaf;
8325         struct btrfs_key key;
8326         u64 offset = 0, num_bytes = 0;
8327         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8328         int errors = 0;
8329         int ret;
8330         u64 data_len;
8331         unsigned long leaf_offset;
8332
8333         root = root->fs_info->csum_root;
8334         if (!extent_buffer_uptodate(root->node)) {
8335                 fprintf(stderr, "No valid csum tree found\n");
8336                 return -ENOENT;
8337         }
8338
8339         btrfs_init_path(&path);
8340         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8341         key.type = BTRFS_EXTENT_CSUM_KEY;
8342         key.offset = 0;
8343         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8344         if (ret < 0) {
8345                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8346                 btrfs_release_path(&path);
8347                 return ret;
8348         }
8349
8350         if (ret > 0 && path.slots[0])
8351                 path.slots[0]--;
8352         ret = 0;
8353
8354         while (1) {
8355                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8356                         ret = btrfs_next_leaf(root, &path);
8357                         if (ret < 0) {
8358                                 fprintf(stderr, "Error going to next leaf "
8359                                         "%d\n", ret);
8360                                 break;
8361                         }
8362                         if (ret)
8363                                 break;
8364                 }
8365                 leaf = path.nodes[0];
8366
8367                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8368                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8369                         path.slots[0]++;
8370                         continue;
8371                 }
8372
8373                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8374                               csum_size) * root->fs_info->sectorsize;
8375                 if (!check_data_csum)
8376                         goto skip_csum_check;
8377                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8378                 ret = check_extent_csums(root, key.offset, data_len,
8379                                          leaf_offset, leaf);
8380                 if (ret)
8381                         break;
8382 skip_csum_check:
8383                 if (!num_bytes) {
8384                         offset = key.offset;
8385                 } else if (key.offset != offset + num_bytes) {
8386                         ret = check_extent_exists(root, offset, num_bytes);
8387                         if (ret) {
8388                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8389                                         "there is no extent record\n",
8390                                         offset, offset+num_bytes);
8391                                 errors++;
8392                         }
8393                         offset = key.offset;
8394                         num_bytes = 0;
8395                 }
8396                 num_bytes += data_len;
8397                 path.slots[0]++;
8398         }
8399
8400         btrfs_release_path(&path);
8401         return errors;
8402 }
8403
8404 static int is_dropped_key(struct btrfs_key *key,
8405                           struct btrfs_key *drop_key) {
8406         if (key->objectid < drop_key->objectid)
8407                 return 1;
8408         else if (key->objectid == drop_key->objectid) {
8409                 if (key->type < drop_key->type)
8410                         return 1;
8411                 else if (key->type == drop_key->type) {
8412                         if (key->offset < drop_key->offset)
8413                                 return 1;
8414                 }
8415         }
8416         return 0;
8417 }
8418
8419 /*
8420  * Here are the rules for FULL_BACKREF.
8421  *
8422  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8423  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8424  *      FULL_BACKREF set.
8425  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8426  *    if it happened after the relocation occurred since we'll have dropped the
8427  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8428  *    have no real way to know for sure.
8429  *
8430  * We process the blocks one root at a time, and we start from the lowest root
8431  * objectid and go to the highest.  So we can just lookup the owner backref for
8432  * the record and if we don't find it then we know it doesn't exist and we have
8433  * a FULL BACKREF.
8434  *
8435  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8436  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8437  * be set or not and then we can check later once we've gathered all the refs.
8438  */
8439 static int calc_extent_flag(struct cache_tree *extent_cache,
8440                            struct extent_buffer *buf,
8441                            struct root_item_record *ri,
8442                            u64 *flags)
8443 {
8444         struct extent_record *rec;
8445         struct cache_extent *cache;
8446         struct tree_backref *tback;
8447         u64 owner = 0;
8448
8449         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8450         /* we have added this extent before */
8451         if (!cache)
8452                 return -ENOENT;
8453
8454         rec = container_of(cache, struct extent_record, cache);
8455
8456         /*
8457          * Except file/reloc tree, we can not have
8458          * FULL BACKREF MODE
8459          */
8460         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8461                 goto normal;
8462         /*
8463          * root node
8464          */
8465         if (buf->start == ri->bytenr)
8466                 goto normal;
8467
8468         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8469                 goto full_backref;
8470
8471         owner = btrfs_header_owner(buf);
8472         if (owner == ri->objectid)
8473                 goto normal;
8474
8475         tback = find_tree_backref(rec, 0, owner);
8476         if (!tback)
8477                 goto full_backref;
8478 normal:
8479         *flags = 0;
8480         if (rec->flag_block_full_backref != FLAG_UNSET &&
8481             rec->flag_block_full_backref != 0)
8482                 rec->bad_full_backref = 1;
8483         return 0;
8484 full_backref:
8485         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8486         if (rec->flag_block_full_backref != FLAG_UNSET &&
8487             rec->flag_block_full_backref != 1)
8488                 rec->bad_full_backref = 1;
8489         return 0;
8490 }
8491
8492 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8493 {
8494         fprintf(stderr, "Invalid key type(");
8495         print_key_type(stderr, 0, key_type);
8496         fprintf(stderr, ") found in root(");
8497         print_objectid(stderr, rootid, 0);
8498         fprintf(stderr, ")\n");
8499 }
8500
8501 /*
8502  * Check if the key is valid with its extent buffer.
8503  *
8504  * This is a early check in case invalid key exists in a extent buffer
8505  * This is not comprehensive yet, but should prevent wrong key/item passed
8506  * further
8507  */
8508 static int check_type_with_root(u64 rootid, u8 key_type)
8509 {
8510         switch (key_type) {
8511         /* Only valid in chunk tree */
8512         case BTRFS_DEV_ITEM_KEY:
8513         case BTRFS_CHUNK_ITEM_KEY:
8514                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8515                         goto err;
8516                 break;
8517         /* valid in csum and log tree */
8518         case BTRFS_CSUM_TREE_OBJECTID:
8519                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8520                       is_fstree(rootid)))
8521                         goto err;
8522                 break;
8523         case BTRFS_EXTENT_ITEM_KEY:
8524         case BTRFS_METADATA_ITEM_KEY:
8525         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8526                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8527                         goto err;
8528                 break;
8529         case BTRFS_ROOT_ITEM_KEY:
8530                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8531                         goto err;
8532                 break;
8533         case BTRFS_DEV_EXTENT_KEY:
8534                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8535                         goto err;
8536                 break;
8537         }
8538         return 0;
8539 err:
8540         report_mismatch_key_root(key_type, rootid);
8541         return -EINVAL;
8542 }
8543
8544 static int run_next_block(struct btrfs_root *root,
8545                           struct block_info *bits,
8546                           int bits_nr,
8547                           u64 *last,
8548                           struct cache_tree *pending,
8549                           struct cache_tree *seen,
8550                           struct cache_tree *reada,
8551                           struct cache_tree *nodes,
8552                           struct cache_tree *extent_cache,
8553                           struct cache_tree *chunk_cache,
8554                           struct rb_root *dev_cache,
8555                           struct block_group_tree *block_group_cache,
8556                           struct device_extent_tree *dev_extent_cache,
8557                           struct root_item_record *ri)
8558 {
8559         struct btrfs_fs_info *fs_info = root->fs_info;
8560         struct extent_buffer *buf;
8561         struct extent_record *rec = NULL;
8562         u64 bytenr;
8563         u32 size;
8564         u64 parent;
8565         u64 owner;
8566         u64 flags;
8567         u64 ptr;
8568         u64 gen = 0;
8569         int ret = 0;
8570         int i;
8571         int nritems;
8572         struct btrfs_key key;
8573         struct cache_extent *cache;
8574         int reada_bits;
8575
8576         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8577                                     bits_nr, &reada_bits);
8578         if (nritems == 0)
8579                 return 1;
8580
8581         if (!reada_bits) {
8582                 for(i = 0; i < nritems; i++) {
8583                         ret = add_cache_extent(reada, bits[i].start,
8584                                                bits[i].size);
8585                         if (ret == -EEXIST)
8586                                 continue;
8587
8588                         /* fixme, get the parent transid */
8589                         readahead_tree_block(fs_info, bits[i].start, 0);
8590                 }
8591         }
8592         *last = bits[0].start;
8593         bytenr = bits[0].start;
8594         size = bits[0].size;
8595
8596         cache = lookup_cache_extent(pending, bytenr, size);
8597         if (cache) {
8598                 remove_cache_extent(pending, cache);
8599                 free(cache);
8600         }
8601         cache = lookup_cache_extent(reada, bytenr, size);
8602         if (cache) {
8603                 remove_cache_extent(reada, cache);
8604                 free(cache);
8605         }
8606         cache = lookup_cache_extent(nodes, bytenr, size);
8607         if (cache) {
8608                 remove_cache_extent(nodes, cache);
8609                 free(cache);
8610         }
8611         cache = lookup_cache_extent(extent_cache, bytenr, size);
8612         if (cache) {
8613                 rec = container_of(cache, struct extent_record, cache);
8614                 gen = rec->parent_generation;
8615         }
8616
8617         /* fixme, get the real parent transid */
8618         buf = read_tree_block(root->fs_info, bytenr, gen);
8619         if (!extent_buffer_uptodate(buf)) {
8620                 record_bad_block_io(root->fs_info,
8621                                     extent_cache, bytenr, size);
8622                 goto out;
8623         }
8624
8625         nritems = btrfs_header_nritems(buf);
8626
8627         flags = 0;
8628         if (!init_extent_tree) {
8629                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8630                                        btrfs_header_level(buf), 1, NULL,
8631                                        &flags);
8632                 if (ret < 0) {
8633                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8634                         if (ret < 0) {
8635                                 fprintf(stderr, "Couldn't calc extent flags\n");
8636                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8637                         }
8638                 }
8639         } else {
8640                 flags = 0;
8641                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8642                 if (ret < 0) {
8643                         fprintf(stderr, "Couldn't calc extent flags\n");
8644                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8645                 }
8646         }
8647
8648         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8649                 if (ri != NULL &&
8650                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8651                     ri->objectid == btrfs_header_owner(buf)) {
8652                         /*
8653                          * Ok we got to this block from it's original owner and
8654                          * we have FULL_BACKREF set.  Relocation can leave
8655                          * converted blocks over so this is altogether possible,
8656                          * however it's not possible if the generation > the
8657                          * last snapshot, so check for this case.
8658                          */
8659                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8660                             btrfs_header_generation(buf) > ri->last_snapshot) {
8661                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8662                                 rec->bad_full_backref = 1;
8663                         }
8664                 }
8665         } else {
8666                 if (ri != NULL &&
8667                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8668                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8669                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8670                         rec->bad_full_backref = 1;
8671                 }
8672         }
8673
8674         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8675                 rec->flag_block_full_backref = 1;
8676                 parent = bytenr;
8677                 owner = 0;
8678         } else {
8679                 rec->flag_block_full_backref = 0;
8680                 parent = 0;
8681                 owner = btrfs_header_owner(buf);
8682         }
8683
8684         ret = check_block(root, extent_cache, buf, flags);
8685         if (ret)
8686                 goto out;
8687
8688         if (btrfs_is_leaf(buf)) {
8689                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8690                 for (i = 0; i < nritems; i++) {
8691                         struct btrfs_file_extent_item *fi;
8692                         btrfs_item_key_to_cpu(buf, &key, i);
8693                         /*
8694                          * Check key type against the leaf owner.
8695                          * Could filter quite a lot of early error if
8696                          * owner is correct
8697                          */
8698                         if (check_type_with_root(btrfs_header_owner(buf),
8699                                                  key.type)) {
8700                                 fprintf(stderr, "ignoring invalid key\n");
8701                                 continue;
8702                         }
8703                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8704                                 process_extent_item(root, extent_cache, buf,
8705                                                     i);
8706                                 continue;
8707                         }
8708                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8709                                 process_extent_item(root, extent_cache, buf,
8710                                                     i);
8711                                 continue;
8712                         }
8713                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8714                                 total_csum_bytes +=
8715                                         btrfs_item_size_nr(buf, i);
8716                                 continue;
8717                         }
8718                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8719                                 process_chunk_item(chunk_cache, &key, buf, i);
8720                                 continue;
8721                         }
8722                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8723                                 process_device_item(dev_cache, &key, buf, i);
8724                                 continue;
8725                         }
8726                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8727                                 process_block_group_item(block_group_cache,
8728                                         &key, buf, i);
8729                                 continue;
8730                         }
8731                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8732                                 process_device_extent_item(dev_extent_cache,
8733                                         &key, buf, i);
8734                                 continue;
8735
8736                         }
8737                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8738 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8739                                 process_extent_ref_v0(extent_cache, buf, i);
8740 #else
8741                                 BUG();
8742 #endif
8743                                 continue;
8744                         }
8745
8746                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8747                                 ret = add_tree_backref(extent_cache,
8748                                                 key.objectid, 0, key.offset, 0);
8749                                 if (ret < 0)
8750                                         error(
8751                                 "add_tree_backref failed (leaf tree block): %s",
8752                                               strerror(-ret));
8753                                 continue;
8754                         }
8755                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8756                                 ret = add_tree_backref(extent_cache,
8757                                                 key.objectid, key.offset, 0, 0);
8758                                 if (ret < 0)
8759                                         error(
8760                                 "add_tree_backref failed (leaf shared block): %s",
8761                                               strerror(-ret));
8762                                 continue;
8763                         }
8764                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8765                                 struct btrfs_extent_data_ref *ref;
8766                                 ref = btrfs_item_ptr(buf, i,
8767                                                 struct btrfs_extent_data_ref);
8768                                 add_data_backref(extent_cache,
8769                                         key.objectid, 0,
8770                                         btrfs_extent_data_ref_root(buf, ref),
8771                                         btrfs_extent_data_ref_objectid(buf,
8772                                                                        ref),
8773                                         btrfs_extent_data_ref_offset(buf, ref),
8774                                         btrfs_extent_data_ref_count(buf, ref),
8775                                         0, root->fs_info->sectorsize);
8776                                 continue;
8777                         }
8778                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8779                                 struct btrfs_shared_data_ref *ref;
8780                                 ref = btrfs_item_ptr(buf, i,
8781                                                 struct btrfs_shared_data_ref);
8782                                 add_data_backref(extent_cache,
8783                                         key.objectid, key.offset, 0, 0, 0,
8784                                         btrfs_shared_data_ref_count(buf, ref),
8785                                         0, root->fs_info->sectorsize);
8786                                 continue;
8787                         }
8788                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8789                                 struct bad_item *bad;
8790
8791                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8792                                         continue;
8793                                 if (!owner)
8794                                         continue;
8795                                 bad = malloc(sizeof(struct bad_item));
8796                                 if (!bad)
8797                                         continue;
8798                                 INIT_LIST_HEAD(&bad->list);
8799                                 memcpy(&bad->key, &key,
8800                                        sizeof(struct btrfs_key));
8801                                 bad->root_id = owner;
8802                                 list_add_tail(&bad->list, &delete_items);
8803                                 continue;
8804                         }
8805                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8806                                 continue;
8807                         fi = btrfs_item_ptr(buf, i,
8808                                             struct btrfs_file_extent_item);
8809                         if (btrfs_file_extent_type(buf, fi) ==
8810                             BTRFS_FILE_EXTENT_INLINE)
8811                                 continue;
8812                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8813                                 continue;
8814
8815                         data_bytes_allocated +=
8816                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8817                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8818                                 abort();
8819                         }
8820                         data_bytes_referenced +=
8821                                 btrfs_file_extent_num_bytes(buf, fi);
8822                         add_data_backref(extent_cache,
8823                                 btrfs_file_extent_disk_bytenr(buf, fi),
8824                                 parent, owner, key.objectid, key.offset -
8825                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8826                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8827                 }
8828         } else {
8829                 int level;
8830                 struct btrfs_key first_key;
8831
8832                 first_key.objectid = 0;
8833
8834                 if (nritems > 0)
8835                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8836                 level = btrfs_header_level(buf);
8837                 for (i = 0; i < nritems; i++) {
8838                         struct extent_record tmpl;
8839
8840                         ptr = btrfs_node_blockptr(buf, i);
8841                         size = root->fs_info->nodesize;
8842                         btrfs_node_key_to_cpu(buf, &key, i);
8843                         if (ri != NULL) {
8844                                 if ((level == ri->drop_level)
8845                                     && is_dropped_key(&key, &ri->drop_key)) {
8846                                         continue;
8847                                 }
8848                         }
8849
8850                         memset(&tmpl, 0, sizeof(tmpl));
8851                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8852                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8853                         tmpl.start = ptr;
8854                         tmpl.nr = size;
8855                         tmpl.refs = 1;
8856                         tmpl.metadata = 1;
8857                         tmpl.max_size = size;
8858                         ret = add_extent_rec(extent_cache, &tmpl);
8859                         if (ret < 0)
8860                                 goto out;
8861
8862                         ret = add_tree_backref(extent_cache, ptr, parent,
8863                                         owner, 1);
8864                         if (ret < 0) {
8865                                 error(
8866                                 "add_tree_backref failed (non-leaf block): %s",
8867                                       strerror(-ret));
8868                                 continue;
8869                         }
8870
8871                         if (level > 1) {
8872                                 add_pending(nodes, seen, ptr, size);
8873                         } else {
8874                                 add_pending(pending, seen, ptr, size);
8875                         }
8876                 }
8877                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
8878                                       nritems) * sizeof(struct btrfs_key_ptr);
8879         }
8880         total_btree_bytes += buf->len;
8881         if (fs_root_objectid(btrfs_header_owner(buf)))
8882                 total_fs_tree_bytes += buf->len;
8883         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8884                 total_extent_tree_bytes += buf->len;
8885 out:
8886         free_extent_buffer(buf);
8887         return ret;
8888 }
8889
8890 static int add_root_to_pending(struct extent_buffer *buf,
8891                                struct cache_tree *extent_cache,
8892                                struct cache_tree *pending,
8893                                struct cache_tree *seen,
8894                                struct cache_tree *nodes,
8895                                u64 objectid)
8896 {
8897         struct extent_record tmpl;
8898         int ret;
8899
8900         if (btrfs_header_level(buf) > 0)
8901                 add_pending(nodes, seen, buf->start, buf->len);
8902         else
8903                 add_pending(pending, seen, buf->start, buf->len);
8904
8905         memset(&tmpl, 0, sizeof(tmpl));
8906         tmpl.start = buf->start;
8907         tmpl.nr = buf->len;
8908         tmpl.is_root = 1;
8909         tmpl.refs = 1;
8910         tmpl.metadata = 1;
8911         tmpl.max_size = buf->len;
8912         add_extent_rec(extent_cache, &tmpl);
8913
8914         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8915             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8916                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8917                                 0, 1);
8918         else
8919                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8920                                 1);
8921         return ret;
8922 }
8923
8924 /* as we fix the tree, we might be deleting blocks that
8925  * we're tracking for repair.  This hook makes sure we
8926  * remove any backrefs for blocks as we are fixing them.
8927  */
8928 static int free_extent_hook(struct btrfs_trans_handle *trans,
8929                             struct btrfs_root *root,
8930                             u64 bytenr, u64 num_bytes, u64 parent,
8931                             u64 root_objectid, u64 owner, u64 offset,
8932                             int refs_to_drop)
8933 {
8934         struct extent_record *rec;
8935         struct cache_extent *cache;
8936         int is_data;
8937         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8938
8939         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8940         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8941         if (!cache)
8942                 return 0;
8943
8944         rec = container_of(cache, struct extent_record, cache);
8945         if (is_data) {
8946                 struct data_backref *back;
8947                 back = find_data_backref(rec, parent, root_objectid, owner,
8948                                          offset, 1, bytenr, num_bytes);
8949                 if (!back)
8950                         goto out;
8951                 if (back->node.found_ref) {
8952                         back->found_ref -= refs_to_drop;
8953                         if (rec->refs)
8954                                 rec->refs -= refs_to_drop;
8955                 }
8956                 if (back->node.found_extent_tree) {
8957                         back->num_refs -= refs_to_drop;
8958                         if (rec->extent_item_refs)
8959                                 rec->extent_item_refs -= refs_to_drop;
8960                 }
8961                 if (back->found_ref == 0)
8962                         back->node.found_ref = 0;
8963                 if (back->num_refs == 0)
8964                         back->node.found_extent_tree = 0;
8965
8966                 if (!back->node.found_extent_tree && back->node.found_ref) {
8967                         rb_erase(&back->node.node, &rec->backref_tree);
8968                         free(back);
8969                 }
8970         } else {
8971                 struct tree_backref *back;
8972                 back = find_tree_backref(rec, parent, root_objectid);
8973                 if (!back)
8974                         goto out;
8975                 if (back->node.found_ref) {
8976                         if (rec->refs)
8977                                 rec->refs--;
8978                         back->node.found_ref = 0;
8979                 }
8980                 if (back->node.found_extent_tree) {
8981                         if (rec->extent_item_refs)
8982                                 rec->extent_item_refs--;
8983                         back->node.found_extent_tree = 0;
8984                 }
8985                 if (!back->node.found_extent_tree && back->node.found_ref) {
8986                         rb_erase(&back->node.node, &rec->backref_tree);
8987                         free(back);
8988                 }
8989         }
8990         maybe_free_extent_rec(extent_cache, rec);
8991 out:
8992         return 0;
8993 }
8994
8995 static int delete_extent_records(struct btrfs_trans_handle *trans,
8996                                  struct btrfs_root *root,
8997                                  struct btrfs_path *path,
8998                                  u64 bytenr)
8999 {
9000         struct btrfs_key key;
9001         struct btrfs_key found_key;
9002         struct extent_buffer *leaf;
9003         int ret;
9004         int slot;
9005
9006
9007         key.objectid = bytenr;
9008         key.type = (u8)-1;
9009         key.offset = (u64)-1;
9010
9011         while(1) {
9012                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9013                                         &key, path, 0, 1);
9014                 if (ret < 0)
9015                         break;
9016
9017                 if (ret > 0) {
9018                         ret = 0;
9019                         if (path->slots[0] == 0)
9020                                 break;
9021                         path->slots[0]--;
9022                 }
9023                 ret = 0;
9024
9025                 leaf = path->nodes[0];
9026                 slot = path->slots[0];
9027
9028                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9029                 if (found_key.objectid != bytenr)
9030                         break;
9031
9032                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9033                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9034                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9035                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9036                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9037                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9038                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9039                         btrfs_release_path(path);
9040                         if (found_key.type == 0) {
9041                                 if (found_key.offset == 0)
9042                                         break;
9043                                 key.offset = found_key.offset - 1;
9044                                 key.type = found_key.type;
9045                         }
9046                         key.type = found_key.type - 1;
9047                         key.offset = (u64)-1;
9048                         continue;
9049                 }
9050
9051                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9052                         found_key.objectid, found_key.type, found_key.offset);
9053
9054                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9055                 if (ret)
9056                         break;
9057                 btrfs_release_path(path);
9058
9059                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9060                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9061                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9062                                 found_key.offset : root->fs_info->nodesize;
9063
9064                         ret = btrfs_update_block_group(root, bytenr,
9065                                                        bytes, 0, 0);
9066                         if (ret)
9067                                 break;
9068                 }
9069         }
9070
9071         btrfs_release_path(path);
9072         return ret;
9073 }
9074
9075 /*
9076  * for a single backref, this will allocate a new extent
9077  * and add the backref to it.
9078  */
9079 static int record_extent(struct btrfs_trans_handle *trans,
9080                          struct btrfs_fs_info *info,
9081                          struct btrfs_path *path,
9082                          struct extent_record *rec,
9083                          struct extent_backref *back,
9084                          int allocated, u64 flags)
9085 {
9086         int ret = 0;
9087         struct btrfs_root *extent_root = info->extent_root;
9088         struct extent_buffer *leaf;
9089         struct btrfs_key ins_key;
9090         struct btrfs_extent_item *ei;
9091         struct data_backref *dback;
9092         struct btrfs_tree_block_info *bi;
9093
9094         if (!back->is_data)
9095                 rec->max_size = max_t(u64, rec->max_size,
9096                                     info->nodesize);
9097
9098         if (!allocated) {
9099                 u32 item_size = sizeof(*ei);
9100
9101                 if (!back->is_data)
9102                         item_size += sizeof(*bi);
9103
9104                 ins_key.objectid = rec->start;
9105                 ins_key.offset = rec->max_size;
9106                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9107
9108                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9109                                         &ins_key, item_size);
9110                 if (ret)
9111                         goto fail;
9112
9113                 leaf = path->nodes[0];
9114                 ei = btrfs_item_ptr(leaf, path->slots[0],
9115                                     struct btrfs_extent_item);
9116
9117                 btrfs_set_extent_refs(leaf, ei, 0);
9118                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9119
9120                 if (back->is_data) {
9121                         btrfs_set_extent_flags(leaf, ei,
9122                                                BTRFS_EXTENT_FLAG_DATA);
9123                 } else {
9124                         struct btrfs_disk_key copy_key;;
9125
9126                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9127                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9128                                              sizeof(*bi));
9129
9130                         btrfs_set_disk_key_objectid(&copy_key,
9131                                                     rec->info_objectid);
9132                         btrfs_set_disk_key_type(&copy_key, 0);
9133                         btrfs_set_disk_key_offset(&copy_key, 0);
9134
9135                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9136                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9137
9138                         btrfs_set_extent_flags(leaf, ei,
9139                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9140                 }
9141
9142                 btrfs_mark_buffer_dirty(leaf);
9143                 ret = btrfs_update_block_group(extent_root, rec->start,
9144                                                rec->max_size, 1, 0);
9145                 if (ret)
9146                         goto fail;
9147                 btrfs_release_path(path);
9148         }
9149
9150         if (back->is_data) {
9151                 u64 parent;
9152                 int i;
9153
9154                 dback = to_data_backref(back);
9155                 if (back->full_backref)
9156                         parent = dback->parent;
9157                 else
9158                         parent = 0;
9159
9160                 for (i = 0; i < dback->found_ref; i++) {
9161                         /* if parent != 0, we're doing a full backref
9162                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9163                          * just makes the backref allocator create a data
9164                          * backref
9165                          */
9166                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9167                                                    rec->start, rec->max_size,
9168                                                    parent,
9169                                                    dback->root,
9170                                                    parent ?
9171                                                    BTRFS_FIRST_FREE_OBJECTID :
9172                                                    dback->owner,
9173                                                    dback->offset);
9174                         if (ret)
9175                                 break;
9176                 }
9177                 fprintf(stderr, "adding new data backref"
9178                                 " on %llu %s %llu owner %llu"
9179                                 " offset %llu found %d\n",
9180                                 (unsigned long long)rec->start,
9181                                 back->full_backref ?
9182                                 "parent" : "root",
9183                                 back->full_backref ?
9184                                 (unsigned long long)parent :
9185                                 (unsigned long long)dback->root,
9186                                 (unsigned long long)dback->owner,
9187                                 (unsigned long long)dback->offset,
9188                                 dback->found_ref);
9189         } else {
9190                 u64 parent;
9191                 struct tree_backref *tback;
9192
9193                 tback = to_tree_backref(back);
9194                 if (back->full_backref)
9195                         parent = tback->parent;
9196                 else
9197                         parent = 0;
9198
9199                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9200                                            rec->start, rec->max_size,
9201                                            parent, tback->root, 0, 0);
9202                 fprintf(stderr, "adding new tree backref on "
9203                         "start %llu len %llu parent %llu root %llu\n",
9204                         rec->start, rec->max_size, parent, tback->root);
9205         }
9206 fail:
9207         btrfs_release_path(path);
9208         return ret;
9209 }
9210
9211 static struct extent_entry *find_entry(struct list_head *entries,
9212                                        u64 bytenr, u64 bytes)
9213 {
9214         struct extent_entry *entry = NULL;
9215
9216         list_for_each_entry(entry, entries, list) {
9217                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9218                         return entry;
9219         }
9220
9221         return NULL;
9222 }
9223
9224 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9225 {
9226         struct extent_entry *entry, *best = NULL, *prev = NULL;
9227
9228         list_for_each_entry(entry, entries, list) {
9229                 /*
9230                  * If there are as many broken entries as entries then we know
9231                  * not to trust this particular entry.
9232                  */
9233                 if (entry->broken == entry->count)
9234                         continue;
9235
9236                 /*
9237                  * Special case, when there are only two entries and 'best' is
9238                  * the first one
9239                  */
9240                 if (!prev) {
9241                         best = entry;
9242                         prev = entry;
9243                         continue;
9244                 }
9245
9246                 /*
9247                  * If our current entry == best then we can't be sure our best
9248                  * is really the best, so we need to keep searching.
9249                  */
9250                 if (best && best->count == entry->count) {
9251                         prev = entry;
9252                         best = NULL;
9253                         continue;
9254                 }
9255
9256                 /* Prev == entry, not good enough, have to keep searching */
9257                 if (!prev->broken && prev->count == entry->count)
9258                         continue;
9259
9260                 if (!best)
9261                         best = (prev->count > entry->count) ? prev : entry;
9262                 else if (best->count < entry->count)
9263                         best = entry;
9264                 prev = entry;
9265         }
9266
9267         return best;
9268 }
9269
9270 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9271                       struct data_backref *dback, struct extent_entry *entry)
9272 {
9273         struct btrfs_trans_handle *trans;
9274         struct btrfs_root *root;
9275         struct btrfs_file_extent_item *fi;
9276         struct extent_buffer *leaf;
9277         struct btrfs_key key;
9278         u64 bytenr, bytes;
9279         int ret, err;
9280
9281         key.objectid = dback->root;
9282         key.type = BTRFS_ROOT_ITEM_KEY;
9283         key.offset = (u64)-1;
9284         root = btrfs_read_fs_root(info, &key);
9285         if (IS_ERR(root)) {
9286                 fprintf(stderr, "Couldn't find root for our ref\n");
9287                 return -EINVAL;
9288         }
9289
9290         /*
9291          * The backref points to the original offset of the extent if it was
9292          * split, so we need to search down to the offset we have and then walk
9293          * forward until we find the backref we're looking for.
9294          */
9295         key.objectid = dback->owner;
9296         key.type = BTRFS_EXTENT_DATA_KEY;
9297         key.offset = dback->offset;
9298         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9299         if (ret < 0) {
9300                 fprintf(stderr, "Error looking up ref %d\n", ret);
9301                 return ret;
9302         }
9303
9304         while (1) {
9305                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9306                         ret = btrfs_next_leaf(root, path);
9307                         if (ret) {
9308                                 fprintf(stderr, "Couldn't find our ref, next\n");
9309                                 return -EINVAL;
9310                         }
9311                 }
9312                 leaf = path->nodes[0];
9313                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9314                 if (key.objectid != dback->owner ||
9315                     key.type != BTRFS_EXTENT_DATA_KEY) {
9316                         fprintf(stderr, "Couldn't find our ref, search\n");
9317                         return -EINVAL;
9318                 }
9319                 fi = btrfs_item_ptr(leaf, path->slots[0],
9320                                     struct btrfs_file_extent_item);
9321                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9322                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9323
9324                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9325                         break;
9326                 path->slots[0]++;
9327         }
9328
9329         btrfs_release_path(path);
9330
9331         trans = btrfs_start_transaction(root, 1);
9332         if (IS_ERR(trans))
9333                 return PTR_ERR(trans);
9334
9335         /*
9336          * Ok we have the key of the file extent we want to fix, now we can cow
9337          * down to the thing and fix it.
9338          */
9339         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9340         if (ret < 0) {
9341                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9342                         key.objectid, key.type, key.offset, ret);
9343                 goto out;
9344         }
9345         if (ret > 0) {
9346                 fprintf(stderr, "Well that's odd, we just found this key "
9347                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9348                         key.offset);
9349                 ret = -EINVAL;
9350                 goto out;
9351         }
9352         leaf = path->nodes[0];
9353         fi = btrfs_item_ptr(leaf, path->slots[0],
9354                             struct btrfs_file_extent_item);
9355
9356         if (btrfs_file_extent_compression(leaf, fi) &&
9357             dback->disk_bytenr != entry->bytenr) {
9358                 fprintf(stderr, "Ref doesn't match the record start and is "
9359                         "compressed, please take a btrfs-image of this file "
9360                         "system and send it to a btrfs developer so they can "
9361                         "complete this functionality for bytenr %Lu\n",
9362                         dback->disk_bytenr);
9363                 ret = -EINVAL;
9364                 goto out;
9365         }
9366
9367         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9368                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9369         } else if (dback->disk_bytenr > entry->bytenr) {
9370                 u64 off_diff, offset;
9371
9372                 off_diff = dback->disk_bytenr - entry->bytenr;
9373                 offset = btrfs_file_extent_offset(leaf, fi);
9374                 if (dback->disk_bytenr + offset +
9375                     btrfs_file_extent_num_bytes(leaf, fi) >
9376                     entry->bytenr + entry->bytes) {
9377                         fprintf(stderr, "Ref is past the entry end, please "
9378                                 "take a btrfs-image of this file system and "
9379                                 "send it to a btrfs developer, ref %Lu\n",
9380                                 dback->disk_bytenr);
9381                         ret = -EINVAL;
9382                         goto out;
9383                 }
9384                 offset += off_diff;
9385                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9386                 btrfs_set_file_extent_offset(leaf, fi, offset);
9387         } else if (dback->disk_bytenr < entry->bytenr) {
9388                 u64 offset;
9389
9390                 offset = btrfs_file_extent_offset(leaf, fi);
9391                 if (dback->disk_bytenr + offset < entry->bytenr) {
9392                         fprintf(stderr, "Ref is before the entry start, please"
9393                                 " take a btrfs-image of this file system and "
9394                                 "send it to a btrfs developer, ref %Lu\n",
9395                                 dback->disk_bytenr);
9396                         ret = -EINVAL;
9397                         goto out;
9398                 }
9399
9400                 offset += dback->disk_bytenr;
9401                 offset -= entry->bytenr;
9402                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9403                 btrfs_set_file_extent_offset(leaf, fi, offset);
9404         }
9405
9406         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9407
9408         /*
9409          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9410          * only do this if we aren't using compression, otherwise it's a
9411          * trickier case.
9412          */
9413         if (!btrfs_file_extent_compression(leaf, fi))
9414                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9415         else
9416                 printf("ram bytes may be wrong?\n");
9417         btrfs_mark_buffer_dirty(leaf);
9418 out:
9419         err = btrfs_commit_transaction(trans, root);
9420         btrfs_release_path(path);
9421         return ret ? ret : err;
9422 }
9423
9424 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9425                            struct extent_record *rec)
9426 {
9427         struct extent_backref *back, *tmp;
9428         struct data_backref *dback;
9429         struct extent_entry *entry, *best = NULL;
9430         LIST_HEAD(entries);
9431         int nr_entries = 0;
9432         int broken_entries = 0;
9433         int ret = 0;
9434         short mismatch = 0;
9435
9436         /*
9437          * Metadata is easy and the backrefs should always agree on bytenr and
9438          * size, if not we've got bigger issues.
9439          */
9440         if (rec->metadata)
9441                 return 0;
9442
9443         rbtree_postorder_for_each_entry_safe(back, tmp,
9444                                              &rec->backref_tree, node) {
9445                 if (back->full_backref || !back->is_data)
9446                         continue;
9447
9448                 dback = to_data_backref(back);
9449
9450                 /*
9451                  * We only pay attention to backrefs that we found a real
9452                  * backref for.
9453                  */
9454                 if (dback->found_ref == 0)
9455                         continue;
9456
9457                 /*
9458                  * For now we only catch when the bytes don't match, not the
9459                  * bytenr.  We can easily do this at the same time, but I want
9460                  * to have a fs image to test on before we just add repair
9461                  * functionality willy-nilly so we know we won't screw up the
9462                  * repair.
9463                  */
9464
9465                 entry = find_entry(&entries, dback->disk_bytenr,
9466                                    dback->bytes);
9467                 if (!entry) {
9468                         entry = malloc(sizeof(struct extent_entry));
9469                         if (!entry) {
9470                                 ret = -ENOMEM;
9471                                 goto out;
9472                         }
9473                         memset(entry, 0, sizeof(*entry));
9474                         entry->bytenr = dback->disk_bytenr;
9475                         entry->bytes = dback->bytes;
9476                         list_add_tail(&entry->list, &entries);
9477                         nr_entries++;
9478                 }
9479
9480                 /*
9481                  * If we only have on entry we may think the entries agree when
9482                  * in reality they don't so we have to do some extra checking.
9483                  */
9484                 if (dback->disk_bytenr != rec->start ||
9485                     dback->bytes != rec->nr || back->broken)
9486                         mismatch = 1;
9487
9488                 if (back->broken) {
9489                         entry->broken++;
9490                         broken_entries++;
9491                 }
9492
9493                 entry->count++;
9494         }
9495
9496         /* Yay all the backrefs agree, carry on good sir */
9497         if (nr_entries <= 1 && !mismatch)
9498                 goto out;
9499
9500         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9501                 "%Lu\n", rec->start);
9502
9503         /*
9504          * First we want to see if the backrefs can agree amongst themselves who
9505          * is right, so figure out which one of the entries has the highest
9506          * count.
9507          */
9508         best = find_most_right_entry(&entries);
9509
9510         /*
9511          * Ok so we may have an even split between what the backrefs think, so
9512          * this is where we use the extent ref to see what it thinks.
9513          */
9514         if (!best) {
9515                 entry = find_entry(&entries, rec->start, rec->nr);
9516                 if (!entry && (!broken_entries || !rec->found_rec)) {
9517                         fprintf(stderr, "Backrefs don't agree with each other "
9518                                 "and extent record doesn't agree with anybody,"
9519                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9520                                 rec->start, rec->nr);
9521                         ret = -EINVAL;
9522                         goto out;
9523                 } else if (!entry) {
9524                         /*
9525                          * Ok our backrefs were broken, we'll assume this is the
9526                          * correct value and add an entry for this range.
9527                          */
9528                         entry = malloc(sizeof(struct extent_entry));
9529                         if (!entry) {
9530                                 ret = -ENOMEM;
9531                                 goto out;
9532                         }
9533                         memset(entry, 0, sizeof(*entry));
9534                         entry->bytenr = rec->start;
9535                         entry->bytes = rec->nr;
9536                         list_add_tail(&entry->list, &entries);
9537                         nr_entries++;
9538                 }
9539                 entry->count++;
9540                 best = find_most_right_entry(&entries);
9541                 if (!best) {
9542                         fprintf(stderr, "Backrefs and extent record evenly "
9543                                 "split on who is right, this is going to "
9544                                 "require user input to fix bytenr %Lu bytes "
9545                                 "%Lu\n", rec->start, rec->nr);
9546                         ret = -EINVAL;
9547                         goto out;
9548                 }
9549         }
9550
9551         /*
9552          * I don't think this can happen currently as we'll abort() if we catch
9553          * this case higher up, but in case somebody removes that we still can't
9554          * deal with it properly here yet, so just bail out of that's the case.
9555          */
9556         if (best->bytenr != rec->start) {
9557                 fprintf(stderr, "Extent start and backref starts don't match, "
9558                         "please use btrfs-image on this file system and send "
9559                         "it to a btrfs developer so they can make fsck fix "
9560                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9561                         rec->start, rec->nr);
9562                 ret = -EINVAL;
9563                 goto out;
9564         }
9565
9566         /*
9567          * Ok great we all agreed on an extent record, let's go find the real
9568          * references and fix up the ones that don't match.
9569          */
9570         rbtree_postorder_for_each_entry_safe(back, tmp,
9571                                              &rec->backref_tree, node) {
9572                 if (back->full_backref || !back->is_data)
9573                         continue;
9574
9575                 dback = to_data_backref(back);
9576
9577                 /*
9578                  * Still ignoring backrefs that don't have a real ref attached
9579                  * to them.
9580                  */
9581                 if (dback->found_ref == 0)
9582                         continue;
9583
9584                 if (dback->bytes == best->bytes &&
9585                     dback->disk_bytenr == best->bytenr)
9586                         continue;
9587
9588                 ret = repair_ref(info, path, dback, best);
9589                 if (ret)
9590                         goto out;
9591         }
9592
9593         /*
9594          * Ok we messed with the actual refs, which means we need to drop our
9595          * entire cache and go back and rescan.  I know this is a huge pain and
9596          * adds a lot of extra work, but it's the only way to be safe.  Once all
9597          * the backrefs agree we may not need to do anything to the extent
9598          * record itself.
9599          */
9600         ret = -EAGAIN;
9601 out:
9602         while (!list_empty(&entries)) {
9603                 entry = list_entry(entries.next, struct extent_entry, list);
9604                 list_del_init(&entry->list);
9605                 free(entry);
9606         }
9607         return ret;
9608 }
9609
9610 static int process_duplicates(struct cache_tree *extent_cache,
9611                               struct extent_record *rec)
9612 {
9613         struct extent_record *good, *tmp;
9614         struct cache_extent *cache;
9615         int ret;
9616
9617         /*
9618          * If we found a extent record for this extent then return, or if we
9619          * have more than one duplicate we are likely going to need to delete
9620          * something.
9621          */
9622         if (rec->found_rec || rec->num_duplicates > 1)
9623                 return 0;
9624
9625         /* Shouldn't happen but just in case */
9626         BUG_ON(!rec->num_duplicates);
9627
9628         /*
9629          * So this happens if we end up with a backref that doesn't match the
9630          * actual extent entry.  So either the backref is bad or the extent
9631          * entry is bad.  Either way we want to have the extent_record actually
9632          * reflect what we found in the extent_tree, so we need to take the
9633          * duplicate out and use that as the extent_record since the only way we
9634          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9635          */
9636         remove_cache_extent(extent_cache, &rec->cache);
9637
9638         good = to_extent_record(rec->dups.next);
9639         list_del_init(&good->list);
9640         INIT_LIST_HEAD(&good->backrefs);
9641         INIT_LIST_HEAD(&good->dups);
9642         good->cache.start = good->start;
9643         good->cache.size = good->nr;
9644         good->content_checked = 0;
9645         good->owner_ref_checked = 0;
9646         good->num_duplicates = 0;
9647         good->refs = rec->refs;
9648         list_splice_init(&rec->backrefs, &good->backrefs);
9649         while (1) {
9650                 cache = lookup_cache_extent(extent_cache, good->start,
9651                                             good->nr);
9652                 if (!cache)
9653                         break;
9654                 tmp = container_of(cache, struct extent_record, cache);
9655
9656                 /*
9657                  * If we find another overlapping extent and it's found_rec is
9658                  * set then it's a duplicate and we need to try and delete
9659                  * something.
9660                  */
9661                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9662                         if (list_empty(&good->list))
9663                                 list_add_tail(&good->list,
9664                                               &duplicate_extents);
9665                         good->num_duplicates += tmp->num_duplicates + 1;
9666                         list_splice_init(&tmp->dups, &good->dups);
9667                         list_del_init(&tmp->list);
9668                         list_add_tail(&tmp->list, &good->dups);
9669                         remove_cache_extent(extent_cache, &tmp->cache);
9670                         continue;
9671                 }
9672
9673                 /*
9674                  * Ok we have another non extent item backed extent rec, so lets
9675                  * just add it to this extent and carry on like we did above.
9676                  */
9677                 good->refs += tmp->refs;
9678                 list_splice_init(&tmp->backrefs, &good->backrefs);
9679                 remove_cache_extent(extent_cache, &tmp->cache);
9680                 free(tmp);
9681         }
9682         ret = insert_cache_extent(extent_cache, &good->cache);
9683         BUG_ON(ret);
9684         free(rec);
9685         return good->num_duplicates ? 0 : 1;
9686 }
9687
9688 static int delete_duplicate_records(struct btrfs_root *root,
9689                                     struct extent_record *rec)
9690 {
9691         struct btrfs_trans_handle *trans;
9692         LIST_HEAD(delete_list);
9693         struct btrfs_path path;
9694         struct extent_record *tmp, *good, *n;
9695         int nr_del = 0;
9696         int ret = 0, err;
9697         struct btrfs_key key;
9698
9699         btrfs_init_path(&path);
9700
9701         good = rec;
9702         /* Find the record that covers all of the duplicates. */
9703         list_for_each_entry(tmp, &rec->dups, list) {
9704                 if (good->start < tmp->start)
9705                         continue;
9706                 if (good->nr > tmp->nr)
9707                         continue;
9708
9709                 if (tmp->start + tmp->nr < good->start + good->nr) {
9710                         fprintf(stderr, "Ok we have overlapping extents that "
9711                                 "aren't completely covered by each other, this "
9712                                 "is going to require more careful thought.  "
9713                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9714                                 tmp->start, tmp->nr, good->start, good->nr);
9715                         abort();
9716                 }
9717                 good = tmp;
9718         }
9719
9720         if (good != rec)
9721                 list_add_tail(&rec->list, &delete_list);
9722
9723         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9724                 if (tmp == good)
9725                         continue;
9726                 list_move_tail(&tmp->list, &delete_list);
9727         }
9728
9729         root = root->fs_info->extent_root;
9730         trans = btrfs_start_transaction(root, 1);
9731         if (IS_ERR(trans)) {
9732                 ret = PTR_ERR(trans);
9733                 goto out;
9734         }
9735
9736         list_for_each_entry(tmp, &delete_list, list) {
9737                 if (tmp->found_rec == 0)
9738                         continue;
9739                 key.objectid = tmp->start;
9740                 key.type = BTRFS_EXTENT_ITEM_KEY;
9741                 key.offset = tmp->nr;
9742
9743                 /* Shouldn't happen but just in case */
9744                 if (tmp->metadata) {
9745                         fprintf(stderr, "Well this shouldn't happen, extent "
9746                                 "record overlaps but is metadata? "
9747                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9748                         abort();
9749                 }
9750
9751                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9752                 if (ret) {
9753                         if (ret > 0)
9754                                 ret = -EINVAL;
9755                         break;
9756                 }
9757                 ret = btrfs_del_item(trans, root, &path);
9758                 if (ret)
9759                         break;
9760                 btrfs_release_path(&path);
9761                 nr_del++;
9762         }
9763         err = btrfs_commit_transaction(trans, root);
9764         if (err && !ret)
9765                 ret = err;
9766 out:
9767         while (!list_empty(&delete_list)) {
9768                 tmp = to_extent_record(delete_list.next);
9769                 list_del_init(&tmp->list);
9770                 if (tmp == rec)
9771                         continue;
9772                 free(tmp);
9773         }
9774
9775         while (!list_empty(&rec->dups)) {
9776                 tmp = to_extent_record(rec->dups.next);
9777                 list_del_init(&tmp->list);
9778                 free(tmp);
9779         }
9780
9781         btrfs_release_path(&path);
9782
9783         if (!ret && !nr_del)
9784                 rec->num_duplicates = 0;
9785
9786         return ret ? ret : nr_del;
9787 }
9788
9789 static int find_possible_backrefs(struct btrfs_fs_info *info,
9790                                   struct btrfs_path *path,
9791                                   struct cache_tree *extent_cache,
9792                                   struct extent_record *rec)
9793 {
9794         struct btrfs_root *root;
9795         struct extent_backref *back, *tmp;
9796         struct data_backref *dback;
9797         struct cache_extent *cache;
9798         struct btrfs_file_extent_item *fi;
9799         struct btrfs_key key;
9800         u64 bytenr, bytes;
9801         int ret;
9802
9803         rbtree_postorder_for_each_entry_safe(back, tmp,
9804                                              &rec->backref_tree, node) {
9805                 /* Don't care about full backrefs (poor unloved backrefs) */
9806                 if (back->full_backref || !back->is_data)
9807                         continue;
9808
9809                 dback = to_data_backref(back);
9810
9811                 /* We found this one, we don't need to do a lookup */
9812                 if (dback->found_ref)
9813                         continue;
9814
9815                 key.objectid = dback->root;
9816                 key.type = BTRFS_ROOT_ITEM_KEY;
9817                 key.offset = (u64)-1;
9818
9819                 root = btrfs_read_fs_root(info, &key);
9820
9821                 /* No root, definitely a bad ref, skip */
9822                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9823                         continue;
9824                 /* Other err, exit */
9825                 if (IS_ERR(root))
9826                         return PTR_ERR(root);
9827
9828                 key.objectid = dback->owner;
9829                 key.type = BTRFS_EXTENT_DATA_KEY;
9830                 key.offset = dback->offset;
9831                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9832                 if (ret) {
9833                         btrfs_release_path(path);
9834                         if (ret < 0)
9835                                 return ret;
9836                         /* Didn't find it, we can carry on */
9837                         ret = 0;
9838                         continue;
9839                 }
9840
9841                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9842                                     struct btrfs_file_extent_item);
9843                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9844                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9845                 btrfs_release_path(path);
9846                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9847                 if (cache) {
9848                         struct extent_record *tmp;
9849                         tmp = container_of(cache, struct extent_record, cache);
9850
9851                         /*
9852                          * If we found an extent record for the bytenr for this
9853                          * particular backref then we can't add it to our
9854                          * current extent record.  We only want to add backrefs
9855                          * that don't have a corresponding extent item in the
9856                          * extent tree since they likely belong to this record
9857                          * and we need to fix it if it doesn't match bytenrs.
9858                          */
9859                         if  (tmp->found_rec)
9860                                 continue;
9861                 }
9862
9863                 dback->found_ref += 1;
9864                 dback->disk_bytenr = bytenr;
9865                 dback->bytes = bytes;
9866
9867                 /*
9868                  * Set this so the verify backref code knows not to trust the
9869                  * values in this backref.
9870                  */
9871                 back->broken = 1;
9872         }
9873
9874         return 0;
9875 }
9876
9877 /*
9878  * Record orphan data ref into corresponding root.
9879  *
9880  * Return 0 if the extent item contains data ref and recorded.
9881  * Return 1 if the extent item contains no useful data ref
9882  *   On that case, it may contains only shared_dataref or metadata backref
9883  *   or the file extent exists(this should be handled by the extent bytenr
9884  *   recovery routine)
9885  * Return <0 if something goes wrong.
9886  */
9887 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9888                                       struct extent_record *rec)
9889 {
9890         struct btrfs_key key;
9891         struct btrfs_root *dest_root;
9892         struct extent_backref *back, *tmp;
9893         struct data_backref *dback;
9894         struct orphan_data_extent *orphan;
9895         struct btrfs_path path;
9896         int recorded_data_ref = 0;
9897         int ret = 0;
9898
9899         if (rec->metadata)
9900                 return 1;
9901         btrfs_init_path(&path);
9902         rbtree_postorder_for_each_entry_safe(back, tmp,
9903                                              &rec->backref_tree, node) {
9904                 if (back->full_backref || !back->is_data ||
9905                     !back->found_extent_tree)
9906                         continue;
9907                 dback = to_data_backref(back);
9908                 if (dback->found_ref)
9909                         continue;
9910                 key.objectid = dback->root;
9911                 key.type = BTRFS_ROOT_ITEM_KEY;
9912                 key.offset = (u64)-1;
9913
9914                 dest_root = btrfs_read_fs_root(fs_info, &key);
9915
9916                 /* For non-exist root we just skip it */
9917                 if (IS_ERR(dest_root) || !dest_root)
9918                         continue;
9919
9920                 key.objectid = dback->owner;
9921                 key.type = BTRFS_EXTENT_DATA_KEY;
9922                 key.offset = dback->offset;
9923
9924                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9925                 btrfs_release_path(&path);
9926                 /*
9927                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9928                  * we need to record it for inode/file extent rebuild.
9929                  * For ret > 0, we record it only for file extent rebuild.
9930                  * For ret == 0, the file extent exists but only bytenr
9931                  * mismatch, let the original bytenr fix routine to handle,
9932                  * don't record it.
9933                  */
9934                 if (ret == 0)
9935                         continue;
9936                 ret = 0;
9937                 orphan = malloc(sizeof(*orphan));
9938                 if (!orphan) {
9939                         ret = -ENOMEM;
9940                         goto out;
9941                 }
9942                 INIT_LIST_HEAD(&orphan->list);
9943                 orphan->root = dback->root;
9944                 orphan->objectid = dback->owner;
9945                 orphan->offset = dback->offset;
9946                 orphan->disk_bytenr = rec->cache.start;
9947                 orphan->disk_len = rec->cache.size;
9948                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9949                 recorded_data_ref = 1;
9950         }
9951 out:
9952         btrfs_release_path(&path);
9953         if (!ret)
9954                 return !recorded_data_ref;
9955         else
9956                 return ret;
9957 }
9958
9959 /*
9960  * when an incorrect extent item is found, this will delete
9961  * all of the existing entries for it and recreate them
9962  * based on what the tree scan found.
9963  */
9964 static int fixup_extent_refs(struct btrfs_fs_info *info,
9965                              struct cache_tree *extent_cache,
9966                              struct extent_record *rec)
9967 {
9968         struct btrfs_trans_handle *trans = NULL;
9969         int ret;
9970         struct btrfs_path path;
9971         struct cache_extent *cache;
9972         struct extent_backref *back, *tmp;
9973         int allocated = 0;
9974         u64 flags = 0;
9975
9976         if (rec->flag_block_full_backref)
9977                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9978
9979         btrfs_init_path(&path);
9980         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9981                 /*
9982                  * Sometimes the backrefs themselves are so broken they don't
9983                  * get attached to any meaningful rec, so first go back and
9984                  * check any of our backrefs that we couldn't find and throw
9985                  * them into the list if we find the backref so that
9986                  * verify_backrefs can figure out what to do.
9987                  */
9988                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9989                 if (ret < 0)
9990                         goto out;
9991         }
9992
9993         /* step one, make sure all of the backrefs agree */
9994         ret = verify_backrefs(info, &path, rec);
9995         if (ret < 0)
9996                 goto out;
9997
9998         trans = btrfs_start_transaction(info->extent_root, 1);
9999         if (IS_ERR(trans)) {
10000                 ret = PTR_ERR(trans);
10001                 goto out;
10002         }
10003
10004         /* step two, delete all the existing records */
10005         ret = delete_extent_records(trans, info->extent_root, &path,
10006                                     rec->start);
10007
10008         if (ret < 0)
10009                 goto out;
10010
10011         /* was this block corrupt?  If so, don't add references to it */
10012         cache = lookup_cache_extent(info->corrupt_blocks,
10013                                     rec->start, rec->max_size);
10014         if (cache) {
10015                 ret = 0;
10016                 goto out;
10017         }
10018
10019         /* step three, recreate all the refs we did find */
10020         rbtree_postorder_for_each_entry_safe(back, tmp,
10021                                              &rec->backref_tree, node) {
10022                 /*
10023                  * if we didn't find any references, don't create a
10024                  * new extent record
10025                  */
10026                 if (!back->found_ref)
10027                         continue;
10028
10029                 rec->bad_full_backref = 0;
10030                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10031                 allocated = 1;
10032
10033                 if (ret)
10034                         goto out;
10035         }
10036 out:
10037         if (trans) {
10038                 int err = btrfs_commit_transaction(trans, info->extent_root);
10039                 if (!ret)
10040                         ret = err;
10041         }
10042
10043         if (!ret)
10044                 fprintf(stderr, "Repaired extent references for %llu\n",
10045                                 (unsigned long long)rec->start);
10046
10047         btrfs_release_path(&path);
10048         return ret;
10049 }
10050
10051 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10052                               struct extent_record *rec)
10053 {
10054         struct btrfs_trans_handle *trans;
10055         struct btrfs_root *root = fs_info->extent_root;
10056         struct btrfs_path path;
10057         struct btrfs_extent_item *ei;
10058         struct btrfs_key key;
10059         u64 flags;
10060         int ret = 0;
10061
10062         key.objectid = rec->start;
10063         if (rec->metadata) {
10064                 key.type = BTRFS_METADATA_ITEM_KEY;
10065                 key.offset = rec->info_level;
10066         } else {
10067                 key.type = BTRFS_EXTENT_ITEM_KEY;
10068                 key.offset = rec->max_size;
10069         }
10070
10071         trans = btrfs_start_transaction(root, 0);
10072         if (IS_ERR(trans))
10073                 return PTR_ERR(trans);
10074
10075         btrfs_init_path(&path);
10076         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10077         if (ret < 0) {
10078                 btrfs_release_path(&path);
10079                 btrfs_commit_transaction(trans, root);
10080                 return ret;
10081         } else if (ret) {
10082                 fprintf(stderr, "Didn't find extent for %llu\n",
10083                         (unsigned long long)rec->start);
10084                 btrfs_release_path(&path);
10085                 btrfs_commit_transaction(trans, root);
10086                 return -ENOENT;
10087         }
10088
10089         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10090                             struct btrfs_extent_item);
10091         flags = btrfs_extent_flags(path.nodes[0], ei);
10092         if (rec->flag_block_full_backref) {
10093                 fprintf(stderr, "setting full backref on %llu\n",
10094                         (unsigned long long)key.objectid);
10095                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10096         } else {
10097                 fprintf(stderr, "clearing full backref on %llu\n",
10098                         (unsigned long long)key.objectid);
10099                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10100         }
10101         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10102         btrfs_mark_buffer_dirty(path.nodes[0]);
10103         btrfs_release_path(&path);
10104         ret = btrfs_commit_transaction(trans, root);
10105         if (!ret)
10106                 fprintf(stderr, "Repaired extent flags for %llu\n",
10107                                 (unsigned long long)rec->start);
10108
10109         return ret;
10110 }
10111
10112 /* right now we only prune from the extent allocation tree */
10113 static int prune_one_block(struct btrfs_trans_handle *trans,
10114                            struct btrfs_fs_info *info,
10115                            struct btrfs_corrupt_block *corrupt)
10116 {
10117         int ret;
10118         struct btrfs_path path;
10119         struct extent_buffer *eb;
10120         u64 found;
10121         int slot;
10122         int nritems;
10123         int level = corrupt->level + 1;
10124
10125         btrfs_init_path(&path);
10126 again:
10127         /* we want to stop at the parent to our busted block */
10128         path.lowest_level = level;
10129
10130         ret = btrfs_search_slot(trans, info->extent_root,
10131                                 &corrupt->key, &path, -1, 1);
10132
10133         if (ret < 0)
10134                 goto out;
10135
10136         eb = path.nodes[level];
10137         if (!eb) {
10138                 ret = -ENOENT;
10139                 goto out;
10140         }
10141
10142         /*
10143          * hopefully the search gave us the block we want to prune,
10144          * lets try that first
10145          */
10146         slot = path.slots[level];
10147         found =  btrfs_node_blockptr(eb, slot);
10148         if (found == corrupt->cache.start)
10149                 goto del_ptr;
10150
10151         nritems = btrfs_header_nritems(eb);
10152
10153         /* the search failed, lets scan this node and hope we find it */
10154         for (slot = 0; slot < nritems; slot++) {
10155                 found =  btrfs_node_blockptr(eb, slot);
10156                 if (found == corrupt->cache.start)
10157                         goto del_ptr;
10158         }
10159         /*
10160          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10161          * to this block
10162          */
10163         if (eb == info->extent_root->node) {
10164                 ret = -ENOENT;
10165                 goto out;
10166         } else {
10167                 level++;
10168                 btrfs_release_path(&path);
10169                 goto again;
10170         }
10171
10172 del_ptr:
10173         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10174         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10175
10176 out:
10177         btrfs_release_path(&path);
10178         return ret;
10179 }
10180
10181 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10182 {
10183         struct btrfs_trans_handle *trans = NULL;
10184         struct cache_extent *cache;
10185         struct btrfs_corrupt_block *corrupt;
10186
10187         while (1) {
10188                 cache = search_cache_extent(info->corrupt_blocks, 0);
10189                 if (!cache)
10190                         break;
10191                 if (!trans) {
10192                         trans = btrfs_start_transaction(info->extent_root, 1);
10193                         if (IS_ERR(trans))
10194                                 return PTR_ERR(trans);
10195                 }
10196                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10197                 prune_one_block(trans, info, corrupt);
10198                 remove_cache_extent(info->corrupt_blocks, cache);
10199         }
10200         if (trans)
10201                 return btrfs_commit_transaction(trans, info->extent_root);
10202         return 0;
10203 }
10204
10205 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10206 {
10207         struct btrfs_block_group_cache *cache;
10208         u64 start, end;
10209         int ret;
10210
10211         while (1) {
10212                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10213                                             &start, &end, EXTENT_DIRTY);
10214                 if (ret)
10215                         break;
10216                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10217         }
10218
10219         start = 0;
10220         while (1) {
10221                 cache = btrfs_lookup_first_block_group(fs_info, start);
10222                 if (!cache)
10223                         break;
10224                 if (cache->cached)
10225                         cache->cached = 0;
10226                 start = cache->key.objectid + cache->key.offset;
10227         }
10228 }
10229
10230 static int check_extent_refs(struct btrfs_root *root,
10231                              struct cache_tree *extent_cache)
10232 {
10233         struct extent_record *rec;
10234         struct cache_extent *cache;
10235         int ret = 0;
10236         int had_dups = 0;
10237         int err = 0;
10238
10239         if (repair) {
10240                 /*
10241                  * if we're doing a repair, we have to make sure
10242                  * we don't allocate from the problem extents.
10243                  * In the worst case, this will be all the
10244                  * extents in the FS
10245                  */
10246                 cache = search_cache_extent(extent_cache, 0);
10247                 while(cache) {
10248                         rec = container_of(cache, struct extent_record, cache);
10249                         set_extent_dirty(root->fs_info->excluded_extents,
10250                                          rec->start,
10251                                          rec->start + rec->max_size - 1);
10252                         cache = next_cache_extent(cache);
10253                 }
10254
10255                 /* pin down all the corrupted blocks too */
10256                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10257                 while(cache) {
10258                         set_extent_dirty(root->fs_info->excluded_extents,
10259                                          cache->start,
10260                                          cache->start + cache->size - 1);
10261                         cache = next_cache_extent(cache);
10262                 }
10263                 prune_corrupt_blocks(root->fs_info);
10264                 reset_cached_block_groups(root->fs_info);
10265         }
10266
10267         reset_cached_block_groups(root->fs_info);
10268
10269         /*
10270          * We need to delete any duplicate entries we find first otherwise we
10271          * could mess up the extent tree when we have backrefs that actually
10272          * belong to a different extent item and not the weird duplicate one.
10273          */
10274         while (repair && !list_empty(&duplicate_extents)) {
10275                 rec = to_extent_record(duplicate_extents.next);
10276                 list_del_init(&rec->list);
10277
10278                 /* Sometimes we can find a backref before we find an actual
10279                  * extent, so we need to process it a little bit to see if there
10280                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10281                  * if this is a backref screwup.  If we need to delete stuff
10282                  * process_duplicates() will return 0, otherwise it will return
10283                  * 1 and we
10284                  */
10285                 if (process_duplicates(extent_cache, rec))
10286                         continue;
10287                 ret = delete_duplicate_records(root, rec);
10288                 if (ret < 0)
10289                         return ret;
10290                 /*
10291                  * delete_duplicate_records will return the number of entries
10292                  * deleted, so if it's greater than 0 then we know we actually
10293                  * did something and we need to remove.
10294                  */
10295                 if (ret)
10296                         had_dups = 1;
10297         }
10298
10299         if (had_dups)
10300                 return -EAGAIN;
10301
10302         while(1) {
10303                 int cur_err = 0;
10304                 int fix = 0;
10305
10306                 cache = search_cache_extent(extent_cache, 0);
10307                 if (!cache)
10308                         break;
10309                 rec = container_of(cache, struct extent_record, cache);
10310                 if (rec->num_duplicates) {
10311                         fprintf(stderr, "extent item %llu has multiple extent "
10312                                 "items\n", (unsigned long long)rec->start);
10313                         cur_err = 1;
10314                 }
10315
10316                 if (rec->refs != rec->extent_item_refs) {
10317                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10318                                 (unsigned long long)rec->start,
10319                                 (unsigned long long)rec->nr);
10320                         fprintf(stderr, "extent item %llu, found %llu\n",
10321                                 (unsigned long long)rec->extent_item_refs,
10322                                 (unsigned long long)rec->refs);
10323                         ret = record_orphan_data_extents(root->fs_info, rec);
10324                         if (ret < 0)
10325                                 goto repair_abort;
10326                         fix = ret;
10327                         cur_err = 1;
10328                 }
10329                 if (all_backpointers_checked(rec, 1)) {
10330                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10331                                 (unsigned long long)rec->start,
10332                                 (unsigned long long)rec->nr);
10333                         fix = 1;
10334                         cur_err = 1;
10335                 }
10336                 if (!rec->owner_ref_checked) {
10337                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10338                                 (unsigned long long)rec->start,
10339                                 (unsigned long long)rec->nr);
10340                         fix = 1;
10341                         cur_err = 1;
10342                 }
10343
10344                 if (repair && fix) {
10345                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10346                         if (ret)
10347                                 goto repair_abort;
10348                 }
10349
10350
10351                 if (rec->bad_full_backref) {
10352                         fprintf(stderr, "bad full backref, on [%llu]\n",
10353                                 (unsigned long long)rec->start);
10354                         if (repair) {
10355                                 ret = fixup_extent_flags(root->fs_info, rec);
10356                                 if (ret)
10357                                         goto repair_abort;
10358                                 fix = 1;
10359                         }
10360                         cur_err = 1;
10361                 }
10362                 /*
10363                  * Although it's not a extent ref's problem, we reuse this
10364                  * routine for error reporting.
10365                  * No repair function yet.
10366                  */
10367                 if (rec->crossing_stripes) {
10368                         fprintf(stderr,
10369                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10370                                 rec->start, rec->start + rec->max_size);
10371                         cur_err = 1;
10372                 }
10373
10374                 if (rec->wrong_chunk_type) {
10375                         fprintf(stderr,
10376                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10377                                 rec->start, rec->start + rec->max_size);
10378                         cur_err = 1;
10379                 }
10380
10381                 err = cur_err;
10382                 remove_cache_extent(extent_cache, cache);
10383                 free_all_extent_backrefs(rec);
10384                 if (!init_extent_tree && repair && (!cur_err || fix))
10385                         clear_extent_dirty(root->fs_info->excluded_extents,
10386                                            rec->start,
10387                                            rec->start + rec->max_size - 1);
10388                 free(rec);
10389         }
10390 repair_abort:
10391         if (repair) {
10392                 if (ret && ret != -EAGAIN) {
10393                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10394                         exit(1);
10395                 } else if (!ret) {
10396                         struct btrfs_trans_handle *trans;
10397
10398                         root = root->fs_info->extent_root;
10399                         trans = btrfs_start_transaction(root, 1);
10400                         if (IS_ERR(trans)) {
10401                                 ret = PTR_ERR(trans);
10402                                 goto repair_abort;
10403                         }
10404
10405                         ret = btrfs_fix_block_accounting(trans, root);
10406                         if (ret)
10407                                 goto repair_abort;
10408                         ret = btrfs_commit_transaction(trans, root);
10409                         if (ret)
10410                                 goto repair_abort;
10411                 }
10412                 return ret;
10413         }
10414
10415         if (err)
10416                 err = -EIO;
10417         return err;
10418 }
10419
10420 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10421 {
10422         u64 stripe_size;
10423
10424         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10425                 stripe_size = length;
10426                 stripe_size /= num_stripes;
10427         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10428                 stripe_size = length * 2;
10429                 stripe_size /= num_stripes;
10430         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10431                 stripe_size = length;
10432                 stripe_size /= (num_stripes - 1);
10433         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10434                 stripe_size = length;
10435                 stripe_size /= (num_stripes - 2);
10436         } else {
10437                 stripe_size = length;
10438         }
10439         return stripe_size;
10440 }
10441
10442 /*
10443  * Check the chunk with its block group/dev list ref:
10444  * Return 0 if all refs seems valid.
10445  * Return 1 if part of refs seems valid, need later check for rebuild ref
10446  * like missing block group and needs to search extent tree to rebuild them.
10447  * Return -1 if essential refs are missing and unable to rebuild.
10448  */
10449 static int check_chunk_refs(struct chunk_record *chunk_rec,
10450                             struct block_group_tree *block_group_cache,
10451                             struct device_extent_tree *dev_extent_cache,
10452                             int silent)
10453 {
10454         struct cache_extent *block_group_item;
10455         struct block_group_record *block_group_rec;
10456         struct cache_extent *dev_extent_item;
10457         struct device_extent_record *dev_extent_rec;
10458         u64 devid;
10459         u64 offset;
10460         u64 length;
10461         int metadump_v2 = 0;
10462         int i;
10463         int ret = 0;
10464
10465         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10466                                                chunk_rec->offset,
10467                                                chunk_rec->length);
10468         if (block_group_item) {
10469                 block_group_rec = container_of(block_group_item,
10470                                                struct block_group_record,
10471                                                cache);
10472                 if (chunk_rec->length != block_group_rec->offset ||
10473                     chunk_rec->offset != block_group_rec->objectid ||
10474                     (!metadump_v2 &&
10475                      chunk_rec->type_flags != block_group_rec->flags)) {
10476                         if (!silent)
10477                                 fprintf(stderr,
10478                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10479                                         chunk_rec->objectid,
10480                                         chunk_rec->type,
10481                                         chunk_rec->offset,
10482                                         chunk_rec->length,
10483                                         chunk_rec->offset,
10484                                         chunk_rec->type_flags,
10485                                         block_group_rec->objectid,
10486                                         block_group_rec->type,
10487                                         block_group_rec->offset,
10488                                         block_group_rec->offset,
10489                                         block_group_rec->objectid,
10490                                         block_group_rec->flags);
10491                         ret = -1;
10492                 } else {
10493                         list_del_init(&block_group_rec->list);
10494                         chunk_rec->bg_rec = block_group_rec;
10495                 }
10496         } else {
10497                 if (!silent)
10498                         fprintf(stderr,
10499                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10500                                 chunk_rec->objectid,
10501                                 chunk_rec->type,
10502                                 chunk_rec->offset,
10503                                 chunk_rec->length,
10504                                 chunk_rec->offset,
10505                                 chunk_rec->type_flags);
10506                 ret = 1;
10507         }
10508
10509         if (metadump_v2)
10510                 return ret;
10511
10512         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10513                                     chunk_rec->num_stripes);
10514         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10515                 devid = chunk_rec->stripes[i].devid;
10516                 offset = chunk_rec->stripes[i].offset;
10517                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10518                                                        devid, offset, length);
10519                 if (dev_extent_item) {
10520                         dev_extent_rec = container_of(dev_extent_item,
10521                                                 struct device_extent_record,
10522                                                 cache);
10523                         if (dev_extent_rec->objectid != devid ||
10524                             dev_extent_rec->offset != offset ||
10525                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10526                             dev_extent_rec->length != length) {
10527                                 if (!silent)
10528                                         fprintf(stderr,
10529                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10530                                                 chunk_rec->objectid,
10531                                                 chunk_rec->type,
10532                                                 chunk_rec->offset,
10533                                                 chunk_rec->stripes[i].devid,
10534                                                 chunk_rec->stripes[i].offset,
10535                                                 dev_extent_rec->objectid,
10536                                                 dev_extent_rec->offset,
10537                                                 dev_extent_rec->length);
10538                                 ret = -1;
10539                         } else {
10540                                 list_move(&dev_extent_rec->chunk_list,
10541                                           &chunk_rec->dextents);
10542                         }
10543                 } else {
10544                         if (!silent)
10545                                 fprintf(stderr,
10546                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10547                                         chunk_rec->objectid,
10548                                         chunk_rec->type,
10549                                         chunk_rec->offset,
10550                                         chunk_rec->stripes[i].devid,
10551                                         chunk_rec->stripes[i].offset);
10552                         ret = -1;
10553                 }
10554         }
10555         return ret;
10556 }
10557
10558 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10559 int check_chunks(struct cache_tree *chunk_cache,
10560                  struct block_group_tree *block_group_cache,
10561                  struct device_extent_tree *dev_extent_cache,
10562                  struct list_head *good, struct list_head *bad,
10563                  struct list_head *rebuild, int silent)
10564 {
10565         struct cache_extent *chunk_item;
10566         struct chunk_record *chunk_rec;
10567         struct block_group_record *bg_rec;
10568         struct device_extent_record *dext_rec;
10569         int err;
10570         int ret = 0;
10571
10572         chunk_item = first_cache_extent(chunk_cache);
10573         while (chunk_item) {
10574                 chunk_rec = container_of(chunk_item, struct chunk_record,
10575                                          cache);
10576                 err = check_chunk_refs(chunk_rec, block_group_cache,
10577                                        dev_extent_cache, silent);
10578                 if (err < 0)
10579                         ret = err;
10580                 if (err == 0 && good)
10581                         list_add_tail(&chunk_rec->list, good);
10582                 if (err > 0 && rebuild)
10583                         list_add_tail(&chunk_rec->list, rebuild);
10584                 if (err < 0 && bad)
10585                         list_add_tail(&chunk_rec->list, bad);
10586                 chunk_item = next_cache_extent(chunk_item);
10587         }
10588
10589         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10590                 if (!silent)
10591                         fprintf(stderr,
10592                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10593                                 bg_rec->objectid,
10594                                 bg_rec->offset,
10595                                 bg_rec->flags);
10596                 if (!ret)
10597                         ret = 1;
10598         }
10599
10600         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10601                             chunk_list) {
10602                 if (!silent)
10603                         fprintf(stderr,
10604                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10605                                 dext_rec->objectid,
10606                                 dext_rec->offset,
10607                                 dext_rec->length);
10608                 if (!ret)
10609                         ret = 1;
10610         }
10611         return ret;
10612 }
10613
10614
10615 static int check_device_used(struct device_record *dev_rec,
10616                              struct device_extent_tree *dext_cache)
10617 {
10618         struct cache_extent *cache;
10619         struct device_extent_record *dev_extent_rec;
10620         u64 total_byte = 0;
10621
10622         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10623         while (cache) {
10624                 dev_extent_rec = container_of(cache,
10625                                               struct device_extent_record,
10626                                               cache);
10627                 if (dev_extent_rec->objectid != dev_rec->devid)
10628                         break;
10629
10630                 list_del_init(&dev_extent_rec->device_list);
10631                 total_byte += dev_extent_rec->length;
10632                 cache = next_cache_extent(cache);
10633         }
10634
10635         if (total_byte != dev_rec->byte_used) {
10636                 fprintf(stderr,
10637                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10638                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10639                         dev_rec->type, dev_rec->offset);
10640                 return -1;
10641         } else {
10642                 return 0;
10643         }
10644 }
10645
10646 /*
10647  * Unlike device size alignment check above, some super total_bytes check
10648  * failure can lead to mount failure for newer kernel.
10649  *
10650  * So this function will return the error for a fatal super total_bytes problem.
10651  */
10652 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10653 {
10654         struct btrfs_device *dev;
10655         struct list_head *dev_list = &fs_info->fs_devices->devices;
10656         u64 total_bytes = 0;
10657         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10658
10659         list_for_each_entry(dev, dev_list, dev_list)
10660                 total_bytes += dev->total_bytes;
10661
10662         /* Important check, which can cause unmountable fs */
10663         if (super_bytes < total_bytes) {
10664                 error("super total bytes %llu smaller than real device(s) size %llu",
10665                         super_bytes, total_bytes);
10666                 error("mounting this fs may fail for newer kernels");
10667                 error("this can be fixed by 'btrfs rescue fix-device-size'");
10668                 return false;
10669         }
10670
10671         /*
10672          * Optional check, just to make everything aligned and match with each
10673          * other.
10674          *
10675          * For a btrfs-image restored fs, we don't need to check it anyway.
10676          */
10677         if (btrfs_super_flags(fs_info->super_copy) &
10678             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10679                 return true;
10680         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10681             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10682             super_bytes != total_bytes) {
10683                 warning("minor unaligned/mismatch device size detected");
10684                 warning(
10685                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10686         }
10687         return true;
10688 }
10689
10690 /* check btrfs_dev_item -> btrfs_dev_extent */
10691 static int check_devices(struct rb_root *dev_cache,
10692                          struct device_extent_tree *dev_extent_cache)
10693 {
10694         struct rb_node *dev_node;
10695         struct device_record *dev_rec;
10696         struct device_extent_record *dext_rec;
10697         int err;
10698         int ret = 0;
10699
10700         dev_node = rb_first(dev_cache);
10701         while (dev_node) {
10702                 dev_rec = container_of(dev_node, struct device_record, node);
10703                 err = check_device_used(dev_rec, dev_extent_cache);
10704                 if (err)
10705                         ret = err;
10706
10707                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
10708                                          global_info->sectorsize);
10709                 dev_node = rb_next(dev_node);
10710         }
10711         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10712                             device_list) {
10713                 fprintf(stderr,
10714                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10715                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10716                 if (!ret)
10717                         ret = 1;
10718         }
10719         return ret;
10720 }
10721
10722 static int add_root_item_to_list(struct list_head *head,
10723                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10724                                   u8 level, u8 drop_level,
10725                                   struct btrfs_key *drop_key)
10726 {
10727
10728         struct root_item_record *ri_rec;
10729         ri_rec = malloc(sizeof(*ri_rec));
10730         if (!ri_rec)
10731                 return -ENOMEM;
10732         ri_rec->bytenr = bytenr;
10733         ri_rec->objectid = objectid;
10734         ri_rec->level = level;
10735         ri_rec->drop_level = drop_level;
10736         ri_rec->last_snapshot = last_snapshot;
10737         if (drop_key)
10738                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10739         list_add_tail(&ri_rec->list, head);
10740
10741         return 0;
10742 }
10743
10744 static void free_root_item_list(struct list_head *list)
10745 {
10746         struct root_item_record *ri_rec;
10747
10748         while (!list_empty(list)) {
10749                 ri_rec = list_first_entry(list, struct root_item_record,
10750                                           list);
10751                 list_del_init(&ri_rec->list);
10752                 free(ri_rec);
10753         }
10754 }
10755
10756 static int deal_root_from_list(struct list_head *list,
10757                                struct btrfs_root *root,
10758                                struct block_info *bits,
10759                                int bits_nr,
10760                                struct cache_tree *pending,
10761                                struct cache_tree *seen,
10762                                struct cache_tree *reada,
10763                                struct cache_tree *nodes,
10764                                struct cache_tree *extent_cache,
10765                                struct cache_tree *chunk_cache,
10766                                struct rb_root *dev_cache,
10767                                struct block_group_tree *block_group_cache,
10768                                struct device_extent_tree *dev_extent_cache)
10769 {
10770         int ret = 0;
10771         u64 last;
10772
10773         while (!list_empty(list)) {
10774                 struct root_item_record *rec;
10775                 struct extent_buffer *buf;
10776                 rec = list_entry(list->next,
10777                                  struct root_item_record, list);
10778                 last = 0;
10779                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10780                 if (!extent_buffer_uptodate(buf)) {
10781                         free_extent_buffer(buf);
10782                         ret = -EIO;
10783                         break;
10784                 }
10785                 ret = add_root_to_pending(buf, extent_cache, pending,
10786                                     seen, nodes, rec->objectid);
10787                 if (ret < 0)
10788                         break;
10789                 /*
10790                  * To rebuild extent tree, we need deal with snapshot
10791                  * one by one, otherwise we deal with node firstly which
10792                  * can maximize readahead.
10793                  */
10794                 while (1) {
10795                         ret = run_next_block(root, bits, bits_nr, &last,
10796                                              pending, seen, reada, nodes,
10797                                              extent_cache, chunk_cache,
10798                                              dev_cache, block_group_cache,
10799                                              dev_extent_cache, rec);
10800                         if (ret != 0)
10801                                 break;
10802                 }
10803                 free_extent_buffer(buf);
10804                 list_del(&rec->list);
10805                 free(rec);
10806                 if (ret < 0)
10807                         break;
10808         }
10809         while (ret >= 0) {
10810                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10811                                      reada, nodes, extent_cache, chunk_cache,
10812                                      dev_cache, block_group_cache,
10813                                      dev_extent_cache, NULL);
10814                 if (ret != 0) {
10815                         if (ret > 0)
10816                                 ret = 0;
10817                         break;
10818                 }
10819         }
10820         return ret;
10821 }
10822
10823 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10824 {
10825         struct rb_root dev_cache;
10826         struct cache_tree chunk_cache;
10827         struct block_group_tree block_group_cache;
10828         struct device_extent_tree dev_extent_cache;
10829         struct cache_tree extent_cache;
10830         struct cache_tree seen;
10831         struct cache_tree pending;
10832         struct cache_tree reada;
10833         struct cache_tree nodes;
10834         struct extent_io_tree excluded_extents;
10835         struct cache_tree corrupt_blocks;
10836         struct btrfs_path path;
10837         struct btrfs_key key;
10838         struct btrfs_key found_key;
10839         int ret, err = 0;
10840         struct block_info *bits;
10841         int bits_nr;
10842         struct extent_buffer *leaf;
10843         int slot;
10844         struct btrfs_root_item ri;
10845         struct list_head dropping_trees;
10846         struct list_head normal_trees;
10847         struct btrfs_root *root1;
10848         struct btrfs_root *root;
10849         u64 objectid;
10850         u8 level;
10851
10852         root = fs_info->fs_root;
10853         dev_cache = RB_ROOT;
10854         cache_tree_init(&chunk_cache);
10855         block_group_tree_init(&block_group_cache);
10856         device_extent_tree_init(&dev_extent_cache);
10857
10858         cache_tree_init(&extent_cache);
10859         cache_tree_init(&seen);
10860         cache_tree_init(&pending);
10861         cache_tree_init(&nodes);
10862         cache_tree_init(&reada);
10863         cache_tree_init(&corrupt_blocks);
10864         extent_io_tree_init(&excluded_extents);
10865         INIT_LIST_HEAD(&dropping_trees);
10866         INIT_LIST_HEAD(&normal_trees);
10867
10868         if (repair) {
10869                 fs_info->excluded_extents = &excluded_extents;
10870                 fs_info->fsck_extent_cache = &extent_cache;
10871                 fs_info->free_extent_hook = free_extent_hook;
10872                 fs_info->corrupt_blocks = &corrupt_blocks;
10873         }
10874
10875         bits_nr = 1024;
10876         bits = malloc(bits_nr * sizeof(struct block_info));
10877         if (!bits) {
10878                 perror("malloc");
10879                 exit(1);
10880         }
10881
10882         if (ctx.progress_enabled) {
10883                 ctx.tp = TASK_EXTENTS;
10884                 task_start(ctx.info);
10885         }
10886
10887 again:
10888         root1 = fs_info->tree_root;
10889         level = btrfs_header_level(root1->node);
10890         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10891                                     root1->node->start, 0, level, 0, NULL);
10892         if (ret < 0)
10893                 goto out;
10894         root1 = fs_info->chunk_root;
10895         level = btrfs_header_level(root1->node);
10896         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10897                                     root1->node->start, 0, level, 0, NULL);
10898         if (ret < 0)
10899                 goto out;
10900         btrfs_init_path(&path);
10901         key.offset = 0;
10902         key.objectid = 0;
10903         key.type = BTRFS_ROOT_ITEM_KEY;
10904         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10905         if (ret < 0)
10906                 goto out;
10907         while(1) {
10908                 leaf = path.nodes[0];
10909                 slot = path.slots[0];
10910                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10911                         ret = btrfs_next_leaf(root, &path);
10912                         if (ret != 0)
10913                                 break;
10914                         leaf = path.nodes[0];
10915                         slot = path.slots[0];
10916                 }
10917                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10918                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10919                         unsigned long offset;
10920                         u64 last_snapshot;
10921
10922                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10923                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10924                         last_snapshot = btrfs_root_last_snapshot(&ri);
10925                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10926                                 level = btrfs_root_level(&ri);
10927                                 ret = add_root_item_to_list(&normal_trees,
10928                                                 found_key.objectid,
10929                                                 btrfs_root_bytenr(&ri),
10930                                                 last_snapshot, level,
10931                                                 0, NULL);
10932                                 if (ret < 0)
10933                                         goto out;
10934                         } else {
10935                                 level = btrfs_root_level(&ri);
10936                                 objectid = found_key.objectid;
10937                                 btrfs_disk_key_to_cpu(&found_key,
10938                                                       &ri.drop_progress);
10939                                 ret = add_root_item_to_list(&dropping_trees,
10940                                                 objectid,
10941                                                 btrfs_root_bytenr(&ri),
10942                                                 last_snapshot, level,
10943                                                 ri.drop_level, &found_key);
10944                                 if (ret < 0)
10945                                         goto out;
10946                         }
10947                 }
10948                 path.slots[0]++;
10949         }
10950         btrfs_release_path(&path);
10951
10952         /*
10953          * check_block can return -EAGAIN if it fixes something, please keep
10954          * this in mind when dealing with return values from these functions, if
10955          * we get -EAGAIN we want to fall through and restart the loop.
10956          */
10957         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10958                                   &seen, &reada, &nodes, &extent_cache,
10959                                   &chunk_cache, &dev_cache, &block_group_cache,
10960                                   &dev_extent_cache);
10961         if (ret < 0) {
10962                 if (ret == -EAGAIN)
10963                         goto loop;
10964                 goto out;
10965         }
10966         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10967                                   &pending, &seen, &reada, &nodes,
10968                                   &extent_cache, &chunk_cache, &dev_cache,
10969                                   &block_group_cache, &dev_extent_cache);
10970         if (ret < 0) {
10971                 if (ret == -EAGAIN)
10972                         goto loop;
10973                 goto out;
10974         }
10975
10976         ret = check_chunks(&chunk_cache, &block_group_cache,
10977                            &dev_extent_cache, NULL, NULL, NULL, 0);
10978         if (ret) {
10979                 if (ret == -EAGAIN)
10980                         goto loop;
10981                 err = ret;
10982         }
10983
10984         ret = check_extent_refs(root, &extent_cache);
10985         if (ret < 0) {
10986                 if (ret == -EAGAIN)
10987                         goto loop;
10988                 goto out;
10989         }
10990
10991         ret = check_devices(&dev_cache, &dev_extent_cache);
10992         if (ret && err)
10993                 ret = err;
10994
10995 out:
10996         task_stop(ctx.info);
10997         if (repair) {
10998                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10999                 extent_io_tree_cleanup(&excluded_extents);
11000                 fs_info->fsck_extent_cache = NULL;
11001                 fs_info->free_extent_hook = NULL;
11002                 fs_info->corrupt_blocks = NULL;
11003                 fs_info->excluded_extents = NULL;
11004         }
11005         free(bits);
11006         free_chunk_cache_tree(&chunk_cache);
11007         free_device_cache_tree(&dev_cache);
11008         free_block_group_tree(&block_group_cache);
11009         free_device_extent_tree(&dev_extent_cache);
11010         free_extent_cache_tree(&seen);
11011         free_extent_cache_tree(&pending);
11012         free_extent_cache_tree(&reada);
11013         free_extent_cache_tree(&nodes);
11014         free_root_item_list(&normal_trees);
11015         free_root_item_list(&dropping_trees);
11016         return ret;
11017 loop:
11018         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11019         free_extent_cache_tree(&seen);
11020         free_extent_cache_tree(&pending);
11021         free_extent_cache_tree(&reada);
11022         free_extent_cache_tree(&nodes);
11023         free_chunk_cache_tree(&chunk_cache);
11024         free_block_group_tree(&block_group_cache);
11025         free_device_cache_tree(&dev_cache);
11026         free_device_extent_tree(&dev_extent_cache);
11027         free_extent_record_cache(&extent_cache);
11028         free_root_item_list(&normal_trees);
11029         free_root_item_list(&dropping_trees);
11030         extent_io_tree_cleanup(&excluded_extents);
11031         goto again;
11032 }
11033
11034 static int check_extent_inline_ref(struct extent_buffer *eb,
11035                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11036 {
11037         int ret;
11038         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11039
11040         switch (type) {
11041         case BTRFS_TREE_BLOCK_REF_KEY:
11042         case BTRFS_EXTENT_DATA_REF_KEY:
11043         case BTRFS_SHARED_BLOCK_REF_KEY:
11044         case BTRFS_SHARED_DATA_REF_KEY:
11045                 ret = 0;
11046                 break;
11047         default:
11048                 error("extent[%llu %u %llu] has unknown ref type: %d",
11049                       key->objectid, key->type, key->offset, type);
11050                 ret = UNKNOWN_TYPE;
11051                 break;
11052         }
11053
11054         return ret;
11055 }
11056
11057 /*
11058  * Check backrefs of a tree block given by @bytenr or @eb.
11059  *
11060  * @root:       the root containing the @bytenr or @eb
11061  * @eb:         tree block extent buffer, can be NULL
11062  * @bytenr:     bytenr of the tree block to search
11063  * @level:      tree level of the tree block
11064  * @owner:      owner of the tree block
11065  *
11066  * Return >0 for any error found and output error message
11067  * Return 0 for no error found
11068  */
11069 static int check_tree_block_ref(struct btrfs_root *root,
11070                                 struct extent_buffer *eb, u64 bytenr,
11071                                 int level, u64 owner, struct node_refs *nrefs)
11072 {
11073         struct btrfs_key key;
11074         struct btrfs_root *extent_root = root->fs_info->extent_root;
11075         struct btrfs_path path;
11076         struct btrfs_extent_item *ei;
11077         struct btrfs_extent_inline_ref *iref;
11078         struct extent_buffer *leaf;
11079         unsigned long end;
11080         unsigned long ptr;
11081         int slot;
11082         int skinny_level;
11083         int root_level = btrfs_header_level(root->node);
11084         int type;
11085         u32 nodesize = root->fs_info->nodesize;
11086         u32 item_size;
11087         u64 offset;
11088         int found_ref = 0;
11089         int err = 0;
11090         int ret;
11091         int strict = 1;
11092         int parent = 0;
11093
11094         btrfs_init_path(&path);
11095         key.objectid = bytenr;
11096         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11097                 key.type = BTRFS_METADATA_ITEM_KEY;
11098         else
11099                 key.type = BTRFS_EXTENT_ITEM_KEY;
11100         key.offset = (u64)-1;
11101
11102         /* Search for the backref in extent tree */
11103         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11104         if (ret < 0) {
11105                 err |= BACKREF_MISSING;
11106                 goto out;
11107         }
11108         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11109         if (ret) {
11110                 err |= BACKREF_MISSING;
11111                 goto out;
11112         }
11113
11114         leaf = path.nodes[0];
11115         slot = path.slots[0];
11116         btrfs_item_key_to_cpu(leaf, &key, slot);
11117
11118         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11119
11120         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11121                 skinny_level = (int)key.offset;
11122                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11123         } else {
11124                 struct btrfs_tree_block_info *info;
11125
11126                 info = (struct btrfs_tree_block_info *)(ei + 1);
11127                 skinny_level = btrfs_tree_block_level(leaf, info);
11128                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11129         }
11130
11131
11132         if (eb) {
11133                 u64 header_gen;
11134                 u64 extent_gen;
11135
11136                 /*
11137                  * Due to the feature of shared tree blocks, if the upper node
11138                  * is a fs root or shared node, the extent of checked node may
11139                  * not be updated until the next CoW.
11140                  */
11141                 if (nrefs)
11142                         strict = should_check_extent_strictly(root, nrefs,
11143                                         level);
11144                 if (!(btrfs_extent_flags(leaf, ei) &
11145                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11146                         error(
11147                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11148                                 key.objectid, nodesize,
11149                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11150                         err = BACKREF_MISMATCH;
11151                 }
11152                 header_gen = btrfs_header_generation(eb);
11153                 extent_gen = btrfs_extent_generation(leaf, ei);
11154                 if (header_gen != extent_gen) {
11155                         error(
11156         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11157                                 key.objectid, nodesize, header_gen,
11158                                 extent_gen);
11159                         err = BACKREF_MISMATCH;
11160                 }
11161                 if (level != skinny_level) {
11162                         error(
11163                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11164                                 key.objectid, nodesize, level, skinny_level);
11165                         err = BACKREF_MISMATCH;
11166                 }
11167                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11168                         error(
11169                         "extent[%llu %u] is referred by other roots than %llu",
11170                                 key.objectid, nodesize, root->objectid);
11171                         err = BACKREF_MISMATCH;
11172                 }
11173         }
11174
11175         /*
11176          * Iterate the extent/metadata item to find the exact backref
11177          */
11178         item_size = btrfs_item_size_nr(leaf, slot);
11179         ptr = (unsigned long)iref;
11180         end = (unsigned long)ei + item_size;
11181
11182         while (ptr < end) {
11183                 iref = (struct btrfs_extent_inline_ref *)ptr;
11184                 type = btrfs_extent_inline_ref_type(leaf, iref);
11185                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11186
11187                 ret = check_extent_inline_ref(leaf, &key, iref);
11188                 if (ret) {
11189                         err |= ret;
11190                         break;
11191                 }
11192                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11193                         if (offset == root->objectid)
11194                                 found_ref = 1;
11195                         if (!strict && owner == offset)
11196                                 found_ref = 1;
11197                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11198                         /*
11199                          * Backref of tree reloc root points to itself, no need
11200                          * to check backref any more.
11201                          *
11202                          * This may be an error of loop backref, but extent tree
11203                          * checker should have already handled it.
11204                          * Here we only need to avoid infinite iteration.
11205                          */
11206                         if (offset == bytenr) {
11207                                 found_ref = 1;
11208                         } else {
11209                                 /*
11210                                  * Check if the backref points to valid
11211                                  * referencer
11212                                  */
11213                                 found_ref = !check_tree_block_ref( root, NULL,
11214                                                 offset, level + 1, owner,
11215                                                 NULL);
11216                         }
11217                 }
11218
11219                 if (found_ref)
11220                         break;
11221                 ptr += btrfs_extent_inline_ref_size(type);
11222         }
11223
11224         /*
11225          * Inlined extent item doesn't have what we need, check
11226          * TREE_BLOCK_REF_KEY
11227          */
11228         if (!found_ref) {
11229                 btrfs_release_path(&path);
11230                 key.objectid = bytenr;
11231                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11232                 key.offset = root->objectid;
11233
11234                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11235                 if (!ret)
11236                         found_ref = 1;
11237         }
11238         /*
11239          * Finally check SHARED BLOCK REF, any found will be good
11240          * Here we're not doing comprehensive extent backref checking,
11241          * only need to ensure there is some extent referring to this
11242          * tree block.
11243          */
11244         if (!found_ref) {
11245                 btrfs_release_path(&path);
11246                 key.objectid = bytenr;
11247                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11248                 key.offset = (u64)-1;
11249
11250                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11251                 if (ret < 0) {
11252                         err |= BACKREF_MISSING;
11253                         goto out;
11254                 }
11255                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11256                 if (ret) {
11257                         err |= BACKREF_MISSING;
11258                         goto out;
11259                 }
11260                 found_ref = 1;
11261         }
11262         if (!found_ref)
11263                 err |= BACKREF_MISSING;
11264 out:
11265         btrfs_release_path(&path);
11266         if (nrefs && strict &&
11267             level < root_level && nrefs->full_backref[level + 1])
11268                 parent = nrefs->bytenr[level + 1];
11269         if (eb && (err & BACKREF_MISSING))
11270                 error(
11271         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11272                       bytenr, nodesize, owner, level,
11273                       parent ? "parent" : "root",
11274                       parent ? parent : root->objectid);
11275         return err;
11276 }
11277
11278 /*
11279  * If @err contains BACKREF_MISSING then add extent of the
11280  * file_extent_data_item.
11281  *
11282  * Returns error bits after reapir.
11283  */
11284 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11285                                    struct btrfs_root *root,
11286                                    struct btrfs_path *pathp,
11287                                    struct node_refs *nrefs,
11288                                    int err)
11289 {
11290         struct btrfs_file_extent_item *fi;
11291         struct btrfs_key fi_key;
11292         struct btrfs_key key;
11293         struct btrfs_extent_item *ei;
11294         struct btrfs_path path;
11295         struct btrfs_root *extent_root = root->fs_info->extent_root;
11296         struct extent_buffer *eb;
11297         u64 size;
11298         u64 disk_bytenr;
11299         u64 num_bytes;
11300         u64 parent;
11301         u64 offset;
11302         u64 extent_offset;
11303         u64 file_offset;
11304         int generation;
11305         int slot;
11306         int ret = 0;
11307
11308         eb = pathp->nodes[0];
11309         slot = pathp->slots[0];
11310         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11311         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11312
11313         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11314             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11315                 return err;
11316
11317         file_offset = fi_key.offset;
11318         generation = btrfs_file_extent_generation(eb, fi);
11319         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11320         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11321         extent_offset = btrfs_file_extent_offset(eb, fi);
11322         offset = file_offset - extent_offset;
11323
11324         /* now repair only adds backref */
11325         if ((err & BACKREF_MISSING) == 0)
11326                 return err;
11327
11328         /* search extent item */
11329         key.objectid = disk_bytenr;
11330         key.type = BTRFS_EXTENT_ITEM_KEY;
11331         key.offset = num_bytes;
11332
11333         btrfs_init_path(&path);
11334         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11335         if (ret < 0) {
11336                 ret = -EIO;
11337                 goto out;
11338         }
11339
11340         /* insert an extent item */
11341         if (ret > 0) {
11342                 key.objectid = disk_bytenr;
11343                 key.type = BTRFS_EXTENT_ITEM_KEY;
11344                 key.offset = num_bytes;
11345                 size = sizeof(*ei);
11346
11347                 btrfs_release_path(&path);
11348                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11349                                               size);
11350                 if (ret)
11351                         goto out;
11352                 eb = path.nodes[0];
11353                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11354
11355                 btrfs_set_extent_refs(eb, ei, 0);
11356                 btrfs_set_extent_generation(eb, ei, generation);
11357                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11358
11359                 btrfs_mark_buffer_dirty(eb);
11360                 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11361                                                num_bytes, 1, 0);
11362                 btrfs_release_path(&path);
11363         }
11364
11365         if (nrefs->full_backref[0])
11366                 parent = btrfs_header_bytenr(eb);
11367         else
11368                 parent = 0;
11369
11370         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11371                                    root->objectid,
11372                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11373                                    offset);
11374         if (ret) {
11375                 error(
11376                 "failed to increase extent data backref[%llu %llu] root %llu",
11377                       disk_bytenr, num_bytes, root->objectid);
11378                 goto out;
11379         } else {
11380                 printf("Add one extent data backref [%llu %llu]\n",
11381                        disk_bytenr, num_bytes);
11382         }
11383
11384         err &= ~BACKREF_MISSING;
11385 out:
11386         if (ret)
11387                 error("can't repair root %llu extent data item[%llu %llu]",
11388                       root->objectid, disk_bytenr, num_bytes);
11389         return err;
11390 }
11391
11392 /*
11393  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11394  *
11395  * Return >0 any error found and output error message
11396  * Return 0 for no error found
11397  */
11398 static int check_extent_data_item(struct btrfs_root *root,
11399                                   struct btrfs_path *pathp,
11400                                   struct node_refs *nrefs,  int account_bytes)
11401 {
11402         struct btrfs_file_extent_item *fi;
11403         struct extent_buffer *eb = pathp->nodes[0];
11404         struct btrfs_path path;
11405         struct btrfs_root *extent_root = root->fs_info->extent_root;
11406         struct btrfs_key fi_key;
11407         struct btrfs_key dbref_key;
11408         struct extent_buffer *leaf;
11409         struct btrfs_extent_item *ei;
11410         struct btrfs_extent_inline_ref *iref;
11411         struct btrfs_extent_data_ref *dref;
11412         u64 owner;
11413         u64 disk_bytenr;
11414         u64 disk_num_bytes;
11415         u64 extent_num_bytes;
11416         u64 extent_flags;
11417         u64 offset;
11418         u32 item_size;
11419         unsigned long end;
11420         unsigned long ptr;
11421         int type;
11422         int found_dbackref = 0;
11423         int slot = pathp->slots[0];
11424         int err = 0;
11425         int ret;
11426         int strict;
11427
11428         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11429         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11430
11431         /* Nothing to check for hole and inline data extents */
11432         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11433             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11434                 return 0;
11435
11436         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11437         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11438         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11439         offset = btrfs_file_extent_offset(eb, fi);
11440
11441         /* Check unaligned disk_num_bytes and num_bytes */
11442         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11443                 error(
11444 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11445                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11446                         root->fs_info->sectorsize);
11447                 err |= BYTES_UNALIGNED;
11448         } else if (account_bytes) {
11449                 data_bytes_allocated += disk_num_bytes;
11450         }
11451         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11452                 error(
11453 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11454                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11455                         root->fs_info->sectorsize);
11456                 err |= BYTES_UNALIGNED;
11457         } else if (account_bytes) {
11458                 data_bytes_referenced += extent_num_bytes;
11459         }
11460         owner = btrfs_header_owner(eb);
11461
11462         /* Check the extent item of the file extent in extent tree */
11463         btrfs_init_path(&path);
11464         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11465         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11466         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11467
11468         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11469         if (ret)
11470                 goto out;
11471
11472         leaf = path.nodes[0];
11473         slot = path.slots[0];
11474         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11475
11476         extent_flags = btrfs_extent_flags(leaf, ei);
11477
11478         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11479                 error(
11480                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11481                     disk_bytenr, disk_num_bytes,
11482                     BTRFS_EXTENT_FLAG_DATA);
11483                 err |= BACKREF_MISMATCH;
11484         }
11485
11486         /* Check data backref inside that extent item */
11487         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11488         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11489         ptr = (unsigned long)iref;
11490         end = (unsigned long)ei + item_size;
11491         strict = should_check_extent_strictly(root, nrefs, -1);
11492
11493         while (ptr < end) {
11494                 u64 ref_root;
11495                 u64 ref_objectid;
11496                 u64 ref_offset;
11497                 bool match = false;
11498
11499                 iref = (struct btrfs_extent_inline_ref *)ptr;
11500                 type = btrfs_extent_inline_ref_type(leaf, iref);
11501                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11502
11503                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11504                 if (ret) {
11505                         err |= ret;
11506                         break;
11507                 }
11508                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11509                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11510                         ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11511                         ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11512
11513                         if (ref_objectid == fi_key.objectid &&
11514                             ref_offset == fi_key.offset - offset)
11515                                 match = true;
11516                         if (ref_root == root->objectid && match)
11517                                 found_dbackref = 1;
11518                         else if (!strict && owner == ref_root && match)
11519                                 found_dbackref = 1;
11520                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11521                         found_dbackref = !check_tree_block_ref(root, NULL,
11522                                 btrfs_extent_inline_ref_offset(leaf, iref),
11523                                 0, owner, NULL);
11524                 }
11525
11526                 if (found_dbackref)
11527                         break;
11528                 ptr += btrfs_extent_inline_ref_size(type);
11529         }
11530
11531         if (!found_dbackref) {
11532                 btrfs_release_path(&path);
11533
11534                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11535                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11536                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11537                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11538                                 fi_key.objectid, fi_key.offset - offset);
11539
11540                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11541                                         &dbref_key, &path, 0, 0);
11542                 if (!ret) {
11543                         found_dbackref = 1;
11544                         goto out;
11545                 }
11546
11547                 btrfs_release_path(&path);
11548
11549                 /*
11550                  * Neither inlined nor EXTENT_DATA_REF found, try
11551                  * SHARED_DATA_REF as last chance.
11552                  */
11553                 dbref_key.objectid = disk_bytenr;
11554                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11555                 dbref_key.offset = eb->start;
11556
11557                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11558                                         &dbref_key, &path, 0, 0);
11559                 if (!ret) {
11560                         found_dbackref = 1;
11561                         goto out;
11562                 }
11563         }
11564
11565 out:
11566         if (!found_dbackref)
11567                 err |= BACKREF_MISSING;
11568         btrfs_release_path(&path);
11569         if (err & BACKREF_MISSING) {
11570                 error("data extent[%llu %llu] backref lost",
11571                       disk_bytenr, disk_num_bytes);
11572         }
11573         return err;
11574 }
11575
11576 /*
11577  * Get real tree block level for the case like shared block
11578  * Return >= 0 as tree level
11579  * Return <0 for error
11580  */
11581 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11582 {
11583         struct extent_buffer *eb;
11584         struct btrfs_path path;
11585         struct btrfs_key key;
11586         struct btrfs_extent_item *ei;
11587         u64 flags;
11588         u64 transid;
11589         u8 backref_level;
11590         u8 header_level;
11591         int ret;
11592
11593         /* Search extent tree for extent generation and level */
11594         key.objectid = bytenr;
11595         key.type = BTRFS_METADATA_ITEM_KEY;
11596         key.offset = (u64)-1;
11597
11598         btrfs_init_path(&path);
11599         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11600         if (ret < 0)
11601                 goto release_out;
11602         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11603         if (ret < 0)
11604                 goto release_out;
11605         if (ret > 0) {
11606                 ret = -ENOENT;
11607                 goto release_out;
11608         }
11609
11610         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11611         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11612                             struct btrfs_extent_item);
11613         flags = btrfs_extent_flags(path.nodes[0], ei);
11614         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11615                 ret = -ENOENT;
11616                 goto release_out;
11617         }
11618
11619         /* Get transid for later read_tree_block() check */
11620         transid = btrfs_extent_generation(path.nodes[0], ei);
11621
11622         /* Get backref level as one source */
11623         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11624                 backref_level = key.offset;
11625         } else {
11626                 struct btrfs_tree_block_info *info;
11627
11628                 info = (struct btrfs_tree_block_info *)(ei + 1);
11629                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11630         }
11631         btrfs_release_path(&path);
11632
11633         /* Get level from tree block as an alternative source */
11634         eb = read_tree_block(fs_info, bytenr, transid);
11635         if (!extent_buffer_uptodate(eb)) {
11636                 free_extent_buffer(eb);
11637                 return -EIO;
11638         }
11639         header_level = btrfs_header_level(eb);
11640         free_extent_buffer(eb);
11641
11642         if (header_level != backref_level)
11643                 return -EIO;
11644         return header_level;
11645
11646 release_out:
11647         btrfs_release_path(&path);
11648         return ret;
11649 }
11650
11651 /*
11652  * Check if a tree block backref is valid (points to a valid tree block)
11653  * if level == -1, level will be resolved
11654  * Return >0 for any error found and print error message
11655  */
11656 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11657                                     u64 bytenr, int level)
11658 {
11659         struct btrfs_root *root;
11660         struct btrfs_key key;
11661         struct btrfs_path path;
11662         struct extent_buffer *eb;
11663         struct extent_buffer *node;
11664         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11665         int err = 0;
11666         int ret;
11667
11668         /* Query level for level == -1 special case */
11669         if (level == -1)
11670                 level = query_tree_block_level(fs_info, bytenr);
11671         if (level < 0) {
11672                 err |= REFERENCER_MISSING;
11673                 goto out;
11674         }
11675
11676         key.objectid = root_id;
11677         key.type = BTRFS_ROOT_ITEM_KEY;
11678         key.offset = (u64)-1;
11679
11680         root = btrfs_read_fs_root(fs_info, &key);
11681         if (IS_ERR(root)) {
11682                 err |= REFERENCER_MISSING;
11683                 goto out;
11684         }
11685
11686         /* Read out the tree block to get item/node key */
11687         eb = read_tree_block(fs_info, bytenr, 0);
11688         if (!extent_buffer_uptodate(eb)) {
11689                 err |= REFERENCER_MISSING;
11690                 free_extent_buffer(eb);
11691                 goto out;
11692         }
11693
11694         /* Empty tree, no need to check key */
11695         if (!btrfs_header_nritems(eb) && !level) {
11696                 free_extent_buffer(eb);
11697                 goto out;
11698         }
11699
11700         if (level)
11701                 btrfs_node_key_to_cpu(eb, &key, 0);
11702         else
11703                 btrfs_item_key_to_cpu(eb, &key, 0);
11704
11705         free_extent_buffer(eb);
11706
11707         btrfs_init_path(&path);
11708         path.lowest_level = level;
11709         /* Search with the first key, to ensure we can reach it */
11710         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11711         if (ret < 0) {
11712                 err |= REFERENCER_MISSING;
11713                 goto release_out;
11714         }
11715
11716         node = path.nodes[level];
11717         if (btrfs_header_bytenr(node) != bytenr) {
11718                 error(
11719         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11720                         bytenr, nodesize, bytenr,
11721                         btrfs_header_bytenr(node));
11722                 err |= REFERENCER_MISMATCH;
11723         }
11724         if (btrfs_header_level(node) != level) {
11725                 error(
11726         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11727                         bytenr, nodesize, level,
11728                         btrfs_header_level(node));
11729                 err |= REFERENCER_MISMATCH;
11730         }
11731
11732 release_out:
11733         btrfs_release_path(&path);
11734 out:
11735         if (err & REFERENCER_MISSING) {
11736                 if (level < 0)
11737                         error("extent [%llu %d] lost referencer (owner: %llu)",
11738                                 bytenr, nodesize, root_id);
11739                 else
11740                         error(
11741                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11742                                 bytenr, nodesize, root_id, level);
11743         }
11744
11745         return err;
11746 }
11747
11748 /*
11749  * Check if tree block @eb is tree reloc root.
11750  * Return 0 if it's not or any problem happens
11751  * Return 1 if it's a tree reloc root
11752  */
11753 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11754                                  struct extent_buffer *eb)
11755 {
11756         struct btrfs_root *tree_reloc_root;
11757         struct btrfs_key key;
11758         u64 bytenr = btrfs_header_bytenr(eb);
11759         u64 owner = btrfs_header_owner(eb);
11760         int ret = 0;
11761
11762         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11763         key.offset = owner;
11764         key.type = BTRFS_ROOT_ITEM_KEY;
11765
11766         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11767         if (IS_ERR(tree_reloc_root))
11768                 return 0;
11769
11770         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11771                 ret = 1;
11772         btrfs_free_fs_root(tree_reloc_root);
11773         return ret;
11774 }
11775
11776 /*
11777  * Check referencer for shared block backref
11778  * If level == -1, this function will resolve the level.
11779  */
11780 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11781                                      u64 parent, u64 bytenr, int level)
11782 {
11783         struct extent_buffer *eb;
11784         u32 nr;
11785         int found_parent = 0;
11786         int i;
11787
11788         eb = read_tree_block(fs_info, parent, 0);
11789         if (!extent_buffer_uptodate(eb))
11790                 goto out;
11791
11792         if (level == -1)
11793                 level = query_tree_block_level(fs_info, bytenr);
11794         if (level < 0)
11795                 goto out;
11796
11797         /* It's possible it's a tree reloc root */
11798         if (parent == bytenr) {
11799                 if (is_tree_reloc_root(fs_info, eb))
11800                         found_parent = 1;
11801                 goto out;
11802         }
11803
11804         if (level + 1 != btrfs_header_level(eb))
11805                 goto out;
11806
11807         nr = btrfs_header_nritems(eb);
11808         for (i = 0; i < nr; i++) {
11809                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11810                         found_parent = 1;
11811                         break;
11812                 }
11813         }
11814 out:
11815         free_extent_buffer(eb);
11816         if (!found_parent) {
11817                 error(
11818         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11819                         bytenr, fs_info->nodesize, parent, level);
11820                 return REFERENCER_MISSING;
11821         }
11822         return 0;
11823 }
11824
11825 /*
11826  * Check referencer for normal (inlined) data ref
11827  * If len == 0, it will be resolved by searching in extent tree
11828  */
11829 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11830                                      u64 root_id, u64 objectid, u64 offset,
11831                                      u64 bytenr, u64 len, u32 count)
11832 {
11833         struct btrfs_root *root;
11834         struct btrfs_root *extent_root = fs_info->extent_root;
11835         struct btrfs_key key;
11836         struct btrfs_path path;
11837         struct extent_buffer *leaf;
11838         struct btrfs_file_extent_item *fi;
11839         u32 found_count = 0;
11840         int slot;
11841         int ret = 0;
11842
11843         if (!len) {
11844                 key.objectid = bytenr;
11845                 key.type = BTRFS_EXTENT_ITEM_KEY;
11846                 key.offset = (u64)-1;
11847
11848                 btrfs_init_path(&path);
11849                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11850                 if (ret < 0)
11851                         goto out;
11852                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11853                 if (ret)
11854                         goto out;
11855                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11856                 if (key.objectid != bytenr ||
11857                     key.type != BTRFS_EXTENT_ITEM_KEY)
11858                         goto out;
11859                 len = key.offset;
11860                 btrfs_release_path(&path);
11861         }
11862         key.objectid = root_id;
11863         key.type = BTRFS_ROOT_ITEM_KEY;
11864         key.offset = (u64)-1;
11865         btrfs_init_path(&path);
11866
11867         root = btrfs_read_fs_root(fs_info, &key);
11868         if (IS_ERR(root))
11869                 goto out;
11870
11871         key.objectid = objectid;
11872         key.type = BTRFS_EXTENT_DATA_KEY;
11873         /*
11874          * It can be nasty as data backref offset is
11875          * file offset - file extent offset, which is smaller or
11876          * equal to original backref offset.  The only special case is
11877          * overflow.  So we need to special check and do further search.
11878          */
11879         key.offset = offset & (1ULL << 63) ? 0 : offset;
11880
11881         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11882         if (ret < 0)
11883                 goto out;
11884
11885         /*
11886          * Search afterwards to get correct one
11887          * NOTE: As we must do a comprehensive check on the data backref to
11888          * make sure the dref count also matches, we must iterate all file
11889          * extents for that inode.
11890          */
11891         while (1) {
11892                 leaf = path.nodes[0];
11893                 slot = path.slots[0];
11894
11895                 if (slot >= btrfs_header_nritems(leaf) ||
11896                     btrfs_header_owner(leaf) != root_id)
11897                         goto next;
11898                 btrfs_item_key_to_cpu(leaf, &key, slot);
11899                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11900                         break;
11901                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11902                 /*
11903                  * Except normal disk bytenr and disk num bytes, we still
11904                  * need to do extra check on dbackref offset as
11905                  * dbackref offset = file_offset - file_extent_offset
11906                  *
11907                  * Also, we must check the leaf owner.
11908                  * In case of shared tree blocks (snapshots) we can inherit
11909                  * leaves from source snapshot.
11910                  * In that case, reference from source snapshot should not
11911                  * count.
11912                  */
11913                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11914                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11915                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11916                     offset && btrfs_header_owner(leaf) == root_id)
11917                         found_count++;
11918
11919 next:
11920                 ret = btrfs_next_item(root, &path);
11921                 if (ret)
11922                         break;
11923         }
11924 out:
11925         btrfs_release_path(&path);
11926         if (found_count != count) {
11927                 error(
11928 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11929                         bytenr, len, root_id, objectid, offset, count, found_count);
11930                 return REFERENCER_MISSING;
11931         }
11932         return 0;
11933 }
11934
11935 /*
11936  * Check if the referencer of a shared data backref exists
11937  */
11938 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11939                                      u64 parent, u64 bytenr)
11940 {
11941         struct extent_buffer *eb;
11942         struct btrfs_key key;
11943         struct btrfs_file_extent_item *fi;
11944         u32 nr;
11945         int found_parent = 0;
11946         int i;
11947
11948         eb = read_tree_block(fs_info, parent, 0);
11949         if (!extent_buffer_uptodate(eb))
11950                 goto out;
11951
11952         nr = btrfs_header_nritems(eb);
11953         for (i = 0; i < nr; i++) {
11954                 btrfs_item_key_to_cpu(eb, &key, i);
11955                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11956                         continue;
11957
11958                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11959                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11960                         continue;
11961
11962                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11963                         found_parent = 1;
11964                         break;
11965                 }
11966         }
11967
11968 out:
11969         free_extent_buffer(eb);
11970         if (!found_parent) {
11971                 error("shared extent %llu referencer lost (parent: %llu)",
11972                         bytenr, parent);
11973                 return REFERENCER_MISSING;
11974         }
11975         return 0;
11976 }
11977
11978 /*
11979  * Only delete backref if REFERENCER_MISSING now
11980  *
11981  * Returns <0   the extent was deleted
11982  * Returns >0   the backref was deleted but extent still exists, returned value
11983  *               means error after repair
11984  * Returns  0   nothing happened
11985  */
11986 static int repair_extent_item(struct btrfs_trans_handle *trans,
11987                       struct btrfs_root *root, struct btrfs_path *path,
11988                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
11989                       u64 owner, u64 offset, int err)
11990 {
11991         struct btrfs_key old_key;
11992         int freed = 0;
11993         int ret;
11994
11995         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
11996
11997         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
11998                 /* delete the backref */
11999                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12000                           num_bytes, parent, root_objectid, owner, offset);
12001                 if (!ret) {
12002                         freed = 1;
12003                         err &= ~REFERENCER_MISSING;
12004                         printf("Delete backref in extent [%llu %llu]\n",
12005                                bytenr, num_bytes);
12006                 } else {
12007                         error("fail to delete backref in extent [%llu %llu]",
12008                                bytenr, num_bytes);
12009                 }
12010         }
12011
12012         /* btrfs_free_extent may delete the extent */
12013         btrfs_release_path(path);
12014         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12015
12016         if (ret)
12017                 ret = -ENOENT;
12018         else if (freed)
12019                 ret = err;
12020         return ret;
12021 }
12022
12023 /*
12024  * This function will check a given extent item, including its backref and
12025  * itself (like crossing stripe boundary and type)
12026  *
12027  * Since we don't use extent_record anymore, introduce new error bit
12028  */
12029 static int check_extent_item(struct btrfs_trans_handle *trans,
12030                              struct btrfs_fs_info *fs_info,
12031                              struct btrfs_path *path)
12032 {
12033         struct btrfs_extent_item *ei;
12034         struct btrfs_extent_inline_ref *iref;
12035         struct btrfs_extent_data_ref *dref;
12036         struct extent_buffer *eb = path->nodes[0];
12037         unsigned long end;
12038         unsigned long ptr;
12039         int slot = path->slots[0];
12040         int type;
12041         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12042         u32 item_size = btrfs_item_size_nr(eb, slot);
12043         u64 flags;
12044         u64 offset;
12045         u64 parent;
12046         u64 num_bytes;
12047         u64 root_objectid;
12048         u64 owner;
12049         u64 owner_offset;
12050         int metadata = 0;
12051         int level;
12052         struct btrfs_key key;
12053         int ret;
12054         int err = 0;
12055
12056         btrfs_item_key_to_cpu(eb, &key, slot);
12057         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12058                 bytes_used += key.offset;
12059                 num_bytes = key.offset;
12060         } else {
12061                 bytes_used += nodesize;
12062                 num_bytes = nodesize;
12063         }
12064
12065         if (item_size < sizeof(*ei)) {
12066                 /*
12067                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12068                  * old thing when on disk format is still un-determined.
12069                  * No need to care about it anymore
12070                  */
12071                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12072                 return -ENOTTY;
12073         }
12074
12075         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12076         flags = btrfs_extent_flags(eb, ei);
12077
12078         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12079                 metadata = 1;
12080         if (metadata && check_crossing_stripes(global_info, key.objectid,
12081                                                eb->len)) {
12082                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12083                       key.objectid, key.objectid + nodesize);
12084                 err |= CROSSING_STRIPE_BOUNDARY;
12085         }
12086
12087         ptr = (unsigned long)(ei + 1);
12088
12089         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12090                 /* Old EXTENT_ITEM metadata */
12091                 struct btrfs_tree_block_info *info;
12092
12093                 info = (struct btrfs_tree_block_info *)ptr;
12094                 level = btrfs_tree_block_level(eb, info);
12095                 ptr += sizeof(struct btrfs_tree_block_info);
12096         } else {
12097                 /* New METADATA_ITEM */
12098                 level = key.offset;
12099         }
12100         end = (unsigned long)ei + item_size;
12101
12102 next:
12103         /* Reached extent item end normally */
12104         if (ptr == end)
12105                 goto out;
12106
12107         /* Beyond extent item end, wrong item size */
12108         if (ptr > end) {
12109                 err |= ITEM_SIZE_MISMATCH;
12110                 error("extent item at bytenr %llu slot %d has wrong size",
12111                         eb->start, slot);
12112                 goto out;
12113         }
12114
12115         parent = 0;
12116         root_objectid = 0;
12117         owner = 0;
12118         owner_offset = 0;
12119         /* Now check every backref in this extent item */
12120         iref = (struct btrfs_extent_inline_ref *)ptr;
12121         type = btrfs_extent_inline_ref_type(eb, iref);
12122         offset = btrfs_extent_inline_ref_offset(eb, iref);
12123         switch (type) {
12124         case BTRFS_TREE_BLOCK_REF_KEY:
12125                 root_objectid = offset;
12126                 owner = level;
12127                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12128                                                level);
12129                 err |= ret;
12130                 break;
12131         case BTRFS_SHARED_BLOCK_REF_KEY:
12132                 parent = offset;
12133                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12134                                                  level);
12135                 err |= ret;
12136                 break;
12137         case BTRFS_EXTENT_DATA_REF_KEY:
12138                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12139                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12140                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12141                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12142                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12143                                         owner_offset, key.objectid, key.offset,
12144                                         btrfs_extent_data_ref_count(eb, dref));
12145                 err |= ret;
12146                 break;
12147         case BTRFS_SHARED_DATA_REF_KEY:
12148                 parent = offset;
12149                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12150                 err |= ret;
12151                 break;
12152         default:
12153                 error("extent[%llu %d %llu] has unknown ref type: %d",
12154                         key.objectid, key.type, key.offset, type);
12155                 ret = UNKNOWN_TYPE;
12156                 err |= ret;
12157                 goto out;
12158         }
12159
12160         if (err && repair) {
12161                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12162                          key.objectid, num_bytes, parent, root_objectid,
12163                          owner, owner_offset, ret);
12164                 if (ret < 0)
12165                         goto out;
12166                 if (ret) {
12167                         goto next;
12168                         err = ret;
12169                 }
12170         }
12171
12172         ptr += btrfs_extent_inline_ref_size(type);
12173         goto next;
12174
12175 out:
12176         return err;
12177 }
12178
12179 /*
12180  * Check if a dev extent item is referred correctly by its chunk
12181  */
12182 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12183                                  struct extent_buffer *eb, int slot)
12184 {
12185         struct btrfs_root *chunk_root = fs_info->chunk_root;
12186         struct btrfs_dev_extent *ptr;
12187         struct btrfs_path path;
12188         struct btrfs_key chunk_key;
12189         struct btrfs_key devext_key;
12190         struct btrfs_chunk *chunk;
12191         struct extent_buffer *l;
12192         int num_stripes;
12193         u64 length;
12194         int i;
12195         int found_chunk = 0;
12196         int ret;
12197
12198         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12199         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12200         length = btrfs_dev_extent_length(eb, ptr);
12201
12202         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12203         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12204         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12205
12206         btrfs_init_path(&path);
12207         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12208         if (ret)
12209                 goto out;
12210
12211         l = path.nodes[0];
12212         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12213         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12214                                       chunk_key.offset);
12215         if (ret < 0)
12216                 goto out;
12217
12218         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12219                 goto out;
12220
12221         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12222         for (i = 0; i < num_stripes; i++) {
12223                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12224                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12225
12226                 if (devid == devext_key.objectid &&
12227                     offset == devext_key.offset) {
12228                         found_chunk = 1;
12229                         break;
12230                 }
12231         }
12232 out:
12233         btrfs_release_path(&path);
12234         if (!found_chunk) {
12235                 error(
12236                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12237                         devext_key.objectid, devext_key.offset, length);
12238                 return REFERENCER_MISSING;
12239         }
12240         return 0;
12241 }
12242
12243 /*
12244  * Check if the used space is correct with the dev item
12245  */
12246 static int check_dev_item(struct btrfs_fs_info *fs_info,
12247                           struct extent_buffer *eb, int slot)
12248 {
12249         struct btrfs_root *dev_root = fs_info->dev_root;
12250         struct btrfs_dev_item *dev_item;
12251         struct btrfs_path path;
12252         struct btrfs_key key;
12253         struct btrfs_dev_extent *ptr;
12254         u64 total_bytes;
12255         u64 dev_id;
12256         u64 used;
12257         u64 total = 0;
12258         int ret;
12259
12260         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12261         dev_id = btrfs_device_id(eb, dev_item);
12262         used = btrfs_device_bytes_used(eb, dev_item);
12263         total_bytes = btrfs_device_total_bytes(eb, dev_item);
12264
12265         key.objectid = dev_id;
12266         key.type = BTRFS_DEV_EXTENT_KEY;
12267         key.offset = 0;
12268
12269         btrfs_init_path(&path);
12270         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12271         if (ret < 0) {
12272                 btrfs_item_key_to_cpu(eb, &key, slot);
12273                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12274                         key.objectid, key.type, key.offset);
12275                 btrfs_release_path(&path);
12276                 return REFERENCER_MISSING;
12277         }
12278
12279         /* Iterate dev_extents to calculate the used space of a device */
12280         while (1) {
12281                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12282                         goto next;
12283
12284                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12285                 if (key.objectid > dev_id)
12286                         break;
12287                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12288                         goto next;
12289
12290                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12291                                      struct btrfs_dev_extent);
12292                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12293 next:
12294                 ret = btrfs_next_item(dev_root, &path);
12295                 if (ret)
12296                         break;
12297         }
12298         btrfs_release_path(&path);
12299
12300         if (used != total) {
12301                 btrfs_item_key_to_cpu(eb, &key, slot);
12302                 error(
12303 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12304                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12305                         BTRFS_DEV_EXTENT_KEY, dev_id);
12306                 return ACCOUNTING_MISMATCH;
12307         }
12308         check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12309
12310         return 0;
12311 }
12312
12313 /*
12314  * Check a block group item with its referener (chunk) and its used space
12315  * with extent/metadata item
12316  */
12317 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12318                                   struct extent_buffer *eb, int slot)
12319 {
12320         struct btrfs_root *extent_root = fs_info->extent_root;
12321         struct btrfs_root *chunk_root = fs_info->chunk_root;
12322         struct btrfs_block_group_item *bi;
12323         struct btrfs_block_group_item bg_item;
12324         struct btrfs_path path;
12325         struct btrfs_key bg_key;
12326         struct btrfs_key chunk_key;
12327         struct btrfs_key extent_key;
12328         struct btrfs_chunk *chunk;
12329         struct extent_buffer *leaf;
12330         struct btrfs_extent_item *ei;
12331         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12332         u64 flags;
12333         u64 bg_flags;
12334         u64 used;
12335         u64 total = 0;
12336         int ret;
12337         int err = 0;
12338
12339         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12340         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12341         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12342         used = btrfs_block_group_used(&bg_item);
12343         bg_flags = btrfs_block_group_flags(&bg_item);
12344
12345         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12346         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12347         chunk_key.offset = bg_key.objectid;
12348
12349         btrfs_init_path(&path);
12350         /* Search for the referencer chunk */
12351         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12352         if (ret) {
12353                 error(
12354                 "block group[%llu %llu] did not find the related chunk item",
12355                         bg_key.objectid, bg_key.offset);
12356                 err |= REFERENCER_MISSING;
12357         } else {
12358                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12359                                         struct btrfs_chunk);
12360                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12361                                                 bg_key.offset) {
12362                         error(
12363         "block group[%llu %llu] related chunk item length does not match",
12364                                 bg_key.objectid, bg_key.offset);
12365                         err |= REFERENCER_MISMATCH;
12366                 }
12367         }
12368         btrfs_release_path(&path);
12369
12370         /* Search from the block group bytenr */
12371         extent_key.objectid = bg_key.objectid;
12372         extent_key.type = 0;
12373         extent_key.offset = 0;
12374
12375         btrfs_init_path(&path);
12376         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12377         if (ret < 0)
12378                 goto out;
12379
12380         /* Iterate extent tree to account used space */
12381         while (1) {
12382                 leaf = path.nodes[0];
12383
12384                 /* Search slot can point to the last item beyond leaf nritems */
12385                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12386                         goto next;
12387
12388                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12389                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12390                         break;
12391
12392                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12393                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12394                         goto next;
12395                 if (extent_key.objectid < bg_key.objectid)
12396                         goto next;
12397
12398                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12399                         total += nodesize;
12400                 else
12401                         total += extent_key.offset;
12402
12403                 ei = btrfs_item_ptr(leaf, path.slots[0],
12404                                     struct btrfs_extent_item);
12405                 flags = btrfs_extent_flags(leaf, ei);
12406                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12407                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12408                                 error(
12409                         "bad extent[%llu, %llu) type mismatch with chunk",
12410                                         extent_key.objectid,
12411                                         extent_key.objectid + extent_key.offset);
12412                                 err |= CHUNK_TYPE_MISMATCH;
12413                         }
12414                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12415                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12416                                     BTRFS_BLOCK_GROUP_METADATA))) {
12417                                 error(
12418                         "bad extent[%llu, %llu) type mismatch with chunk",
12419                                         extent_key.objectid,
12420                                         extent_key.objectid + nodesize);
12421                                 err |= CHUNK_TYPE_MISMATCH;
12422                         }
12423                 }
12424 next:
12425                 ret = btrfs_next_item(extent_root, &path);
12426                 if (ret)
12427                         break;
12428         }
12429
12430 out:
12431         btrfs_release_path(&path);
12432
12433         if (total != used) {
12434                 error(
12435                 "block group[%llu %llu] used %llu but extent items used %llu",
12436                         bg_key.objectid, bg_key.offset, used, total);
12437                 err |= BG_ACCOUNTING_ERROR;
12438         }
12439         return err;
12440 }
12441
12442 /*
12443  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12444  * FIXME: We still need to repair error of dev_item.
12445  *
12446  * Returns error after repair.
12447  */
12448 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12449                              struct btrfs_root *chunk_root,
12450                              struct btrfs_path *path, int err)
12451 {
12452         struct btrfs_chunk *chunk;
12453         struct btrfs_key chunk_key;
12454         struct extent_buffer *eb = path->nodes[0];
12455         u64 length;
12456         int slot = path->slots[0];
12457         u64 type;
12458         int ret = 0;
12459
12460         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12461         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12462                 return err;
12463         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12464         type = btrfs_chunk_type(path->nodes[0], chunk);
12465         length = btrfs_chunk_length(eb, chunk);
12466
12467         if (err & REFERENCER_MISSING) {
12468                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12469                                              type, chunk_key.offset, length);
12470                 if (ret) {
12471                         error("fail to add block group item[%llu %llu]",
12472                               chunk_key.offset, length);
12473                         goto out;
12474                 } else {
12475                         err &= ~REFERENCER_MISSING;
12476                         printf("Added block group item[%llu %llu]\n",
12477                                chunk_key.offset, length);
12478                 }
12479         }
12480
12481 out:
12482         return err;
12483 }
12484
12485 /*
12486  * Check a chunk item.
12487  * Including checking all referred dev_extents and block group
12488  */
12489 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12490                             struct extent_buffer *eb, int slot)
12491 {
12492         struct btrfs_root *extent_root = fs_info->extent_root;
12493         struct btrfs_root *dev_root = fs_info->dev_root;
12494         struct btrfs_path path;
12495         struct btrfs_key chunk_key;
12496         struct btrfs_key bg_key;
12497         struct btrfs_key devext_key;
12498         struct btrfs_chunk *chunk;
12499         struct extent_buffer *leaf;
12500         struct btrfs_block_group_item *bi;
12501         struct btrfs_block_group_item bg_item;
12502         struct btrfs_dev_extent *ptr;
12503         u64 length;
12504         u64 chunk_end;
12505         u64 stripe_len;
12506         u64 type;
12507         int num_stripes;
12508         u64 offset;
12509         u64 objectid;
12510         int i;
12511         int ret;
12512         int err = 0;
12513
12514         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12515         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12516         length = btrfs_chunk_length(eb, chunk);
12517         chunk_end = chunk_key.offset + length;
12518         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12519                                       chunk_key.offset);
12520         if (ret < 0) {
12521                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12522                         chunk_end);
12523                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12524                 goto out;
12525         }
12526         type = btrfs_chunk_type(eb, chunk);
12527
12528         bg_key.objectid = chunk_key.offset;
12529         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12530         bg_key.offset = length;
12531
12532         btrfs_init_path(&path);
12533         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12534         if (ret) {
12535                 error(
12536                 "chunk[%llu %llu) did not find the related block group item",
12537                         chunk_key.offset, chunk_end);
12538                 err |= REFERENCER_MISSING;
12539         } else{
12540                 leaf = path.nodes[0];
12541                 bi = btrfs_item_ptr(leaf, path.slots[0],
12542                                     struct btrfs_block_group_item);
12543                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12544                                    sizeof(bg_item));
12545                 if (btrfs_block_group_flags(&bg_item) != type) {
12546                         error(
12547 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12548                                 chunk_key.offset, chunk_end, type,
12549                                 btrfs_block_group_flags(&bg_item));
12550                         err |= REFERENCER_MISSING;
12551                 }
12552         }
12553
12554         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12555         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12556         for (i = 0; i < num_stripes; i++) {
12557                 btrfs_release_path(&path);
12558                 btrfs_init_path(&path);
12559                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12560                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12561                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12562
12563                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12564                                         0, 0);
12565                 if (ret)
12566                         goto not_match_dev;
12567
12568                 leaf = path.nodes[0];
12569                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12570                                      struct btrfs_dev_extent);
12571                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12572                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12573                 if (objectid != chunk_key.objectid ||
12574                     offset != chunk_key.offset ||
12575                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12576                         goto not_match_dev;
12577                 continue;
12578 not_match_dev:
12579                 err |= BACKREF_MISSING;
12580                 error(
12581                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12582                         chunk_key.objectid, chunk_end, i);
12583                 continue;
12584         }
12585         btrfs_release_path(&path);
12586 out:
12587         return err;
12588 }
12589
12590 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12591                                    struct btrfs_root *root,
12592                                    struct btrfs_path *path)
12593 {
12594         struct btrfs_key key;
12595         int ret = 0;
12596
12597         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12598         btrfs_release_path(path);
12599         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12600         if (ret) {
12601                 ret = -ENOENT;
12602                 goto out;
12603         }
12604
12605         ret = btrfs_del_item(trans, root, path);
12606         if (ret)
12607                 goto out;
12608
12609         if (path->slots[0] == 0)
12610                 btrfs_prev_leaf(root, path);
12611         else
12612                 path->slots[0]--;
12613 out:
12614         if (ret)
12615                 error("failed to delete root %llu item[%llu, %u, %llu]",
12616                       root->objectid, key.objectid, key.type, key.offset);
12617         else
12618                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12619                        root->objectid, key.objectid, key.type, key.offset);
12620         return ret;
12621 }
12622
12623 /*
12624  * Main entry function to check known items and update related accounting info
12625  */
12626 static int check_leaf_items(struct btrfs_trans_handle *trans,
12627                             struct btrfs_root *root, struct btrfs_path *path,
12628                             struct node_refs *nrefs, int account_bytes)
12629 {
12630         struct btrfs_fs_info *fs_info = root->fs_info;
12631         struct btrfs_key key;
12632         struct extent_buffer *eb;
12633         int slot;
12634         int type;
12635         struct btrfs_extent_data_ref *dref;
12636         int ret = 0;
12637         int err = 0;
12638
12639 again:
12640         eb = path->nodes[0];
12641         slot = path->slots[0];
12642         if (slot >= btrfs_header_nritems(eb)) {
12643                 if (slot == 0) {
12644                         error("empty leaf [%llu %u] root %llu", eb->start,
12645                                 root->fs_info->nodesize, root->objectid);
12646                         err |= EIO;
12647                 }
12648                 goto out;
12649         }
12650
12651         btrfs_item_key_to_cpu(eb, &key, slot);
12652         type = key.type;
12653
12654         switch (type) {
12655         case BTRFS_EXTENT_DATA_KEY:
12656                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12657                 if (repair && ret)
12658                         ret = repair_extent_data_item(trans, root, path, nrefs,
12659                                                       ret);
12660                 err |= ret;
12661                 break;
12662         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12663                 ret = check_block_group_item(fs_info, eb, slot);
12664                 if (repair &&
12665                     ret & REFERENCER_MISSING)
12666                         ret = delete_extent_tree_item(trans, root, path);
12667                 err |= ret;
12668                 break;
12669         case BTRFS_DEV_ITEM_KEY:
12670                 ret = check_dev_item(fs_info, eb, slot);
12671                 err |= ret;
12672                 break;
12673         case BTRFS_CHUNK_ITEM_KEY:
12674                 ret = check_chunk_item(fs_info, eb, slot);
12675                 if (repair && ret)
12676                         ret = repair_chunk_item(trans, root, path, ret);
12677                 err |= ret;
12678                 break;
12679         case BTRFS_DEV_EXTENT_KEY:
12680                 ret = check_dev_extent_item(fs_info, eb, slot);
12681                 err |= ret;
12682                 break;
12683         case BTRFS_EXTENT_ITEM_KEY:
12684         case BTRFS_METADATA_ITEM_KEY:
12685                 ret = check_extent_item(trans, fs_info, path);
12686                 err |= ret;
12687                 break;
12688         case BTRFS_EXTENT_CSUM_KEY:
12689                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12690                 err |= ret;
12691                 break;
12692         case BTRFS_TREE_BLOCK_REF_KEY:
12693                 ret = check_tree_block_backref(fs_info, key.offset,
12694                                                key.objectid, -1);
12695                 if (repair &&
12696                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12697                         ret = delete_extent_tree_item(trans, root, path);
12698                 err |= ret;
12699                 break;
12700         case BTRFS_EXTENT_DATA_REF_KEY:
12701                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12702                 ret = check_extent_data_backref(fs_info,
12703                                 btrfs_extent_data_ref_root(eb, dref),
12704                                 btrfs_extent_data_ref_objectid(eb, dref),
12705                                 btrfs_extent_data_ref_offset(eb, dref),
12706                                 key.objectid, 0,
12707                                 btrfs_extent_data_ref_count(eb, dref));
12708                 if (repair &&
12709                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12710                         ret = delete_extent_tree_item(trans, root, path);
12711                 err |= ret;
12712                 break;
12713         case BTRFS_SHARED_BLOCK_REF_KEY:
12714                 ret = check_shared_block_backref(fs_info, key.offset,
12715                                                  key.objectid, -1);
12716                 if (repair &&
12717                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12718                         ret = delete_extent_tree_item(trans, root, path);
12719                 err |= ret;
12720                 break;
12721         case BTRFS_SHARED_DATA_REF_KEY:
12722                 ret = check_shared_data_backref(fs_info, key.offset,
12723                                                 key.objectid);
12724                 if (repair &&
12725                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12726                         ret = delete_extent_tree_item(trans, root, path);
12727                 err |= ret;
12728                 break;
12729         default:
12730                 break;
12731         }
12732
12733         ++path->slots[0];
12734         goto again;
12735 out:
12736         return err;
12737 }
12738
12739 /*
12740  * Low memory usage version check_chunks_and_extents.
12741  */
12742 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12743 {
12744         struct btrfs_trans_handle *trans = NULL;
12745         struct btrfs_path path;
12746         struct btrfs_key old_key;
12747         struct btrfs_key key;
12748         struct btrfs_root *root1;
12749         struct btrfs_root *root;
12750         struct btrfs_root *cur_root;
12751         int err = 0;
12752         int ret;
12753
12754         root = fs_info->fs_root;
12755
12756         if (repair) {
12757                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12758                 if (IS_ERR(trans)) {
12759                         error("failed to start transaction before check");
12760                         return PTR_ERR(trans);
12761                 }
12762         }
12763
12764         root1 = root->fs_info->chunk_root;
12765         ret = check_btrfs_root(trans, root1, 0, 1);
12766         err |= ret;
12767
12768         root1 = root->fs_info->tree_root;
12769         ret = check_btrfs_root(trans, root1, 0, 1);
12770         err |= ret;
12771
12772         btrfs_init_path(&path);
12773         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12774         key.offset = 0;
12775         key.type = BTRFS_ROOT_ITEM_KEY;
12776
12777         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12778         if (ret) {
12779                 error("cannot find extent tree in tree_root");
12780                 goto out;
12781         }
12782
12783         while (1) {
12784                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12785                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12786                         goto next;
12787                 old_key = key;
12788                 key.offset = (u64)-1;
12789
12790                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12791                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12792                                         &key);
12793                 else
12794                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12795                 if (IS_ERR(cur_root) || !cur_root) {
12796                         error("failed to read tree: %lld", key.objectid);
12797                         goto next;
12798                 }
12799
12800                 ret = check_btrfs_root(trans, cur_root, 0, 1);
12801                 err |= ret;
12802
12803                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12804                         btrfs_free_fs_root(cur_root);
12805
12806                 btrfs_release_path(&path);
12807                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
12808                                         &old_key, &path, 0, 0);
12809                 if (ret)
12810                         goto out;
12811 next:
12812                 ret = btrfs_next_item(root1, &path);
12813                 if (ret)
12814                         goto out;
12815         }
12816 out:
12817
12818         /* if repair, update block accounting */
12819         if (repair) {
12820                 ret = btrfs_fix_block_accounting(trans, root);
12821                 if (ret)
12822                         err |= ret;
12823                 else
12824                         err &= ~BG_ACCOUNTING_ERROR;
12825         }
12826
12827         if (trans)
12828                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
12829
12830         btrfs_release_path(&path);
12831
12832         return err;
12833 }
12834
12835 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12836 {
12837         int ret;
12838
12839         if (!ctx.progress_enabled)
12840                 fprintf(stderr, "checking extents\n");
12841         if (check_mode == CHECK_MODE_LOWMEM)
12842                 ret = check_chunks_and_extents_v2(fs_info);
12843         else
12844                 ret = check_chunks_and_extents(fs_info);
12845
12846         /* Also repair device size related problems */
12847         if (repair && !ret) {
12848                 ret = btrfs_fix_device_and_super_size(fs_info);
12849                 if (ret > 0)
12850                         ret = 0;
12851         }
12852         return ret;
12853 }
12854
12855 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12856                            struct btrfs_root *root, int overwrite)
12857 {
12858         struct extent_buffer *c;
12859         struct extent_buffer *old = root->node;
12860         int level;
12861         int ret;
12862         struct btrfs_disk_key disk_key = {0,0,0};
12863
12864         level = 0;
12865
12866         if (overwrite) {
12867                 c = old;
12868                 extent_buffer_get(c);
12869                 goto init;
12870         }
12871         c = btrfs_alloc_free_block(trans, root,
12872                                    root->fs_info->nodesize,
12873                                    root->root_key.objectid,
12874                                    &disk_key, level, 0, 0);
12875         if (IS_ERR(c)) {
12876                 c = old;
12877                 extent_buffer_get(c);
12878                 overwrite = 1;
12879         }
12880 init:
12881         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12882         btrfs_set_header_level(c, level);
12883         btrfs_set_header_bytenr(c, c->start);
12884         btrfs_set_header_generation(c, trans->transid);
12885         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12886         btrfs_set_header_owner(c, root->root_key.objectid);
12887
12888         write_extent_buffer(c, root->fs_info->fsid,
12889                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12890
12891         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12892                             btrfs_header_chunk_tree_uuid(c),
12893                             BTRFS_UUID_SIZE);
12894
12895         btrfs_mark_buffer_dirty(c);
12896         /*
12897          * this case can happen in the following case:
12898          *
12899          * 1.overwrite previous root.
12900          *
12901          * 2.reinit reloc data root, this is because we skip pin
12902          * down reloc data tree before which means we can allocate
12903          * same block bytenr here.
12904          */
12905         if (old->start == c->start) {
12906                 btrfs_set_root_generation(&root->root_item,
12907                                           trans->transid);
12908                 root->root_item.level = btrfs_header_level(root->node);
12909                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12910                                         &root->root_key, &root->root_item);
12911                 if (ret) {
12912                         free_extent_buffer(c);
12913                         return ret;
12914                 }
12915         }
12916         free_extent_buffer(old);
12917         root->node = c;
12918         add_root_to_dirty_list(root);
12919         return 0;
12920 }
12921
12922 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12923                                 struct extent_buffer *eb, int tree_root)
12924 {
12925         struct extent_buffer *tmp;
12926         struct btrfs_root_item *ri;
12927         struct btrfs_key key;
12928         u64 bytenr;
12929         int level = btrfs_header_level(eb);
12930         int nritems;
12931         int ret;
12932         int i;
12933
12934         /*
12935          * If we have pinned this block before, don't pin it again.
12936          * This can not only avoid forever loop with broken filesystem
12937          * but also give us some speedups.
12938          */
12939         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12940                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12941                 return 0;
12942
12943         btrfs_pin_extent(fs_info, eb->start, eb->len);
12944
12945         nritems = btrfs_header_nritems(eb);
12946         for (i = 0; i < nritems; i++) {
12947                 if (level == 0) {
12948                         btrfs_item_key_to_cpu(eb, &key, i);
12949                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12950                                 continue;
12951                         /* Skip the extent root and reloc roots */
12952                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12953                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12954                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12955                                 continue;
12956                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12957                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12958
12959                         /*
12960                          * If at any point we start needing the real root we
12961                          * will have to build a stump root for the root we are
12962                          * in, but for now this doesn't actually use the root so
12963                          * just pass in extent_root.
12964                          */
12965                         tmp = read_tree_block(fs_info, bytenr, 0);
12966                         if (!extent_buffer_uptodate(tmp)) {
12967                                 fprintf(stderr, "Error reading root block\n");
12968                                 return -EIO;
12969                         }
12970                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12971                         free_extent_buffer(tmp);
12972                         if (ret)
12973                                 return ret;
12974                 } else {
12975                         bytenr = btrfs_node_blockptr(eb, i);
12976
12977                         /* If we aren't the tree root don't read the block */
12978                         if (level == 1 && !tree_root) {
12979                                 btrfs_pin_extent(fs_info, bytenr,
12980                                                 fs_info->nodesize);
12981                                 continue;
12982                         }
12983
12984                         tmp = read_tree_block(fs_info, bytenr, 0);
12985                         if (!extent_buffer_uptodate(tmp)) {
12986                                 fprintf(stderr, "Error reading tree block\n");
12987                                 return -EIO;
12988                         }
12989                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12990                         free_extent_buffer(tmp);
12991                         if (ret)
12992                                 return ret;
12993                 }
12994         }
12995
12996         return 0;
12997 }
12998
12999 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13000 {
13001         int ret;
13002
13003         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13004         if (ret)
13005                 return ret;
13006
13007         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13008 }
13009
13010 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13011 {
13012         struct btrfs_block_group_cache *cache;
13013         struct btrfs_path path;
13014         struct extent_buffer *leaf;
13015         struct btrfs_chunk *chunk;
13016         struct btrfs_key key;
13017         int ret;
13018         u64 start;
13019
13020         btrfs_init_path(&path);
13021         key.objectid = 0;
13022         key.type = BTRFS_CHUNK_ITEM_KEY;
13023         key.offset = 0;
13024         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13025         if (ret < 0) {
13026                 btrfs_release_path(&path);
13027                 return ret;
13028         }
13029
13030         /*
13031          * We do this in case the block groups were screwed up and had alloc
13032          * bits that aren't actually set on the chunks.  This happens with
13033          * restored images every time and could happen in real life I guess.
13034          */
13035         fs_info->avail_data_alloc_bits = 0;
13036         fs_info->avail_metadata_alloc_bits = 0;
13037         fs_info->avail_system_alloc_bits = 0;
13038
13039         /* First we need to create the in-memory block groups */
13040         while (1) {
13041                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13042                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13043                         if (ret < 0) {
13044                                 btrfs_release_path(&path);
13045                                 return ret;
13046                         }
13047                         if (ret) {
13048                                 ret = 0;
13049                                 break;
13050                         }
13051                 }
13052                 leaf = path.nodes[0];
13053                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13054                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13055                         path.slots[0]++;
13056                         continue;
13057                 }
13058
13059                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13060                 btrfs_add_block_group(fs_info, 0,
13061                                       btrfs_chunk_type(leaf, chunk), key.offset,
13062                                       btrfs_chunk_length(leaf, chunk));
13063                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13064                                  key.offset + btrfs_chunk_length(leaf, chunk));
13065                 path.slots[0]++;
13066         }
13067         start = 0;
13068         while (1) {
13069                 cache = btrfs_lookup_first_block_group(fs_info, start);
13070                 if (!cache)
13071                         break;
13072                 cache->cached = 1;
13073                 start = cache->key.objectid + cache->key.offset;
13074         }
13075
13076         btrfs_release_path(&path);
13077         return 0;
13078 }
13079
13080 static int reset_balance(struct btrfs_trans_handle *trans,
13081                          struct btrfs_fs_info *fs_info)
13082 {
13083         struct btrfs_root *root = fs_info->tree_root;
13084         struct btrfs_path path;
13085         struct extent_buffer *leaf;
13086         struct btrfs_key key;
13087         int del_slot, del_nr = 0;
13088         int ret;
13089         int found = 0;
13090
13091         btrfs_init_path(&path);
13092         key.objectid = BTRFS_BALANCE_OBJECTID;
13093         key.type = BTRFS_BALANCE_ITEM_KEY;
13094         key.offset = 0;
13095         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13096         if (ret) {
13097                 if (ret > 0)
13098                         ret = 0;
13099                 if (!ret)
13100                         goto reinit_data_reloc;
13101                 else
13102                         goto out;
13103         }
13104
13105         ret = btrfs_del_item(trans, root, &path);
13106         if (ret)
13107                 goto out;
13108         btrfs_release_path(&path);
13109
13110         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13111         key.type = BTRFS_ROOT_ITEM_KEY;
13112         key.offset = 0;
13113         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13114         if (ret < 0)
13115                 goto out;
13116         while (1) {
13117                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13118                         if (!found)
13119                                 break;
13120
13121                         if (del_nr) {
13122                                 ret = btrfs_del_items(trans, root, &path,
13123                                                       del_slot, del_nr);
13124                                 del_nr = 0;
13125                                 if (ret)
13126                                         goto out;
13127                         }
13128                         key.offset++;
13129                         btrfs_release_path(&path);
13130
13131                         found = 0;
13132                         ret = btrfs_search_slot(trans, root, &key, &path,
13133                                                 -1, 1);
13134                         if (ret < 0)
13135                                 goto out;
13136                         continue;
13137                 }
13138                 found = 1;
13139                 leaf = path.nodes[0];
13140                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13141                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13142                         break;
13143                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13144                         path.slots[0]++;
13145                         continue;
13146                 }
13147                 if (!del_nr) {
13148                         del_slot = path.slots[0];
13149                         del_nr = 1;
13150                 } else {
13151                         del_nr++;
13152                 }
13153                 path.slots[0]++;
13154         }
13155
13156         if (del_nr) {
13157                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13158                 if (ret)
13159                         goto out;
13160         }
13161         btrfs_release_path(&path);
13162
13163 reinit_data_reloc:
13164         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13165         key.type = BTRFS_ROOT_ITEM_KEY;
13166         key.offset = (u64)-1;
13167         root = btrfs_read_fs_root(fs_info, &key);
13168         if (IS_ERR(root)) {
13169                 fprintf(stderr, "Error reading data reloc tree\n");
13170                 ret = PTR_ERR(root);
13171                 goto out;
13172         }
13173         record_root_in_trans(trans, root);
13174         ret = btrfs_fsck_reinit_root(trans, root, 0);
13175         if (ret)
13176                 goto out;
13177         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13178 out:
13179         btrfs_release_path(&path);
13180         return ret;
13181 }
13182
13183 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13184                               struct btrfs_fs_info *fs_info)
13185 {
13186         u64 start = 0;
13187         int ret;
13188
13189         /*
13190          * The only reason we don't do this is because right now we're just
13191          * walking the trees we find and pinning down their bytes, we don't look
13192          * at any of the leaves.  In order to do mixed groups we'd have to check
13193          * the leaves of any fs roots and pin down the bytes for any file
13194          * extents we find.  Not hard but why do it if we don't have to?
13195          */
13196         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13197                 fprintf(stderr, "We don't support re-initing the extent tree "
13198                         "for mixed block groups yet, please notify a btrfs "
13199                         "developer you want to do this so they can add this "
13200                         "functionality.\n");
13201                 return -EINVAL;
13202         }
13203
13204         /*
13205          * first we need to walk all of the trees except the extent tree and pin
13206          * down the bytes that are in use so we don't overwrite any existing
13207          * metadata.
13208          */
13209         ret = pin_metadata_blocks(fs_info);
13210         if (ret) {
13211                 fprintf(stderr, "error pinning down used bytes\n");
13212                 return ret;
13213         }
13214
13215         /*
13216          * Need to drop all the block groups since we're going to recreate all
13217          * of them again.
13218          */
13219         btrfs_free_block_groups(fs_info);
13220         ret = reset_block_groups(fs_info);
13221         if (ret) {
13222                 fprintf(stderr, "error resetting the block groups\n");
13223                 return ret;
13224         }
13225
13226         /* Ok we can allocate now, reinit the extent root */
13227         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13228         if (ret) {
13229                 fprintf(stderr, "extent root initialization failed\n");
13230                 /*
13231                  * When the transaction code is updated we should end the
13232                  * transaction, but for now progs only knows about commit so
13233                  * just return an error.
13234                  */
13235                 return ret;
13236         }
13237
13238         /*
13239          * Now we have all the in-memory block groups setup so we can make
13240          * allocations properly, and the metadata we care about is safe since we
13241          * pinned all of it above.
13242          */
13243         while (1) {
13244                 struct btrfs_block_group_cache *cache;
13245
13246                 cache = btrfs_lookup_first_block_group(fs_info, start);
13247                 if (!cache)
13248                         break;
13249                 start = cache->key.objectid + cache->key.offset;
13250                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13251                                         &cache->key, &cache->item,
13252                                         sizeof(cache->item));
13253                 if (ret) {
13254                         fprintf(stderr, "Error adding block group\n");
13255                         return ret;
13256                 }
13257                 btrfs_extent_post_op(trans, fs_info->extent_root);
13258         }
13259
13260         ret = reset_balance(trans, fs_info);
13261         if (ret)
13262                 fprintf(stderr, "error resetting the pending balance\n");
13263
13264         return ret;
13265 }
13266
13267 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13268 {
13269         struct btrfs_path path;
13270         struct btrfs_trans_handle *trans;
13271         struct btrfs_key key;
13272         int ret;
13273
13274         printf("Recowing metadata block %llu\n", eb->start);
13275         key.objectid = btrfs_header_owner(eb);
13276         key.type = BTRFS_ROOT_ITEM_KEY;
13277         key.offset = (u64)-1;
13278
13279         root = btrfs_read_fs_root(root->fs_info, &key);
13280         if (IS_ERR(root)) {
13281                 fprintf(stderr, "Couldn't find owner root %llu\n",
13282                         key.objectid);
13283                 return PTR_ERR(root);
13284         }
13285
13286         trans = btrfs_start_transaction(root, 1);
13287         if (IS_ERR(trans))
13288                 return PTR_ERR(trans);
13289
13290         btrfs_init_path(&path);
13291         path.lowest_level = btrfs_header_level(eb);
13292         if (path.lowest_level)
13293                 btrfs_node_key_to_cpu(eb, &key, 0);
13294         else
13295                 btrfs_item_key_to_cpu(eb, &key, 0);
13296
13297         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13298         btrfs_commit_transaction(trans, root);
13299         btrfs_release_path(&path);
13300         return ret;
13301 }
13302
13303 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13304 {
13305         struct btrfs_path path;
13306         struct btrfs_trans_handle *trans;
13307         struct btrfs_key key;
13308         int ret;
13309
13310         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13311                bad->key.type, bad->key.offset);
13312         key.objectid = bad->root_id;
13313         key.type = BTRFS_ROOT_ITEM_KEY;
13314         key.offset = (u64)-1;
13315
13316         root = btrfs_read_fs_root(root->fs_info, &key);
13317         if (IS_ERR(root)) {
13318                 fprintf(stderr, "Couldn't find owner root %llu\n",
13319                         key.objectid);
13320                 return PTR_ERR(root);
13321         }
13322
13323         trans = btrfs_start_transaction(root, 1);
13324         if (IS_ERR(trans))
13325                 return PTR_ERR(trans);
13326
13327         btrfs_init_path(&path);
13328         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13329         if (ret) {
13330                 if (ret > 0)
13331                         ret = 0;
13332                 goto out;
13333         }
13334         ret = btrfs_del_item(trans, root, &path);
13335 out:
13336         btrfs_commit_transaction(trans, root);
13337         btrfs_release_path(&path);
13338         return ret;
13339 }
13340
13341 static int zero_log_tree(struct btrfs_root *root)
13342 {
13343         struct btrfs_trans_handle *trans;
13344         int ret;
13345
13346         trans = btrfs_start_transaction(root, 1);
13347         if (IS_ERR(trans)) {
13348                 ret = PTR_ERR(trans);
13349                 return ret;
13350         }
13351         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13352         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13353         ret = btrfs_commit_transaction(trans, root);
13354         return ret;
13355 }
13356
13357 static int populate_csum(struct btrfs_trans_handle *trans,
13358                          struct btrfs_root *csum_root, char *buf, u64 start,
13359                          u64 len)
13360 {
13361         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13362         u64 offset = 0;
13363         u64 sectorsize;
13364         int ret = 0;
13365
13366         while (offset < len) {
13367                 sectorsize = fs_info->sectorsize;
13368                 ret = read_extent_data(fs_info, buf, start + offset,
13369                                        &sectorsize, 0);
13370                 if (ret)
13371                         break;
13372                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13373                                             start + offset, buf, sectorsize);
13374                 if (ret)
13375                         break;
13376                 offset += sectorsize;
13377         }
13378         return ret;
13379 }
13380
13381 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13382                                       struct btrfs_root *csum_root,
13383                                       struct btrfs_root *cur_root)
13384 {
13385         struct btrfs_path path;
13386         struct btrfs_key key;
13387         struct extent_buffer *node;
13388         struct btrfs_file_extent_item *fi;
13389         char *buf = NULL;
13390         u64 start = 0;
13391         u64 len = 0;
13392         int slot = 0;
13393         int ret = 0;
13394
13395         buf = malloc(cur_root->fs_info->sectorsize);
13396         if (!buf)
13397                 return -ENOMEM;
13398
13399         btrfs_init_path(&path);
13400         key.objectid = 0;
13401         key.offset = 0;
13402         key.type = 0;
13403         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13404         if (ret < 0)
13405                 goto out;
13406         /* Iterate all regular file extents and fill its csum */
13407         while (1) {
13408                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13409
13410                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13411                         goto next;
13412                 node = path.nodes[0];
13413                 slot = path.slots[0];
13414                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13415                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13416                         goto next;
13417                 start = btrfs_file_extent_disk_bytenr(node, fi);
13418                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13419
13420                 ret = populate_csum(trans, csum_root, buf, start, len);
13421                 if (ret == -EEXIST)
13422                         ret = 0;
13423                 if (ret < 0)
13424                         goto out;
13425 next:
13426                 /*
13427                  * TODO: if next leaf is corrupted, jump to nearest next valid
13428                  * leaf.
13429                  */
13430                 ret = btrfs_next_item(cur_root, &path);
13431                 if (ret < 0)
13432                         goto out;
13433                 if (ret > 0) {
13434                         ret = 0;
13435                         goto out;
13436                 }
13437         }
13438
13439 out:
13440         btrfs_release_path(&path);
13441         free(buf);
13442         return ret;
13443 }
13444
13445 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13446                                   struct btrfs_root *csum_root)
13447 {
13448         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13449         struct btrfs_path path;
13450         struct btrfs_root *tree_root = fs_info->tree_root;
13451         struct btrfs_root *cur_root;
13452         struct extent_buffer *node;
13453         struct btrfs_key key;
13454         int slot = 0;
13455         int ret = 0;
13456
13457         btrfs_init_path(&path);
13458         key.objectid = BTRFS_FS_TREE_OBJECTID;
13459         key.offset = 0;
13460         key.type = BTRFS_ROOT_ITEM_KEY;
13461         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13462         if (ret < 0)
13463                 goto out;
13464         if (ret > 0) {
13465                 ret = -ENOENT;
13466                 goto out;
13467         }
13468
13469         while (1) {
13470                 node = path.nodes[0];
13471                 slot = path.slots[0];
13472                 btrfs_item_key_to_cpu(node, &key, slot);
13473                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13474                         goto out;
13475                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13476                         goto next;
13477                 if (!is_fstree(key.objectid))
13478                         goto next;
13479                 key.offset = (u64)-1;
13480
13481                 cur_root = btrfs_read_fs_root(fs_info, &key);
13482                 if (IS_ERR(cur_root) || !cur_root) {
13483                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13484                                 key.objectid);
13485                         goto out;
13486                 }
13487                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13488                                 cur_root);
13489                 if (ret < 0)
13490                         goto out;
13491 next:
13492                 ret = btrfs_next_item(tree_root, &path);
13493                 if (ret > 0) {
13494                         ret = 0;
13495                         goto out;
13496                 }
13497                 if (ret < 0)
13498                         goto out;
13499         }
13500
13501 out:
13502         btrfs_release_path(&path);
13503         return ret;
13504 }
13505
13506 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13507                                       struct btrfs_root *csum_root)
13508 {
13509         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13510         struct btrfs_path path;
13511         struct btrfs_extent_item *ei;
13512         struct extent_buffer *leaf;
13513         char *buf;
13514         struct btrfs_key key;
13515         int ret;
13516
13517         btrfs_init_path(&path);
13518         key.objectid = 0;
13519         key.type = BTRFS_EXTENT_ITEM_KEY;
13520         key.offset = 0;
13521         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13522         if (ret < 0) {
13523                 btrfs_release_path(&path);
13524                 return ret;
13525         }
13526
13527         buf = malloc(csum_root->fs_info->sectorsize);
13528         if (!buf) {
13529                 btrfs_release_path(&path);
13530                 return -ENOMEM;
13531         }
13532
13533         while (1) {
13534                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13535                         ret = btrfs_next_leaf(extent_root, &path);
13536                         if (ret < 0)
13537                                 break;
13538                         if (ret) {
13539                                 ret = 0;
13540                                 break;
13541                         }
13542                 }
13543                 leaf = path.nodes[0];
13544
13545                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13546                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13547                         path.slots[0]++;
13548                         continue;
13549                 }
13550
13551                 ei = btrfs_item_ptr(leaf, path.slots[0],
13552                                     struct btrfs_extent_item);
13553                 if (!(btrfs_extent_flags(leaf, ei) &
13554                       BTRFS_EXTENT_FLAG_DATA)) {
13555                         path.slots[0]++;
13556                         continue;
13557                 }
13558
13559                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13560                                     key.offset);
13561                 if (ret)
13562                         break;
13563                 path.slots[0]++;
13564         }
13565
13566         btrfs_release_path(&path);
13567         free(buf);
13568         return ret;
13569 }
13570
13571 /*
13572  * Recalculate the csum and put it into the csum tree.
13573  *
13574  * Extent tree init will wipe out all the extent info, so in that case, we
13575  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13576  * will use fs/subvol trees to init the csum tree.
13577  */
13578 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13579                           struct btrfs_root *csum_root,
13580                           int search_fs_tree)
13581 {
13582         if (search_fs_tree)
13583                 return fill_csum_tree_from_fs(trans, csum_root);
13584         else
13585                 return fill_csum_tree_from_extent(trans, csum_root);
13586 }
13587
13588 static void free_roots_info_cache(void)
13589 {
13590         if (!roots_info_cache)
13591                 return;
13592
13593         while (!cache_tree_empty(roots_info_cache)) {
13594                 struct cache_extent *entry;
13595                 struct root_item_info *rii;
13596
13597                 entry = first_cache_extent(roots_info_cache);
13598                 if (!entry)
13599                         break;
13600                 remove_cache_extent(roots_info_cache, entry);
13601                 rii = container_of(entry, struct root_item_info, cache_extent);
13602                 free(rii);
13603         }
13604
13605         free(roots_info_cache);
13606         roots_info_cache = NULL;
13607 }
13608
13609 static int build_roots_info_cache(struct btrfs_fs_info *info)
13610 {
13611         int ret = 0;
13612         struct btrfs_key key;
13613         struct extent_buffer *leaf;
13614         struct btrfs_path path;
13615
13616         if (!roots_info_cache) {
13617                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13618                 if (!roots_info_cache)
13619                         return -ENOMEM;
13620                 cache_tree_init(roots_info_cache);
13621         }
13622
13623         btrfs_init_path(&path);
13624         key.objectid = 0;
13625         key.type = BTRFS_EXTENT_ITEM_KEY;
13626         key.offset = 0;
13627         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13628         if (ret < 0)
13629                 goto out;
13630         leaf = path.nodes[0];
13631
13632         while (1) {
13633                 struct btrfs_key found_key;
13634                 struct btrfs_extent_item *ei;
13635                 struct btrfs_extent_inline_ref *iref;
13636                 int slot = path.slots[0];
13637                 int type;
13638                 u64 flags;
13639                 u64 root_id;
13640                 u8 level;
13641                 struct cache_extent *entry;
13642                 struct root_item_info *rii;
13643
13644                 if (slot >= btrfs_header_nritems(leaf)) {
13645                         ret = btrfs_next_leaf(info->extent_root, &path);
13646                         if (ret < 0) {
13647                                 break;
13648                         } else if (ret) {
13649                                 ret = 0;
13650                                 break;
13651                         }
13652                         leaf = path.nodes[0];
13653                         slot = path.slots[0];
13654                 }
13655
13656                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13657
13658                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13659                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13660                         goto next;
13661
13662                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13663                 flags = btrfs_extent_flags(leaf, ei);
13664
13665                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13666                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13667                         goto next;
13668
13669                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13670                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13671                         level = found_key.offset;
13672                 } else {
13673                         struct btrfs_tree_block_info *binfo;
13674
13675                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13676                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13677                         level = btrfs_tree_block_level(leaf, binfo);
13678                 }
13679
13680                 /*
13681                  * For a root extent, it must be of the following type and the
13682                  * first (and only one) iref in the item.
13683                  */
13684                 type = btrfs_extent_inline_ref_type(leaf, iref);
13685                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13686                         goto next;
13687
13688                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13689                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13690                 if (!entry) {
13691                         rii = malloc(sizeof(struct root_item_info));
13692                         if (!rii) {
13693                                 ret = -ENOMEM;
13694                                 goto out;
13695                         }
13696                         rii->cache_extent.start = root_id;
13697                         rii->cache_extent.size = 1;
13698                         rii->level = (u8)-1;
13699                         entry = &rii->cache_extent;
13700                         ret = insert_cache_extent(roots_info_cache, entry);
13701                         ASSERT(ret == 0);
13702                 } else {
13703                         rii = container_of(entry, struct root_item_info,
13704                                            cache_extent);
13705                 }
13706
13707                 ASSERT(rii->cache_extent.start == root_id);
13708                 ASSERT(rii->cache_extent.size == 1);
13709
13710                 if (level > rii->level || rii->level == (u8)-1) {
13711                         rii->level = level;
13712                         rii->bytenr = found_key.objectid;
13713                         rii->gen = btrfs_extent_generation(leaf, ei);
13714                         rii->node_count = 1;
13715                 } else if (level == rii->level) {
13716                         rii->node_count++;
13717                 }
13718 next:
13719                 path.slots[0]++;
13720         }
13721
13722 out:
13723         btrfs_release_path(&path);
13724
13725         return ret;
13726 }
13727
13728 static int maybe_repair_root_item(struct btrfs_path *path,
13729                                   const struct btrfs_key *root_key,
13730                                   const int read_only_mode)
13731 {
13732         const u64 root_id = root_key->objectid;
13733         struct cache_extent *entry;
13734         struct root_item_info *rii;
13735         struct btrfs_root_item ri;
13736         unsigned long offset;
13737
13738         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13739         if (!entry) {
13740                 fprintf(stderr,
13741                         "Error: could not find extent items for root %llu\n",
13742                         root_key->objectid);
13743                 return -ENOENT;
13744         }
13745
13746         rii = container_of(entry, struct root_item_info, cache_extent);
13747         ASSERT(rii->cache_extent.start == root_id);
13748         ASSERT(rii->cache_extent.size == 1);
13749
13750         if (rii->node_count != 1) {
13751                 fprintf(stderr,
13752                         "Error: could not find btree root extent for root %llu\n",
13753                         root_id);
13754                 return -ENOENT;
13755         }
13756
13757         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13758         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13759
13760         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13761             btrfs_root_level(&ri) != rii->level ||
13762             btrfs_root_generation(&ri) != rii->gen) {
13763
13764                 /*
13765                  * If we're in repair mode but our caller told us to not update
13766                  * the root item, i.e. just check if it needs to be updated, don't
13767                  * print this message, since the caller will call us again shortly
13768                  * for the same root item without read only mode (the caller will
13769                  * open a transaction first).
13770                  */
13771                 if (!(read_only_mode && repair))
13772                         fprintf(stderr,
13773                                 "%sroot item for root %llu,"
13774                                 " current bytenr %llu, current gen %llu, current level %u,"
13775                                 " new bytenr %llu, new gen %llu, new level %u\n",
13776                                 (read_only_mode ? "" : "fixing "),
13777                                 root_id,
13778                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13779                                 btrfs_root_level(&ri),
13780                                 rii->bytenr, rii->gen, rii->level);
13781
13782                 if (btrfs_root_generation(&ri) > rii->gen) {
13783                         fprintf(stderr,
13784                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13785                                 root_id, btrfs_root_generation(&ri), rii->gen);
13786                         return -EINVAL;
13787                 }
13788
13789                 if (!read_only_mode) {
13790                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13791                         btrfs_set_root_level(&ri, rii->level);
13792                         btrfs_set_root_generation(&ri, rii->gen);
13793                         write_extent_buffer(path->nodes[0], &ri,
13794                                             offset, sizeof(ri));
13795                 }
13796
13797                 return 1;
13798         }
13799
13800         return 0;
13801 }
13802
13803 /*
13804  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13805  * caused read-only snapshots to be corrupted if they were created at a moment
13806  * when the source subvolume/snapshot had orphan items. The issue was that the
13807  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13808  * node instead of the post orphan cleanup root node.
13809  * So this function, and its callees, just detects and fixes those cases. Even
13810  * though the regression was for read-only snapshots, this function applies to
13811  * any snapshot/subvolume root.
13812  * This must be run before any other repair code - not doing it so, makes other
13813  * repair code delete or modify backrefs in the extent tree for example, which
13814  * will result in an inconsistent fs after repairing the root items.
13815  */
13816 static int repair_root_items(struct btrfs_fs_info *info)
13817 {
13818         struct btrfs_path path;
13819         struct btrfs_key key;
13820         struct extent_buffer *leaf;
13821         struct btrfs_trans_handle *trans = NULL;
13822         int ret = 0;
13823         int bad_roots = 0;
13824         int need_trans = 0;
13825
13826         btrfs_init_path(&path);
13827
13828         ret = build_roots_info_cache(info);
13829         if (ret)
13830                 goto out;
13831
13832         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13833         key.type = BTRFS_ROOT_ITEM_KEY;
13834         key.offset = 0;
13835
13836 again:
13837         /*
13838          * Avoid opening and committing transactions if a leaf doesn't have
13839          * any root items that need to be fixed, so that we avoid rotating
13840          * backup roots unnecessarily.
13841          */
13842         if (need_trans) {
13843                 trans = btrfs_start_transaction(info->tree_root, 1);
13844                 if (IS_ERR(trans)) {
13845                         ret = PTR_ERR(trans);
13846                         goto out;
13847                 }
13848         }
13849
13850         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13851                                 0, trans ? 1 : 0);
13852         if (ret < 0)
13853                 goto out;
13854         leaf = path.nodes[0];
13855
13856         while (1) {
13857                 struct btrfs_key found_key;
13858
13859                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13860                         int no_more_keys = find_next_key(&path, &key);
13861
13862                         btrfs_release_path(&path);
13863                         if (trans) {
13864                                 ret = btrfs_commit_transaction(trans,
13865                                                                info->tree_root);
13866                                 trans = NULL;
13867                                 if (ret < 0)
13868                                         goto out;
13869                         }
13870                         need_trans = 0;
13871                         if (no_more_keys)
13872                                 break;
13873                         goto again;
13874                 }
13875
13876                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13877
13878                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13879                         goto next;
13880                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13881                         goto next;
13882
13883                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13884                 if (ret < 0)
13885                         goto out;
13886                 if (ret) {
13887                         if (!trans && repair) {
13888                                 need_trans = 1;
13889                                 key = found_key;
13890                                 btrfs_release_path(&path);
13891                                 goto again;
13892                         }
13893                         bad_roots++;
13894                 }
13895 next:
13896                 path.slots[0]++;
13897         }
13898         ret = 0;
13899 out:
13900         free_roots_info_cache();
13901         btrfs_release_path(&path);
13902         if (trans)
13903                 btrfs_commit_transaction(trans, info->tree_root);
13904         if (ret < 0)
13905                 return ret;
13906
13907         return bad_roots;
13908 }
13909
13910 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13911 {
13912         struct btrfs_trans_handle *trans;
13913         struct btrfs_block_group_cache *bg_cache;
13914         u64 current = 0;
13915         int ret = 0;
13916
13917         /* Clear all free space cache inodes and its extent data */
13918         while (1) {
13919                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13920                 if (!bg_cache)
13921                         break;
13922                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13923                 if (ret < 0)
13924                         return ret;
13925                 current = bg_cache->key.objectid + bg_cache->key.offset;
13926         }
13927
13928         /* Don't forget to set cache_generation to -1 */
13929         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13930         if (IS_ERR(trans)) {
13931                 error("failed to update super block cache generation");
13932                 return PTR_ERR(trans);
13933         }
13934         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13935         btrfs_commit_transaction(trans, fs_info->tree_root);
13936
13937         return ret;
13938 }
13939
13940 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13941                 int clear_version)
13942 {
13943         int ret = 0;
13944
13945         if (clear_version == 1) {
13946                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13947                         error(
13948                 "free space cache v2 detected, use --clear-space-cache v2");
13949                         ret = 1;
13950                         goto close_out;
13951                 }
13952                 printf("Clearing free space cache\n");
13953                 ret = clear_free_space_cache(fs_info);
13954                 if (ret) {
13955                         error("failed to clear free space cache");
13956                         ret = 1;
13957                 } else {
13958                         printf("Free space cache cleared\n");
13959                 }
13960         } else if (clear_version == 2) {
13961                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13962                         printf("no free space cache v2 to clear\n");
13963                         ret = 0;
13964                         goto close_out;
13965                 }
13966                 printf("Clear free space cache v2\n");
13967                 ret = btrfs_clear_free_space_tree(fs_info);
13968                 if (ret) {
13969                         error("failed to clear free space cache v2: %d", ret);
13970                         ret = 1;
13971                 } else {
13972                         printf("free space cache v2 cleared\n");
13973                 }
13974         }
13975 close_out:
13976         return ret;
13977 }
13978
13979 const char * const cmd_check_usage[] = {
13980         "btrfs check [options] <device>",
13981         "Check structural integrity of a filesystem (unmounted).",
13982         "Check structural integrity of an unmounted filesystem. Verify internal",
13983         "trees' consistency and item connectivity. In the repair mode try to",
13984         "fix the problems found. ",
13985         "WARNING: the repair mode is considered dangerous",
13986         "",
13987         "-s|--super <superblock>     use this superblock copy",
13988         "-b|--backup                 use the first valid backup root copy",
13989         "--force                     skip mount checks, repair is not possible",
13990         "--repair                    try to repair the filesystem",
13991         "--readonly                  run in read-only mode (default)",
13992         "--init-csum-tree            create a new CRC tree",
13993         "--init-extent-tree          create a new extent tree",
13994         "--mode <MODE>               allows choice of memory/IO trade-offs",
13995         "                            where MODE is one of:",
13996         "                            original - read inodes and extents to memory (requires",
13997         "                                       more memory, does less IO)",
13998         "                            lowmem   - try to use less memory but read blocks again",
13999         "                                       when needed",
14000         "--check-data-csum           verify checksums of data blocks",
14001         "-Q|--qgroup-report          print a report on qgroup consistency",
14002         "-E|--subvol-extents <subvolid>",
14003         "                            print subvolume extents and sharing state",
14004         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14005         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14006         "-p|--progress               indicate progress",
14007         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14008         NULL
14009 };
14010
14011 int cmd_check(int argc, char **argv)
14012 {
14013         struct cache_tree root_cache;
14014         struct btrfs_root *root;
14015         struct btrfs_fs_info *info;
14016         u64 bytenr = 0;
14017         u64 subvolid = 0;
14018         u64 tree_root_bytenr = 0;
14019         u64 chunk_root_bytenr = 0;
14020         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14021         int ret = 0;
14022         int err = 0;
14023         u64 num;
14024         int init_csum_tree = 0;
14025         int readonly = 0;
14026         int clear_space_cache = 0;
14027         int qgroup_report = 0;
14028         int qgroups_repaired = 0;
14029         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14030         int force = 0;
14031
14032         while(1) {
14033                 int c;
14034                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14035                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14036                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14037                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14038                         GETOPT_VAL_FORCE };
14039                 static const struct option long_options[] = {
14040                         { "super", required_argument, NULL, 's' },
14041                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14042                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14043                         { "init-csum-tree", no_argument, NULL,
14044                                 GETOPT_VAL_INIT_CSUM },
14045                         { "init-extent-tree", no_argument, NULL,
14046                                 GETOPT_VAL_INIT_EXTENT },
14047                         { "check-data-csum", no_argument, NULL,
14048                                 GETOPT_VAL_CHECK_CSUM },
14049                         { "backup", no_argument, NULL, 'b' },
14050                         { "subvol-extents", required_argument, NULL, 'E' },
14051                         { "qgroup-report", no_argument, NULL, 'Q' },
14052                         { "tree-root", required_argument, NULL, 'r' },
14053                         { "chunk-root", required_argument, NULL,
14054                                 GETOPT_VAL_CHUNK_TREE },
14055                         { "progress", no_argument, NULL, 'p' },
14056                         { "mode", required_argument, NULL,
14057                                 GETOPT_VAL_MODE },
14058                         { "clear-space-cache", required_argument, NULL,
14059                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14060                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14061                         { NULL, 0, NULL, 0}
14062                 };
14063
14064                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14065                 if (c < 0)
14066                         break;
14067                 switch(c) {
14068                         case 'a': /* ignored */ break;
14069                         case 'b':
14070                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14071                                 break;
14072                         case 's':
14073                                 num = arg_strtou64(optarg);
14074                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14075                                         error(
14076                                         "super mirror should be less than %d",
14077                                                 BTRFS_SUPER_MIRROR_MAX);
14078                                         exit(1);
14079                                 }
14080                                 bytenr = btrfs_sb_offset(((int)num));
14081                                 printf("using SB copy %llu, bytenr %llu\n", num,
14082                                        (unsigned long long)bytenr);
14083                                 break;
14084                         case 'Q':
14085                                 qgroup_report = 1;
14086                                 break;
14087                         case 'E':
14088                                 subvolid = arg_strtou64(optarg);
14089                                 break;
14090                         case 'r':
14091                                 tree_root_bytenr = arg_strtou64(optarg);
14092                                 break;
14093                         case GETOPT_VAL_CHUNK_TREE:
14094                                 chunk_root_bytenr = arg_strtou64(optarg);
14095                                 break;
14096                         case 'p':
14097                                 ctx.progress_enabled = true;
14098                                 break;
14099                         case '?':
14100                         case 'h':
14101                                 usage(cmd_check_usage);
14102                         case GETOPT_VAL_REPAIR:
14103                                 printf("enabling repair mode\n");
14104                                 repair = 1;
14105                                 ctree_flags |= OPEN_CTREE_WRITES;
14106                                 break;
14107                         case GETOPT_VAL_READONLY:
14108                                 readonly = 1;
14109                                 break;
14110                         case GETOPT_VAL_INIT_CSUM:
14111                                 printf("Creating a new CRC tree\n");
14112                                 init_csum_tree = 1;
14113                                 repair = 1;
14114                                 ctree_flags |= OPEN_CTREE_WRITES;
14115                                 break;
14116                         case GETOPT_VAL_INIT_EXTENT:
14117                                 init_extent_tree = 1;
14118                                 ctree_flags |= (OPEN_CTREE_WRITES |
14119                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14120                                 repair = 1;
14121                                 break;
14122                         case GETOPT_VAL_CHECK_CSUM:
14123                                 check_data_csum = 1;
14124                                 break;
14125                         case GETOPT_VAL_MODE:
14126                                 check_mode = parse_check_mode(optarg);
14127                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14128                                         error("unknown mode: %s", optarg);
14129                                         exit(1);
14130                                 }
14131                                 break;
14132                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14133                                 if (strcmp(optarg, "v1") == 0) {
14134                                         clear_space_cache = 1;
14135                                 } else if (strcmp(optarg, "v2") == 0) {
14136                                         clear_space_cache = 2;
14137                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14138                                 } else {
14139                                         error(
14140                 "invalid argument to --clear-space-cache, must be v1 or v2");
14141                                         exit(1);
14142                                 }
14143                                 ctree_flags |= OPEN_CTREE_WRITES;
14144                                 break;
14145                         case GETOPT_VAL_FORCE:
14146                                 force = 1;
14147                                 break;
14148                 }
14149         }
14150
14151         if (check_argc_exact(argc - optind, 1))
14152                 usage(cmd_check_usage);
14153
14154         if (ctx.progress_enabled) {
14155                 ctx.tp = TASK_NOTHING;
14156                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14157         }
14158
14159         /* This check is the only reason for --readonly to exist */
14160         if (readonly && repair) {
14161                 error("repair options are not compatible with --readonly");
14162                 exit(1);
14163         }
14164
14165         /*
14166          * experimental and dangerous
14167          */
14168         if (repair && check_mode == CHECK_MODE_LOWMEM)
14169                 warning("low-memory mode repair support is only partial");
14170
14171         radix_tree_init();
14172         cache_tree_init(&root_cache);
14173
14174         ret = check_mounted(argv[optind]);
14175         if (!force) {
14176                 if (ret < 0) {
14177                         error("could not check mount status: %s",
14178                                         strerror(-ret));
14179                         err |= !!ret;
14180                         goto err_out;
14181                 } else if (ret) {
14182                         error(
14183 "%s is currently mounted, use --force if you really intend to check the filesystem",
14184                                 argv[optind]);
14185                         ret = -EBUSY;
14186                         err |= !!ret;
14187                         goto err_out;
14188                 }
14189         } else {
14190                 if (repair) {
14191                         error("repair and --force is not yet supported");
14192                         ret = 1;
14193                         err |= !!ret;
14194                         goto err_out;
14195                 }
14196                 if (ret < 0) {
14197                         warning(
14198 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14199                                 argv[optind]);
14200                 } else if (ret) {
14201                         warning(
14202                         "filesystem mounted, continuing because of --force");
14203                 }
14204                 /* A block device is mounted in exclusive mode by kernel */
14205                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14206         }
14207
14208         /* only allow partial opening under repair mode */
14209         if (repair)
14210                 ctree_flags |= OPEN_CTREE_PARTIAL;
14211
14212         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14213                                   chunk_root_bytenr, ctree_flags);
14214         if (!info) {
14215                 error("cannot open file system");
14216                 ret = -EIO;
14217                 err |= !!ret;
14218                 goto err_out;
14219         }
14220
14221         global_info = info;
14222         root = info->fs_root;
14223         uuid_unparse(info->super_copy->fsid, uuidbuf);
14224
14225         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14226
14227         /*
14228          * Check the bare minimum before starting anything else that could rely
14229          * on it, namely the tree roots, any local consistency checks
14230          */
14231         if (!extent_buffer_uptodate(info->tree_root->node) ||
14232             !extent_buffer_uptodate(info->dev_root->node) ||
14233             !extent_buffer_uptodate(info->chunk_root->node)) {
14234                 error("critical roots corrupted, unable to check the filesystem");
14235                 err |= !!ret;
14236                 ret = -EIO;
14237                 goto close_out;
14238         }
14239
14240         if (clear_space_cache) {
14241                 ret = do_clear_free_space_cache(info, clear_space_cache);
14242                 err |= !!ret;
14243                 goto close_out;
14244         }
14245
14246         /*
14247          * repair mode will force us to commit transaction which
14248          * will make us fail to load log tree when mounting.
14249          */
14250         if (repair && btrfs_super_log_root(info->super_copy)) {
14251                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14252                 if (!ret) {
14253                         ret = 1;
14254                         err |= !!ret;
14255                         goto close_out;
14256                 }
14257                 ret = zero_log_tree(root);
14258                 err |= !!ret;
14259                 if (ret) {
14260                         error("failed to zero log tree: %d", ret);
14261                         goto close_out;
14262                 }
14263         }
14264
14265         if (qgroup_report) {
14266                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14267                        uuidbuf);
14268                 ret = qgroup_verify_all(info);
14269                 err |= !!ret;
14270                 if (ret == 0)
14271                         report_qgroups(1);
14272                 goto close_out;
14273         }
14274         if (subvolid) {
14275                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14276                        subvolid, argv[optind], uuidbuf);
14277                 ret = print_extent_state(info, subvolid);
14278                 err |= !!ret;
14279                 goto close_out;
14280         }
14281
14282         if (init_extent_tree || init_csum_tree) {
14283                 struct btrfs_trans_handle *trans;
14284
14285                 trans = btrfs_start_transaction(info->extent_root, 0);
14286                 if (IS_ERR(trans)) {
14287                         error("error starting transaction");
14288                         ret = PTR_ERR(trans);
14289                         err |= !!ret;
14290                         goto close_out;
14291                 }
14292
14293                 if (init_extent_tree) {
14294                         printf("Creating a new extent tree\n");
14295                         ret = reinit_extent_tree(trans, info);
14296                         err |= !!ret;
14297                         if (ret)
14298                                 goto close_out;
14299                 }
14300
14301                 if (init_csum_tree) {
14302                         printf("Reinitialize checksum tree\n");
14303                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14304                         if (ret) {
14305                                 error("checksum tree initialization failed: %d",
14306                                                 ret);
14307                                 ret = -EIO;
14308                                 err |= !!ret;
14309                                 goto close_out;
14310                         }
14311
14312                         ret = fill_csum_tree(trans, info->csum_root,
14313                                              init_extent_tree);
14314                         err |= !!ret;
14315                         if (ret) {
14316                                 error("checksum tree refilling failed: %d", ret);
14317                                 return -EIO;
14318                         }
14319                 }
14320                 /*
14321                  * Ok now we commit and run the normal fsck, which will add
14322                  * extent entries for all of the items it finds.
14323                  */
14324                 ret = btrfs_commit_transaction(trans, info->extent_root);
14325                 err |= !!ret;
14326                 if (ret)
14327                         goto close_out;
14328         }
14329         if (!extent_buffer_uptodate(info->extent_root->node)) {
14330                 error("critical: extent_root, unable to check the filesystem");
14331                 ret = -EIO;
14332                 err |= !!ret;
14333                 goto close_out;
14334         }
14335         if (!extent_buffer_uptodate(info->csum_root->node)) {
14336                 error("critical: csum_root, unable to check the filesystem");
14337                 ret = -EIO;
14338                 err |= !!ret;
14339                 goto close_out;
14340         }
14341
14342         if (!init_extent_tree) {
14343                 ret = repair_root_items(info);
14344                 if (ret < 0) {
14345                         err = !!ret;
14346                         error("failed to repair root items: %s", strerror(-ret));
14347                         goto close_out;
14348                 }
14349                 if (repair) {
14350                         fprintf(stderr, "Fixed %d roots.\n", ret);
14351                         ret = 0;
14352                 } else if (ret > 0) {
14353                         fprintf(stderr,
14354                                 "Found %d roots with an outdated root item.\n",
14355                                 ret);
14356                         fprintf(stderr,
14357         "Please run a filesystem check with the option --repair to fix them.\n");
14358                         ret = 1;
14359                         err |= ret;
14360                         goto close_out;
14361                 }
14362         }
14363
14364         ret = do_check_chunks_and_extents(info);
14365         err |= !!ret;
14366         if (ret)
14367                 error(
14368                 "errors found in extent allocation tree or chunk allocation");
14369
14370         /* Only re-check super size after we checked and repaired the fs */
14371         err |= !is_super_size_valid(info);
14372
14373         if (!ctx.progress_enabled) {
14374                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14375                         fprintf(stderr, "checking free space tree\n");
14376                 else
14377                         fprintf(stderr, "checking free space cache\n");
14378         }
14379         ret = check_space_cache(root);
14380         err |= !!ret;
14381         if (ret) {
14382                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14383                         error("errors found in free space tree");
14384                 else
14385                         error("errors found in free space cache");
14386                 goto out;
14387         }
14388
14389         /*
14390          * We used to have to have these hole extents in between our real
14391          * extents so if we don't have this flag set we need to make sure there
14392          * are no gaps in the file extents for inodes, otherwise we can just
14393          * ignore it when this happens.
14394          */
14395         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14396         ret = do_check_fs_roots(info, &root_cache);
14397         err |= !!ret;
14398         if (ret) {
14399                 error("errors found in fs roots");
14400                 goto out;
14401         }
14402
14403         fprintf(stderr, "checking csums\n");
14404         ret = check_csums(root);
14405         err |= !!ret;
14406         if (ret) {
14407                 error("errors found in csum tree");
14408                 goto out;
14409         }
14410
14411         fprintf(stderr, "checking root refs\n");
14412         /* For low memory mode, check_fs_roots_v2 handles root refs */
14413         if (check_mode != CHECK_MODE_LOWMEM) {
14414                 ret = check_root_refs(root, &root_cache);
14415                 err |= !!ret;
14416                 if (ret) {
14417                         error("errors found in root refs");
14418                         goto out;
14419                 }
14420         }
14421
14422         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14423                 struct extent_buffer *eb;
14424
14425                 eb = list_first_entry(&root->fs_info->recow_ebs,
14426                                       struct extent_buffer, recow);
14427                 list_del_init(&eb->recow);
14428                 ret = recow_extent_buffer(root, eb);
14429                 err |= !!ret;
14430                 if (ret) {
14431                         error("fails to fix transid errors");
14432                         break;
14433                 }
14434         }
14435
14436         while (!list_empty(&delete_items)) {
14437                 struct bad_item *bad;
14438
14439                 bad = list_first_entry(&delete_items, struct bad_item, list);
14440                 list_del_init(&bad->list);
14441                 if (repair) {
14442                         ret = delete_bad_item(root, bad);
14443                         err |= !!ret;
14444                 }
14445                 free(bad);
14446         }
14447
14448         if (info->quota_enabled) {
14449                 fprintf(stderr, "checking quota groups\n");
14450                 ret = qgroup_verify_all(info);
14451                 err |= !!ret;
14452                 if (ret) {
14453                         error("failed to check quota groups");
14454                         goto out;
14455                 }
14456                 report_qgroups(0);
14457                 ret = repair_qgroups(info, &qgroups_repaired);
14458                 err |= !!ret;
14459                 if (err) {
14460                         error("failed to repair quota groups");
14461                         goto out;
14462                 }
14463                 ret = 0;
14464         }
14465
14466         if (!list_empty(&root->fs_info->recow_ebs)) {
14467                 error("transid errors in file system");
14468                 ret = 1;
14469                 err |= !!ret;
14470         }
14471 out:
14472         printf("found %llu bytes used, ",
14473                (unsigned long long)bytes_used);
14474         if (err)
14475                 printf("error(s) found\n");
14476         else
14477                 printf("no error found\n");
14478         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14479         printf("total tree bytes: %llu\n",
14480                (unsigned long long)total_btree_bytes);
14481         printf("total fs tree bytes: %llu\n",
14482                (unsigned long long)total_fs_tree_bytes);
14483         printf("total extent tree bytes: %llu\n",
14484                (unsigned long long)total_extent_tree_bytes);
14485         printf("btree space waste bytes: %llu\n",
14486                (unsigned long long)btree_space_waste);
14487         printf("file data blocks allocated: %llu\n referenced %llu\n",
14488                 (unsigned long long)data_bytes_allocated,
14489                 (unsigned long long)data_bytes_referenced);
14490
14491         free_qgroup_counts();
14492         free_root_recs_tree(&root_cache);
14493 close_out:
14494         close_ctree(root);
14495 err_out:
14496         if (ctx.progress_enabled)
14497                 task_deinit(ctx.info);
14498
14499         return err;
14500 }