btrfs-progs: check: move reada_walk_down to check/common.c
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
416
417 static void record_root_in_trans(struct btrfs_trans_handle *trans,
418                                  struct btrfs_root *root)
419 {
420         if (root->last_trans != trans->transid) {
421                 root->track_dirty = 1;
422                 root->last_trans = trans->transid;
423                 root->commit_root = root->node;
424                 extent_buffer_get(root->node);
425         }
426 }
427
428 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
429 {
430         struct device_record *rec1;
431         struct device_record *rec2;
432
433         rec1 = rb_entry(node1, struct device_record, node);
434         rec2 = rb_entry(node2, struct device_record, node);
435         if (rec1->devid > rec2->devid)
436                 return -1;
437         else if (rec1->devid < rec2->devid)
438                 return 1;
439         else
440                 return 0;
441 }
442
443 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
444 {
445         struct inode_record *rec;
446         struct inode_backref *backref;
447         struct inode_backref *orig;
448         struct inode_backref *tmp;
449         struct orphan_data_extent *src_orphan;
450         struct orphan_data_extent *dst_orphan;
451         struct rb_node *rb;
452         size_t size;
453         int ret;
454
455         rec = malloc(sizeof(*rec));
456         if (!rec)
457                 return ERR_PTR(-ENOMEM);
458         memcpy(rec, orig_rec, sizeof(*rec));
459         rec->refs = 1;
460         INIT_LIST_HEAD(&rec->backrefs);
461         INIT_LIST_HEAD(&rec->orphan_extents);
462         rec->holes = RB_ROOT;
463
464         list_for_each_entry(orig, &orig_rec->backrefs, list) {
465                 size = sizeof(*orig) + orig->namelen + 1;
466                 backref = malloc(size);
467                 if (!backref) {
468                         ret = -ENOMEM;
469                         goto cleanup;
470                 }
471                 memcpy(backref, orig, size);
472                 list_add_tail(&backref->list, &rec->backrefs);
473         }
474         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
475                 dst_orphan = malloc(sizeof(*dst_orphan));
476                 if (!dst_orphan) {
477                         ret = -ENOMEM;
478                         goto cleanup;
479                 }
480                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
481                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
482         }
483         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
484         if (ret < 0)
485                 goto cleanup_rb;
486
487         return rec;
488
489 cleanup_rb:
490         rb = rb_first(&rec->holes);
491         while (rb) {
492                 struct file_extent_hole *hole;
493
494                 hole = rb_entry(rb, struct file_extent_hole, node);
495                 rb = rb_next(rb);
496                 free(hole);
497         }
498
499 cleanup:
500         if (!list_empty(&rec->backrefs))
501                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
502                         list_del(&orig->list);
503                         free(orig);
504                 }
505
506         if (!list_empty(&rec->orphan_extents))
507                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
508                         list_del(&orig->list);
509                         free(orig);
510                 }
511
512         free(rec);
513
514         return ERR_PTR(ret);
515 }
516
517 static void print_orphan_data_extents(struct list_head *orphan_extents,
518                                       u64 objectid)
519 {
520         struct orphan_data_extent *orphan;
521
522         if (list_empty(orphan_extents))
523                 return;
524         printf("The following data extent is lost in tree %llu:\n",
525                objectid);
526         list_for_each_entry(orphan, orphan_extents, list) {
527                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
528                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
529                        orphan->disk_len);
530         }
531 }
532
533 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
534 {
535         u64 root_objectid = root->root_key.objectid;
536         int errors = rec->errors;
537
538         if (!errors)
539                 return;
540         /* reloc root errors, we print its corresponding fs root objectid*/
541         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
542                 root_objectid = root->root_key.offset;
543                 fprintf(stderr, "reloc");
544         }
545         fprintf(stderr, "root %llu inode %llu errors %x",
546                 (unsigned long long) root_objectid,
547                 (unsigned long long) rec->ino, rec->errors);
548
549         if (errors & I_ERR_NO_INODE_ITEM)
550                 fprintf(stderr, ", no inode item");
551         if (errors & I_ERR_NO_ORPHAN_ITEM)
552                 fprintf(stderr, ", no orphan item");
553         if (errors & I_ERR_DUP_INODE_ITEM)
554                 fprintf(stderr, ", dup inode item");
555         if (errors & I_ERR_DUP_DIR_INDEX)
556                 fprintf(stderr, ", dup dir index");
557         if (errors & I_ERR_ODD_DIR_ITEM)
558                 fprintf(stderr, ", odd dir item");
559         if (errors & I_ERR_ODD_FILE_EXTENT)
560                 fprintf(stderr, ", odd file extent");
561         if (errors & I_ERR_BAD_FILE_EXTENT)
562                 fprintf(stderr, ", bad file extent");
563         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
564                 fprintf(stderr, ", file extent overlap");
565         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566                 fprintf(stderr, ", file extent discount");
567         if (errors & I_ERR_DIR_ISIZE_WRONG)
568                 fprintf(stderr, ", dir isize wrong");
569         if (errors & I_ERR_FILE_NBYTES_WRONG)
570                 fprintf(stderr, ", nbytes wrong");
571         if (errors & I_ERR_ODD_CSUM_ITEM)
572                 fprintf(stderr, ", odd csum item");
573         if (errors & I_ERR_SOME_CSUM_MISSING)
574                 fprintf(stderr, ", some csum missing");
575         if (errors & I_ERR_LINK_COUNT_WRONG)
576                 fprintf(stderr, ", link count wrong");
577         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578                 fprintf(stderr, ", orphan file extent");
579         fprintf(stderr, "\n");
580         /* Print the orphan extents if needed */
581         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
583
584         /* Print the holes if needed */
585         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586                 struct file_extent_hole *hole;
587                 struct rb_node *node;
588                 int found = 0;
589
590                 node = rb_first(&rec->holes);
591                 fprintf(stderr, "Found file extent holes:\n");
592                 while (node) {
593                         found = 1;
594                         hole = rb_entry(node, struct file_extent_hole, node);
595                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
596                                 hole->start, hole->len);
597                         node = rb_next(node);
598                 }
599                 if (!found)
600                         fprintf(stderr, "\tstart: 0, len: %llu\n",
601                                 round_up(rec->isize,
602                                          root->fs_info->sectorsize));
603         }
604 }
605
606 static void print_ref_error(int errors)
607 {
608         if (errors & REF_ERR_NO_DIR_ITEM)
609                 fprintf(stderr, ", no dir item");
610         if (errors & REF_ERR_NO_DIR_INDEX)
611                 fprintf(stderr, ", no dir index");
612         if (errors & REF_ERR_NO_INODE_REF)
613                 fprintf(stderr, ", no inode ref");
614         if (errors & REF_ERR_DUP_DIR_ITEM)
615                 fprintf(stderr, ", dup dir item");
616         if (errors & REF_ERR_DUP_DIR_INDEX)
617                 fprintf(stderr, ", dup dir index");
618         if (errors & REF_ERR_DUP_INODE_REF)
619                 fprintf(stderr, ", dup inode ref");
620         if (errors & REF_ERR_INDEX_UNMATCH)
621                 fprintf(stderr, ", index mismatch");
622         if (errors & REF_ERR_FILETYPE_UNMATCH)
623                 fprintf(stderr, ", filetype mismatch");
624         if (errors & REF_ERR_NAME_TOO_LONG)
625                 fprintf(stderr, ", name too long");
626         if (errors & REF_ERR_NO_ROOT_REF)
627                 fprintf(stderr, ", no root ref");
628         if (errors & REF_ERR_NO_ROOT_BACKREF)
629                 fprintf(stderr, ", no root backref");
630         if (errors & REF_ERR_DUP_ROOT_REF)
631                 fprintf(stderr, ", dup root ref");
632         if (errors & REF_ERR_DUP_ROOT_BACKREF)
633                 fprintf(stderr, ", dup root backref");
634         fprintf(stderr, "\n");
635 }
636
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
638                                           u64 ino, int mod)
639 {
640         struct ptr_node *node;
641         struct cache_extent *cache;
642         struct inode_record *rec = NULL;
643         int ret;
644
645         cache = lookup_cache_extent(inode_cache, ino, 1);
646         if (cache) {
647                 node = container_of(cache, struct ptr_node, cache);
648                 rec = node->data;
649                 if (mod && rec->refs > 1) {
650                         node->data = clone_inode_rec(rec);
651                         if (IS_ERR(node->data))
652                                 return node->data;
653                         rec->refs--;
654                         rec = node->data;
655                 }
656         } else if (mod) {
657                 rec = calloc(1, sizeof(*rec));
658                 if (!rec)
659                         return ERR_PTR(-ENOMEM);
660                 rec->ino = ino;
661                 rec->extent_start = (u64)-1;
662                 rec->refs = 1;
663                 INIT_LIST_HEAD(&rec->backrefs);
664                 INIT_LIST_HEAD(&rec->orphan_extents);
665                 rec->holes = RB_ROOT;
666
667                 node = malloc(sizeof(*node));
668                 if (!node) {
669                         free(rec);
670                         return ERR_PTR(-ENOMEM);
671                 }
672                 node->cache.start = ino;
673                 node->cache.size = 1;
674                 node->data = rec;
675
676                 if (ino == BTRFS_FREE_INO_OBJECTID)
677                         rec->found_link = 1;
678
679                 ret = insert_cache_extent(inode_cache, &node->cache);
680                 if (ret)
681                         return ERR_PTR(-EEXIST);
682         }
683         return rec;
684 }
685
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
687 {
688         struct orphan_data_extent *orphan;
689
690         while (!list_empty(orphan_extents)) {
691                 orphan = list_entry(orphan_extents->next,
692                                     struct orphan_data_extent, list);
693                 list_del(&orphan->list);
694                 free(orphan);
695         }
696 }
697
698 static void free_inode_rec(struct inode_record *rec)
699 {
700         struct inode_backref *backref;
701
702         if (--rec->refs > 0)
703                 return;
704
705         while (!list_empty(&rec->backrefs)) {
706                 backref = to_inode_backref(rec->backrefs.next);
707                 list_del(&backref->list);
708                 free(backref);
709         }
710         free_orphan_data_extents(&rec->orphan_extents);
711         free_file_extent_holes(&rec->holes);
712         free(rec);
713 }
714
715 static int can_free_inode_rec(struct inode_record *rec)
716 {
717         if (!rec->errors && rec->checked && rec->found_inode_item &&
718             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
719                 return 1;
720         return 0;
721 }
722
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724                                  struct inode_record *rec)
725 {
726         struct cache_extent *cache;
727         struct inode_backref *tmp, *backref;
728         struct ptr_node *node;
729         u8 filetype;
730
731         if (!rec->found_inode_item)
732                 return;
733
734         filetype = imode_to_type(rec->imode);
735         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736                 if (backref->found_dir_item && backref->found_dir_index) {
737                         if (backref->filetype != filetype)
738                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739                         if (!backref->errors && backref->found_inode_ref &&
740                             rec->nlink == rec->found_link) {
741                                 list_del(&backref->list);
742                                 free(backref);
743                         }
744                 }
745         }
746
747         if (!rec->checked || rec->merging)
748                 return;
749
750         if (S_ISDIR(rec->imode)) {
751                 if (rec->found_size != rec->isize)
752                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753                 if (rec->found_file_extent)
754                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
755         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756                 if (rec->found_dir_item)
757                         rec->errors |= I_ERR_ODD_DIR_ITEM;
758                 if (rec->found_size != rec->nbytes)
759                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760                 if (rec->nlink > 0 && !no_holes &&
761                     (rec->extent_end < rec->isize ||
762                      first_extent_gap(&rec->holes) < rec->isize))
763                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
764         }
765
766         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767                 if (rec->found_csum_item && rec->nodatasum)
768                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
769                 if (rec->some_csum_missing && !rec->nodatasum)
770                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
771         }
772
773         BUG_ON(rec->refs != 1);
774         if (can_free_inode_rec(rec)) {
775                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776                 node = container_of(cache, struct ptr_node, cache);
777                 BUG_ON(node->data != rec);
778                 remove_cache_extent(inode_cache, &node->cache);
779                 free(node);
780                 free_inode_rec(rec);
781         }
782 }
783
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
785 {
786         struct btrfs_path path;
787         struct btrfs_key key;
788         int ret;
789
790         key.objectid = BTRFS_ORPHAN_OBJECTID;
791         key.type = BTRFS_ORPHAN_ITEM_KEY;
792         key.offset = ino;
793
794         btrfs_init_path(&path);
795         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796         btrfs_release_path(&path);
797         if (ret > 0)
798                 ret = -ENOENT;
799         return ret;
800 }
801
802 static int process_inode_item(struct extent_buffer *eb,
803                               int slot, struct btrfs_key *key,
804                               struct shared_node *active_node)
805 {
806         struct inode_record *rec;
807         struct btrfs_inode_item *item;
808
809         rec = active_node->current;
810         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811         if (rec->found_inode_item) {
812                 rec->errors |= I_ERR_DUP_INODE_ITEM;
813                 return 1;
814         }
815         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816         rec->nlink = btrfs_inode_nlink(eb, item);
817         rec->isize = btrfs_inode_size(eb, item);
818         rec->nbytes = btrfs_inode_nbytes(eb, item);
819         rec->imode = btrfs_inode_mode(eb, item);
820         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
821                 rec->nodatasum = 1;
822         rec->found_inode_item = 1;
823         if (rec->nlink == 0)
824                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825         maybe_free_inode_rec(&active_node->inode_cache, rec);
826         return 0;
827 }
828
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
830                                                 const char *name,
831                                                 int namelen, u64 dir)
832 {
833         struct inode_backref *backref;
834
835         list_for_each_entry(backref, &rec->backrefs, list) {
836                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
837                         break;
838                 if (backref->dir != dir || backref->namelen != namelen)
839                         continue;
840                 if (memcmp(name, backref->name, namelen))
841                         continue;
842                 return backref;
843         }
844
845         backref = malloc(sizeof(*backref) + namelen + 1);
846         if (!backref)
847                 return NULL;
848         memset(backref, 0, sizeof(*backref));
849         backref->dir = dir;
850         backref->namelen = namelen;
851         memcpy(backref->name, name, namelen);
852         backref->name[namelen] = '\0';
853         list_add_tail(&backref->list, &rec->backrefs);
854         return backref;
855 }
856
857 static int add_inode_backref(struct cache_tree *inode_cache,
858                              u64 ino, u64 dir, u64 index,
859                              const char *name, int namelen,
860                              u8 filetype, u8 itemtype, int errors)
861 {
862         struct inode_record *rec;
863         struct inode_backref *backref;
864
865         rec = get_inode_rec(inode_cache, ino, 1);
866         BUG_ON(IS_ERR(rec));
867         backref = get_inode_backref(rec, name, namelen, dir);
868         BUG_ON(!backref);
869         if (errors)
870                 backref->errors |= errors;
871         if (itemtype == BTRFS_DIR_INDEX_KEY) {
872                 if (backref->found_dir_index)
873                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
874                 if (backref->found_inode_ref && backref->index != index)
875                         backref->errors |= REF_ERR_INDEX_UNMATCH;
876                 if (backref->found_dir_item && backref->filetype != filetype)
877                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
878
879                 backref->index = index;
880                 backref->filetype = filetype;
881                 backref->found_dir_index = 1;
882         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
883                 rec->found_link++;
884                 if (backref->found_dir_item)
885                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
886                 if (backref->found_dir_index && backref->filetype != filetype)
887                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
888
889                 backref->filetype = filetype;
890                 backref->found_dir_item = 1;
891         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893                 if (backref->found_inode_ref)
894                         backref->errors |= REF_ERR_DUP_INODE_REF;
895                 if (backref->found_dir_index && backref->index != index)
896                         backref->errors |= REF_ERR_INDEX_UNMATCH;
897                 else
898                         backref->index = index;
899
900                 backref->ref_type = itemtype;
901                 backref->found_inode_ref = 1;
902         } else {
903                 BUG_ON(1);
904         }
905
906         maybe_free_inode_rec(inode_cache, rec);
907         return 0;
908 }
909
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911                             struct cache_tree *dst_cache)
912 {
913         struct inode_backref *backref;
914         u32 dir_count = 0;
915         int ret = 0;
916
917         dst->merging = 1;
918         list_for_each_entry(backref, &src->backrefs, list) {
919                 if (backref->found_dir_index) {
920                         add_inode_backref(dst_cache, dst->ino, backref->dir,
921                                         backref->index, backref->name,
922                                         backref->namelen, backref->filetype,
923                                         BTRFS_DIR_INDEX_KEY, backref->errors);
924                 }
925                 if (backref->found_dir_item) {
926                         dir_count++;
927                         add_inode_backref(dst_cache, dst->ino,
928                                         backref->dir, 0, backref->name,
929                                         backref->namelen, backref->filetype,
930                                         BTRFS_DIR_ITEM_KEY, backref->errors);
931                 }
932                 if (backref->found_inode_ref) {
933                         add_inode_backref(dst_cache, dst->ino,
934                                         backref->dir, backref->index,
935                                         backref->name, backref->namelen, 0,
936                                         backref->ref_type, backref->errors);
937                 }
938         }
939
940         if (src->found_dir_item)
941                 dst->found_dir_item = 1;
942         if (src->found_file_extent)
943                 dst->found_file_extent = 1;
944         if (src->found_csum_item)
945                 dst->found_csum_item = 1;
946         if (src->some_csum_missing)
947                 dst->some_csum_missing = 1;
948         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
950                 if (ret < 0)
951                         return ret;
952         }
953
954         BUG_ON(src->found_link < dir_count);
955         dst->found_link += src->found_link - dir_count;
956         dst->found_size += src->found_size;
957         if (src->extent_start != (u64)-1) {
958                 if (dst->extent_start == (u64)-1) {
959                         dst->extent_start = src->extent_start;
960                         dst->extent_end = src->extent_end;
961                 } else {
962                         if (dst->extent_end > src->extent_start)
963                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964                         else if (dst->extent_end < src->extent_start) {
965                                 ret = add_file_extent_hole(&dst->holes,
966                                         dst->extent_end,
967                                         src->extent_start - dst->extent_end);
968                         }
969                         if (dst->extent_end < src->extent_end)
970                                 dst->extent_end = src->extent_end;
971                 }
972         }
973
974         dst->errors |= src->errors;
975         if (src->found_inode_item) {
976                 if (!dst->found_inode_item) {
977                         dst->nlink = src->nlink;
978                         dst->isize = src->isize;
979                         dst->nbytes = src->nbytes;
980                         dst->imode = src->imode;
981                         dst->nodatasum = src->nodatasum;
982                         dst->found_inode_item = 1;
983                 } else {
984                         dst->errors |= I_ERR_DUP_INODE_ITEM;
985                 }
986         }
987         dst->merging = 0;
988
989         return 0;
990 }
991
992 static int splice_shared_node(struct shared_node *src_node,
993                               struct shared_node *dst_node)
994 {
995         struct cache_extent *cache;
996         struct ptr_node *node, *ins;
997         struct cache_tree *src, *dst;
998         struct inode_record *rec, *conflict;
999         u64 current_ino = 0;
1000         int splice = 0;
1001         int ret;
1002
1003         if (--src_node->refs == 0)
1004                 splice = 1;
1005         if (src_node->current)
1006                 current_ino = src_node->current->ino;
1007
1008         src = &src_node->root_cache;
1009         dst = &dst_node->root_cache;
1010 again:
1011         cache = search_cache_extent(src, 0);
1012         while (cache) {
1013                 node = container_of(cache, struct ptr_node, cache);
1014                 rec = node->data;
1015                 cache = next_cache_extent(cache);
1016
1017                 if (splice) {
1018                         remove_cache_extent(src, &node->cache);
1019                         ins = node;
1020                 } else {
1021                         ins = malloc(sizeof(*ins));
1022                         BUG_ON(!ins);
1023                         ins->cache.start = node->cache.start;
1024                         ins->cache.size = node->cache.size;
1025                         ins->data = rec;
1026                         rec->refs++;
1027                 }
1028                 ret = insert_cache_extent(dst, &ins->cache);
1029                 if (ret == -EEXIST) {
1030                         conflict = get_inode_rec(dst, rec->ino, 1);
1031                         BUG_ON(IS_ERR(conflict));
1032                         merge_inode_recs(rec, conflict, dst);
1033                         if (rec->checked) {
1034                                 conflict->checked = 1;
1035                                 if (dst_node->current == conflict)
1036                                         dst_node->current = NULL;
1037                         }
1038                         maybe_free_inode_rec(dst, conflict);
1039                         free_inode_rec(rec);
1040                         free(ins);
1041                 } else {
1042                         BUG_ON(ret);
1043                 }
1044         }
1045
1046         if (src == &src_node->root_cache) {
1047                 src = &src_node->inode_cache;
1048                 dst = &dst_node->inode_cache;
1049                 goto again;
1050         }
1051
1052         if (current_ino > 0 && (!dst_node->current ||
1053             current_ino > dst_node->current->ino)) {
1054                 if (dst_node->current) {
1055                         dst_node->current->checked = 1;
1056                         maybe_free_inode_rec(dst, dst_node->current);
1057                 }
1058                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059                 BUG_ON(IS_ERR(dst_node->current));
1060         }
1061         return 0;
1062 }
1063
1064 static void free_inode_ptr(struct cache_extent *cache)
1065 {
1066         struct ptr_node *node;
1067         struct inode_record *rec;
1068
1069         node = container_of(cache, struct ptr_node, cache);
1070         rec = node->data;
1071         free_inode_rec(rec);
1072         free(node);
1073 }
1074
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1076
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1078                                             u64 bytenr)
1079 {
1080         struct cache_extent *cache;
1081         struct shared_node *node;
1082
1083         cache = lookup_cache_extent(shared, bytenr, 1);
1084         if (cache) {
1085                 node = container_of(cache, struct shared_node, cache);
1086                 return node;
1087         }
1088         return NULL;
1089 }
1090
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1092 {
1093         int ret;
1094         struct shared_node *node;
1095
1096         node = calloc(1, sizeof(*node));
1097         if (!node)
1098                 return -ENOMEM;
1099         node->cache.start = bytenr;
1100         node->cache.size = 1;
1101         cache_tree_init(&node->root_cache);
1102         cache_tree_init(&node->inode_cache);
1103         node->refs = refs;
1104
1105         ret = insert_cache_extent(shared, &node->cache);
1106
1107         return ret;
1108 }
1109
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111                              struct walk_control *wc, int level)
1112 {
1113         struct shared_node *node;
1114         struct shared_node *dest;
1115         int ret;
1116
1117         if (level == wc->active_node)
1118                 return 0;
1119
1120         BUG_ON(wc->active_node <= level);
1121         node = find_shared_node(&wc->shared, bytenr);
1122         if (!node) {
1123                 ret = add_shared_node(&wc->shared, bytenr, refs);
1124                 BUG_ON(ret);
1125                 node = find_shared_node(&wc->shared, bytenr);
1126                 wc->nodes[level] = node;
1127                 wc->active_node = level;
1128                 return 0;
1129         }
1130
1131         if (wc->root_level == wc->active_node &&
1132             btrfs_root_refs(&root->root_item) == 0) {
1133                 if (--node->refs == 0) {
1134                         free_inode_recs_tree(&node->root_cache);
1135                         free_inode_recs_tree(&node->inode_cache);
1136                         remove_cache_extent(&wc->shared, &node->cache);
1137                         free(node);
1138                 }
1139                 return 1;
1140         }
1141
1142         dest = wc->nodes[wc->active_node];
1143         splice_shared_node(node, dest);
1144         if (node->refs == 0) {
1145                 remove_cache_extent(&wc->shared, &node->cache);
1146                 free(node);
1147         }
1148         return 1;
1149 }
1150
1151 static int leave_shared_node(struct btrfs_root *root,
1152                              struct walk_control *wc, int level)
1153 {
1154         struct shared_node *node;
1155         struct shared_node *dest;
1156         int i;
1157
1158         if (level == wc->root_level)
1159                 return 0;
1160
1161         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1162                 if (wc->nodes[i])
1163                         break;
1164         }
1165         BUG_ON(i >= BTRFS_MAX_LEVEL);
1166
1167         node = wc->nodes[wc->active_node];
1168         wc->nodes[wc->active_node] = NULL;
1169         wc->active_node = i;
1170
1171         dest = wc->nodes[wc->active_node];
1172         if (wc->active_node < wc->root_level ||
1173             btrfs_root_refs(&root->root_item) > 0) {
1174                 BUG_ON(node->refs <= 1);
1175                 splice_shared_node(node, dest);
1176         } else {
1177                 BUG_ON(node->refs < 2);
1178                 node->refs--;
1179         }
1180         return 0;
1181 }
1182
1183 /*
1184  * Returns:
1185  * < 0 - on error
1186  * 1   - if the root with id child_root_id is a child of root parent_root_id
1187  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1188  *       has other root(s) as parent(s)
1189  * 2   - if the root child_root_id doesn't have any parent roots
1190  */
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1192                          u64 child_root_id)
1193 {
1194         struct btrfs_path path;
1195         struct btrfs_key key;
1196         struct extent_buffer *leaf;
1197         int has_parent = 0;
1198         int ret;
1199
1200         btrfs_init_path(&path);
1201
1202         key.objectid = parent_root_id;
1203         key.type = BTRFS_ROOT_REF_KEY;
1204         key.offset = child_root_id;
1205         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1206                                 0, 0);
1207         if (ret < 0)
1208                 return ret;
1209         btrfs_release_path(&path);
1210         if (!ret)
1211                 return 1;
1212
1213         key.objectid = child_root_id;
1214         key.type = BTRFS_ROOT_BACKREF_KEY;
1215         key.offset = 0;
1216         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1217                                 0, 0);
1218         if (ret < 0)
1219                 goto out;
1220
1221         while (1) {
1222                 leaf = path.nodes[0];
1223                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1225                         if (ret)
1226                                 break;
1227                         leaf = path.nodes[0];
1228                 }
1229
1230                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231                 if (key.objectid != child_root_id ||
1232                     key.type != BTRFS_ROOT_BACKREF_KEY)
1233                         break;
1234
1235                 has_parent = 1;
1236
1237                 if (key.offset == parent_root_id) {
1238                         btrfs_release_path(&path);
1239                         return 1;
1240                 }
1241
1242                 path.slots[0]++;
1243         }
1244 out:
1245         btrfs_release_path(&path);
1246         if (ret < 0)
1247                 return ret;
1248         return has_parent ? 0 : 2;
1249 }
1250
1251 static int process_dir_item(struct extent_buffer *eb,
1252                             int slot, struct btrfs_key *key,
1253                             struct shared_node *active_node)
1254 {
1255         u32 total;
1256         u32 cur = 0;
1257         u32 len;
1258         u32 name_len;
1259         u32 data_len;
1260         int error;
1261         int nritems = 0;
1262         u8 filetype;
1263         struct btrfs_dir_item *di;
1264         struct inode_record *rec;
1265         struct cache_tree *root_cache;
1266         struct cache_tree *inode_cache;
1267         struct btrfs_key location;
1268         char namebuf[BTRFS_NAME_LEN];
1269
1270         root_cache = &active_node->root_cache;
1271         inode_cache = &active_node->inode_cache;
1272         rec = active_node->current;
1273         rec->found_dir_item = 1;
1274
1275         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276         total = btrfs_item_size_nr(eb, slot);
1277         while (cur < total) {
1278                 nritems++;
1279                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280                 name_len = btrfs_dir_name_len(eb, di);
1281                 data_len = btrfs_dir_data_len(eb, di);
1282                 filetype = btrfs_dir_type(eb, di);
1283
1284                 rec->found_size += name_len;
1285                 if (cur + sizeof(*di) + name_len > total ||
1286                     name_len > BTRFS_NAME_LEN) {
1287                         error = REF_ERR_NAME_TOO_LONG;
1288
1289                         if (cur + sizeof(*di) > total)
1290                                 break;
1291                         len = min_t(u32, total - cur - sizeof(*di),
1292                                     BTRFS_NAME_LEN);
1293                 } else {
1294                         len = name_len;
1295                         error = 0;
1296                 }
1297
1298                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1299
1300                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301                     key->offset != btrfs_name_hash(namebuf, len)) {
1302                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1303                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304                         key->objectid, key->offset, namebuf, len, filetype,
1305                         key->offset, btrfs_name_hash(namebuf, len));
1306                 }
1307
1308                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309                         add_inode_backref(inode_cache, location.objectid,
1310                                           key->objectid, key->offset, namebuf,
1311                                           len, filetype, key->type, error);
1312                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313                         add_inode_backref(root_cache, location.objectid,
1314                                           key->objectid, key->offset,
1315                                           namebuf, len, filetype,
1316                                           key->type, error);
1317                 } else {
1318                         fprintf(stderr,
1319                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320                                 location.type, key->objectid, key->offset);
1321                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322                                           key->objectid, key->offset, namebuf,
1323                                           len, filetype, key->type, error);
1324                 }
1325
1326                 len = sizeof(*di) + name_len + data_len;
1327                 di = (struct btrfs_dir_item *)((char *)di + len);
1328                 cur += len;
1329         }
1330         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1332
1333         return 0;
1334 }
1335
1336 static int process_inode_ref(struct extent_buffer *eb,
1337                              int slot, struct btrfs_key *key,
1338                              struct shared_node *active_node)
1339 {
1340         u32 total;
1341         u32 cur = 0;
1342         u32 len;
1343         u32 name_len;
1344         u64 index;
1345         int error;
1346         struct cache_tree *inode_cache;
1347         struct btrfs_inode_ref *ref;
1348         char namebuf[BTRFS_NAME_LEN];
1349
1350         inode_cache = &active_node->inode_cache;
1351
1352         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353         total = btrfs_item_size_nr(eb, slot);
1354         while (cur < total) {
1355                 name_len = btrfs_inode_ref_name_len(eb, ref);
1356                 index = btrfs_inode_ref_index(eb, ref);
1357
1358                 /* inode_ref + namelen should not cross item boundary */
1359                 if (cur + sizeof(*ref) + name_len > total ||
1360                     name_len > BTRFS_NAME_LEN) {
1361                         if (total < cur + sizeof(*ref))
1362                                 break;
1363
1364                         /* Still try to read out the remaining part */
1365                         len = min_t(u32, total - cur - sizeof(*ref),
1366                                     BTRFS_NAME_LEN);
1367                         error = REF_ERR_NAME_TOO_LONG;
1368                 } else {
1369                         len = name_len;
1370                         error = 0;
1371                 }
1372
1373                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374                 add_inode_backref(inode_cache, key->objectid, key->offset,
1375                                   index, namebuf, len, 0, key->type, error);
1376
1377                 len = sizeof(*ref) + name_len;
1378                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1379                 cur += len;
1380         }
1381         return 0;
1382 }
1383
1384 static int process_inode_extref(struct extent_buffer *eb,
1385                                 int slot, struct btrfs_key *key,
1386                                 struct shared_node *active_node)
1387 {
1388         u32 total;
1389         u32 cur = 0;
1390         u32 len;
1391         u32 name_len;
1392         u64 index;
1393         u64 parent;
1394         int error;
1395         struct cache_tree *inode_cache;
1396         struct btrfs_inode_extref *extref;
1397         char namebuf[BTRFS_NAME_LEN];
1398
1399         inode_cache = &active_node->inode_cache;
1400
1401         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402         total = btrfs_item_size_nr(eb, slot);
1403         while (cur < total) {
1404                 name_len = btrfs_inode_extref_name_len(eb, extref);
1405                 index = btrfs_inode_extref_index(eb, extref);
1406                 parent = btrfs_inode_extref_parent(eb, extref);
1407                 if (name_len <= BTRFS_NAME_LEN) {
1408                         len = name_len;
1409                         error = 0;
1410                 } else {
1411                         len = BTRFS_NAME_LEN;
1412                         error = REF_ERR_NAME_TOO_LONG;
1413                 }
1414                 read_extent_buffer(eb, namebuf,
1415                                    (unsigned long)(extref + 1), len);
1416                 add_inode_backref(inode_cache, key->objectid, parent,
1417                                   index, namebuf, len, 0, key->type, error);
1418
1419                 len = sizeof(*extref) + name_len;
1420                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1421                 cur += len;
1422         }
1423         return 0;
1424
1425 }
1426
1427 static int process_file_extent(struct btrfs_root *root,
1428                                 struct extent_buffer *eb,
1429                                 int slot, struct btrfs_key *key,
1430                                 struct shared_node *active_node)
1431 {
1432         struct inode_record *rec;
1433         struct btrfs_file_extent_item *fi;
1434         u64 num_bytes = 0;
1435         u64 disk_bytenr = 0;
1436         u64 extent_offset = 0;
1437         u64 mask = root->fs_info->sectorsize - 1;
1438         int extent_type;
1439         int ret;
1440
1441         rec = active_node->current;
1442         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1443         rec->found_file_extent = 1;
1444
1445         if (rec->extent_start == (u64)-1) {
1446                 rec->extent_start = key->offset;
1447                 rec->extent_end = key->offset;
1448         }
1449
1450         if (rec->extent_end > key->offset)
1451                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1452         else if (rec->extent_end < key->offset) {
1453                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1454                                            key->offset - rec->extent_end);
1455                 if (ret < 0)
1456                         return ret;
1457         }
1458
1459         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1460         extent_type = btrfs_file_extent_type(eb, fi);
1461
1462         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1463                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1464                 if (num_bytes == 0)
1465                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1466                 rec->found_size += num_bytes;
1467                 num_bytes = (num_bytes + mask) & ~mask;
1468         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1469                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1470                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1471                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1472                 extent_offset = btrfs_file_extent_offset(eb, fi);
1473                 if (num_bytes == 0 || (num_bytes & mask))
1474                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1475                 if (num_bytes + extent_offset >
1476                     btrfs_file_extent_ram_bytes(eb, fi))
1477                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1478                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1479                     (btrfs_file_extent_compression(eb, fi) ||
1480                      btrfs_file_extent_encryption(eb, fi) ||
1481                      btrfs_file_extent_other_encoding(eb, fi)))
1482                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1483                 if (disk_bytenr > 0)
1484                         rec->found_size += num_bytes;
1485         } else {
1486                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1487         }
1488         rec->extent_end = key->offset + num_bytes;
1489
1490         /*
1491          * The data reloc tree will copy full extents into its inode and then
1492          * copy the corresponding csums.  Because the extent it copied could be
1493          * a preallocated extent that hasn't been written to yet there may be no
1494          * csums to copy, ergo we won't have csums for our file extent.  This is
1495          * ok so just don't bother checking csums if the inode belongs to the
1496          * data reloc tree.
1497          */
1498         if (disk_bytenr > 0 &&
1499             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1500                 u64 found;
1501                 if (btrfs_file_extent_compression(eb, fi))
1502                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1503                 else
1504                         disk_bytenr += extent_offset;
1505
1506                 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1507                                        &found);
1508                 if (ret < 0)
1509                         return ret;
1510                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1511                         if (found > 0)
1512                                 rec->found_csum_item = 1;
1513                         if (found < num_bytes)
1514                                 rec->some_csum_missing = 1;
1515                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1516                         if (found > 0)
1517                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1518                 }
1519         }
1520         return 0;
1521 }
1522
1523 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1524                             struct walk_control *wc)
1525 {
1526         struct btrfs_key key;
1527         u32 nritems;
1528         int i;
1529         int ret = 0;
1530         struct cache_tree *inode_cache;
1531         struct shared_node *active_node;
1532
1533         if (wc->root_level == wc->active_node &&
1534             btrfs_root_refs(&root->root_item) == 0)
1535                 return 0;
1536
1537         active_node = wc->nodes[wc->active_node];
1538         inode_cache = &active_node->inode_cache;
1539         nritems = btrfs_header_nritems(eb);
1540         for (i = 0; i < nritems; i++) {
1541                 btrfs_item_key_to_cpu(eb, &key, i);
1542
1543                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1544                         continue;
1545                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1546                         continue;
1547
1548                 if (active_node->current == NULL ||
1549                     active_node->current->ino < key.objectid) {
1550                         if (active_node->current) {
1551                                 active_node->current->checked = 1;
1552                                 maybe_free_inode_rec(inode_cache,
1553                                                      active_node->current);
1554                         }
1555                         active_node->current = get_inode_rec(inode_cache,
1556                                                              key.objectid, 1);
1557                         BUG_ON(IS_ERR(active_node->current));
1558                 }
1559                 switch (key.type) {
1560                 case BTRFS_DIR_ITEM_KEY:
1561                 case BTRFS_DIR_INDEX_KEY:
1562                         ret = process_dir_item(eb, i, &key, active_node);
1563                         break;
1564                 case BTRFS_INODE_REF_KEY:
1565                         ret = process_inode_ref(eb, i, &key, active_node);
1566                         break;
1567                 case BTRFS_INODE_EXTREF_KEY:
1568                         ret = process_inode_extref(eb, i, &key, active_node);
1569                         break;
1570                 case BTRFS_INODE_ITEM_KEY:
1571                         ret = process_inode_item(eb, i, &key, active_node);
1572                         break;
1573                 case BTRFS_EXTENT_DATA_KEY:
1574                         ret = process_file_extent(root, eb, i, &key,
1575                                                   active_node);
1576                         break;
1577                 default:
1578                         break;
1579                 };
1580         }
1581         return ret;
1582 }
1583
1584 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1585                              struct extent_buffer *eb, struct node_refs *nrefs,
1586                              u64 level, int check_all);
1587 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1588                             unsigned int ext_ref);
1589
1590 /*
1591  * Returns >0  Found error, not fatal, should continue
1592  * Returns <0  Fatal error, must exit the whole check
1593  * Returns 0   No errors found
1594  */
1595 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1596                                struct node_refs *nrefs, int *level, int ext_ref)
1597 {
1598         struct extent_buffer *cur = path->nodes[0];
1599         struct btrfs_key key;
1600         u64 cur_bytenr;
1601         u32 nritems;
1602         u64 first_ino = 0;
1603         int root_level = btrfs_header_level(root->node);
1604         int i;
1605         int ret = 0; /* Final return value */
1606         int err = 0; /* Positive error bitmap */
1607
1608         cur_bytenr = cur->start;
1609
1610         /* skip to first inode item or the first inode number change */
1611         nritems = btrfs_header_nritems(cur);
1612         for (i = 0; i < nritems; i++) {
1613                 btrfs_item_key_to_cpu(cur, &key, i);
1614                 if (i == 0)
1615                         first_ino = key.objectid;
1616                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1617                     (first_ino && first_ino != key.objectid))
1618                         break;
1619         }
1620         if (i == nritems) {
1621                 path->slots[0] = nritems;
1622                 return 0;
1623         }
1624         path->slots[0] = i;
1625
1626 again:
1627         err |= check_inode_item(root, path, ext_ref);
1628
1629         /* modify cur since check_inode_item may change path */
1630         cur = path->nodes[0];
1631
1632         if (err & LAST_ITEM)
1633                 goto out;
1634
1635         /* still have inode items in thie leaf */
1636         if (cur->start == cur_bytenr)
1637                 goto again;
1638
1639         /*
1640          * we have switched to another leaf, above nodes may
1641          * have changed, here walk down the path, if a node
1642          * or leaf is shared, check whether we can skip this
1643          * node or leaf.
1644          */
1645         for (i = root_level; i >= 0; i--) {
1646                 if (path->nodes[i]->start == nrefs->bytenr[i])
1647                         continue;
1648
1649                 ret = update_nodes_refs(root, path->nodes[i]->start,
1650                                 path->nodes[i], nrefs, i, 0);
1651                 if (ret)
1652                         goto out;
1653
1654                 if (!nrefs->need_check[i]) {
1655                         *level += 1;
1656                         break;
1657                 }
1658         }
1659
1660         for (i = 0; i < *level; i++) {
1661                 free_extent_buffer(path->nodes[i]);
1662                 path->nodes[i] = NULL;
1663         }
1664 out:
1665         err &= ~LAST_ITEM;
1666         if (err && !ret)
1667                 ret = err;
1668         return ret;
1669 }
1670
1671 /*
1672  * Check the child node/leaf by the following condition:
1673  * 1. the first item key of the node/leaf should be the same with the one
1674  *    in parent.
1675  * 2. block in parent node should match the child node/leaf.
1676  * 3. generation of parent node and child's header should be consistent.
1677  *
1678  * Or the child node/leaf pointed by the key in parent is not valid.
1679  *
1680  * We hope to check leaf owner too, but since subvol may share leaves,
1681  * which makes leaf owner check not so strong, key check should be
1682  * sufficient enough for that case.
1683  */
1684 static int check_child_node(struct extent_buffer *parent, int slot,
1685                             struct extent_buffer *child)
1686 {
1687         struct btrfs_key parent_key;
1688         struct btrfs_key child_key;
1689         int ret = 0;
1690
1691         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1692         if (btrfs_header_level(child) == 0)
1693                 btrfs_item_key_to_cpu(child, &child_key, 0);
1694         else
1695                 btrfs_node_key_to_cpu(child, &child_key, 0);
1696
1697         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1698                 ret = -EINVAL;
1699                 fprintf(stderr,
1700                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1701                         parent_key.objectid, parent_key.type, parent_key.offset,
1702                         child_key.objectid, child_key.type, child_key.offset);
1703         }
1704         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1705                 ret = -EINVAL;
1706                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1707                         btrfs_node_blockptr(parent, slot),
1708                         btrfs_header_bytenr(child));
1709         }
1710         if (btrfs_node_ptr_generation(parent, slot) !=
1711             btrfs_header_generation(child)) {
1712                 ret = -EINVAL;
1713                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1714                         btrfs_header_generation(child),
1715                         btrfs_node_ptr_generation(parent, slot));
1716         }
1717         return ret;
1718 }
1719
1720 /*
1721  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1722  * in every fs or file tree check. Here we find its all root ids, and only check
1723  * it in the fs or file tree which has the smallest root id.
1724  */
1725 static int need_check(struct btrfs_root *root, struct ulist *roots)
1726 {
1727         struct rb_node *node;
1728         struct ulist_node *u;
1729
1730         /*
1731          * @roots can be empty if it belongs to tree reloc tree
1732          * In that case, we should always check the leaf, as we can't use
1733          * the tree owner to ensure some other root will check it.
1734          */
1735         if (roots->nnodes == 1 || roots->nnodes == 0)
1736                 return 1;
1737
1738         node = rb_first(&roots->root);
1739         u = rb_entry(node, struct ulist_node, rb_node);
1740         /*
1741          * current root id is not smallest, we skip it and let it be checked
1742          * in the fs or file tree who hash the smallest root id.
1743          */
1744         if (root->objectid != u->val)
1745                 return 0;
1746
1747         return 1;
1748 }
1749
1750 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1751                                u64 *flags_ret)
1752 {
1753         struct btrfs_root *extent_root = root->fs_info->extent_root;
1754         struct btrfs_root_item *ri = &root->root_item;
1755         struct btrfs_extent_inline_ref *iref;
1756         struct btrfs_extent_item *ei;
1757         struct btrfs_key key;
1758         struct btrfs_path *path = NULL;
1759         unsigned long ptr;
1760         unsigned long end;
1761         u64 flags;
1762         u64 owner = 0;
1763         u64 offset;
1764         int slot;
1765         int type;
1766         int ret = 0;
1767
1768         /*
1769          * Except file/reloc tree, we can not have FULL BACKREF MODE
1770          */
1771         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1772                 goto normal;
1773
1774         /* root node */
1775         if (eb->start == btrfs_root_bytenr(ri))
1776                 goto normal;
1777
1778         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1779                 goto full_backref;
1780
1781         owner = btrfs_header_owner(eb);
1782         if (owner == root->objectid)
1783                 goto normal;
1784
1785         path = btrfs_alloc_path();
1786         if (!path)
1787                 return -ENOMEM;
1788
1789         key.objectid = btrfs_header_bytenr(eb);
1790         key.type = (u8)-1;
1791         key.offset = (u64)-1;
1792
1793         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1794         if (ret <= 0) {
1795                 ret = -EIO;
1796                 goto out;
1797         }
1798
1799         if (ret > 0) {
1800                 ret = btrfs_previous_extent_item(extent_root, path,
1801                                                  key.objectid);
1802                 if (ret)
1803                         goto full_backref;
1804
1805         }
1806         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1807
1808         eb = path->nodes[0];
1809         slot = path->slots[0];
1810         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1811
1812         flags = btrfs_extent_flags(eb, ei);
1813         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1814                 goto full_backref;
1815
1816         ptr = (unsigned long)(ei + 1);
1817         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1818
1819         if (key.type == BTRFS_EXTENT_ITEM_KEY)
1820                 ptr += sizeof(struct btrfs_tree_block_info);
1821
1822 next:
1823         /* Reached extent item ends normally */
1824         if (ptr == end)
1825                 goto full_backref;
1826
1827         /* Beyond extent item end, wrong item size */
1828         if (ptr > end) {
1829                 error("extent item at bytenr %llu slot %d has wrong size",
1830                         eb->start, slot);
1831                 goto full_backref;
1832         }
1833
1834         iref = (struct btrfs_extent_inline_ref *)ptr;
1835         offset = btrfs_extent_inline_ref_offset(eb, iref);
1836         type = btrfs_extent_inline_ref_type(eb, iref);
1837
1838         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1839                 goto normal;
1840         ptr += btrfs_extent_inline_ref_size(type);
1841         goto next;
1842
1843 normal:
1844         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1845         goto out;
1846
1847 full_backref:
1848         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1849 out:
1850         btrfs_free_path(path);
1851         return ret;
1852 }
1853
1854 /*
1855  * for a tree node or leaf, we record its reference count, so later if we still
1856  * process this node or leaf, don't need to compute its reference count again.
1857  *
1858  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1859  */
1860 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1861                              struct extent_buffer *eb, struct node_refs *nrefs,
1862                              u64 level, int check_all)
1863 {
1864         struct ulist *roots;
1865         u64 refs = 0;
1866         u64 flags = 0;
1867         int root_level = btrfs_header_level(root->node);
1868         int check;
1869         int ret;
1870
1871         if (nrefs->bytenr[level] == bytenr)
1872                 return 0;
1873
1874         if (bytenr != (u64)-1) {
1875                 /* the return value of this function seems a mistake */
1876                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1877                                        level, 1, &refs, &flags);
1878                 /* temporary fix */
1879                 if (ret < 0 && !check_all)
1880                         return ret;
1881
1882                 nrefs->bytenr[level] = bytenr;
1883                 nrefs->refs[level] = refs;
1884                 nrefs->full_backref[level] = 0;
1885                 nrefs->checked[level] = 0;
1886
1887                 if (refs > 1) {
1888                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1889                                                    0, &roots);
1890                         if (ret)
1891                                 return -EIO;
1892
1893                         check = need_check(root, roots);
1894                         ulist_free(roots);
1895                         nrefs->need_check[level] = check;
1896                 } else {
1897                         if (!check_all) {
1898                                 nrefs->need_check[level] = 1;
1899                         } else {
1900                                 if (level == root_level) {
1901                                         nrefs->need_check[level] = 1;
1902                                 } else {
1903                                         /*
1904                                          * The node refs may have not been
1905                                          * updated if upper needs checking (the
1906                                          * lowest root_objectid) the node can
1907                                          * be checked.
1908                                          */
1909                                         nrefs->need_check[level] =
1910                                                 nrefs->need_check[level + 1];
1911                                 }
1912                         }
1913                 }
1914         }
1915
1916         if (check_all && eb) {
1917                 calc_extent_flag_v2(root, eb, &flags);
1918                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1919                         nrefs->full_backref[level] = 1;
1920         }
1921
1922         return 0;
1923 }
1924
1925 /*
1926  * @level           if @level == -1 means extent data item
1927  *                  else normal treeblocl.
1928  */
1929 static int should_check_extent_strictly(struct btrfs_root *root,
1930                                         struct node_refs *nrefs, int level)
1931 {
1932         int root_level = btrfs_header_level(root->node);
1933
1934         if (level > root_level || level < -1)
1935                 return 1;
1936         if (level == root_level)
1937                 return 1;
1938         /*
1939          * if the upper node is marked full backref, it should contain shared
1940          * backref of the parent (except owner == root->objectid).
1941          */
1942         while (++level <= root_level)
1943                 if (nrefs->refs[level] > 1)
1944                         return 0;
1945
1946         return 1;
1947 }
1948
1949 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1950                           struct walk_control *wc, int *level,
1951                           struct node_refs *nrefs)
1952 {
1953         enum btrfs_tree_block_status status;
1954         u64 bytenr;
1955         u64 ptr_gen;
1956         struct btrfs_fs_info *fs_info = root->fs_info;
1957         struct extent_buffer *next;
1958         struct extent_buffer *cur;
1959         int ret, err = 0;
1960         u64 refs;
1961
1962         WARN_ON(*level < 0);
1963         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1964
1965         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1966                 refs = nrefs->refs[*level];
1967                 ret = 0;
1968         } else {
1969                 ret = btrfs_lookup_extent_info(NULL, root,
1970                                        path->nodes[*level]->start,
1971                                        *level, 1, &refs, NULL);
1972                 if (ret < 0) {
1973                         err = ret;
1974                         goto out;
1975                 }
1976                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1977                 nrefs->refs[*level] = refs;
1978         }
1979
1980         if (refs > 1) {
1981                 ret = enter_shared_node(root, path->nodes[*level]->start,
1982                                         refs, wc, *level);
1983                 if (ret > 0) {
1984                         err = ret;
1985                         goto out;
1986                 }
1987         }
1988
1989         while (*level >= 0) {
1990                 WARN_ON(*level < 0);
1991                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1992                 cur = path->nodes[*level];
1993
1994                 if (btrfs_header_level(cur) != *level)
1995                         WARN_ON(1);
1996
1997                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1998                         break;
1999                 if (*level == 0) {
2000                         ret = process_one_leaf(root, cur, wc);
2001                         if (ret < 0)
2002                                 err = ret;
2003                         break;
2004                 }
2005                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2006                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2007
2008                 if (bytenr == nrefs->bytenr[*level - 1]) {
2009                         refs = nrefs->refs[*level - 1];
2010                 } else {
2011                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2012                                         *level - 1, 1, &refs, NULL);
2013                         if (ret < 0) {
2014                                 refs = 0;
2015                         } else {
2016                                 nrefs->bytenr[*level - 1] = bytenr;
2017                                 nrefs->refs[*level - 1] = refs;
2018                         }
2019                 }
2020
2021                 if (refs > 1) {
2022                         ret = enter_shared_node(root, bytenr, refs,
2023                                                 wc, *level - 1);
2024                         if (ret > 0) {
2025                                 path->slots[*level]++;
2026                                 continue;
2027                         }
2028                 }
2029
2030                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2031                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2032                         free_extent_buffer(next);
2033                         reada_walk_down(root, cur, path->slots[*level]);
2034                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2035                         if (!extent_buffer_uptodate(next)) {
2036                                 struct btrfs_key node_key;
2037
2038                                 btrfs_node_key_to_cpu(path->nodes[*level],
2039                                                       &node_key,
2040                                                       path->slots[*level]);
2041                                 btrfs_add_corrupt_extent_record(root->fs_info,
2042                                                 &node_key,
2043                                                 path->nodes[*level]->start,
2044                                                 root->fs_info->nodesize,
2045                                                 *level);
2046                                 err = -EIO;
2047                                 goto out;
2048                         }
2049                 }
2050
2051                 ret = check_child_node(cur, path->slots[*level], next);
2052                 if (ret) {
2053                         free_extent_buffer(next);
2054                         err = ret;
2055                         goto out;
2056                 }
2057
2058                 if (btrfs_is_leaf(next))
2059                         status = btrfs_check_leaf(root, NULL, next);
2060                 else
2061                         status = btrfs_check_node(root, NULL, next);
2062                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2063                         free_extent_buffer(next);
2064                         err = -EIO;
2065                         goto out;
2066                 }
2067
2068                 *level = *level - 1;
2069                 free_extent_buffer(path->nodes[*level]);
2070                 path->nodes[*level] = next;
2071                 path->slots[*level] = 0;
2072         }
2073 out:
2074         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2075         return err;
2076 }
2077
2078 /*
2079  * Update global fs information.
2080  */
2081 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2082                          int level)
2083 {
2084         u32 free_nrs;
2085         struct extent_buffer *eb = path->nodes[level];
2086
2087         total_btree_bytes += eb->len;
2088         if (fs_root_objectid(root->objectid))
2089                 total_fs_tree_bytes += eb->len;
2090         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2091                 total_extent_tree_bytes += eb->len;
2092
2093         if (level == 0) {
2094                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2095         } else {
2096                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2097                             btrfs_header_nritems(eb));
2098                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2099         }
2100 }
2101
2102 /*
2103  * This function only handles BACKREF_MISSING,
2104  * If corresponding extent item exists, increase the ref, else insert an extent
2105  * item and backref.
2106  *
2107  * Returns error bits after repair.
2108  */
2109 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2110                                  struct btrfs_root *root,
2111                                  struct extent_buffer *node,
2112                                  struct node_refs *nrefs, int level, int err)
2113 {
2114         struct btrfs_fs_info *fs_info = root->fs_info;
2115         struct btrfs_root *extent_root = fs_info->extent_root;
2116         struct btrfs_path path;
2117         struct btrfs_extent_item *ei;
2118         struct btrfs_tree_block_info *bi;
2119         struct btrfs_key key;
2120         struct extent_buffer *eb;
2121         u32 size = sizeof(*ei);
2122         u32 node_size = root->fs_info->nodesize;
2123         int insert_extent = 0;
2124         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2125         int root_level = btrfs_header_level(root->node);
2126         int generation;
2127         int ret;
2128         u64 owner;
2129         u64 bytenr;
2130         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2131         u64 parent = 0;
2132
2133         if ((err & BACKREF_MISSING) == 0)
2134                 return err;
2135
2136         WARN_ON(level > BTRFS_MAX_LEVEL);
2137         WARN_ON(level < 0);
2138
2139         btrfs_init_path(&path);
2140         bytenr = btrfs_header_bytenr(node);
2141         owner = btrfs_header_owner(node);
2142         generation = btrfs_header_generation(node);
2143
2144         key.objectid = bytenr;
2145         key.type = (u8)-1;
2146         key.offset = (u64)-1;
2147
2148         /* Search for the extent item */
2149         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2150         if (ret <= 0) {
2151                 ret = -EIO;
2152                 goto out;
2153         }
2154
2155         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2156         if (ret)
2157                 insert_extent = 1;
2158
2159         /* calculate if the extent item flag is full backref or not */
2160         if (nrefs->full_backref[level] != 0)
2161                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2162
2163         /* insert an extent item */
2164         if (insert_extent) {
2165                 struct btrfs_disk_key copy_key;
2166
2167                 generation = btrfs_header_generation(node);
2168
2169                 if (level < root_level && nrefs->full_backref[level + 1] &&
2170                     owner != root->objectid) {
2171                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2172                 }
2173
2174                 key.objectid = bytenr;
2175                 if (!skinny_metadata) {
2176                         key.type = BTRFS_EXTENT_ITEM_KEY;
2177                         key.offset = node_size;
2178                         size += sizeof(*bi);
2179                 } else {
2180                         key.type = BTRFS_METADATA_ITEM_KEY;
2181                         key.offset = level;
2182                 }
2183
2184                 btrfs_release_path(&path);
2185                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2186                                               size);
2187                 if (ret)
2188                         goto out;
2189
2190                 eb = path.nodes[0];
2191                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2192
2193                 btrfs_set_extent_refs(eb, ei, 0);
2194                 btrfs_set_extent_generation(eb, ei, generation);
2195                 btrfs_set_extent_flags(eb, ei, flags);
2196
2197                 if (!skinny_metadata) {
2198                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2199                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2200                                              sizeof(*bi));
2201                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2202                         btrfs_set_disk_key_type(&copy_key, 0);
2203                         btrfs_set_disk_key_offset(&copy_key, 0);
2204
2205                         btrfs_set_tree_block_level(eb, bi, level);
2206                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2207                 }
2208                 btrfs_mark_buffer_dirty(eb);
2209                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2210                 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2211
2212                 nrefs->refs[level] = 0;
2213                 nrefs->full_backref[level] =
2214                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2215                 btrfs_release_path(&path);
2216         }
2217
2218         if (level < root_level && nrefs->full_backref[level + 1] &&
2219             owner != root->objectid)
2220                 parent = nrefs->bytenr[level + 1];
2221
2222         /* increase the ref */
2223         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2224                         parent, root->objectid, level, 0);
2225
2226         nrefs->refs[level]++;
2227 out:
2228         btrfs_release_path(&path);
2229         if (ret) {
2230                 error(
2231         "failed to repair tree block ref start %llu root %llu due to %s",
2232                       bytenr, root->objectid, strerror(-ret));
2233         } else {
2234                 printf("Added one tree block ref start %llu %s %llu\n",
2235                        bytenr, parent ? "parent" : "root",
2236                        parent ? parent : root->objectid);
2237                 err &= ~BACKREF_MISSING;
2238         }
2239
2240         return err;
2241 }
2242
2243 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2244                             unsigned int ext_ref);
2245 static int check_tree_block_ref(struct btrfs_root *root,
2246                                 struct extent_buffer *eb, u64 bytenr,
2247                                 int level, u64 owner, struct node_refs *nrefs);
2248 static int check_leaf_items(struct btrfs_trans_handle *trans,
2249                             struct btrfs_root *root, struct btrfs_path *path,
2250                             struct node_refs *nrefs, int account_bytes);
2251
2252 /*
2253  * @trans      just for lowmem repair mode
2254  * @check all  if not 0 then check all tree block backrefs and items
2255  *             0 then just check relationship of items in fs tree(s)
2256  *
2257  * Returns >0  Found error, should continue
2258  * Returns <0  Fatal error, must exit the whole check
2259  * Returns 0   No errors found
2260  */
2261 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2262                              struct btrfs_root *root, struct btrfs_path *path,
2263                              int *level, struct node_refs *nrefs, int ext_ref,
2264                              int check_all)
2265
2266 {
2267         enum btrfs_tree_block_status status;
2268         u64 bytenr;
2269         u64 ptr_gen;
2270         struct btrfs_fs_info *fs_info = root->fs_info;
2271         struct extent_buffer *next;
2272         struct extent_buffer *cur;
2273         int ret;
2274         int err = 0;
2275         int check;
2276         int account_file_data = 0;
2277
2278         WARN_ON(*level < 0);
2279         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2280
2281         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2282                                 path->nodes[*level], nrefs, *level, check_all);
2283         if (ret < 0)
2284                 return ret;
2285
2286         while (*level >= 0) {
2287                 WARN_ON(*level < 0);
2288                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2289                 cur = path->nodes[*level];
2290                 bytenr = btrfs_header_bytenr(cur);
2291                 check = nrefs->need_check[*level];
2292
2293                 if (btrfs_header_level(cur) != *level)
2294                         WARN_ON(1);
2295                /*
2296                 * Update bytes accounting and check tree block ref
2297                 * NOTE: Doing accounting and check before checking nritems
2298                 * is necessary because of empty node/leaf.
2299                 */
2300                 if ((check_all && !nrefs->checked[*level]) ||
2301                     (!check_all && nrefs->need_check[*level])) {
2302                         ret = check_tree_block_ref(root, cur,
2303                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2304                            btrfs_header_owner(cur), nrefs);
2305
2306                         if (repair && ret)
2307                                 ret = repair_tree_block_ref(trans, root,
2308                                     path->nodes[*level], nrefs, *level, ret);
2309                         err |= ret;
2310
2311                         if (check_all && nrefs->need_check[*level] &&
2312                                 nrefs->refs[*level]) {
2313                                 account_bytes(root, path, *level);
2314                                 account_file_data = 1;
2315                         }
2316                         nrefs->checked[*level] = 1;
2317                 }
2318
2319                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2320                         break;
2321
2322                 /* Don't forgot to check leaf/node validation */
2323                 if (*level == 0) {
2324                         /* skip duplicate check */
2325                         if (check || !check_all) {
2326                                 ret = btrfs_check_leaf(root, NULL, cur);
2327                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2328                                         err |= -EIO;
2329                                         break;
2330                                 }
2331                         }
2332
2333                         ret = 0;
2334                         if (!check_all)
2335                                 ret = process_one_leaf_v2(root, path, nrefs,
2336                                                           level, ext_ref);
2337                         else
2338                                 ret = check_leaf_items(trans, root, path,
2339                                                nrefs, account_file_data);
2340                         err |= ret;
2341                         break;
2342                 } else {
2343                         if (check || !check_all) {
2344                                 ret = btrfs_check_node(root, NULL, cur);
2345                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2346                                         err |= -EIO;
2347                                         break;
2348                                 }
2349                         }
2350                 }
2351
2352                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2353                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2354
2355                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2356                                         check_all);
2357                 if (ret < 0)
2358                         break;
2359                 /*
2360                  * check all trees in check_chunks_and_extent_v2
2361                  * check shared node once in check_fs_roots
2362                  */
2363                 if (!check_all && !nrefs->need_check[*level - 1]) {
2364                         path->slots[*level]++;
2365                         continue;
2366                 }
2367
2368                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2369                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2370                         free_extent_buffer(next);
2371                         reada_walk_down(root, cur, path->slots[*level]);
2372                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2373                         if (!extent_buffer_uptodate(next)) {
2374                                 struct btrfs_key node_key;
2375
2376                                 btrfs_node_key_to_cpu(path->nodes[*level],
2377                                                       &node_key,
2378                                                       path->slots[*level]);
2379                                 btrfs_add_corrupt_extent_record(fs_info,
2380                                         &node_key, path->nodes[*level]->start,
2381                                         fs_info->nodesize, *level);
2382                                 err |= -EIO;
2383                                 break;
2384                         }
2385                 }
2386
2387                 ret = check_child_node(cur, path->slots[*level], next);
2388                 err |= ret;
2389                 if (ret < 0) 
2390                         break;
2391
2392                 if (btrfs_is_leaf(next))
2393                         status = btrfs_check_leaf(root, NULL, next);
2394                 else
2395                         status = btrfs_check_node(root, NULL, next);
2396                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2397                         free_extent_buffer(next);
2398                         err |= -EIO;
2399                         break;
2400                 }
2401
2402                 *level = *level - 1;
2403                 free_extent_buffer(path->nodes[*level]);
2404                 path->nodes[*level] = next;
2405                 path->slots[*level] = 0;
2406                 account_file_data = 0;
2407
2408                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2409         }
2410         return err;
2411 }
2412
2413 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2414                         struct walk_control *wc, int *level)
2415 {
2416         int i;
2417         struct extent_buffer *leaf;
2418
2419         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2420                 leaf = path->nodes[i];
2421                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2422                         path->slots[i]++;
2423                         *level = i;
2424                         return 0;
2425                 } else {
2426                         free_extent_buffer(path->nodes[*level]);
2427                         path->nodes[*level] = NULL;
2428                         BUG_ON(*level > wc->active_node);
2429                         if (*level == wc->active_node)
2430                                 leave_shared_node(root, wc, *level);
2431                         *level = i + 1;
2432                 }
2433         }
2434         return 1;
2435 }
2436
2437 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2438                            int *level)
2439 {
2440         int i;
2441         struct extent_buffer *leaf;
2442
2443         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2444                 leaf = path->nodes[i];
2445                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2446                         path->slots[i]++;
2447                         *level = i;
2448                         return 0;
2449                 } else {
2450                         free_extent_buffer(path->nodes[*level]);
2451                         path->nodes[*level] = NULL;
2452                         *level = i + 1;
2453                 }
2454         }
2455         return 1;
2456 }
2457
2458 static int check_root_dir(struct inode_record *rec)
2459 {
2460         struct inode_backref *backref;
2461         int ret = -1;
2462
2463         if (!rec->found_inode_item || rec->errors)
2464                 goto out;
2465         if (rec->nlink != 1 || rec->found_link != 0)
2466                 goto out;
2467         if (list_empty(&rec->backrefs))
2468                 goto out;
2469         backref = to_inode_backref(rec->backrefs.next);
2470         if (!backref->found_inode_ref)
2471                 goto out;
2472         if (backref->index != 0 || backref->namelen != 2 ||
2473             memcmp(backref->name, "..", 2))
2474                 goto out;
2475         if (backref->found_dir_index || backref->found_dir_item)
2476                 goto out;
2477         ret = 0;
2478 out:
2479         return ret;
2480 }
2481
2482 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2483                               struct btrfs_root *root, struct btrfs_path *path,
2484                               struct inode_record *rec)
2485 {
2486         struct btrfs_inode_item *ei;
2487         struct btrfs_key key;
2488         int ret;
2489
2490         key.objectid = rec->ino;
2491         key.type = BTRFS_INODE_ITEM_KEY;
2492         key.offset = (u64)-1;
2493
2494         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2495         if (ret < 0)
2496                 goto out;
2497         if (ret) {
2498                 if (!path->slots[0]) {
2499                         ret = -ENOENT;
2500                         goto out;
2501                 }
2502                 path->slots[0]--;
2503                 ret = 0;
2504         }
2505         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2506         if (key.objectid != rec->ino) {
2507                 ret = -ENOENT;
2508                 goto out;
2509         }
2510
2511         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2512                             struct btrfs_inode_item);
2513         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2514         btrfs_mark_buffer_dirty(path->nodes[0]);
2515         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2516         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2517                root->root_key.objectid);
2518 out:
2519         btrfs_release_path(path);
2520         return ret;
2521 }
2522
2523 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2524                                     struct btrfs_root *root,
2525                                     struct btrfs_path *path,
2526                                     struct inode_record *rec)
2527 {
2528         int ret;
2529
2530         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2531         btrfs_release_path(path);
2532         if (!ret)
2533                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2534         return ret;
2535 }
2536
2537 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2538                                struct btrfs_root *root,
2539                                struct btrfs_path *path,
2540                                struct inode_record *rec)
2541 {
2542         struct btrfs_inode_item *ei;
2543         struct btrfs_key key;
2544         int ret = 0;
2545
2546         key.objectid = rec->ino;
2547         key.type = BTRFS_INODE_ITEM_KEY;
2548         key.offset = 0;
2549
2550         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2551         if (ret) {
2552                 if (ret > 0)
2553                         ret = -ENOENT;
2554                 goto out;
2555         }
2556
2557         /* Since ret == 0, no need to check anything */
2558         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2559                             struct btrfs_inode_item);
2560         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2561         btrfs_mark_buffer_dirty(path->nodes[0]);
2562         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2563         printf("reset nbytes for ino %llu root %llu\n",
2564                rec->ino, root->root_key.objectid);
2565 out:
2566         btrfs_release_path(path);
2567         return ret;
2568 }
2569
2570 static int add_missing_dir_index(struct btrfs_root *root,
2571                                  struct cache_tree *inode_cache,
2572                                  struct inode_record *rec,
2573                                  struct inode_backref *backref)
2574 {
2575         struct btrfs_path path;
2576         struct btrfs_trans_handle *trans;
2577         struct btrfs_dir_item *dir_item;
2578         struct extent_buffer *leaf;
2579         struct btrfs_key key;
2580         struct btrfs_disk_key disk_key;
2581         struct inode_record *dir_rec;
2582         unsigned long name_ptr;
2583         u32 data_size = sizeof(*dir_item) + backref->namelen;
2584         int ret;
2585
2586         trans = btrfs_start_transaction(root, 1);
2587         if (IS_ERR(trans))
2588                 return PTR_ERR(trans);
2589
2590         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2591                 (unsigned long long)rec->ino);
2592
2593         btrfs_init_path(&path);
2594         key.objectid = backref->dir;
2595         key.type = BTRFS_DIR_INDEX_KEY;
2596         key.offset = backref->index;
2597         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2598         BUG_ON(ret);
2599
2600         leaf = path.nodes[0];
2601         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2602
2603         disk_key.objectid = cpu_to_le64(rec->ino);
2604         disk_key.type = BTRFS_INODE_ITEM_KEY;
2605         disk_key.offset = 0;
2606
2607         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2608         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2609         btrfs_set_dir_data_len(leaf, dir_item, 0);
2610         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2611         name_ptr = (unsigned long)(dir_item + 1);
2612         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2613         btrfs_mark_buffer_dirty(leaf);
2614         btrfs_release_path(&path);
2615         btrfs_commit_transaction(trans, root);
2616
2617         backref->found_dir_index = 1;
2618         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2619         BUG_ON(IS_ERR(dir_rec));
2620         if (!dir_rec)
2621                 return 0;
2622         dir_rec->found_size += backref->namelen;
2623         if (dir_rec->found_size == dir_rec->isize &&
2624             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2625                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2626         if (dir_rec->found_size != dir_rec->isize)
2627                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2628
2629         return 0;
2630 }
2631
2632 static int delete_dir_index(struct btrfs_root *root,
2633                             struct inode_backref *backref)
2634 {
2635         struct btrfs_trans_handle *trans;
2636         struct btrfs_dir_item *di;
2637         struct btrfs_path path;
2638         int ret = 0;
2639
2640         trans = btrfs_start_transaction(root, 1);
2641         if (IS_ERR(trans))
2642                 return PTR_ERR(trans);
2643
2644         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2645                 (unsigned long long)backref->dir,
2646                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2647                 (unsigned long long)root->objectid);
2648
2649         btrfs_init_path(&path);
2650         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2651                                     backref->name, backref->namelen,
2652                                     backref->index, -1);
2653         if (IS_ERR(di)) {
2654                 ret = PTR_ERR(di);
2655                 btrfs_release_path(&path);
2656                 btrfs_commit_transaction(trans, root);
2657                 if (ret == -ENOENT)
2658                         return 0;
2659                 return ret;
2660         }
2661
2662         if (!di)
2663                 ret = btrfs_del_item(trans, root, &path);
2664         else
2665                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2666         BUG_ON(ret);
2667         btrfs_release_path(&path);
2668         btrfs_commit_transaction(trans, root);
2669         return ret;
2670 }
2671
2672 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2673                                     struct btrfs_root *root, u64 ino,
2674                                     u8 filetype)
2675 {
2676         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2677
2678         return insert_inode_item(trans, root, ino, 0, 0, 0, mode);
2679 }
2680
2681 static int create_inode_item(struct btrfs_root *root,
2682                              struct inode_record *rec, int root_dir)
2683 {
2684         struct btrfs_trans_handle *trans;
2685         u64 nlink = 0;
2686         u32 mode = 0;
2687         u64 size = 0;
2688         int ret;
2689
2690         trans = btrfs_start_transaction(root, 1);
2691         if (IS_ERR(trans)) {
2692                 ret = PTR_ERR(trans);
2693                 return ret;
2694         }
2695
2696         nlink = root_dir ? 1 : rec->found_link;
2697         if (rec->found_dir_item) {
2698                 if (rec->found_file_extent)
2699                         fprintf(stderr, "root %llu inode %llu has both a dir "
2700                                 "item and extents, unsure if it is a dir or a "
2701                                 "regular file so setting it as a directory\n",
2702                                 (unsigned long long)root->objectid,
2703                                 (unsigned long long)rec->ino);
2704                 mode = S_IFDIR | 0755;
2705                 size = rec->found_size;
2706         } else if (!rec->found_dir_item) {
2707                 size = rec->extent_end;
2708                 mode =  S_IFREG | 0755;
2709         }
2710
2711         ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
2712                                   nlink, mode);
2713         btrfs_commit_transaction(trans, root);
2714         return 0;
2715 }
2716
2717 static int repair_inode_backrefs(struct btrfs_root *root,
2718                                  struct inode_record *rec,
2719                                  struct cache_tree *inode_cache,
2720                                  int delete)
2721 {
2722         struct inode_backref *tmp, *backref;
2723         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2724         int ret = 0;
2725         int repaired = 0;
2726
2727         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2728                 if (!delete && rec->ino == root_dirid) {
2729                         if (!rec->found_inode_item) {
2730                                 ret = create_inode_item(root, rec, 1);
2731                                 if (ret)
2732                                         break;
2733                                 repaired++;
2734                         }
2735                 }
2736
2737                 /* Index 0 for root dir's are special, don't mess with it */
2738                 if (rec->ino == root_dirid && backref->index == 0)
2739                         continue;
2740
2741                 if (delete &&
2742                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2743                      (backref->found_dir_index && backref->found_inode_ref &&
2744                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2745                         ret = delete_dir_index(root, backref);
2746                         if (ret)
2747                                 break;
2748                         repaired++;
2749                         list_del(&backref->list);
2750                         free(backref);
2751                         continue;
2752                 }
2753
2754                 if (!delete && !backref->found_dir_index &&
2755                     backref->found_dir_item && backref->found_inode_ref) {
2756                         ret = add_missing_dir_index(root, inode_cache, rec,
2757                                                     backref);
2758                         if (ret)
2759                                 break;
2760                         repaired++;
2761                         if (backref->found_dir_item &&
2762                             backref->found_dir_index) {
2763                                 if (!backref->errors &&
2764                                     backref->found_inode_ref) {
2765                                         list_del(&backref->list);
2766                                         free(backref);
2767                                         continue;
2768                                 }
2769                         }
2770                 }
2771
2772                 if (!delete && (!backref->found_dir_index &&
2773                                 !backref->found_dir_item &&
2774                                 backref->found_inode_ref)) {
2775                         struct btrfs_trans_handle *trans;
2776                         struct btrfs_key location;
2777
2778                         ret = check_dir_conflict(root, backref->name,
2779                                                  backref->namelen,
2780                                                  backref->dir,
2781                                                  backref->index);
2782                         if (ret) {
2783                                 /*
2784                                  * let nlink fixing routine to handle it,
2785                                  * which can do it better.
2786                                  */
2787                                 ret = 0;
2788                                 break;
2789                         }
2790                         location.objectid = rec->ino;
2791                         location.type = BTRFS_INODE_ITEM_KEY;
2792                         location.offset = 0;
2793
2794                         trans = btrfs_start_transaction(root, 1);
2795                         if (IS_ERR(trans)) {
2796                                 ret = PTR_ERR(trans);
2797                                 break;
2798                         }
2799                         fprintf(stderr, "adding missing dir index/item pair "
2800                                 "for inode %llu\n",
2801                                 (unsigned long long)rec->ino);
2802                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2803                                                     backref->namelen,
2804                                                     backref->dir, &location,
2805                                                     imode_to_type(rec->imode),
2806                                                     backref->index);
2807                         BUG_ON(ret);
2808                         btrfs_commit_transaction(trans, root);
2809                         repaired++;
2810                 }
2811
2812                 if (!delete && (backref->found_inode_ref &&
2813                                 backref->found_dir_index &&
2814                                 backref->found_dir_item &&
2815                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2816                                 !rec->found_inode_item)) {
2817                         ret = create_inode_item(root, rec, 0);
2818                         if (ret)
2819                                 break;
2820                         repaired++;
2821                 }
2822
2823         }
2824         return ret ? ret : repaired;
2825 }
2826
2827 /*
2828  * To determine the file type for nlink/inode_item repair
2829  *
2830  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2831  * Return -ENOENT if file type is not found.
2832  */
2833 static int find_file_type(struct inode_record *rec, u8 *type)
2834 {
2835         struct inode_backref *backref;
2836
2837         /* For inode item recovered case */
2838         if (rec->found_inode_item) {
2839                 *type = imode_to_type(rec->imode);
2840                 return 0;
2841         }
2842
2843         list_for_each_entry(backref, &rec->backrefs, list) {
2844                 if (backref->found_dir_index || backref->found_dir_item) {
2845                         *type = backref->filetype;
2846                         return 0;
2847                 }
2848         }
2849         return -ENOENT;
2850 }
2851
2852 /*
2853  * To determine the file name for nlink repair
2854  *
2855  * Return 0 if file name is found, set name and namelen.
2856  * Return -ENOENT if file name is not found.
2857  */
2858 static int find_file_name(struct inode_record *rec,
2859                           char *name, int *namelen)
2860 {
2861         struct inode_backref *backref;
2862
2863         list_for_each_entry(backref, &rec->backrefs, list) {
2864                 if (backref->found_dir_index || backref->found_dir_item ||
2865                     backref->found_inode_ref) {
2866                         memcpy(name, backref->name, backref->namelen);
2867                         *namelen = backref->namelen;
2868                         return 0;
2869                 }
2870         }
2871         return -ENOENT;
2872 }
2873
2874 /* Reset the nlink of the inode to the correct one */
2875 static int reset_nlink(struct btrfs_trans_handle *trans,
2876                        struct btrfs_root *root,
2877                        struct btrfs_path *path,
2878                        struct inode_record *rec)
2879 {
2880         struct inode_backref *backref;
2881         struct inode_backref *tmp;
2882         struct btrfs_key key;
2883         struct btrfs_inode_item *inode_item;
2884         int ret = 0;
2885
2886         /* We don't believe this either, reset it and iterate backref */
2887         rec->found_link = 0;
2888
2889         /* Remove all backref including the valid ones */
2890         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2891                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2892                                    backref->index, backref->name,
2893                                    backref->namelen, 0);
2894                 if (ret < 0)
2895                         goto out;
2896
2897                 /* remove invalid backref, so it won't be added back */
2898                 if (!(backref->found_dir_index &&
2899                       backref->found_dir_item &&
2900                       backref->found_inode_ref)) {
2901                         list_del(&backref->list);
2902                         free(backref);
2903                 } else {
2904                         rec->found_link++;
2905                 }
2906         }
2907
2908         /* Set nlink to 0 */
2909         key.objectid = rec->ino;
2910         key.type = BTRFS_INODE_ITEM_KEY;
2911         key.offset = 0;
2912         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2913         if (ret < 0)
2914                 goto out;
2915         if (ret > 0) {
2916                 ret = -ENOENT;
2917                 goto out;
2918         }
2919         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2920                                     struct btrfs_inode_item);
2921         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2922         btrfs_mark_buffer_dirty(path->nodes[0]);
2923         btrfs_release_path(path);
2924
2925         /*
2926          * Add back valid inode_ref/dir_item/dir_index,
2927          * add_link() will handle the nlink inc, so new nlink must be correct
2928          */
2929         list_for_each_entry(backref, &rec->backrefs, list) {
2930                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2931                                      backref->name, backref->namelen,
2932                                      backref->filetype, &backref->index, 1, 0);
2933                 if (ret < 0)
2934                         goto out;
2935         }
2936 out:
2937         btrfs_release_path(path);
2938         return ret;
2939 }
2940
2941 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2942                                struct btrfs_root *root,
2943                                struct btrfs_path *path,
2944                                struct inode_record *rec)
2945 {
2946         char namebuf[BTRFS_NAME_LEN] = {0};
2947         u8 type = 0;
2948         int namelen = 0;
2949         int name_recovered = 0;
2950         int type_recovered = 0;
2951         int ret = 0;
2952
2953         /*
2954          * Get file name and type first before these invalid inode ref
2955          * are deleted by remove_all_invalid_backref()
2956          */
2957         name_recovered = !find_file_name(rec, namebuf, &namelen);
2958         type_recovered = !find_file_type(rec, &type);
2959
2960         if (!name_recovered) {
2961                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2962                        rec->ino, rec->ino);
2963                 namelen = count_digits(rec->ino);
2964                 sprintf(namebuf, "%llu", rec->ino);
2965                 name_recovered = 1;
2966         }
2967         if (!type_recovered) {
2968                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2969                        rec->ino);
2970                 type = BTRFS_FT_REG_FILE;
2971                 type_recovered = 1;
2972         }
2973
2974         ret = reset_nlink(trans, root, path, rec);
2975         if (ret < 0) {
2976                 fprintf(stderr,
2977                         "Failed to reset nlink for inode %llu: %s\n",
2978                         rec->ino, strerror(-ret));
2979                 goto out;
2980         }
2981
2982         if (rec->found_link == 0) {
2983                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2984                                               namebuf, namelen, type,
2985                                               (u64 *)&rec->found_link);
2986                 if (ret)
2987                         goto out;
2988         }
2989         printf("Fixed the nlink of inode %llu\n", rec->ino);
2990 out:
2991         /*
2992          * Clear the flag anyway, or we will loop forever for the same inode
2993          * as it will not be removed from the bad inode list and the dead loop
2994          * happens.
2995          */
2996         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2997         btrfs_release_path(path);
2998         return ret;
2999 }
3000
3001 /*
3002  * Check if there is any normal(reg or prealloc) file extent for given
3003  * ino.
3004  * This is used to determine the file type when neither its dir_index/item or
3005  * inode_item exists.
3006  *
3007  * This will *NOT* report error, if any error happens, just consider it does
3008  * not have any normal file extent.
3009  */
3010 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3011 {
3012         struct btrfs_path path;
3013         struct btrfs_key key;
3014         struct btrfs_key found_key;
3015         struct btrfs_file_extent_item *fi;
3016         u8 type;
3017         int ret = 0;
3018
3019         btrfs_init_path(&path);
3020         key.objectid = ino;
3021         key.type = BTRFS_EXTENT_DATA_KEY;
3022         key.offset = 0;
3023
3024         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3025         if (ret < 0) {
3026                 ret = 0;
3027                 goto out;
3028         }
3029         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3030                 ret = btrfs_next_leaf(root, &path);
3031                 if (ret) {
3032                         ret = 0;
3033                         goto out;
3034                 }
3035         }
3036         while (1) {
3037                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3038                                       path.slots[0]);
3039                 if (found_key.objectid != ino ||
3040                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3041                         break;
3042                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3043                                     struct btrfs_file_extent_item);
3044                 type = btrfs_file_extent_type(path.nodes[0], fi);
3045                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3046                         ret = 1;
3047                         goto out;
3048                 }
3049         }
3050 out:
3051         btrfs_release_path(&path);
3052         return ret;
3053 }
3054
3055 static u32 btrfs_type_to_imode(u8 type)
3056 {
3057         static u32 imode_by_btrfs_type[] = {
3058                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3059                 [BTRFS_FT_DIR]          = S_IFDIR,
3060                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3061                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3062                 [BTRFS_FT_FIFO]         = S_IFIFO,
3063                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3064                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3065         };
3066
3067         return imode_by_btrfs_type[(type)];
3068 }
3069
3070 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3071                                 struct btrfs_root *root,
3072                                 struct btrfs_path *path,
3073                                 struct inode_record *rec)
3074 {
3075         u8 filetype;
3076         u32 mode = 0700;
3077         int type_recovered = 0;
3078         int ret = 0;
3079
3080         printf("Trying to rebuild inode:%llu\n", rec->ino);
3081
3082         type_recovered = !find_file_type(rec, &filetype);
3083
3084         /*
3085          * Try to determine inode type if type not found.
3086          *
3087          * For found regular file extent, it must be FILE.
3088          * For found dir_item/index, it must be DIR.
3089          *
3090          * For undetermined one, use FILE as fallback.
3091          *
3092          * TODO:
3093          * 1. If found backref(inode_index/item is already handled) to it,
3094          *    it must be DIR.
3095          *    Need new inode-inode ref structure to allow search for that.
3096          */
3097         if (!type_recovered) {
3098                 if (rec->found_file_extent &&
3099                     find_normal_file_extent(root, rec->ino)) {
3100                         type_recovered = 1;
3101                         filetype = BTRFS_FT_REG_FILE;
3102                 } else if (rec->found_dir_item) {
3103                         type_recovered = 1;
3104                         filetype = BTRFS_FT_DIR;
3105                 } else if (!list_empty(&rec->orphan_extents)) {
3106                         type_recovered = 1;
3107                         filetype = BTRFS_FT_REG_FILE;
3108                 } else{
3109                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3110                                rec->ino);
3111                         type_recovered = 1;
3112                         filetype = BTRFS_FT_REG_FILE;
3113                 }
3114         }
3115
3116         ret = btrfs_new_inode(trans, root, rec->ino,
3117                               mode | btrfs_type_to_imode(filetype));
3118         if (ret < 0)
3119                 goto out;
3120
3121         /*
3122          * Here inode rebuild is done, we only rebuild the inode item,
3123          * don't repair the nlink(like move to lost+found).
3124          * That is the job of nlink repair.
3125          *
3126          * We just fill the record and return
3127          */
3128         rec->found_dir_item = 1;
3129         rec->imode = mode | btrfs_type_to_imode(filetype);
3130         rec->nlink = 0;
3131         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3132         /* Ensure the inode_nlinks repair function will be called */
3133         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3134 out:
3135         return ret;
3136 }
3137
3138 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3139                                       struct btrfs_root *root,
3140                                       struct btrfs_path *path,
3141                                       struct inode_record *rec)
3142 {
3143         struct orphan_data_extent *orphan;
3144         struct orphan_data_extent *tmp;
3145         int ret = 0;
3146
3147         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3148                 /*
3149                  * Check for conflicting file extents
3150                  *
3151                  * Here we don't know whether the extents is compressed or not,
3152                  * so we can only assume it not compressed nor data offset,
3153                  * and use its disk_len as extent length.
3154                  */
3155                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3156                                        orphan->offset, orphan->disk_len, 0);
3157                 btrfs_release_path(path);
3158                 if (ret < 0)
3159                         goto out;
3160                 if (!ret) {
3161                         fprintf(stderr,
3162                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3163                                 orphan->disk_bytenr, orphan->disk_len);
3164                         ret = btrfs_free_extent(trans,
3165                                         root->fs_info->extent_root,
3166                                         orphan->disk_bytenr, orphan->disk_len,
3167                                         0, root->objectid, orphan->objectid,
3168                                         orphan->offset);
3169                         if (ret < 0)
3170                                 goto out;
3171                 }
3172                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3173                                 orphan->offset, orphan->disk_bytenr,
3174                                 orphan->disk_len, orphan->disk_len);
3175                 if (ret < 0)
3176                         goto out;
3177
3178                 /* Update file size info */
3179                 rec->found_size += orphan->disk_len;
3180                 if (rec->found_size == rec->nbytes)
3181                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3182
3183                 /* Update the file extent hole info too */
3184                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3185                                            orphan->disk_len);
3186                 if (ret < 0)
3187                         goto out;
3188                 if (RB_EMPTY_ROOT(&rec->holes))
3189                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3190
3191                 list_del(&orphan->list);
3192                 free(orphan);
3193         }
3194         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3195 out:
3196         return ret;
3197 }
3198
3199 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3200                                         struct btrfs_root *root,
3201                                         struct btrfs_path *path,
3202                                         struct inode_record *rec)
3203 {
3204         struct rb_node *node;
3205         struct file_extent_hole *hole;
3206         int found = 0;
3207         int ret = 0;
3208
3209         node = rb_first(&rec->holes);
3210
3211         while (node) {
3212                 found = 1;
3213                 hole = rb_entry(node, struct file_extent_hole, node);
3214                 ret = btrfs_punch_hole(trans, root, rec->ino,
3215                                        hole->start, hole->len);
3216                 if (ret < 0)
3217                         goto out;
3218                 ret = del_file_extent_hole(&rec->holes, hole->start,
3219                                            hole->len);
3220                 if (ret < 0)
3221                         goto out;
3222                 if (RB_EMPTY_ROOT(&rec->holes))
3223                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3224                 node = rb_first(&rec->holes);
3225         }
3226         /* special case for a file losing all its file extent */
3227         if (!found) {
3228                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3229                                        round_up(rec->isize,
3230                                                 root->fs_info->sectorsize));
3231                 if (ret < 0)
3232                         goto out;
3233         }
3234         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3235                rec->ino, root->objectid);
3236 out:
3237         return ret;
3238 }
3239
3240 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3241 {
3242         struct btrfs_trans_handle *trans;
3243         struct btrfs_path path;
3244         int ret = 0;
3245
3246         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3247                              I_ERR_NO_ORPHAN_ITEM |
3248                              I_ERR_LINK_COUNT_WRONG |
3249                              I_ERR_NO_INODE_ITEM |
3250                              I_ERR_FILE_EXTENT_ORPHAN |
3251                              I_ERR_FILE_EXTENT_DISCOUNT|
3252                              I_ERR_FILE_NBYTES_WRONG)))
3253                 return rec->errors;
3254
3255         /*
3256          * For nlink repair, it may create a dir and add link, so
3257          * 2 for parent(256)'s dir_index and dir_item
3258          * 2 for lost+found dir's inode_item and inode_ref
3259          * 1 for the new inode_ref of the file
3260          * 2 for lost+found dir's dir_index and dir_item for the file
3261          */
3262         trans = btrfs_start_transaction(root, 7);
3263         if (IS_ERR(trans))
3264                 return PTR_ERR(trans);
3265
3266         btrfs_init_path(&path);
3267         if (rec->errors & I_ERR_NO_INODE_ITEM)
3268                 ret = repair_inode_no_item(trans, root, &path, rec);
3269         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3270                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3271         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3272                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3273         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3274                 ret = repair_inode_isize(trans, root, &path, rec);
3275         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3276                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3277         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3278                 ret = repair_inode_nlinks(trans, root, &path, rec);
3279         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3280                 ret = repair_inode_nbytes(trans, root, &path, rec);
3281         btrfs_commit_transaction(trans, root);
3282         btrfs_release_path(&path);
3283         return ret;
3284 }
3285
3286 static int check_inode_recs(struct btrfs_root *root,
3287                             struct cache_tree *inode_cache)
3288 {
3289         struct cache_extent *cache;
3290         struct ptr_node *node;
3291         struct inode_record *rec;
3292         struct inode_backref *backref;
3293         int stage = 0;
3294         int ret = 0;
3295         int err = 0;
3296         u64 error = 0;
3297         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3298
3299         if (btrfs_root_refs(&root->root_item) == 0) {
3300                 if (!cache_tree_empty(inode_cache))
3301                         fprintf(stderr, "warning line %d\n", __LINE__);
3302                 return 0;
3303         }
3304
3305         /*
3306          * We need to repair backrefs first because we could change some of the
3307          * errors in the inode recs.
3308          *
3309          * We also need to go through and delete invalid backrefs first and then
3310          * add the correct ones second.  We do this because we may get EEXIST
3311          * when adding back the correct index because we hadn't yet deleted the
3312          * invalid index.
3313          *
3314          * For example, if we were missing a dir index then the directories
3315          * isize would be wrong, so if we fixed the isize to what we thought it
3316          * would be and then fixed the backref we'd still have a invalid fs, so
3317          * we need to add back the dir index and then check to see if the isize
3318          * is still wrong.
3319          */
3320         while (stage < 3) {
3321                 stage++;
3322                 if (stage == 3 && !err)
3323                         break;
3324
3325                 cache = search_cache_extent(inode_cache, 0);
3326                 while (repair && cache) {
3327                         node = container_of(cache, struct ptr_node, cache);
3328                         rec = node->data;
3329                         cache = next_cache_extent(cache);
3330
3331                         /* Need to free everything up and rescan */
3332                         if (stage == 3) {
3333                                 remove_cache_extent(inode_cache, &node->cache);
3334                                 free(node);
3335                                 free_inode_rec(rec);
3336                                 continue;
3337                         }
3338
3339                         if (list_empty(&rec->backrefs))
3340                                 continue;
3341
3342                         ret = repair_inode_backrefs(root, rec, inode_cache,
3343                                                     stage == 1);
3344                         if (ret < 0) {
3345                                 err = ret;
3346                                 stage = 2;
3347                                 break;
3348                         } if (ret > 0) {
3349                                 err = -EAGAIN;
3350                         }
3351                 }
3352         }
3353         if (err)
3354                 return err;
3355
3356         rec = get_inode_rec(inode_cache, root_dirid, 0);
3357         BUG_ON(IS_ERR(rec));
3358         if (rec) {
3359                 ret = check_root_dir(rec);
3360                 if (ret) {
3361                         fprintf(stderr, "root %llu root dir %llu error\n",
3362                                 (unsigned long long)root->root_key.objectid,
3363                                 (unsigned long long)root_dirid);
3364                         print_inode_error(root, rec);
3365                         error++;
3366                 }
3367         } else {
3368                 if (repair) {
3369                         struct btrfs_trans_handle *trans;
3370
3371                         trans = btrfs_start_transaction(root, 1);
3372                         if (IS_ERR(trans)) {
3373                                 err = PTR_ERR(trans);
3374                                 return err;
3375                         }
3376
3377                         fprintf(stderr,
3378                                 "root %llu missing its root dir, recreating\n",
3379                                 (unsigned long long)root->objectid);
3380
3381                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3382                         BUG_ON(ret);
3383
3384                         btrfs_commit_transaction(trans, root);
3385                         return -EAGAIN;
3386                 }
3387
3388                 fprintf(stderr, "root %llu root dir %llu not found\n",
3389                         (unsigned long long)root->root_key.objectid,
3390                         (unsigned long long)root_dirid);
3391         }
3392
3393         while (1) {
3394                 cache = search_cache_extent(inode_cache, 0);
3395                 if (!cache)
3396                         break;
3397                 node = container_of(cache, struct ptr_node, cache);
3398                 rec = node->data;
3399                 remove_cache_extent(inode_cache, &node->cache);
3400                 free(node);
3401                 if (rec->ino == root_dirid ||
3402                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3403                         free_inode_rec(rec);
3404                         continue;
3405                 }
3406
3407                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3408                         ret = check_orphan_item(root, rec->ino);
3409                         if (ret == 0)
3410                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3411                         if (can_free_inode_rec(rec)) {
3412                                 free_inode_rec(rec);
3413                                 continue;
3414                         }
3415                 }
3416
3417                 if (!rec->found_inode_item)
3418                         rec->errors |= I_ERR_NO_INODE_ITEM;
3419                 if (rec->found_link != rec->nlink)
3420                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3421                 if (repair) {
3422                         ret = try_repair_inode(root, rec);
3423                         if (ret == 0 && can_free_inode_rec(rec)) {
3424                                 free_inode_rec(rec);
3425                                 continue;
3426                         }
3427                         ret = 0;
3428                 }
3429
3430                 if (!(repair && ret == 0))
3431                         error++;
3432                 print_inode_error(root, rec);
3433                 list_for_each_entry(backref, &rec->backrefs, list) {
3434                         if (!backref->found_dir_item)
3435                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3436                         if (!backref->found_dir_index)
3437                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3438                         if (!backref->found_inode_ref)
3439                                 backref->errors |= REF_ERR_NO_INODE_REF;
3440                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3441                                 " namelen %u name %s filetype %d errors %x",
3442                                 (unsigned long long)backref->dir,
3443                                 (unsigned long long)backref->index,
3444                                 backref->namelen, backref->name,
3445                                 backref->filetype, backref->errors);
3446                         print_ref_error(backref->errors);
3447                 }
3448                 free_inode_rec(rec);
3449         }
3450         return (error > 0) ? -1 : 0;
3451 }
3452
3453 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3454                                         u64 objectid)
3455 {
3456         struct cache_extent *cache;
3457         struct root_record *rec = NULL;
3458         int ret;
3459
3460         cache = lookup_cache_extent(root_cache, objectid, 1);
3461         if (cache) {
3462                 rec = container_of(cache, struct root_record, cache);
3463         } else {
3464                 rec = calloc(1, sizeof(*rec));
3465                 if (!rec)
3466                         return ERR_PTR(-ENOMEM);
3467                 rec->objectid = objectid;
3468                 INIT_LIST_HEAD(&rec->backrefs);
3469                 rec->cache.start = objectid;
3470                 rec->cache.size = 1;
3471
3472                 ret = insert_cache_extent(root_cache, &rec->cache);
3473                 if (ret)
3474                         return ERR_PTR(-EEXIST);
3475         }
3476         return rec;
3477 }
3478
3479 static struct root_backref *get_root_backref(struct root_record *rec,
3480                                              u64 ref_root, u64 dir, u64 index,
3481                                              const char *name, int namelen)
3482 {
3483         struct root_backref *backref;
3484
3485         list_for_each_entry(backref, &rec->backrefs, list) {
3486                 if (backref->ref_root != ref_root || backref->dir != dir ||
3487                     backref->namelen != namelen)
3488                         continue;
3489                 if (memcmp(name, backref->name, namelen))
3490                         continue;
3491                 return backref;
3492         }
3493
3494         backref = calloc(1, sizeof(*backref) + namelen + 1);
3495         if (!backref)
3496                 return NULL;
3497         backref->ref_root = ref_root;
3498         backref->dir = dir;
3499         backref->index = index;
3500         backref->namelen = namelen;
3501         memcpy(backref->name, name, namelen);
3502         backref->name[namelen] = '\0';
3503         list_add_tail(&backref->list, &rec->backrefs);
3504         return backref;
3505 }
3506
3507 static void free_root_record(struct cache_extent *cache)
3508 {
3509         struct root_record *rec;
3510         struct root_backref *backref;
3511
3512         rec = container_of(cache, struct root_record, cache);
3513         while (!list_empty(&rec->backrefs)) {
3514                 backref = to_root_backref(rec->backrefs.next);
3515                 list_del(&backref->list);
3516                 free(backref);
3517         }
3518
3519         free(rec);
3520 }
3521
3522 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3523
3524 static int add_root_backref(struct cache_tree *root_cache,
3525                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3526                             const char *name, int namelen,
3527                             int item_type, int errors)
3528 {
3529         struct root_record *rec;
3530         struct root_backref *backref;
3531
3532         rec = get_root_rec(root_cache, root_id);
3533         BUG_ON(IS_ERR(rec));
3534         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3535         BUG_ON(!backref);
3536
3537         backref->errors |= errors;
3538
3539         if (item_type != BTRFS_DIR_ITEM_KEY) {
3540                 if (backref->found_dir_index || backref->found_back_ref ||
3541                     backref->found_forward_ref) {
3542                         if (backref->index != index)
3543                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3544                 } else {
3545                         backref->index = index;
3546                 }
3547         }
3548
3549         if (item_type == BTRFS_DIR_ITEM_KEY) {
3550                 if (backref->found_forward_ref)
3551                         rec->found_ref++;
3552                 backref->found_dir_item = 1;
3553         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3554                 backref->found_dir_index = 1;
3555         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3556                 if (backref->found_forward_ref)
3557                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3558                 else if (backref->found_dir_item)
3559                         rec->found_ref++;
3560                 backref->found_forward_ref = 1;
3561         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3562                 if (backref->found_back_ref)
3563                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3564                 backref->found_back_ref = 1;
3565         } else {
3566                 BUG_ON(1);
3567         }
3568
3569         if (backref->found_forward_ref && backref->found_dir_item)
3570                 backref->reachable = 1;
3571         return 0;
3572 }
3573
3574 static int merge_root_recs(struct btrfs_root *root,
3575                            struct cache_tree *src_cache,
3576                            struct cache_tree *dst_cache)
3577 {
3578         struct cache_extent *cache;
3579         struct ptr_node *node;
3580         struct inode_record *rec;
3581         struct inode_backref *backref;
3582         int ret = 0;
3583
3584         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3585                 free_inode_recs_tree(src_cache);
3586                 return 0;
3587         }
3588
3589         while (1) {
3590                 cache = search_cache_extent(src_cache, 0);
3591                 if (!cache)
3592                         break;
3593                 node = container_of(cache, struct ptr_node, cache);
3594                 rec = node->data;
3595                 remove_cache_extent(src_cache, &node->cache);
3596                 free(node);
3597
3598                 ret = is_child_root(root, root->objectid, rec->ino);
3599                 if (ret < 0)
3600                         break;
3601                 else if (ret == 0)
3602                         goto skip;
3603
3604                 list_for_each_entry(backref, &rec->backrefs, list) {
3605                         BUG_ON(backref->found_inode_ref);
3606                         if (backref->found_dir_item)
3607                                 add_root_backref(dst_cache, rec->ino,
3608                                         root->root_key.objectid, backref->dir,
3609                                         backref->index, backref->name,
3610                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3611                                         backref->errors);
3612                         if (backref->found_dir_index)
3613                                 add_root_backref(dst_cache, rec->ino,
3614                                         root->root_key.objectid, backref->dir,
3615                                         backref->index, backref->name,
3616                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3617                                         backref->errors);
3618                 }
3619 skip:
3620                 free_inode_rec(rec);
3621         }
3622         if (ret < 0)
3623                 return ret;
3624         return 0;
3625 }
3626
3627 static int check_root_refs(struct btrfs_root *root,
3628                            struct cache_tree *root_cache)
3629 {
3630         struct root_record *rec;
3631         struct root_record *ref_root;
3632         struct root_backref *backref;
3633         struct cache_extent *cache;
3634         int loop = 1;
3635         int ret;
3636         int error;
3637         int errors = 0;
3638
3639         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3640         BUG_ON(IS_ERR(rec));
3641         rec->found_ref = 1;
3642
3643         /* fixme: this can not detect circular references */
3644         while (loop) {
3645                 loop = 0;
3646                 cache = search_cache_extent(root_cache, 0);
3647                 while (1) {
3648                         if (!cache)
3649                                 break;
3650                         rec = container_of(cache, struct root_record, cache);
3651                         cache = next_cache_extent(cache);
3652
3653                         if (rec->found_ref == 0)
3654                                 continue;
3655
3656                         list_for_each_entry(backref, &rec->backrefs, list) {
3657                                 if (!backref->reachable)
3658                                         continue;
3659
3660                                 ref_root = get_root_rec(root_cache,
3661                                                         backref->ref_root);
3662                                 BUG_ON(IS_ERR(ref_root));
3663                                 if (ref_root->found_ref > 0)
3664                                         continue;
3665
3666                                 backref->reachable = 0;
3667                                 rec->found_ref--;
3668                                 if (rec->found_ref == 0)
3669                                         loop = 1;
3670                         }
3671                 }
3672         }
3673
3674         cache = search_cache_extent(root_cache, 0);
3675         while (1) {
3676                 if (!cache)
3677                         break;
3678                 rec = container_of(cache, struct root_record, cache);
3679                 cache = next_cache_extent(cache);
3680
3681                 if (rec->found_ref == 0 &&
3682                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3683                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3684                         ret = check_orphan_item(root->fs_info->tree_root,
3685                                                 rec->objectid);
3686                         if (ret == 0)
3687                                 continue;
3688
3689                         /*
3690                          * If we don't have a root item then we likely just have
3691                          * a dir item in a snapshot for this root but no actual
3692                          * ref key or anything so it's meaningless.
3693                          */
3694                         if (!rec->found_root_item)
3695                                 continue;
3696                         errors++;
3697                         fprintf(stderr, "fs tree %llu not referenced\n",
3698                                 (unsigned long long)rec->objectid);
3699                 }
3700
3701                 error = 0;
3702                 if (rec->found_ref > 0 && !rec->found_root_item)
3703                         error = 1;
3704                 list_for_each_entry(backref, &rec->backrefs, list) {
3705                         if (!backref->found_dir_item)
3706                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3707                         if (!backref->found_dir_index)
3708                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3709                         if (!backref->found_back_ref)
3710                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3711                         if (!backref->found_forward_ref)
3712                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3713                         if (backref->reachable && backref->errors)
3714                                 error = 1;
3715                 }
3716                 if (!error)
3717                         continue;
3718
3719                 errors++;
3720                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3721                         (unsigned long long)rec->objectid, rec->found_ref,
3722                          rec->found_root_item ? "" : "not found");
3723
3724                 list_for_each_entry(backref, &rec->backrefs, list) {
3725                         if (!backref->reachable)
3726                                 continue;
3727                         if (!backref->errors && rec->found_root_item)
3728                                 continue;
3729                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3730                                 " index %llu namelen %u name %s errors %x\n",
3731                                 (unsigned long long)backref->ref_root,
3732                                 (unsigned long long)backref->dir,
3733                                 (unsigned long long)backref->index,
3734                                 backref->namelen, backref->name,
3735                                 backref->errors);
3736                         print_ref_error(backref->errors);
3737                 }
3738         }
3739         return errors > 0 ? 1 : 0;
3740 }
3741
3742 static int process_root_ref(struct extent_buffer *eb, int slot,
3743                             struct btrfs_key *key,
3744                             struct cache_tree *root_cache)
3745 {
3746         u64 dirid;
3747         u64 index;
3748         u32 len;
3749         u32 name_len;
3750         struct btrfs_root_ref *ref;
3751         char namebuf[BTRFS_NAME_LEN];
3752         int error;
3753
3754         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3755
3756         dirid = btrfs_root_ref_dirid(eb, ref);
3757         index = btrfs_root_ref_sequence(eb, ref);
3758         name_len = btrfs_root_ref_name_len(eb, ref);
3759
3760         if (name_len <= BTRFS_NAME_LEN) {
3761                 len = name_len;
3762                 error = 0;
3763         } else {
3764                 len = BTRFS_NAME_LEN;
3765                 error = REF_ERR_NAME_TOO_LONG;
3766         }
3767         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3768
3769         if (key->type == BTRFS_ROOT_REF_KEY) {
3770                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3771                                  index, namebuf, len, key->type, error);
3772         } else {
3773                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3774                                  index, namebuf, len, key->type, error);
3775         }
3776         return 0;
3777 }
3778
3779 static void free_corrupt_block(struct cache_extent *cache)
3780 {
3781         struct btrfs_corrupt_block *corrupt;
3782
3783         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3784         free(corrupt);
3785 }
3786
3787 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3788
3789 /*
3790  * Repair the btree of the given root.
3791  *
3792  * The fix is to remove the node key in corrupt_blocks cache_tree.
3793  * and rebalance the tree.
3794  * After the fix, the btree should be writeable.
3795  */
3796 static int repair_btree(struct btrfs_root *root,
3797                         struct cache_tree *corrupt_blocks)
3798 {
3799         struct btrfs_trans_handle *trans;
3800         struct btrfs_path path;
3801         struct btrfs_corrupt_block *corrupt;
3802         struct cache_extent *cache;
3803         struct btrfs_key key;
3804         u64 offset;
3805         int level;
3806         int ret = 0;
3807
3808         if (cache_tree_empty(corrupt_blocks))
3809                 return 0;
3810
3811         trans = btrfs_start_transaction(root, 1);
3812         if (IS_ERR(trans)) {
3813                 ret = PTR_ERR(trans);
3814                 fprintf(stderr, "Error starting transaction: %s\n",
3815                         strerror(-ret));
3816                 return ret;
3817         }
3818         btrfs_init_path(&path);
3819         cache = first_cache_extent(corrupt_blocks);
3820         while (cache) {
3821                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3822                                        cache);
3823                 level = corrupt->level;
3824                 path.lowest_level = level;
3825                 key.objectid = corrupt->key.objectid;
3826                 key.type = corrupt->key.type;
3827                 key.offset = corrupt->key.offset;
3828
3829                 /*
3830                  * Here we don't want to do any tree balance, since it may
3831                  * cause a balance with corrupted brother leaf/node,
3832                  * so ins_len set to 0 here.
3833                  * Balance will be done after all corrupt node/leaf is deleted.
3834                  */
3835                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3836                 if (ret < 0)
3837                         goto out;
3838                 offset = btrfs_node_blockptr(path.nodes[level],
3839                                              path.slots[level]);
3840
3841                 /* Remove the ptr */
3842                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3843                 if (ret < 0)
3844                         goto out;
3845                 /*
3846                  * Remove the corresponding extent
3847                  * return value is not concerned.
3848                  */
3849                 btrfs_release_path(&path);
3850                 ret = btrfs_free_extent(trans, root, offset,
3851                                 root->fs_info->nodesize, 0,
3852                                 root->root_key.objectid, level - 1, 0);
3853                 cache = next_cache_extent(cache);
3854         }
3855
3856         /* Balance the btree using btrfs_search_slot() */
3857         cache = first_cache_extent(corrupt_blocks);
3858         while (cache) {
3859                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3860                                        cache);
3861                 memcpy(&key, &corrupt->key, sizeof(key));
3862                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3863                 if (ret < 0)
3864                         goto out;
3865                 /* return will always >0 since it won't find the item */
3866                 ret = 0;
3867                 btrfs_release_path(&path);
3868                 cache = next_cache_extent(cache);
3869         }
3870 out:
3871         btrfs_commit_transaction(trans, root);
3872         btrfs_release_path(&path);
3873         return ret;
3874 }
3875
3876 static int check_fs_root(struct btrfs_root *root,
3877                          struct cache_tree *root_cache,
3878                          struct walk_control *wc)
3879 {
3880         int ret = 0;
3881         int err = 0;
3882         int wret;
3883         int level;
3884         struct btrfs_path path;
3885         struct shared_node root_node;
3886         struct root_record *rec;
3887         struct btrfs_root_item *root_item = &root->root_item;
3888         struct cache_tree corrupt_blocks;
3889         struct orphan_data_extent *orphan;
3890         struct orphan_data_extent *tmp;
3891         enum btrfs_tree_block_status status;
3892         struct node_refs nrefs;
3893
3894         /*
3895          * Reuse the corrupt_block cache tree to record corrupted tree block
3896          *
3897          * Unlike the usage in extent tree check, here we do it in a per
3898          * fs/subvol tree base.
3899          */
3900         cache_tree_init(&corrupt_blocks);
3901         root->fs_info->corrupt_blocks = &corrupt_blocks;
3902
3903         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3904                 rec = get_root_rec(root_cache, root->root_key.objectid);
3905                 BUG_ON(IS_ERR(rec));
3906                 if (btrfs_root_refs(root_item) > 0)
3907                         rec->found_root_item = 1;
3908         }
3909
3910         btrfs_init_path(&path);
3911         memset(&root_node, 0, sizeof(root_node));
3912         cache_tree_init(&root_node.root_cache);
3913         cache_tree_init(&root_node.inode_cache);
3914         memset(&nrefs, 0, sizeof(nrefs));
3915
3916         /* Move the orphan extent record to corresponding inode_record */
3917         list_for_each_entry_safe(orphan, tmp,
3918                                  &root->orphan_data_extents, list) {
3919                 struct inode_record *inode;
3920
3921                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3922                                       1);
3923                 BUG_ON(IS_ERR(inode));
3924                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3925                 list_move(&orphan->list, &inode->orphan_extents);
3926         }
3927
3928         level = btrfs_header_level(root->node);
3929         memset(wc->nodes, 0, sizeof(wc->nodes));
3930         wc->nodes[level] = &root_node;
3931         wc->active_node = level;
3932         wc->root_level = level;
3933
3934         /* We may not have checked the root block, lets do that now */
3935         if (btrfs_is_leaf(root->node))
3936                 status = btrfs_check_leaf(root, NULL, root->node);
3937         else
3938                 status = btrfs_check_node(root, NULL, root->node);
3939         if (status != BTRFS_TREE_BLOCK_CLEAN)
3940                 return -EIO;
3941
3942         if (btrfs_root_refs(root_item) > 0 ||
3943             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3944                 path.nodes[level] = root->node;
3945                 extent_buffer_get(root->node);
3946                 path.slots[level] = 0;
3947         } else {
3948                 struct btrfs_key key;
3949                 struct btrfs_disk_key found_key;
3950
3951                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3952                 level = root_item->drop_level;
3953                 path.lowest_level = level;
3954                 if (level > btrfs_header_level(root->node) ||
3955                     level >= BTRFS_MAX_LEVEL) {
3956                         error("ignoring invalid drop level: %u", level);
3957                         goto skip_walking;
3958                 }
3959                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3960                 if (wret < 0)
3961                         goto skip_walking;
3962                 btrfs_node_key(path.nodes[level], &found_key,
3963                                 path.slots[level]);
3964                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3965                                         sizeof(found_key)));
3966         }
3967
3968         while (1) {
3969                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3970                 if (wret < 0)
3971                         ret = wret;
3972                 if (wret != 0)
3973                         break;
3974
3975                 wret = walk_up_tree(root, &path, wc, &level);
3976                 if (wret < 0)
3977                         ret = wret;
3978                 if (wret != 0)
3979                         break;
3980         }
3981 skip_walking:
3982         btrfs_release_path(&path);
3983
3984         if (!cache_tree_empty(&corrupt_blocks)) {
3985                 struct cache_extent *cache;
3986                 struct btrfs_corrupt_block *corrupt;
3987
3988                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3989                        root->root_key.objectid);
3990                 cache = first_cache_extent(&corrupt_blocks);
3991                 while (cache) {
3992                         corrupt = container_of(cache,
3993                                                struct btrfs_corrupt_block,
3994                                                cache);
3995                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3996                                cache->start, corrupt->level,
3997                                corrupt->key.objectid, corrupt->key.type,
3998                                corrupt->key.offset);
3999                         cache = next_cache_extent(cache);
4000                 }
4001                 if (repair) {
4002                         printf("Try to repair the btree for root %llu\n",
4003                                root->root_key.objectid);
4004                         ret = repair_btree(root, &corrupt_blocks);
4005                         if (ret < 0)
4006                                 fprintf(stderr, "Failed to repair btree: %s\n",
4007                                         strerror(-ret));
4008                         if (!ret)
4009                                 printf("Btree for root %llu is fixed\n",
4010                                        root->root_key.objectid);
4011                 }
4012         }
4013
4014         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4015         if (err < 0)
4016                 ret = err;
4017
4018         if (root_node.current) {
4019                 root_node.current->checked = 1;
4020                 maybe_free_inode_rec(&root_node.inode_cache,
4021                                 root_node.current);
4022         }
4023
4024         err = check_inode_recs(root, &root_node.inode_cache);
4025         if (!ret)
4026                 ret = err;
4027
4028         free_corrupt_blocks_tree(&corrupt_blocks);
4029         root->fs_info->corrupt_blocks = NULL;
4030         free_orphan_data_extents(&root->orphan_data_extents);
4031         return ret;
4032 }
4033
4034 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4035                           struct cache_tree *root_cache)
4036 {
4037         struct btrfs_path path;
4038         struct btrfs_key key;
4039         struct walk_control wc;
4040         struct extent_buffer *leaf, *tree_node;
4041         struct btrfs_root *tmp_root;
4042         struct btrfs_root *tree_root = fs_info->tree_root;
4043         int ret;
4044         int err = 0;
4045
4046         if (ctx.progress_enabled) {
4047                 ctx.tp = TASK_FS_ROOTS;
4048                 task_start(ctx.info);
4049         }
4050
4051         /*
4052          * Just in case we made any changes to the extent tree that weren't
4053          * reflected into the free space cache yet.
4054          */
4055         if (repair)
4056                 reset_cached_block_groups(fs_info);
4057         memset(&wc, 0, sizeof(wc));
4058         cache_tree_init(&wc.shared);
4059         btrfs_init_path(&path);
4060
4061 again:
4062         key.offset = 0;
4063         key.objectid = 0;
4064         key.type = BTRFS_ROOT_ITEM_KEY;
4065         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4066         if (ret < 0) {
4067                 err = 1;
4068                 goto out;
4069         }
4070         tree_node = tree_root->node;
4071         while (1) {
4072                 if (tree_node != tree_root->node) {
4073                         free_root_recs_tree(root_cache);
4074                         btrfs_release_path(&path);
4075                         goto again;
4076                 }
4077                 leaf = path.nodes[0];
4078                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4079                         ret = btrfs_next_leaf(tree_root, &path);
4080                         if (ret) {
4081                                 if (ret < 0)
4082                                         err = 1;
4083                                 break;
4084                         }
4085                         leaf = path.nodes[0];
4086                 }
4087                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4088                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4089                     fs_root_objectid(key.objectid)) {
4090                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4091                                 tmp_root = btrfs_read_fs_root_no_cache(
4092                                                 fs_info, &key);
4093                         } else {
4094                                 key.offset = (u64)-1;
4095                                 tmp_root = btrfs_read_fs_root(
4096                                                 fs_info, &key);
4097                         }
4098                         if (IS_ERR(tmp_root)) {
4099                                 err = 1;
4100                                 goto next;
4101                         }
4102                         ret = check_fs_root(tmp_root, root_cache, &wc);
4103                         if (ret == -EAGAIN) {
4104                                 free_root_recs_tree(root_cache);
4105                                 btrfs_release_path(&path);
4106                                 goto again;
4107                         }
4108                         if (ret)
4109                                 err = 1;
4110                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4111                                 btrfs_free_fs_root(tmp_root);
4112                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4113                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4114                         process_root_ref(leaf, path.slots[0], &key,
4115                                          root_cache);
4116                 }
4117 next:
4118                 path.slots[0]++;
4119         }
4120 out:
4121         btrfs_release_path(&path);
4122         if (err)
4123                 free_extent_cache_tree(&wc.shared);
4124         if (!cache_tree_empty(&wc.shared))
4125                 fprintf(stderr, "warning line %d\n", __LINE__);
4126
4127         task_stop(ctx.info);
4128
4129         return err;
4130 }
4131
4132 /*
4133  * Find the @index according by @ino and name.
4134  * Notice:time efficiency is O(N)
4135  *
4136  * @root:       the root of the fs/file tree
4137  * @index_ret:  the index as return value
4138  * @namebuf:    the name to match
4139  * @name_len:   the length of name to match
4140  * @file_type:  the file_type of INODE_ITEM to match
4141  *
4142  * Returns 0 if found and *@index_ret will be modified with right value
4143  * Returns< 0 not found and *@index_ret will be (u64)-1
4144  */
4145 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4146                           u64 *index_ret, char *namebuf, u32 name_len,
4147                           u8 file_type)
4148 {
4149         struct btrfs_path path;
4150         struct extent_buffer *node;
4151         struct btrfs_dir_item *di;
4152         struct btrfs_key key;
4153         struct btrfs_key location;
4154         char name[BTRFS_NAME_LEN] = {0};
4155
4156         u32 total;
4157         u32 cur = 0;
4158         u32 len;
4159         u32 data_len;
4160         u8 filetype;
4161         int slot;
4162         int ret;
4163
4164         ASSERT(index_ret);
4165
4166         /* search from the last index */
4167         key.objectid = dirid;
4168         key.offset = (u64)-1;
4169         key.type = BTRFS_DIR_INDEX_KEY;
4170
4171         btrfs_init_path(&path);
4172         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4173         if (ret < 0)
4174                 return ret;
4175
4176 loop:
4177         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4178         if (ret) {
4179                 ret = -ENOENT;
4180                 *index_ret = (64)-1;
4181                 goto out;
4182         }
4183         /* Check whether inode_id/filetype/name match */
4184         node = path.nodes[0];
4185         slot = path.slots[0];
4186         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4187         total = btrfs_item_size_nr(node, slot);
4188         while (cur < total) {
4189                 ret = -ENOENT;
4190                 len = btrfs_dir_name_len(node, di);
4191                 data_len = btrfs_dir_data_len(node, di);
4192
4193                 btrfs_dir_item_key_to_cpu(node, di, &location);
4194                 if (location.objectid != location_id ||
4195                     location.type != BTRFS_INODE_ITEM_KEY ||
4196                     location.offset != 0)
4197                         goto next;
4198
4199                 filetype = btrfs_dir_type(node, di);
4200                 if (file_type != filetype)
4201                         goto next;
4202
4203                 if (len > BTRFS_NAME_LEN)
4204                         len = BTRFS_NAME_LEN;
4205
4206                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4207                 if (len != name_len || strncmp(namebuf, name, len))
4208                         goto next;
4209
4210                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4211                 *index_ret = key.offset;
4212                 ret = 0;
4213                 goto out;
4214 next:
4215                 len += sizeof(*di) + data_len;
4216                 di = (struct btrfs_dir_item *)((char *)di + len);
4217                 cur += len;
4218         }
4219         goto loop;
4220
4221 out:
4222         btrfs_release_path(&path);
4223         return ret;
4224 }
4225
4226 /*
4227  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4228  * INODE_REF/INODE_EXTREF match.
4229  *
4230  * @root:       the root of the fs/file tree
4231  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4232  *              value while find index
4233  * @location_key: location key of the struct btrfs_dir_item to match
4234  * @name:       the name to match
4235  * @namelen:    the length of name
4236  * @file_type:  the type of file to math
4237  *
4238  * Return 0 if no error occurred.
4239  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4240  * DIR_ITEM/DIR_INDEX
4241  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4242  * and DIR_ITEM/DIR_INDEX mismatch
4243  */
4244 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4245                          struct btrfs_key *location_key, char *name,
4246                          u32 namelen, u8 file_type)
4247 {
4248         struct btrfs_path path;
4249         struct extent_buffer *node;
4250         struct btrfs_dir_item *di;
4251         struct btrfs_key location;
4252         char namebuf[BTRFS_NAME_LEN] = {0};
4253         u32 total;
4254         u32 cur = 0;
4255         u32 len;
4256         u32 data_len;
4257         u8 filetype;
4258         int slot;
4259         int ret;
4260
4261         /* get the index by traversing all index */
4262         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4263                 ret = find_dir_index(root, key->objectid,
4264                                      location_key->objectid, &key->offset,
4265                                      name, namelen, file_type);
4266                 if (ret)
4267                         ret = DIR_INDEX_MISSING;
4268                 return ret;
4269         }
4270
4271         btrfs_init_path(&path);
4272         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4273         if (ret) {
4274                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4275                         DIR_INDEX_MISSING;
4276                 goto out;
4277         }
4278
4279         /* Check whether inode_id/filetype/name match */
4280         node = path.nodes[0];
4281         slot = path.slots[0];
4282         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4283         total = btrfs_item_size_nr(node, slot);
4284         while (cur < total) {
4285                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4286                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4287
4288                 len = btrfs_dir_name_len(node, di);
4289                 data_len = btrfs_dir_data_len(node, di);
4290
4291                 btrfs_dir_item_key_to_cpu(node, di, &location);
4292                 if (location.objectid != location_key->objectid ||
4293                     location.type != location_key->type ||
4294                     location.offset != location_key->offset)
4295                         goto next;
4296
4297                 filetype = btrfs_dir_type(node, di);
4298                 if (file_type != filetype)
4299                         goto next;
4300
4301                 if (len > BTRFS_NAME_LEN) {
4302                         len = BTRFS_NAME_LEN;
4303                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4304                         root->objectid,
4305                         key->type == BTRFS_DIR_ITEM_KEY ?
4306                         "DIR_ITEM" : "DIR_INDEX",
4307                         key->objectid, key->offset, len);
4308                 }
4309                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4310                                    len);
4311                 if (len != namelen || strncmp(namebuf, name, len))
4312                         goto next;
4313
4314                 ret = 0;
4315                 goto out;
4316 next:
4317                 len += sizeof(*di) + data_len;
4318                 di = (struct btrfs_dir_item *)((char *)di + len);
4319                 cur += len;
4320         }
4321
4322 out:
4323         btrfs_release_path(&path);
4324         return ret;
4325 }
4326
4327 /*
4328  * Prints inode ref error message
4329  */
4330 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4331                                 u64 index, const char *namebuf, int name_len,
4332                                 u8 filetype, int err)
4333 {
4334         if (!err)
4335                 return;
4336
4337         /* root dir error */
4338         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4339                 error(
4340         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4341                       root->objectid, key->objectid, key->offset, namebuf);
4342                 return;
4343         }
4344
4345         /* normal error */
4346         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4347                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4348                       root->objectid, key->offset,
4349                       btrfs_name_hash(namebuf, name_len),
4350                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4351                       namebuf, filetype);
4352         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4353                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4354                       root->objectid, key->offset, index,
4355                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4356                       namebuf, filetype);
4357 }
4358
4359 /*
4360  * Insert the missing inode item.
4361  *
4362  * Returns 0 means success.
4363  * Returns <0 means error.
4364  */
4365 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4366                                      u8 filetype)
4367 {
4368         struct btrfs_key key;
4369         struct btrfs_trans_handle *trans;
4370         struct btrfs_path path;
4371         int ret;
4372
4373         key.objectid = ino;
4374         key.type = BTRFS_INODE_ITEM_KEY;
4375         key.offset = 0;
4376
4377         btrfs_init_path(&path);
4378         trans = btrfs_start_transaction(root, 1);
4379         if (IS_ERR(trans)) {
4380                 ret = -EIO;
4381                 goto out;
4382         }
4383
4384         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4385         if (ret < 0 || !ret)
4386                 goto fail;
4387
4388         /* insert inode item */
4389         create_inode_item_lowmem(trans, root, ino, filetype);
4390         ret = 0;
4391 fail:
4392         btrfs_commit_transaction(trans, root);
4393 out:
4394         if (ret)
4395                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4396                       root->objectid, ino);
4397         btrfs_release_path(&path);
4398         return ret;
4399 }
4400
4401 /*
4402  * The ternary means dir item, dir index and relative inode ref.
4403  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4404  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4405  * strategy:
4406  * If two of three is missing or mismatched, delete the existing one.
4407  * If one of three is missing or mismatched, add the missing one.
4408  *
4409  * returns 0 means success.
4410  * returns not 0 means on error;
4411  */
4412 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4413                           u64 index, char *name, int name_len, u8 filetype,
4414                           int err)
4415 {
4416         struct btrfs_trans_handle *trans;
4417         int stage = 0;
4418         int ret = 0;
4419
4420         /*
4421          * stage shall be one of following valild values:
4422          *      0: Fine, nothing to do.
4423          *      1: One of three is wrong, so add missing one.
4424          *      2: Two of three is wrong, so delete existed one.
4425          */
4426         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4427                 stage++;
4428         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4429                 stage++;
4430         if (err & (INODE_REF_MISSING))
4431                 stage++;
4432
4433         /* stage must be smllarer than 3 */
4434         ASSERT(stage < 3);
4435
4436         trans = btrfs_start_transaction(root, 1);
4437         if (stage == 2) {
4438                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4439                                    name_len, 0);
4440                 goto out;
4441         }
4442         if (stage == 1) {
4443                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4444                                filetype, &index, 1, 1);
4445                 goto out;
4446         }
4447 out:
4448         btrfs_commit_transaction(trans, root);
4449
4450         if (ret)
4451                 error("fail to repair inode %llu name %s filetype %u",
4452                       ino, name, filetype);
4453         else
4454                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4455                        stage == 2 ? "Delete" : "Add",
4456                        ino, name, filetype);
4457
4458         return ret;
4459 }
4460
4461 /*
4462  * Traverse the given INODE_REF and call find_dir_item() to find related
4463  * DIR_ITEM/DIR_INDEX.
4464  *
4465  * @root:       the root of the fs/file tree
4466  * @ref_key:    the key of the INODE_REF
4467  * @path        the path provides node and slot
4468  * @refs:       the count of INODE_REF
4469  * @mode:       the st_mode of INODE_ITEM
4470  * @name_ret:   returns with the first ref's name
4471  * @name_len_ret:    len of the name_ret
4472  *
4473  * Return 0 if no error occurred.
4474  */
4475 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4476                            struct btrfs_path *path, char *name_ret,
4477                            u32 *namelen_ret, u64 *refs_ret, int mode)
4478 {
4479         struct btrfs_key key;
4480         struct btrfs_key location;
4481         struct btrfs_inode_ref *ref;
4482         struct extent_buffer *node;
4483         char namebuf[BTRFS_NAME_LEN] = {0};
4484         u32 total;
4485         u32 cur = 0;
4486         u32 len;
4487         u32 name_len;
4488         u64 index;
4489         int ret;
4490         int err = 0;
4491         int tmp_err;
4492         int slot;
4493         int need_research = 0;
4494         u64 refs;
4495
4496 begin:
4497         err = 0;
4498         cur = 0;
4499         refs = *refs_ret;
4500
4501         /* since after repair, path and the dir item may be changed */
4502         if (need_research) {
4503                 need_research = 0;
4504                 btrfs_release_path(path);
4505                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4506                 /* the item was deleted, let path point to the last checked item */
4507                 if (ret > 0) {
4508                         if (path->slots[0] == 0)
4509                                 btrfs_prev_leaf(root, path);
4510                         else
4511                                 path->slots[0]--;
4512                 }
4513                 if (ret)
4514                         goto out;
4515         }
4516
4517         location.objectid = ref_key->objectid;
4518         location.type = BTRFS_INODE_ITEM_KEY;
4519         location.offset = 0;
4520         node = path->nodes[0];
4521         slot = path->slots[0];
4522
4523         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4524         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4525         total = btrfs_item_size_nr(node, slot);
4526
4527 next:
4528         /* Update inode ref count */
4529         refs++;
4530         tmp_err = 0;
4531         index = btrfs_inode_ref_index(node, ref);
4532         name_len = btrfs_inode_ref_name_len(node, ref);
4533
4534         if (name_len <= BTRFS_NAME_LEN) {
4535                 len = name_len;
4536         } else {
4537                 len = BTRFS_NAME_LEN;
4538                 warning("root %llu INODE_REF[%llu %llu] name too long",
4539                         root->objectid, ref_key->objectid, ref_key->offset);
4540         }
4541
4542         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4543
4544         /* copy the first name found to name_ret */
4545         if (refs == 1 && name_ret) {
4546                 memcpy(name_ret, namebuf, len);
4547                 *namelen_ret = len;
4548         }
4549
4550         /* Check root dir ref */
4551         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4552                 if (index != 0 || len != strlen("..") ||
4553                     strncmp("..", namebuf, len) ||
4554                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4555                         /* set err bits then repair will delete the ref */
4556                         err |= DIR_INDEX_MISSING;
4557                         err |= DIR_ITEM_MISSING;
4558                 }
4559                 goto end;
4560         }
4561
4562         /* Find related DIR_INDEX */
4563         key.objectid = ref_key->offset;
4564         key.type = BTRFS_DIR_INDEX_KEY;
4565         key.offset = index;
4566         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4567                             imode_to_type(mode));
4568
4569         /* Find related dir_item */
4570         key.objectid = ref_key->offset;
4571         key.type = BTRFS_DIR_ITEM_KEY;
4572         key.offset = btrfs_name_hash(namebuf, len);
4573         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4574                             imode_to_type(mode));
4575 end:
4576         if (tmp_err && repair) {
4577                 ret = repair_ternary_lowmem(root, ref_key->offset,
4578                                             ref_key->objectid, index, namebuf,
4579                                             name_len, imode_to_type(mode),
4580                                             tmp_err);
4581                 if (!ret) {
4582                         need_research = 1;
4583                         goto begin;
4584                 }
4585         }
4586         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4587                             imode_to_type(mode), tmp_err);
4588         err |= tmp_err;
4589         len = sizeof(*ref) + name_len;
4590         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4591         cur += len;
4592         if (cur < total)
4593                 goto next;
4594
4595 out:
4596         *refs_ret = refs;
4597         return err;
4598 }
4599
4600 /*
4601  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4602  * DIR_ITEM/DIR_INDEX.
4603  *
4604  * @root:       the root of the fs/file tree
4605  * @ref_key:    the key of the INODE_EXTREF
4606  * @refs:       the count of INODE_EXTREF
4607  * @mode:       the st_mode of INODE_ITEM
4608  *
4609  * Return 0 if no error occurred.
4610  */
4611 static int check_inode_extref(struct btrfs_root *root,
4612                               struct btrfs_key *ref_key,
4613                               struct extent_buffer *node, int slot, u64 *refs,
4614                               int mode)
4615 {
4616         struct btrfs_key key;
4617         struct btrfs_key location;
4618         struct btrfs_inode_extref *extref;
4619         char namebuf[BTRFS_NAME_LEN] = {0};
4620         u32 total;
4621         u32 cur = 0;
4622         u32 len;
4623         u32 name_len;
4624         u64 index;
4625         u64 parent;
4626         int ret;
4627         int err = 0;
4628
4629         location.objectid = ref_key->objectid;
4630         location.type = BTRFS_INODE_ITEM_KEY;
4631         location.offset = 0;
4632
4633         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4634         total = btrfs_item_size_nr(node, slot);
4635
4636 next:
4637         /* update inode ref count */
4638         (*refs)++;
4639         name_len = btrfs_inode_extref_name_len(node, extref);
4640         index = btrfs_inode_extref_index(node, extref);
4641         parent = btrfs_inode_extref_parent(node, extref);
4642         if (name_len <= BTRFS_NAME_LEN) {
4643                 len = name_len;
4644         } else {
4645                 len = BTRFS_NAME_LEN;
4646                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4647                         root->objectid, ref_key->objectid, ref_key->offset);
4648         }
4649         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4650
4651         /* Check root dir ref name */
4652         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4653                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4654                       root->objectid, ref_key->objectid, ref_key->offset,
4655                       namebuf);
4656                 err |= ROOT_DIR_ERROR;
4657         }
4658
4659         /* find related dir_index */
4660         key.objectid = parent;
4661         key.type = BTRFS_DIR_INDEX_KEY;
4662         key.offset = index;
4663         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4664         err |= ret;
4665
4666         /* find related dir_item */
4667         key.objectid = parent;
4668         key.type = BTRFS_DIR_ITEM_KEY;
4669         key.offset = btrfs_name_hash(namebuf, len);
4670         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4671         err |= ret;
4672
4673         len = sizeof(*extref) + name_len;
4674         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4675         cur += len;
4676
4677         if (cur < total)
4678                 goto next;
4679
4680         return err;
4681 }
4682
4683 /*
4684  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4685  * DIR_ITEM/DIR_INDEX match.
4686  * Return with @index_ret.
4687  *
4688  * @root:       the root of the fs/file tree
4689  * @key:        the key of the INODE_REF/INODE_EXTREF
4690  * @name:       the name in the INODE_REF/INODE_EXTREF
4691  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4692  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4693  *              value (64)-1 means do not check index
4694  * @ext_ref:    the EXTENDED_IREF feature
4695  *
4696  * Return 0 if no error occurred.
4697  * Return >0 for error bitmap
4698  */
4699 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4700                           char *name, int namelen, u64 *index_ret,
4701                           unsigned int ext_ref)
4702 {
4703         struct btrfs_path path;
4704         struct btrfs_inode_ref *ref;
4705         struct btrfs_inode_extref *extref;
4706         struct extent_buffer *node;
4707         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4708         u32 total;
4709         u32 cur = 0;
4710         u32 len;
4711         u32 ref_namelen;
4712         u64 ref_index;
4713         u64 parent;
4714         u64 dir_id;
4715         int slot;
4716         int ret;
4717
4718         ASSERT(index_ret);
4719
4720         btrfs_init_path(&path);
4721         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4722         if (ret) {
4723                 ret = INODE_REF_MISSING;
4724                 goto extref;
4725         }
4726
4727         node = path.nodes[0];
4728         slot = path.slots[0];
4729
4730         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4731         total = btrfs_item_size_nr(node, slot);
4732
4733         /* Iterate all entry of INODE_REF */
4734         while (cur < total) {
4735                 ret = INODE_REF_MISSING;
4736
4737                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4738                 ref_index = btrfs_inode_ref_index(node, ref);
4739                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4740                         goto next_ref;
4741
4742                 if (cur + sizeof(*ref) + ref_namelen > total ||
4743                     ref_namelen > BTRFS_NAME_LEN) {
4744                         warning("root %llu INODE %s[%llu %llu] name too long",
4745                                 root->objectid,
4746                                 key->type == BTRFS_INODE_REF_KEY ?
4747                                         "REF" : "EXTREF",
4748                                 key->objectid, key->offset);
4749
4750                         if (cur + sizeof(*ref) > total)
4751                                 break;
4752                         len = min_t(u32, total - cur - sizeof(*ref),
4753                                     BTRFS_NAME_LEN);
4754                 } else {
4755                         len = ref_namelen;
4756                 }
4757
4758                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4759                                    len);
4760
4761                 if (len != namelen || strncmp(ref_namebuf, name, len))
4762                         goto next_ref;
4763
4764                 *index_ret = ref_index;
4765                 ret = 0;
4766                 goto out;
4767 next_ref:
4768                 len = sizeof(*ref) + ref_namelen;
4769                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4770                 cur += len;
4771         }
4772
4773 extref:
4774         /* Skip if not support EXTENDED_IREF feature */
4775         if (!ext_ref)
4776                 goto out;
4777
4778         btrfs_release_path(&path);
4779         btrfs_init_path(&path);
4780
4781         dir_id = key->offset;
4782         key->type = BTRFS_INODE_EXTREF_KEY;
4783         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4784
4785         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4786         if (ret) {
4787                 ret = INODE_REF_MISSING;
4788                 goto out;
4789         }
4790
4791         node = path.nodes[0];
4792         slot = path.slots[0];
4793
4794         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4795         cur = 0;
4796         total = btrfs_item_size_nr(node, slot);
4797
4798         /* Iterate all entry of INODE_EXTREF */
4799         while (cur < total) {
4800                 ret = INODE_REF_MISSING;
4801
4802                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4803                 ref_index = btrfs_inode_extref_index(node, extref);
4804                 parent = btrfs_inode_extref_parent(node, extref);
4805                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4806                         goto next_extref;
4807
4808                 if (parent != dir_id)
4809                         goto next_extref;
4810
4811                 if (ref_namelen <= BTRFS_NAME_LEN) {
4812                         len = ref_namelen;
4813                 } else {
4814                         len = BTRFS_NAME_LEN;
4815                         warning("root %llu INODE %s[%llu %llu] name too long",
4816                                 root->objectid,
4817                                 key->type == BTRFS_INODE_REF_KEY ?
4818                                         "REF" : "EXTREF",
4819                                 key->objectid, key->offset);
4820                 }
4821                 read_extent_buffer(node, ref_namebuf,
4822                                    (unsigned long)(extref + 1), len);
4823
4824                 if (len != namelen || strncmp(ref_namebuf, name, len))
4825                         goto next_extref;
4826
4827                 *index_ret = ref_index;
4828                 ret = 0;
4829                 goto out;
4830
4831 next_extref:
4832                 len = sizeof(*extref) + ref_namelen;
4833                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4834                 cur += len;
4835
4836         }
4837 out:
4838         btrfs_release_path(&path);
4839         return ret;
4840 }
4841
4842 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4843                                u64 ino, u64 index, const char *namebuf,
4844                                int name_len, u8 filetype, int err)
4845 {
4846         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4847                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4848                       root->objectid, key->objectid, key->offset, namebuf,
4849                       filetype,
4850                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4851         }
4852
4853         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4854                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4855                       root->objectid, key->objectid, index, namebuf, filetype,
4856                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4857         }
4858
4859         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4860                 error(
4861                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4862                       root->objectid, ino, index, namebuf, filetype,
4863                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4864         }
4865
4866         if (err & INODE_REF_MISSING)
4867                 error(
4868                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
4869                       root->objectid, ino, key->objectid, namebuf, filetype);
4870
4871 }
4872
4873 /*
4874  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
4875  *
4876  * Returns error after repair
4877  */
4878 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
4879                            u64 index, u8 filetype, char *namebuf, u32 name_len,
4880                            int err)
4881 {
4882         int ret;
4883
4884         if (err & INODE_ITEM_MISSING) {
4885                 ret = repair_inode_item_missing(root, ino, filetype);
4886                 if (!ret)
4887                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
4888         }
4889
4890         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
4891                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
4892                                             name_len, filetype, err);
4893                 if (!ret) {
4894                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
4895                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
4896                         err &= ~(INODE_REF_MISSING);
4897                 }
4898         }
4899         return err;
4900 }
4901
4902 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
4903                 u64 *size_ret)
4904 {
4905         struct btrfs_key key;
4906         struct btrfs_path path;
4907         u32 len;
4908         struct btrfs_dir_item *di;
4909         int ret;
4910         int cur = 0;
4911         int total = 0;
4912
4913         ASSERT(size_ret);
4914         *size_ret = 0;
4915
4916         key.objectid = ino;
4917         key.type = type;
4918         key.offset = (u64)-1;
4919
4920         btrfs_init_path(&path);
4921         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4922         if (ret < 0) {
4923                 ret = -EIO;
4924                 goto out;
4925         }
4926         /* if found, go to spacial case */
4927         if (ret == 0)
4928                 goto special_case;
4929
4930 loop:
4931         ret = btrfs_previous_item(root, &path, ino, type);
4932
4933         if (ret) {
4934                 ret = 0;
4935                 goto out;
4936         }
4937
4938 special_case:
4939         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
4940         cur = 0;
4941         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
4942
4943         while (cur < total) {
4944                 len = btrfs_dir_name_len(path.nodes[0], di);
4945                 if (len > BTRFS_NAME_LEN)
4946                         len = BTRFS_NAME_LEN;
4947                 *size_ret += len;
4948
4949                 len += btrfs_dir_data_len(path.nodes[0], di);
4950                 len += sizeof(*di);
4951                 di = (struct btrfs_dir_item *)((char *)di + len);
4952                 cur += len;
4953         }
4954         goto loop;
4955
4956 out:
4957         btrfs_release_path(&path);
4958         return ret;
4959 }
4960
4961 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
4962 {
4963         u64 item_size;
4964         u64 index_size;
4965         int ret;
4966
4967         ASSERT(size);
4968         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
4969         if (ret)
4970                 goto out;
4971
4972         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
4973         if (ret)
4974                 goto out;
4975
4976         *size = item_size + index_size;
4977
4978 out:
4979         if (ret)
4980                 error("failed to count root %llu INODE[%llu] root size",
4981                       root->objectid, ino);
4982         return ret;
4983 }
4984
4985 /*
4986  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4987  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4988  *
4989  * @root:       the root of the fs/file tree
4990  * @key:        the key of the INODE_REF/INODE_EXTREF
4991  * @path:       the path
4992  * @size:       the st_size of the INODE_ITEM
4993  * @ext_ref:    the EXTENDED_IREF feature
4994  *
4995  * Return 0 if no error occurred.
4996  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
4997  */
4998 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
4999                           struct btrfs_path *path, u64 *size,
5000                           unsigned int ext_ref)
5001 {
5002         struct btrfs_dir_item *di;
5003         struct btrfs_inode_item *ii;
5004         struct btrfs_key key;
5005         struct btrfs_key location;
5006         struct extent_buffer *node;
5007         int slot;
5008         char namebuf[BTRFS_NAME_LEN] = {0};
5009         u32 total;
5010         u32 cur = 0;
5011         u32 len;
5012         u32 name_len;
5013         u32 data_len;
5014         u8 filetype;
5015         u32 mode = 0;
5016         u64 index;
5017         int ret;
5018         int err;
5019         int tmp_err;
5020         int need_research = 0;
5021
5022         /*
5023          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5024          * ignore index check.
5025          */
5026         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5027                 index = di_key->offset;
5028         else
5029                 index = (u64)-1;
5030 begin:
5031         err = 0;
5032         cur = 0;
5033
5034         /* since after repair, path and the dir item may be changed */
5035         if (need_research) {
5036                 need_research = 0;
5037                 err |= DIR_COUNT_AGAIN;
5038                 btrfs_release_path(path);
5039                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5040                 /* the item was deleted, let path point the last checked item */
5041                 if (ret > 0) {
5042                         if (path->slots[0] == 0)
5043                                 btrfs_prev_leaf(root, path);
5044                         else
5045                                 path->slots[0]--;
5046                 }
5047                 if (ret)
5048                         goto out;
5049         }
5050
5051         node = path->nodes[0];
5052         slot = path->slots[0];
5053
5054         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5055         total = btrfs_item_size_nr(node, slot);
5056         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5057
5058         while (cur < total) {
5059                 data_len = btrfs_dir_data_len(node, di);
5060                 tmp_err = 0;
5061                 if (data_len)
5062                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5063                               root->objectid,
5064               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5065                               di_key->objectid, di_key->offset, data_len);
5066
5067                 name_len = btrfs_dir_name_len(node, di);
5068                 if (name_len <= BTRFS_NAME_LEN) {
5069                         len = name_len;
5070                 } else {
5071                         len = BTRFS_NAME_LEN;
5072                         warning("root %llu %s[%llu %llu] name too long",
5073                                 root->objectid,
5074                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5075                                 di_key->objectid, di_key->offset);
5076                 }
5077                 (*size) += name_len;
5078                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5079                                    len);
5080                 filetype = btrfs_dir_type(node, di);
5081
5082                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5083                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5084                         err |= -EIO;
5085                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5086                         root->objectid, di_key->objectid, di_key->offset,
5087                         namebuf, len, filetype, di_key->offset,
5088                         btrfs_name_hash(namebuf, len));
5089                 }
5090
5091                 btrfs_dir_item_key_to_cpu(node, di, &location);
5092                 /* Ignore related ROOT_ITEM check */
5093                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5094                         goto next;
5095
5096                 btrfs_release_path(path);
5097                 /* Check relative INODE_ITEM(existence/filetype) */
5098                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5099                 if (ret) {
5100                         tmp_err |= INODE_ITEM_MISSING;
5101                         goto next;
5102                 }
5103
5104                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5105                                     struct btrfs_inode_item);
5106                 mode = btrfs_inode_mode(path->nodes[0], ii);
5107                 if (imode_to_type(mode) != filetype) {
5108                         tmp_err |= INODE_ITEM_MISMATCH;
5109                         goto next;
5110                 }
5111
5112                 /* Check relative INODE_REF/INODE_EXTREF */
5113                 key.objectid = location.objectid;
5114                 key.type = BTRFS_INODE_REF_KEY;
5115                 key.offset = di_key->objectid;
5116                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5117                                           &index, ext_ref);
5118
5119                 /* check relative INDEX/ITEM */
5120                 key.objectid = di_key->objectid;
5121                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5122                         key.type = BTRFS_DIR_INDEX_KEY;
5123                         key.offset = index;
5124                 } else {
5125                         key.type = BTRFS_DIR_ITEM_KEY;
5126                         key.offset = btrfs_name_hash(namebuf, name_len);
5127                 }
5128
5129                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5130                                          name_len, filetype);
5131                 /* find_dir_item may find index */
5132                 if (key.type == BTRFS_DIR_INDEX_KEY)
5133                         index = key.offset;
5134 next:
5135
5136                 if (tmp_err && repair) {
5137                         ret = repair_dir_item(root, di_key->objectid,
5138                                               location.objectid, index,
5139                                               imode_to_type(mode), namebuf,
5140                                               name_len, tmp_err);
5141                         if (ret != tmp_err) {
5142                                 need_research = 1;
5143                                 goto begin;
5144                         }
5145                 }
5146                 btrfs_release_path(path);
5147                 print_dir_item_err(root, di_key, location.objectid, index,
5148                                    namebuf, name_len, filetype, tmp_err);
5149                 err |= tmp_err;
5150                 len = sizeof(*di) + name_len + data_len;
5151                 di = (struct btrfs_dir_item *)((char *)di + len);
5152                 cur += len;
5153
5154                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5155                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5156                               root->objectid, di_key->objectid,
5157                               di_key->offset);
5158                         break;
5159                 }
5160         }
5161 out:
5162         /* research path */
5163         btrfs_release_path(path);
5164         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5165         if (ret)
5166                 err |= ret > 0 ? -ENOENT : ret;
5167         return err;
5168 }
5169
5170 /*
5171  * Wrapper function of btrfs_punch_hole.
5172  *
5173  * Returns 0 means success.
5174  * Returns not 0 means error.
5175  */
5176 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5177                              u64 len)
5178 {
5179         struct btrfs_trans_handle *trans;
5180         int ret = 0;
5181
5182         trans = btrfs_start_transaction(root, 1);
5183         if (IS_ERR(trans))
5184                 return PTR_ERR(trans);
5185
5186         ret = btrfs_punch_hole(trans, root, ino, start, len);
5187         if (ret)
5188                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5189                       start, len, ino);
5190         else
5191                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5192                        ino);
5193
5194         btrfs_commit_transaction(trans, root);
5195         return ret;
5196 }
5197
5198 /*
5199  * Check file extent datasum/hole, update the size of the file extents,
5200  * check and update the last offset of the file extent.
5201  *
5202  * @root:       the root of fs/file tree.
5203  * @fkey:       the key of the file extent.
5204  * @nodatasum:  INODE_NODATASUM feature.
5205  * @size:       the sum of all EXTENT_DATA items size for this inode.
5206  * @end:        the offset of the last extent.
5207  *
5208  * Return 0 if no error occurred.
5209  */
5210 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5211                              struct extent_buffer *node, int slot,
5212                              unsigned int nodatasum, u64 *size, u64 *end)
5213 {
5214         struct btrfs_file_extent_item *fi;
5215         u64 disk_bytenr;
5216         u64 disk_num_bytes;
5217         u64 extent_num_bytes;
5218         u64 extent_offset;
5219         u64 csum_found;         /* In byte size, sectorsize aligned */
5220         u64 search_start;       /* Logical range start we search for csum */
5221         u64 search_len;         /* Logical range len we search for csum */
5222         unsigned int extent_type;
5223         unsigned int is_hole;
5224         int compressed = 0;
5225         int ret;
5226         int err = 0;
5227
5228         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5229
5230         /* Check inline extent */
5231         extent_type = btrfs_file_extent_type(node, fi);
5232         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5233                 struct btrfs_item *e = btrfs_item_nr(slot);
5234                 u32 item_inline_len;
5235
5236                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5237                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5238                 compressed = btrfs_file_extent_compression(node, fi);
5239                 if (extent_num_bytes == 0) {
5240                         error(
5241                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5242                                 root->objectid, fkey->objectid, fkey->offset);
5243                         err |= FILE_EXTENT_ERROR;
5244                 }
5245                 if (!compressed && extent_num_bytes != item_inline_len) {
5246                         error(
5247                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5248                                 root->objectid, fkey->objectid, fkey->offset,
5249                                 extent_num_bytes, item_inline_len);
5250                         err |= FILE_EXTENT_ERROR;
5251                 }
5252                 *end += extent_num_bytes;
5253                 *size += extent_num_bytes;
5254                 return err;
5255         }
5256
5257         /* Check extent type */
5258         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5259                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5260                 err |= FILE_EXTENT_ERROR;
5261                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5262                       root->objectid, fkey->objectid, fkey->offset);
5263                 return err;
5264         }
5265
5266         /* Check REG_EXTENT/PREALLOC_EXTENT */
5267         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5268         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5269         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5270         extent_offset = btrfs_file_extent_offset(node, fi);
5271         compressed = btrfs_file_extent_compression(node, fi);
5272         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5273
5274         /*
5275          * Check EXTENT_DATA csum
5276          *
5277          * For plain (uncompressed) extent, we should only check the range
5278          * we're referring to, as it's possible that part of prealloc extent
5279          * has been written, and has csum:
5280          *
5281          * |<--- Original large preallocated extent A ---->|
5282          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5283          *      No csum                         Has csum
5284          *
5285          * For compressed extent, we should check the whole range.
5286          */
5287         if (!compressed) {
5288                 search_start = disk_bytenr + extent_offset;
5289                 search_len = extent_num_bytes;
5290         } else {
5291                 search_start = disk_bytenr;
5292                 search_len = disk_num_bytes;
5293         }
5294         ret = count_csum_range(root->fs_info, search_start, search_len, &csum_found);
5295         if (csum_found > 0 && nodatasum) {
5296                 err |= ODD_CSUM_ITEM;
5297                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5298                       root->objectid, fkey->objectid, fkey->offset);
5299         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5300                    !is_hole && (ret < 0 || csum_found < search_len)) {
5301                 err |= CSUM_ITEM_MISSING;
5302                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5303                       root->objectid, fkey->objectid, fkey->offset,
5304                       csum_found, search_len);
5305         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5306                 err |= ODD_CSUM_ITEM;
5307                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5308                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5309         }
5310
5311         /* Check EXTENT_DATA hole */
5312         if (!no_holes && *end != fkey->offset) {
5313                 if (repair)
5314                         ret = punch_extent_hole(root, fkey->objectid,
5315                                                 *end, fkey->offset - *end);
5316                 if (!repair || ret) {
5317                         err |= FILE_EXTENT_ERROR;
5318                         error(
5319 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5320                                 root->objectid, fkey->objectid, fkey->offset,
5321                                 fkey->objectid, *end);
5322                 }
5323         }
5324
5325         *end += extent_num_bytes;
5326         if (!is_hole)
5327                 *size += extent_num_bytes;
5328
5329         return err;
5330 }
5331
5332 /*
5333  * Set inode item nbytes to @nbytes
5334  *
5335  * Returns  0     on success
5336  * Returns  != 0  on error
5337  */
5338 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5339                                       struct btrfs_path *path,
5340                                       u64 ino, u64 nbytes)
5341 {
5342         struct btrfs_trans_handle *trans;
5343         struct btrfs_inode_item *ii;
5344         struct btrfs_key key;
5345         struct btrfs_key research_key;
5346         int err = 0;
5347         int ret;
5348
5349         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5350
5351         key.objectid = ino;
5352         key.type = BTRFS_INODE_ITEM_KEY;
5353         key.offset = 0;
5354
5355         trans = btrfs_start_transaction(root, 1);
5356         if (IS_ERR(trans)) {
5357                 ret = PTR_ERR(trans);
5358                 err |= ret;
5359                 goto out;
5360         }
5361
5362         btrfs_release_path(path);
5363         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5364         if (ret > 0)
5365                 ret = -ENOENT;
5366         if (ret) {
5367                 err |= ret;
5368                 goto fail;
5369         }
5370
5371         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5372                             struct btrfs_inode_item);
5373         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5374         btrfs_mark_buffer_dirty(path->nodes[0]);
5375 fail:
5376         btrfs_commit_transaction(trans, root);
5377 out:
5378         if (ret)
5379                 error("failed to set nbytes in inode %llu root %llu",
5380                       ino, root->root_key.objectid);
5381         else
5382                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5383                        root->root_key.objectid, nbytes);
5384
5385         /* research path */
5386         btrfs_release_path(path);
5387         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5388         err |= ret;
5389
5390         return err;
5391 }
5392
5393 /*
5394  * Set directory inode isize to @isize.
5395  *
5396  * Returns 0     on success.
5397  * Returns != 0  on error.
5398  */
5399 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5400                                    struct btrfs_path *path,
5401                                    u64 ino, u64 isize)
5402 {
5403         struct btrfs_trans_handle *trans;
5404         struct btrfs_inode_item *ii;
5405         struct btrfs_key key;
5406         struct btrfs_key research_key;
5407         int ret;
5408         int err = 0;
5409
5410         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5411
5412         key.objectid = ino;
5413         key.type = BTRFS_INODE_ITEM_KEY;
5414         key.offset = 0;
5415
5416         trans = btrfs_start_transaction(root, 1);
5417         if (IS_ERR(trans)) {
5418                 ret = PTR_ERR(trans);
5419                 err |= ret;
5420                 goto out;
5421         }
5422
5423         btrfs_release_path(path);
5424         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5425         if (ret > 0)
5426                 ret = -ENOENT;
5427         if (ret) {
5428                 err |= ret;
5429                 goto fail;
5430         }
5431
5432         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5433                             struct btrfs_inode_item);
5434         btrfs_set_inode_size(path->nodes[0], ii, isize);
5435         btrfs_mark_buffer_dirty(path->nodes[0]);
5436 fail:
5437         btrfs_commit_transaction(trans, root);
5438 out:
5439         if (ret)
5440                 error("failed to set isize in inode %llu root %llu",
5441                       ino, root->root_key.objectid);
5442         else
5443                 printf("Set isize in inode %llu root %llu to %llu\n",
5444                        ino, root->root_key.objectid, isize);
5445
5446         btrfs_release_path(path);
5447         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5448         err |= ret;
5449
5450         return err;
5451 }
5452
5453 /*
5454  * Wrapper function for btrfs_add_orphan_item().
5455  *
5456  * Returns 0     on success.
5457  * Returns != 0  on error.
5458  */
5459 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5460                                            struct btrfs_path *path, u64 ino)
5461 {
5462         struct btrfs_trans_handle *trans;
5463         struct btrfs_key research_key;
5464         int ret;
5465         int err = 0;
5466
5467         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5468
5469         trans = btrfs_start_transaction(root, 1);
5470         if (IS_ERR(trans)) {
5471                 ret = PTR_ERR(trans);
5472                 err |= ret;
5473                 goto out;
5474         }
5475
5476         btrfs_release_path(path);
5477         ret = btrfs_add_orphan_item(trans, root, path, ino);
5478         err |= ret;
5479         btrfs_commit_transaction(trans, root);
5480 out:
5481         if (ret)
5482                 error("failed to add inode %llu as orphan item root %llu",
5483                       ino, root->root_key.objectid);
5484         else
5485                 printf("Added inode %llu as orphan item root %llu\n",
5486                        ino, root->root_key.objectid);
5487
5488         btrfs_release_path(path);
5489         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5490         err |= ret;
5491
5492         return err;
5493 }
5494
5495 /* Set inode_item nlink to @ref_count.
5496  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5497  *
5498  * Returns 0 on success
5499  */
5500 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5501                                       struct btrfs_path *path, u64 ino,
5502                                       const char *name, u32 namelen,
5503                                       u64 ref_count, u8 filetype, u64 *nlink)
5504 {
5505         struct btrfs_trans_handle *trans;
5506         struct btrfs_inode_item *ii;
5507         struct btrfs_key key;
5508         struct btrfs_key old_key;
5509         char namebuf[BTRFS_NAME_LEN] = {0};
5510         int name_len;
5511         int ret;
5512         int ret2;
5513
5514         /* save the key */
5515         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5516
5517         if (name && namelen) {
5518                 ASSERT(namelen <= BTRFS_NAME_LEN);
5519                 memcpy(namebuf, name, namelen);
5520                 name_len = namelen;
5521         } else {
5522                 sprintf(namebuf, "%llu", ino);
5523                 name_len = count_digits(ino);
5524                 printf("Can't find file name for inode %llu, use %s instead\n",
5525                        ino, namebuf);
5526         }
5527
5528         trans = btrfs_start_transaction(root, 1);
5529         if (IS_ERR(trans)) {
5530                 ret = PTR_ERR(trans);
5531                 goto out;
5532         }
5533
5534         btrfs_release_path(path);
5535         /* if refs is 0, put it into lostfound */
5536         if (ref_count == 0) {
5537                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5538                                               name_len, filetype, &ref_count);
5539                 if (ret)
5540                         goto fail;
5541         }
5542
5543         /* reset inode_item's nlink to ref_count */
5544         key.objectid = ino;
5545         key.type = BTRFS_INODE_ITEM_KEY;
5546         key.offset = 0;
5547
5548         btrfs_release_path(path);
5549         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5550         if (ret > 0)
5551                 ret = -ENOENT;
5552         if (ret)
5553                 goto fail;
5554
5555         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5556                             struct btrfs_inode_item);
5557         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5558         btrfs_mark_buffer_dirty(path->nodes[0]);
5559
5560         if (nlink)
5561                 *nlink = ref_count;
5562 fail:
5563         btrfs_commit_transaction(trans, root);
5564 out:
5565         if (ret)
5566                 error(
5567         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5568                        root->objectid, ino, namebuf, filetype);
5569         else
5570                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5571                        root->objectid, ino, namebuf, filetype);
5572
5573         /* research */
5574         btrfs_release_path(path);
5575         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5576         if (ret2 < 0)
5577                 return ret |= ret2;
5578         return ret;
5579 }
5580
5581 /*
5582  * Check INODE_ITEM and related ITEMs (the same inode number)
5583  * 1. check link count
5584  * 2. check inode ref/extref
5585  * 3. check dir item/index
5586  *
5587  * @ext_ref:    the EXTENDED_IREF feature
5588  *
5589  * Return 0 if no error occurred.
5590  * Return >0 for error or hit the traversal is done(by error bitmap)
5591  */
5592 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5593                             unsigned int ext_ref)
5594 {
5595         struct extent_buffer *node;
5596         struct btrfs_inode_item *ii;
5597         struct btrfs_key key;
5598         struct btrfs_key last_key;
5599         u64 inode_id;
5600         u32 mode;
5601         u64 nlink;
5602         u64 nbytes;
5603         u64 isize;
5604         u64 size = 0;
5605         u64 refs = 0;
5606         u64 extent_end = 0;
5607         u64 extent_size = 0;
5608         unsigned int dir;
5609         unsigned int nodatasum;
5610         int slot;
5611         int ret;
5612         int err = 0;
5613         char namebuf[BTRFS_NAME_LEN] = {0};
5614         u32 name_len = 0;
5615
5616         node = path->nodes[0];
5617         slot = path->slots[0];
5618
5619         btrfs_item_key_to_cpu(node, &key, slot);
5620         inode_id = key.objectid;
5621
5622         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5623                 ret = btrfs_next_item(root, path);
5624                 if (ret > 0)
5625                         err |= LAST_ITEM;
5626                 return err;
5627         }
5628
5629         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5630         isize = btrfs_inode_size(node, ii);
5631         nbytes = btrfs_inode_nbytes(node, ii);
5632         mode = btrfs_inode_mode(node, ii);
5633         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5634         nlink = btrfs_inode_nlink(node, ii);
5635         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5636
5637         while (1) {
5638                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5639                 ret = btrfs_next_item(root, path);
5640                 if (ret < 0) {
5641                         /* out will fill 'err' rusing current statistics */
5642                         goto out;
5643                 } else if (ret > 0) {
5644                         err |= LAST_ITEM;
5645                         goto out;
5646                 }
5647
5648                 node = path->nodes[0];
5649                 slot = path->slots[0];
5650                 btrfs_item_key_to_cpu(node, &key, slot);
5651                 if (key.objectid != inode_id)
5652                         goto out;
5653
5654                 switch (key.type) {
5655                 case BTRFS_INODE_REF_KEY:
5656                         ret = check_inode_ref(root, &key, path, namebuf,
5657                                               &name_len, &refs, mode);
5658                         err |= ret;
5659                         break;
5660                 case BTRFS_INODE_EXTREF_KEY:
5661                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5662                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5663                                         root->objectid, key.objectid,
5664                                         key.offset);
5665                         ret = check_inode_extref(root, &key, node, slot, &refs,
5666                                                  mode);
5667                         err |= ret;
5668                         break;
5669                 case BTRFS_DIR_ITEM_KEY:
5670                 case BTRFS_DIR_INDEX_KEY:
5671                         if (!dir) {
5672                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5673                                         root->objectid, inode_id,
5674                                         imode_to_type(mode), key.objectid,
5675                                         key.offset);
5676                         }
5677                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5678                         err |= ret;
5679                         break;
5680                 case BTRFS_EXTENT_DATA_KEY:
5681                         if (dir) {
5682                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5683                                         root->objectid, inode_id, key.objectid,
5684                                         key.offset);
5685                         }
5686                         ret = check_file_extent(root, &key, node, slot,
5687                                                 nodatasum, &extent_size,
5688                                                 &extent_end);
5689                         err |= ret;
5690                         break;
5691                 case BTRFS_XATTR_ITEM_KEY:
5692                         break;
5693                 default:
5694                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5695                               key.objectid, key.type, key.offset);
5696                 }
5697         }
5698
5699 out:
5700         if (err & LAST_ITEM) {
5701                 btrfs_release_path(path);
5702                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5703                 if (ret)
5704                         return err;
5705         }
5706
5707         /* verify INODE_ITEM nlink/isize/nbytes */
5708         if (dir) {
5709                 if (repair && (err & DIR_COUNT_AGAIN)) {
5710                         err &= ~DIR_COUNT_AGAIN;
5711                         count_dir_isize(root, inode_id, &size);
5712                 }
5713
5714                 if ((nlink != 1 || refs != 1) && repair) {
5715                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5716                                 namebuf, name_len, refs, imode_to_type(mode),
5717                                 &nlink);
5718                 }
5719
5720                 if (nlink != 1) {
5721                         err |= LINK_COUNT_ERROR;
5722                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5723                               root->objectid, inode_id, nlink);
5724                 }
5725
5726                 /*
5727                  * Just a warning, as dir inode nbytes is just an
5728                  * instructive value.
5729                  */
5730                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5731                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5732                                 root->objectid, inode_id,
5733                                 root->fs_info->nodesize);
5734                 }
5735
5736                 if (isize != size) {
5737                         if (repair)
5738                                 ret = repair_dir_isize_lowmem(root, path,
5739                                                               inode_id, size);
5740                         if (!repair || ret) {
5741                                 err |= ISIZE_ERROR;
5742                                 error(
5743                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5744                                       root->objectid, inode_id, isize, size);
5745                         }
5746                 }
5747         } else {
5748                 if (nlink != refs) {
5749                         if (repair)
5750                                 ret = repair_inode_nlinks_lowmem(root, path,
5751                                          inode_id, namebuf, name_len, refs,
5752                                          imode_to_type(mode), &nlink);
5753                         if (!repair || ret) {
5754                                 err |= LINK_COUNT_ERROR;
5755                                 error(
5756                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5757                                       root->objectid, inode_id, nlink, refs);
5758                         }
5759                 } else if (!nlink) {
5760                         if (repair)
5761                                 ret = repair_inode_orphan_item_lowmem(root,
5762                                                               path, inode_id);
5763                         if (!repair || ret) {
5764                                 err |= ORPHAN_ITEM;
5765                                 error("root %llu INODE[%llu] is orphan item",
5766                                       root->objectid, inode_id);
5767                         }
5768                 }
5769
5770                 if (!nbytes && !no_holes && extent_end < isize) {
5771                         if (repair)
5772                                 ret = punch_extent_hole(root, inode_id,
5773                                                 extent_end, isize - extent_end);
5774                         if (!repair || ret) {
5775                                 err |= NBYTES_ERROR;
5776                                 error(
5777         "root %llu INODE[%llu] size %llu should have a file extent hole",
5778                                       root->objectid, inode_id, isize);
5779                         }
5780                 }
5781
5782                 if (nbytes != extent_size) {
5783                         if (repair)
5784                                 ret = repair_inode_nbytes_lowmem(root, path,
5785                                                          inode_id, extent_size);
5786                         if (!repair || ret) {
5787                                 err |= NBYTES_ERROR;
5788                                 error(
5789         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5790                                       root->objectid, inode_id, nbytes,
5791                                       extent_size);
5792                         }
5793                 }
5794         }
5795
5796         if (err & LAST_ITEM)
5797                 btrfs_next_item(root, path);
5798         return err;
5799 }
5800
5801 /*
5802  * Insert the missing inode item and inode ref.
5803  *
5804  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5805  * Root dir should be handled specially because root dir is the root of fs.
5806  *
5807  * returns err (>0 or 0) after repair
5808  */
5809 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5810 {
5811         struct btrfs_trans_handle *trans;
5812         struct btrfs_key key;
5813         struct btrfs_path path;
5814         int filetype = BTRFS_FT_DIR;
5815         int ret = 0;
5816
5817         btrfs_init_path(&path);
5818
5819         if (err & INODE_REF_MISSING) {
5820                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5821                 key.type = BTRFS_INODE_REF_KEY;
5822                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5823
5824                 trans = btrfs_start_transaction(root, 1);
5825                 if (IS_ERR(trans)) {
5826                         ret = PTR_ERR(trans);
5827                         goto out;
5828                 }
5829
5830                 btrfs_release_path(&path);
5831                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5832                 if (ret)
5833                         goto trans_fail;
5834
5835                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5836                                              BTRFS_FIRST_FREE_OBJECTID,
5837                                              BTRFS_FIRST_FREE_OBJECTID, 0);
5838                 if (ret)
5839                         goto trans_fail;
5840
5841                 printf("Add INODE_REF[%llu %llu] name %s\n",
5842                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5843                        "..");
5844                 err &= ~INODE_REF_MISSING;
5845 trans_fail:
5846                 if (ret)
5847                         error("fail to insert first inode's ref");
5848                 btrfs_commit_transaction(trans, root);
5849         }
5850
5851         if (err & INODE_ITEM_MISSING) {
5852                 ret = repair_inode_item_missing(root,
5853                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
5854                 if (ret)
5855                         goto out;
5856                 err &= ~INODE_ITEM_MISSING;
5857         }
5858 out:
5859         if (ret)
5860                 error("fail to repair first inode");
5861         btrfs_release_path(&path);
5862         return err;
5863 }
5864
5865 /*
5866  * check first root dir's inode_item and inode_ref
5867  *
5868  * returns 0 means no error
5869  * returns >0 means error
5870  * returns <0 means fatal error
5871  */
5872 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5873 {
5874         struct btrfs_path path;
5875         struct btrfs_key key;
5876         struct btrfs_inode_item *ii;
5877         u64 index;
5878         u32 mode;
5879         int err = 0;
5880         int ret;
5881
5882         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5883         key.type = BTRFS_INODE_ITEM_KEY;
5884         key.offset = 0;
5885
5886         /* For root being dropped, we don't need to check first inode */
5887         if (btrfs_root_refs(&root->root_item) == 0 &&
5888             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5889             BTRFS_FIRST_FREE_OBJECTID)
5890                 return 0;
5891
5892         btrfs_init_path(&path);
5893         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5894         if (ret < 0)
5895                 goto out;
5896         if (ret > 0) {
5897                 ret = 0;
5898                 err |= INODE_ITEM_MISSING;
5899         } else {
5900                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5901                                     struct btrfs_inode_item);
5902                 mode = btrfs_inode_mode(path.nodes[0], ii);
5903                 if (imode_to_type(mode) != BTRFS_FT_DIR)
5904                         err |= INODE_ITEM_MISMATCH;
5905         }
5906
5907         /* lookup first inode ref */
5908         key.offset = BTRFS_FIRST_FREE_OBJECTID;
5909         key.type = BTRFS_INODE_REF_KEY;
5910         /* special index value */
5911         index = 0;
5912
5913         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5914         if (ret < 0)
5915                 goto out;
5916         err |= ret;
5917
5918 out:
5919         btrfs_release_path(&path);
5920
5921         if (err && repair)
5922                 err = repair_fs_first_inode(root, err);
5923
5924         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5925                 error("root dir INODE_ITEM is %s",
5926                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5927         if (err & INODE_REF_MISSING)
5928                 error("root dir INODE_REF is missing");
5929
5930         return ret < 0 ? ret : err;
5931 }
5932
5933 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5934                                                 u64 parent, u64 root)
5935 {
5936         struct rb_node *node;
5937         struct tree_backref *back = NULL;
5938         struct tree_backref match = {
5939                 .node = {
5940                         .is_data = 0,
5941                 },
5942         };
5943
5944         if (parent) {
5945                 match.parent = parent;
5946                 match.node.full_backref = 1;
5947         } else {
5948                 match.root = root;
5949         }
5950
5951         node = rb_search(&rec->backref_tree, &match.node.node,
5952                          (rb_compare_keys)compare_extent_backref, NULL);
5953         if (node)
5954                 back = to_tree_backref(rb_node_to_extent_backref(node));
5955
5956         return back;
5957 }
5958
5959 static struct data_backref *find_data_backref(struct extent_record *rec,
5960                                                 u64 parent, u64 root,
5961                                                 u64 owner, u64 offset,
5962                                                 int found_ref,
5963                                                 u64 disk_bytenr, u64 bytes)
5964 {
5965         struct rb_node *node;
5966         struct data_backref *back = NULL;
5967         struct data_backref match = {
5968                 .node = {
5969                         .is_data = 1,
5970                 },
5971                 .owner = owner,
5972                 .offset = offset,
5973                 .bytes = bytes,
5974                 .found_ref = found_ref,
5975                 .disk_bytenr = disk_bytenr,
5976         };
5977
5978         if (parent) {
5979                 match.parent = parent;
5980                 match.node.full_backref = 1;
5981         } else {
5982                 match.root = root;
5983         }
5984
5985         node = rb_search(&rec->backref_tree, &match.node.node,
5986                          (rb_compare_keys)compare_extent_backref, NULL);
5987         if (node)
5988                 back = to_data_backref(rb_node_to_extent_backref(node));
5989
5990         return back;
5991 }
5992 /*
5993  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
5994  * blocks and integrity of fs tree items.
5995  *
5996  * @root:         the root of the tree to be checked.
5997  * @ext_ref       feature EXTENDED_IREF is enable or not.
5998  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
5999  *                otherwise means check fs tree(s) items relationship and
6000  *                @root MUST be a fs tree root.
6001  * Returns 0      represents OK.
6002  * Returns not 0  represents error.
6003  */
6004 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6005                             struct btrfs_root *root, unsigned int ext_ref,
6006                             int check_all)
6007
6008 {
6009         struct btrfs_path path;
6010         struct node_refs nrefs;
6011         struct btrfs_root_item *root_item = &root->root_item;
6012         int ret;
6013         int level;
6014         int err = 0;
6015
6016         memset(&nrefs, 0, sizeof(nrefs));
6017         if (!check_all) {
6018                 /*
6019                  * We need to manually check the first inode item (256)
6020                  * As the following traversal function will only start from
6021                  * the first inode item in the leaf, if inode item (256) is
6022                  * missing we will skip it forever.
6023                  */
6024                 ret = check_fs_first_inode(root, ext_ref);
6025                 if (ret < 0)
6026                         return ret;
6027         }
6028
6029
6030         level = btrfs_header_level(root->node);
6031         btrfs_init_path(&path);
6032
6033         if (btrfs_root_refs(root_item) > 0 ||
6034             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6035                 path.nodes[level] = root->node;
6036                 path.slots[level] = 0;
6037                 extent_buffer_get(root->node);
6038         } else {
6039                 struct btrfs_key key;
6040
6041                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6042                 level = root_item->drop_level;
6043                 path.lowest_level = level;
6044                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6045                 if (ret < 0)
6046                         goto out;
6047                 ret = 0;
6048         }
6049
6050         while (1) {
6051                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6052                                         ext_ref, check_all);
6053
6054                 err |= !!ret;
6055
6056                 /* if ret is negative, walk shall stop */
6057                 if (ret < 0) {
6058                         ret = err;
6059                         break;
6060                 }
6061
6062                 ret = walk_up_tree_v2(root, &path, &level);
6063                 if (ret != 0) {
6064                         /* Normal exit, reset ret to err */
6065                         ret = err;
6066                         break;
6067                 }
6068         }
6069
6070 out:
6071         btrfs_release_path(&path);
6072         return ret;
6073 }
6074
6075 /*
6076  * Iterate all items in the tree and call check_inode_item() to check.
6077  *
6078  * @root:       the root of the tree to be checked.
6079  * @ext_ref:    the EXTENDED_IREF feature
6080  *
6081  * Return 0 if no error found.
6082  * Return <0 for error.
6083  */
6084 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6085 {
6086         reset_cached_block_groups(root->fs_info);
6087         return check_btrfs_root(NULL, root, ext_ref, 0);
6088 }
6089
6090 /*
6091  * Find the relative ref for root_ref and root_backref.
6092  *
6093  * @root:       the root of the root tree.
6094  * @ref_key:    the key of the root ref.
6095  *
6096  * Return 0 if no error occurred.
6097  */
6098 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6099                           struct extent_buffer *node, int slot)
6100 {
6101         struct btrfs_path path;
6102         struct btrfs_key key;
6103         struct btrfs_root_ref *ref;
6104         struct btrfs_root_ref *backref;
6105         char ref_name[BTRFS_NAME_LEN] = {0};
6106         char backref_name[BTRFS_NAME_LEN] = {0};
6107         u64 ref_dirid;
6108         u64 ref_seq;
6109         u32 ref_namelen;
6110         u64 backref_dirid;
6111         u64 backref_seq;
6112         u32 backref_namelen;
6113         u32 len;
6114         int ret;
6115         int err = 0;
6116
6117         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6118         ref_dirid = btrfs_root_ref_dirid(node, ref);
6119         ref_seq = btrfs_root_ref_sequence(node, ref);
6120         ref_namelen = btrfs_root_ref_name_len(node, ref);
6121
6122         if (ref_namelen <= BTRFS_NAME_LEN) {
6123                 len = ref_namelen;
6124         } else {
6125                 len = BTRFS_NAME_LEN;
6126                 warning("%s[%llu %llu] ref_name too long",
6127                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6128                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6129                         ref_key->offset);
6130         }
6131         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6132
6133         /* Find relative root_ref */
6134         key.objectid = ref_key->offset;
6135         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6136         key.offset = ref_key->objectid;
6137
6138         btrfs_init_path(&path);
6139         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6140         if (ret) {
6141                 err |= ROOT_REF_MISSING;
6142                 error("%s[%llu %llu] couldn't find relative ref",
6143                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6144                       "ROOT_REF" : "ROOT_BACKREF",
6145                       ref_key->objectid, ref_key->offset);
6146                 goto out;
6147         }
6148
6149         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6150                                  struct btrfs_root_ref);
6151         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6152         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6153         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6154
6155         if (backref_namelen <= BTRFS_NAME_LEN) {
6156                 len = backref_namelen;
6157         } else {
6158                 len = BTRFS_NAME_LEN;
6159                 warning("%s[%llu %llu] ref_name too long",
6160                         key.type == BTRFS_ROOT_REF_KEY ?
6161                         "ROOT_REF" : "ROOT_BACKREF",
6162                         key.objectid, key.offset);
6163         }
6164         read_extent_buffer(path.nodes[0], backref_name,
6165                            (unsigned long)(backref + 1), len);
6166
6167         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6168             ref_namelen != backref_namelen ||
6169             strncmp(ref_name, backref_name, len)) {
6170                 err |= ROOT_REF_MISMATCH;
6171                 error("%s[%llu %llu] mismatch relative ref",
6172                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6173                       "ROOT_REF" : "ROOT_BACKREF",
6174                       ref_key->objectid, ref_key->offset);
6175         }
6176 out:
6177         btrfs_release_path(&path);
6178         return err;
6179 }
6180
6181 /*
6182  * Check all fs/file tree in low_memory mode.
6183  *
6184  * 1. for fs tree root item, call check_fs_root_v2()
6185  * 2. for fs tree root ref/backref, call check_root_ref()
6186  *
6187  * Return 0 if no error occurred.
6188  */
6189 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6190 {
6191         struct btrfs_root *tree_root = fs_info->tree_root;
6192         struct btrfs_root *cur_root = NULL;
6193         struct btrfs_path path;
6194         struct btrfs_key key;
6195         struct extent_buffer *node;
6196         unsigned int ext_ref;
6197         int slot;
6198         int ret;
6199         int err = 0;
6200
6201         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6202
6203         btrfs_init_path(&path);
6204         key.objectid = BTRFS_FS_TREE_OBJECTID;
6205         key.offset = 0;
6206         key.type = BTRFS_ROOT_ITEM_KEY;
6207
6208         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6209         if (ret < 0) {
6210                 err = ret;
6211                 goto out;
6212         } else if (ret > 0) {
6213                 err = -ENOENT;
6214                 goto out;
6215         }
6216
6217         while (1) {
6218                 node = path.nodes[0];
6219                 slot = path.slots[0];
6220                 btrfs_item_key_to_cpu(node, &key, slot);
6221                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6222                         goto out;
6223                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6224                     fs_root_objectid(key.objectid)) {
6225                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6226                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6227                                                                        &key);
6228                         } else {
6229                                 key.offset = (u64)-1;
6230                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6231                         }
6232
6233                         if (IS_ERR(cur_root)) {
6234                                 error("Fail to read fs/subvol tree: %lld",
6235                                       key.objectid);
6236                                 err = -EIO;
6237                                 goto next;
6238                         }
6239
6240                         ret = check_fs_root_v2(cur_root, ext_ref);
6241                         err |= ret;
6242
6243                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6244                                 btrfs_free_fs_root(cur_root);
6245                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6246                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6247                         ret = check_root_ref(tree_root, &key, node, slot);
6248                         err |= ret;
6249                 }
6250 next:
6251                 ret = btrfs_next_item(tree_root, &path);
6252                 if (ret > 0)
6253                         goto out;
6254                 if (ret < 0) {
6255                         err = ret;
6256                         goto out;
6257                 }
6258         }
6259
6260 out:
6261         btrfs_release_path(&path);
6262         return err;
6263 }
6264
6265 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6266                           struct cache_tree *root_cache)
6267 {
6268         int ret;
6269
6270         if (!ctx.progress_enabled)
6271                 fprintf(stderr, "checking fs roots\n");
6272         if (check_mode == CHECK_MODE_LOWMEM)
6273                 ret = check_fs_roots_v2(fs_info);
6274         else
6275                 ret = check_fs_roots(fs_info, root_cache);
6276
6277         return ret;
6278 }
6279
6280 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6281 {
6282         struct extent_backref *back, *tmp;
6283         struct tree_backref *tback;
6284         struct data_backref *dback;
6285         u64 found = 0;
6286         int err = 0;
6287
6288         rbtree_postorder_for_each_entry_safe(back, tmp,
6289                                              &rec->backref_tree, node) {
6290                 if (!back->found_extent_tree) {
6291                         err = 1;
6292                         if (!print_errs)
6293                                 goto out;
6294                         if (back->is_data) {
6295                                 dback = to_data_backref(back);
6296                                 fprintf(stderr, "Data backref %llu %s %llu"
6297                                         " owner %llu offset %llu num_refs %lu"
6298                                         " not found in extent tree\n",
6299                                         (unsigned long long)rec->start,
6300                                         back->full_backref ?
6301                                         "parent" : "root",
6302                                         back->full_backref ?
6303                                         (unsigned long long)dback->parent:
6304                                         (unsigned long long)dback->root,
6305                                         (unsigned long long)dback->owner,
6306                                         (unsigned long long)dback->offset,
6307                                         (unsigned long)dback->num_refs);
6308                         } else {
6309                                 tback = to_tree_backref(back);
6310                                 fprintf(stderr, "Tree backref %llu parent %llu"
6311                                         " root %llu not found in extent tree\n",
6312                                         (unsigned long long)rec->start,
6313                                         (unsigned long long)tback->parent,
6314                                         (unsigned long long)tback->root);
6315                         }
6316                 }
6317                 if (!back->is_data && !back->found_ref) {
6318                         err = 1;
6319                         if (!print_errs)
6320                                 goto out;
6321                         tback = to_tree_backref(back);
6322                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6323                                 (unsigned long long)rec->start,
6324                                 back->full_backref ? "parent" : "root",
6325                                 back->full_backref ?
6326                                 (unsigned long long)tback->parent :
6327                                 (unsigned long long)tback->root, back);
6328                 }
6329                 if (back->is_data) {
6330                         dback = to_data_backref(back);
6331                         if (dback->found_ref != dback->num_refs) {
6332                                 err = 1;
6333                                 if (!print_errs)
6334                                         goto out;
6335                                 fprintf(stderr, "Incorrect local backref count"
6336                                         " on %llu %s %llu owner %llu"
6337                                         " offset %llu found %u wanted %u back %p\n",
6338                                         (unsigned long long)rec->start,
6339                                         back->full_backref ?
6340                                         "parent" : "root",
6341                                         back->full_backref ?
6342                                         (unsigned long long)dback->parent:
6343                                         (unsigned long long)dback->root,
6344                                         (unsigned long long)dback->owner,
6345                                         (unsigned long long)dback->offset,
6346                                         dback->found_ref, dback->num_refs, back);
6347                         }
6348                         if (dback->disk_bytenr != rec->start) {
6349                                 err = 1;
6350                                 if (!print_errs)
6351                                         goto out;
6352                                 fprintf(stderr, "Backref disk bytenr does not"
6353                                         " match extent record, bytenr=%llu, "
6354                                         "ref bytenr=%llu\n",
6355                                         (unsigned long long)rec->start,
6356                                         (unsigned long long)dback->disk_bytenr);
6357                         }
6358
6359                         if (dback->bytes != rec->nr) {
6360                                 err = 1;
6361                                 if (!print_errs)
6362                                         goto out;
6363                                 fprintf(stderr, "Backref bytes do not match "
6364                                         "extent backref, bytenr=%llu, ref "
6365                                         "bytes=%llu, backref bytes=%llu\n",
6366                                         (unsigned long long)rec->start,
6367                                         (unsigned long long)rec->nr,
6368                                         (unsigned long long)dback->bytes);
6369                         }
6370                 }
6371                 if (!back->is_data) {
6372                         found += 1;
6373                 } else {
6374                         dback = to_data_backref(back);
6375                         found += dback->found_ref;
6376                 }
6377         }
6378         if (found != rec->refs) {
6379                 err = 1;
6380                 if (!print_errs)
6381                         goto out;
6382                 fprintf(stderr, "Incorrect global backref count "
6383                         "on %llu found %llu wanted %llu\n",
6384                         (unsigned long long)rec->start,
6385                         (unsigned long long)found,
6386                         (unsigned long long)rec->refs);
6387         }
6388 out:
6389         return err;
6390 }
6391
6392 static void __free_one_backref(struct rb_node *node)
6393 {
6394         struct extent_backref *back = rb_node_to_extent_backref(node);
6395
6396         free(back);
6397 }
6398
6399 static void free_all_extent_backrefs(struct extent_record *rec)
6400 {
6401         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6402 }
6403
6404 static void free_extent_record_cache(struct cache_tree *extent_cache)
6405 {
6406         struct cache_extent *cache;
6407         struct extent_record *rec;
6408
6409         while (1) {
6410                 cache = first_cache_extent(extent_cache);
6411                 if (!cache)
6412                         break;
6413                 rec = container_of(cache, struct extent_record, cache);
6414                 remove_cache_extent(extent_cache, cache);
6415                 free_all_extent_backrefs(rec);
6416                 free(rec);
6417         }
6418 }
6419
6420 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6421                                  struct extent_record *rec)
6422 {
6423         if (rec->content_checked && rec->owner_ref_checked &&
6424             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6425             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6426             !rec->bad_full_backref && !rec->crossing_stripes &&
6427             !rec->wrong_chunk_type) {
6428                 remove_cache_extent(extent_cache, &rec->cache);
6429                 free_all_extent_backrefs(rec);
6430                 list_del_init(&rec->list);
6431                 free(rec);
6432         }
6433         return 0;
6434 }
6435
6436 static int check_owner_ref(struct btrfs_root *root,
6437                             struct extent_record *rec,
6438                             struct extent_buffer *buf)
6439 {
6440         struct extent_backref *node, *tmp;
6441         struct tree_backref *back;
6442         struct btrfs_root *ref_root;
6443         struct btrfs_key key;
6444         struct btrfs_path path;
6445         struct extent_buffer *parent;
6446         int level;
6447         int found = 0;
6448         int ret;
6449
6450         rbtree_postorder_for_each_entry_safe(node, tmp,
6451                                              &rec->backref_tree, node) {
6452                 if (node->is_data)
6453                         continue;
6454                 if (!node->found_ref)
6455                         continue;
6456                 if (node->full_backref)
6457                         continue;
6458                 back = to_tree_backref(node);
6459                 if (btrfs_header_owner(buf) == back->root)
6460                         return 0;
6461         }
6462         BUG_ON(rec->is_root);
6463
6464         /* try to find the block by search corresponding fs tree */
6465         key.objectid = btrfs_header_owner(buf);
6466         key.type = BTRFS_ROOT_ITEM_KEY;
6467         key.offset = (u64)-1;
6468
6469         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6470         if (IS_ERR(ref_root))
6471                 return 1;
6472
6473         level = btrfs_header_level(buf);
6474         if (level == 0)
6475                 btrfs_item_key_to_cpu(buf, &key, 0);
6476         else
6477                 btrfs_node_key_to_cpu(buf, &key, 0);
6478
6479         btrfs_init_path(&path);
6480         path.lowest_level = level + 1;
6481         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6482         if (ret < 0)
6483                 return 0;
6484
6485         parent = path.nodes[level + 1];
6486         if (parent && buf->start == btrfs_node_blockptr(parent,
6487                                                         path.slots[level + 1]))
6488                 found = 1;
6489
6490         btrfs_release_path(&path);
6491         return found ? 0 : 1;
6492 }
6493
6494 static int is_extent_tree_record(struct extent_record *rec)
6495 {
6496         struct extent_backref *node, *tmp;
6497         struct tree_backref *back;
6498         int is_extent = 0;
6499
6500         rbtree_postorder_for_each_entry_safe(node, tmp,
6501                                              &rec->backref_tree, node) {
6502                 if (node->is_data)
6503                         return 0;
6504                 back = to_tree_backref(node);
6505                 if (node->full_backref)
6506                         return 0;
6507                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6508                         is_extent = 1;
6509         }
6510         return is_extent;
6511 }
6512
6513
6514 static int record_bad_block_io(struct btrfs_fs_info *info,
6515                                struct cache_tree *extent_cache,
6516                                u64 start, u64 len)
6517 {
6518         struct extent_record *rec;
6519         struct cache_extent *cache;
6520         struct btrfs_key key;
6521
6522         cache = lookup_cache_extent(extent_cache, start, len);
6523         if (!cache)
6524                 return 0;
6525
6526         rec = container_of(cache, struct extent_record, cache);
6527         if (!is_extent_tree_record(rec))
6528                 return 0;
6529
6530         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6531         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6532 }
6533
6534 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6535                        struct extent_buffer *buf, int slot)
6536 {
6537         if (btrfs_header_level(buf)) {
6538                 struct btrfs_key_ptr ptr1, ptr2;
6539
6540                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6541                                    sizeof(struct btrfs_key_ptr));
6542                 read_extent_buffer(buf, &ptr2,
6543                                    btrfs_node_key_ptr_offset(slot + 1),
6544                                    sizeof(struct btrfs_key_ptr));
6545                 write_extent_buffer(buf, &ptr1,
6546                                     btrfs_node_key_ptr_offset(slot + 1),
6547                                     sizeof(struct btrfs_key_ptr));
6548                 write_extent_buffer(buf, &ptr2,
6549                                     btrfs_node_key_ptr_offset(slot),
6550                                     sizeof(struct btrfs_key_ptr));
6551                 if (slot == 0) {
6552                         struct btrfs_disk_key key;
6553                         btrfs_node_key(buf, &key, 0);
6554                         btrfs_fixup_low_keys(root, path, &key,
6555                                              btrfs_header_level(buf) + 1);
6556                 }
6557         } else {
6558                 struct btrfs_item *item1, *item2;
6559                 struct btrfs_key k1, k2;
6560                 char *item1_data, *item2_data;
6561                 u32 item1_offset, item2_offset, item1_size, item2_size;
6562
6563                 item1 = btrfs_item_nr(slot);
6564                 item2 = btrfs_item_nr(slot + 1);
6565                 btrfs_item_key_to_cpu(buf, &k1, slot);
6566                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6567                 item1_offset = btrfs_item_offset(buf, item1);
6568                 item2_offset = btrfs_item_offset(buf, item2);
6569                 item1_size = btrfs_item_size(buf, item1);
6570                 item2_size = btrfs_item_size(buf, item2);
6571
6572                 item1_data = malloc(item1_size);
6573                 if (!item1_data)
6574                         return -ENOMEM;
6575                 item2_data = malloc(item2_size);
6576                 if (!item2_data) {
6577                         free(item1_data);
6578                         return -ENOMEM;
6579                 }
6580
6581                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6582                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6583
6584                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6585                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6586                 free(item1_data);
6587                 free(item2_data);
6588
6589                 btrfs_set_item_offset(buf, item1, item2_offset);
6590                 btrfs_set_item_offset(buf, item2, item1_offset);
6591                 btrfs_set_item_size(buf, item1, item2_size);
6592                 btrfs_set_item_size(buf, item2, item1_size);
6593
6594                 path->slots[0] = slot;
6595                 btrfs_set_item_key_unsafe(root, path, &k2);
6596                 path->slots[0] = slot + 1;
6597                 btrfs_set_item_key_unsafe(root, path, &k1);
6598         }
6599         return 0;
6600 }
6601
6602 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6603 {
6604         struct extent_buffer *buf;
6605         struct btrfs_key k1, k2;
6606         int i;
6607         int level = path->lowest_level;
6608         int ret = -EIO;
6609
6610         buf = path->nodes[level];
6611         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6612                 if (level) {
6613                         btrfs_node_key_to_cpu(buf, &k1, i);
6614                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6615                 } else {
6616                         btrfs_item_key_to_cpu(buf, &k1, i);
6617                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6618                 }
6619                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6620                         continue;
6621                 ret = swap_values(root, path, buf, i);
6622                 if (ret)
6623                         break;
6624                 btrfs_mark_buffer_dirty(buf);
6625                 i = 0;
6626         }
6627         return ret;
6628 }
6629
6630 static int delete_bogus_item(struct btrfs_root *root,
6631                              struct btrfs_path *path,
6632                              struct extent_buffer *buf, int slot)
6633 {
6634         struct btrfs_key key;
6635         int nritems = btrfs_header_nritems(buf);
6636
6637         btrfs_item_key_to_cpu(buf, &key, slot);
6638
6639         /* These are all the keys we can deal with missing. */
6640         if (key.type != BTRFS_DIR_INDEX_KEY &&
6641             key.type != BTRFS_EXTENT_ITEM_KEY &&
6642             key.type != BTRFS_METADATA_ITEM_KEY &&
6643             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6644             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6645                 return -1;
6646
6647         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6648                (unsigned long long)key.objectid, key.type,
6649                (unsigned long long)key.offset, slot, buf->start);
6650         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6651                               btrfs_item_nr_offset(slot + 1),
6652                               sizeof(struct btrfs_item) *
6653                               (nritems - slot - 1));
6654         btrfs_set_header_nritems(buf, nritems - 1);
6655         if (slot == 0) {
6656                 struct btrfs_disk_key disk_key;
6657
6658                 btrfs_item_key(buf, &disk_key, 0);
6659                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6660         }
6661         btrfs_mark_buffer_dirty(buf);
6662         return 0;
6663 }
6664
6665 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6666 {
6667         struct extent_buffer *buf;
6668         int i;
6669         int ret = 0;
6670
6671         /* We should only get this for leaves */
6672         BUG_ON(path->lowest_level);
6673         buf = path->nodes[0];
6674 again:
6675         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6676                 unsigned int shift = 0, offset;
6677
6678                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6679                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6680                         if (btrfs_item_end_nr(buf, i) >
6681                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6682                                 ret = delete_bogus_item(root, path, buf, i);
6683                                 if (!ret)
6684                                         goto again;
6685                                 fprintf(stderr, "item is off the end of the "
6686                                         "leaf, can't fix\n");
6687                                 ret = -EIO;
6688                                 break;
6689                         }
6690                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6691                                 btrfs_item_end_nr(buf, i);
6692                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6693                            btrfs_item_offset_nr(buf, i - 1)) {
6694                         if (btrfs_item_end_nr(buf, i) >
6695                             btrfs_item_offset_nr(buf, i - 1)) {
6696                                 ret = delete_bogus_item(root, path, buf, i);
6697                                 if (!ret)
6698                                         goto again;
6699                                 fprintf(stderr, "items overlap, can't fix\n");
6700                                 ret = -EIO;
6701                                 break;
6702                         }
6703                         shift = btrfs_item_offset_nr(buf, i - 1) -
6704                                 btrfs_item_end_nr(buf, i);
6705                 }
6706                 if (!shift)
6707                         continue;
6708
6709                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6710                        i, shift, (unsigned long long)buf->start);
6711                 offset = btrfs_item_offset_nr(buf, i);
6712                 memmove_extent_buffer(buf,
6713                                       btrfs_leaf_data(buf) + offset + shift,
6714                                       btrfs_leaf_data(buf) + offset,
6715                                       btrfs_item_size_nr(buf, i));
6716                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6717                                       offset + shift);
6718                 btrfs_mark_buffer_dirty(buf);
6719         }
6720
6721         /*
6722          * We may have moved things, in which case we want to exit so we don't
6723          * write those changes out.  Once we have proper abort functionality in
6724          * progs this can be changed to something nicer.
6725          */
6726         BUG_ON(ret);
6727         return ret;
6728 }
6729
6730 /*
6731  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6732  * then just return -EIO.
6733  */
6734 static int try_to_fix_bad_block(struct btrfs_root *root,
6735                                 struct extent_buffer *buf,
6736                                 enum btrfs_tree_block_status status)
6737 {
6738         struct btrfs_trans_handle *trans;
6739         struct ulist *roots;
6740         struct ulist_node *node;
6741         struct btrfs_root *search_root;
6742         struct btrfs_path path;
6743         struct ulist_iterator iter;
6744         struct btrfs_key root_key, key;
6745         int ret;
6746
6747         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6748             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6749                 return -EIO;
6750
6751         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6752         if (ret)
6753                 return -EIO;
6754
6755         btrfs_init_path(&path);
6756         ULIST_ITER_INIT(&iter);
6757         while ((node = ulist_next(roots, &iter))) {
6758                 root_key.objectid = node->val;
6759                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6760                 root_key.offset = (u64)-1;
6761
6762                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6763                 if (IS_ERR(root)) {
6764                         ret = -EIO;
6765                         break;
6766                 }
6767
6768
6769                 trans = btrfs_start_transaction(search_root, 0);
6770                 if (IS_ERR(trans)) {
6771                         ret = PTR_ERR(trans);
6772                         break;
6773                 }
6774
6775                 path.lowest_level = btrfs_header_level(buf);
6776                 path.skip_check_block = 1;
6777                 if (path.lowest_level)
6778                         btrfs_node_key_to_cpu(buf, &key, 0);
6779                 else
6780                         btrfs_item_key_to_cpu(buf, &key, 0);
6781                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6782                 if (ret) {
6783                         ret = -EIO;
6784                         btrfs_commit_transaction(trans, search_root);
6785                         break;
6786                 }
6787                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6788                         ret = fix_key_order(search_root, &path);
6789                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6790                         ret = fix_item_offset(search_root, &path);
6791                 if (ret) {
6792                         btrfs_commit_transaction(trans, search_root);
6793                         break;
6794                 }
6795                 btrfs_release_path(&path);
6796                 btrfs_commit_transaction(trans, search_root);
6797         }
6798         ulist_free(roots);
6799         btrfs_release_path(&path);
6800         return ret;
6801 }
6802
6803 static int check_block(struct btrfs_root *root,
6804                        struct cache_tree *extent_cache,
6805                        struct extent_buffer *buf, u64 flags)
6806 {
6807         struct extent_record *rec;
6808         struct cache_extent *cache;
6809         struct btrfs_key key;
6810         enum btrfs_tree_block_status status;
6811         int ret = 0;
6812         int level;
6813
6814         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6815         if (!cache)
6816                 return 1;
6817         rec = container_of(cache, struct extent_record, cache);
6818         rec->generation = btrfs_header_generation(buf);
6819
6820         level = btrfs_header_level(buf);
6821         if (btrfs_header_nritems(buf) > 0) {
6822
6823                 if (level == 0)
6824                         btrfs_item_key_to_cpu(buf, &key, 0);
6825                 else
6826                         btrfs_node_key_to_cpu(buf, &key, 0);
6827
6828                 rec->info_objectid = key.objectid;
6829         }
6830         rec->info_level = level;
6831
6832         if (btrfs_is_leaf(buf))
6833                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6834         else
6835                 status = btrfs_check_node(root, &rec->parent_key, buf);
6836
6837         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6838                 if (repair)
6839                         status = try_to_fix_bad_block(root, buf, status);
6840                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6841                         ret = -EIO;
6842                         fprintf(stderr, "bad block %llu\n",
6843                                 (unsigned long long)buf->start);
6844                 } else {
6845                         /*
6846                          * Signal to callers we need to start the scan over
6847                          * again since we'll have cowed blocks.
6848                          */
6849                         ret = -EAGAIN;
6850                 }
6851         } else {
6852                 rec->content_checked = 1;
6853                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6854                         rec->owner_ref_checked = 1;
6855                 else {
6856                         ret = check_owner_ref(root, rec, buf);
6857                         if (!ret)
6858                                 rec->owner_ref_checked = 1;
6859                 }
6860         }
6861         if (!ret)
6862                 maybe_free_extent_rec(extent_cache, rec);
6863         return ret;
6864 }
6865
6866 #if 0
6867 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6868                                                 u64 parent, u64 root)
6869 {
6870         struct list_head *cur = rec->backrefs.next;
6871         struct extent_backref *node;
6872         struct tree_backref *back;
6873
6874         while(cur != &rec->backrefs) {
6875                 node = to_extent_backref(cur);
6876                 cur = cur->next;
6877                 if (node->is_data)
6878                         continue;
6879                 back = to_tree_backref(node);
6880                 if (parent > 0) {
6881                         if (!node->full_backref)
6882                                 continue;
6883                         if (parent == back->parent)
6884                                 return back;
6885                 } else {
6886                         if (node->full_backref)
6887                                 continue;
6888                         if (back->root == root)
6889                                 return back;
6890                 }
6891         }
6892         return NULL;
6893 }
6894 #endif
6895
6896 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6897                                                 u64 parent, u64 root)
6898 {
6899         struct tree_backref *ref = malloc(sizeof(*ref));
6900
6901         if (!ref)
6902                 return NULL;
6903         memset(&ref->node, 0, sizeof(ref->node));
6904         if (parent > 0) {
6905                 ref->parent = parent;
6906                 ref->node.full_backref = 1;
6907         } else {
6908                 ref->root = root;
6909                 ref->node.full_backref = 0;
6910         }
6911
6912         return ref;
6913 }
6914
6915 #if 0
6916 static struct data_backref *find_data_backref(struct extent_record *rec,
6917                                                 u64 parent, u64 root,
6918                                                 u64 owner, u64 offset,
6919                                                 int found_ref,
6920                                                 u64 disk_bytenr, u64 bytes)
6921 {
6922         struct list_head *cur = rec->backrefs.next;
6923         struct extent_backref *node;
6924         struct data_backref *back;
6925
6926         while(cur != &rec->backrefs) {
6927                 node = to_extent_backref(cur);
6928                 cur = cur->next;
6929                 if (!node->is_data)
6930                         continue;
6931                 back = to_data_backref(node);
6932                 if (parent > 0) {
6933                         if (!node->full_backref)
6934                                 continue;
6935                         if (parent == back->parent)
6936                                 return back;
6937                 } else {
6938                         if (node->full_backref)
6939                                 continue;
6940                         if (back->root == root && back->owner == owner &&
6941                             back->offset == offset) {
6942                                 if (found_ref && node->found_ref &&
6943                                     (back->bytes != bytes ||
6944                                     back->disk_bytenr != disk_bytenr))
6945                                         continue;
6946                                 return back;
6947                         }
6948                 }
6949         }
6950         return NULL;
6951 }
6952 #endif
6953
6954 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6955                                                 u64 parent, u64 root,
6956                                                 u64 owner, u64 offset,
6957                                                 u64 max_size)
6958 {
6959         struct data_backref *ref = malloc(sizeof(*ref));
6960
6961         if (!ref)
6962                 return NULL;
6963         memset(&ref->node, 0, sizeof(ref->node));
6964         ref->node.is_data = 1;
6965
6966         if (parent > 0) {
6967                 ref->parent = parent;
6968                 ref->owner = 0;
6969                 ref->offset = 0;
6970                 ref->node.full_backref = 1;
6971         } else {
6972                 ref->root = root;
6973                 ref->owner = owner;
6974                 ref->offset = offset;
6975                 ref->node.full_backref = 0;
6976         }
6977         ref->bytes = max_size;
6978         ref->found_ref = 0;
6979         ref->num_refs = 0;
6980         if (max_size > rec->max_size)
6981                 rec->max_size = max_size;
6982         return ref;
6983 }
6984
6985 /* Check if the type of extent matches with its chunk */
6986 static void check_extent_type(struct extent_record *rec)
6987 {
6988         struct btrfs_block_group_cache *bg_cache;
6989
6990         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6991         if (!bg_cache)
6992                 return;
6993
6994         /* data extent, check chunk directly*/
6995         if (!rec->metadata) {
6996                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6997                         rec->wrong_chunk_type = 1;
6998                 return;
6999         }
7000
7001         /* metadata extent, check the obvious case first */
7002         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7003                                  BTRFS_BLOCK_GROUP_METADATA))) {
7004                 rec->wrong_chunk_type = 1;
7005                 return;
7006         }
7007
7008         /*
7009          * Check SYSTEM extent, as it's also marked as metadata, we can only
7010          * make sure it's a SYSTEM extent by its backref
7011          */
7012         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7013                 struct extent_backref *node;
7014                 struct tree_backref *tback;
7015                 u64 bg_type;
7016
7017                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7018                 if (node->is_data) {
7019                         /* tree block shouldn't have data backref */
7020                         rec->wrong_chunk_type = 1;
7021                         return;
7022                 }
7023                 tback = container_of(node, struct tree_backref, node);
7024
7025                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7026                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7027                 else
7028                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7029                 if (!(bg_cache->flags & bg_type))
7030                         rec->wrong_chunk_type = 1;
7031         }
7032 }
7033
7034 /*
7035  * Allocate a new extent record, fill default values from @tmpl and insert int
7036  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7037  * the cache, otherwise it fails.
7038  */
7039 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7040                 struct extent_record *tmpl)
7041 {
7042         struct extent_record *rec;
7043         int ret = 0;
7044
7045         BUG_ON(tmpl->max_size == 0);
7046         rec = malloc(sizeof(*rec));
7047         if (!rec)
7048                 return -ENOMEM;
7049         rec->start = tmpl->start;
7050         rec->max_size = tmpl->max_size;
7051         rec->nr = max(tmpl->nr, tmpl->max_size);
7052         rec->found_rec = tmpl->found_rec;
7053         rec->content_checked = tmpl->content_checked;
7054         rec->owner_ref_checked = tmpl->owner_ref_checked;
7055         rec->num_duplicates = 0;
7056         rec->metadata = tmpl->metadata;
7057         rec->flag_block_full_backref = FLAG_UNSET;
7058         rec->bad_full_backref = 0;
7059         rec->crossing_stripes = 0;
7060         rec->wrong_chunk_type = 0;
7061         rec->is_root = tmpl->is_root;
7062         rec->refs = tmpl->refs;
7063         rec->extent_item_refs = tmpl->extent_item_refs;
7064         rec->parent_generation = tmpl->parent_generation;
7065         INIT_LIST_HEAD(&rec->backrefs);
7066         INIT_LIST_HEAD(&rec->dups);
7067         INIT_LIST_HEAD(&rec->list);
7068         rec->backref_tree = RB_ROOT;
7069         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7070         rec->cache.start = tmpl->start;
7071         rec->cache.size = tmpl->nr;
7072         ret = insert_cache_extent(extent_cache, &rec->cache);
7073         if (ret) {
7074                 free(rec);
7075                 return ret;
7076         }
7077         bytes_used += rec->nr;
7078
7079         if (tmpl->metadata)
7080                 rec->crossing_stripes = check_crossing_stripes(global_info,
7081                                 rec->start, global_info->nodesize);
7082         check_extent_type(rec);
7083         return ret;
7084 }
7085
7086 /*
7087  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7088  * some are hints:
7089  * - refs              - if found, increase refs
7090  * - is_root           - if found, set
7091  * - content_checked   - if found, set
7092  * - owner_ref_checked - if found, set
7093  *
7094  * If not found, create a new one, initialize and insert.
7095  */
7096 static int add_extent_rec(struct cache_tree *extent_cache,
7097                 struct extent_record *tmpl)
7098 {
7099         struct extent_record *rec;
7100         struct cache_extent *cache;
7101         int ret = 0;
7102         int dup = 0;
7103
7104         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7105         if (cache) {
7106                 rec = container_of(cache, struct extent_record, cache);
7107                 if (tmpl->refs)
7108                         rec->refs++;
7109                 if (rec->nr == 1)
7110                         rec->nr = max(tmpl->nr, tmpl->max_size);
7111
7112                 /*
7113                  * We need to make sure to reset nr to whatever the extent
7114                  * record says was the real size, this way we can compare it to
7115                  * the backrefs.
7116                  */
7117                 if (tmpl->found_rec) {
7118                         if (tmpl->start != rec->start || rec->found_rec) {
7119                                 struct extent_record *tmp;
7120
7121                                 dup = 1;
7122                                 if (list_empty(&rec->list))
7123                                         list_add_tail(&rec->list,
7124                                                       &duplicate_extents);
7125
7126                                 /*
7127                                  * We have to do this song and dance in case we
7128                                  * find an extent record that falls inside of
7129                                  * our current extent record but does not have
7130                                  * the same objectid.
7131                                  */
7132                                 tmp = malloc(sizeof(*tmp));
7133                                 if (!tmp)
7134                                         return -ENOMEM;
7135                                 tmp->start = tmpl->start;
7136                                 tmp->max_size = tmpl->max_size;
7137                                 tmp->nr = tmpl->nr;
7138                                 tmp->found_rec = 1;
7139                                 tmp->metadata = tmpl->metadata;
7140                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7141                                 INIT_LIST_HEAD(&tmp->list);
7142                                 list_add_tail(&tmp->list, &rec->dups);
7143                                 rec->num_duplicates++;
7144                         } else {
7145                                 rec->nr = tmpl->nr;
7146                                 rec->found_rec = 1;
7147                         }
7148                 }
7149
7150                 if (tmpl->extent_item_refs && !dup) {
7151                         if (rec->extent_item_refs) {
7152                                 fprintf(stderr, "block %llu rec "
7153                                         "extent_item_refs %llu, passed %llu\n",
7154                                         (unsigned long long)tmpl->start,
7155                                         (unsigned long long)
7156                                                         rec->extent_item_refs,
7157                                         (unsigned long long)tmpl->extent_item_refs);
7158                         }
7159                         rec->extent_item_refs = tmpl->extent_item_refs;
7160                 }
7161                 if (tmpl->is_root)
7162                         rec->is_root = 1;
7163                 if (tmpl->content_checked)
7164                         rec->content_checked = 1;
7165                 if (tmpl->owner_ref_checked)
7166                         rec->owner_ref_checked = 1;
7167                 memcpy(&rec->parent_key, &tmpl->parent_key,
7168                                 sizeof(tmpl->parent_key));
7169                 if (tmpl->parent_generation)
7170                         rec->parent_generation = tmpl->parent_generation;
7171                 if (rec->max_size < tmpl->max_size)
7172                         rec->max_size = tmpl->max_size;
7173
7174                 /*
7175                  * A metadata extent can't cross stripe_len boundary, otherwise
7176                  * kernel scrub won't be able to handle it.
7177                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7178                  * it.
7179                  */
7180                 if (tmpl->metadata)
7181                         rec->crossing_stripes = check_crossing_stripes(
7182                                         global_info, rec->start,
7183                                         global_info->nodesize);
7184                 check_extent_type(rec);
7185                 maybe_free_extent_rec(extent_cache, rec);
7186                 return ret;
7187         }
7188
7189         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7190
7191         return ret;
7192 }
7193
7194 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7195                             u64 parent, u64 root, int found_ref)
7196 {
7197         struct extent_record *rec;
7198         struct tree_backref *back;
7199         struct cache_extent *cache;
7200         int ret;
7201         bool insert = false;
7202
7203         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7204         if (!cache) {
7205                 struct extent_record tmpl;
7206
7207                 memset(&tmpl, 0, sizeof(tmpl));
7208                 tmpl.start = bytenr;
7209                 tmpl.nr = 1;
7210                 tmpl.metadata = 1;
7211                 tmpl.max_size = 1;
7212
7213                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7214                 if (ret)
7215                         return ret;
7216
7217                 /* really a bug in cache_extent implement now */
7218                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7219                 if (!cache)
7220                         return -ENOENT;
7221         }
7222
7223         rec = container_of(cache, struct extent_record, cache);
7224         if (rec->start != bytenr) {
7225                 /*
7226                  * Several cause, from unaligned bytenr to over lapping extents
7227                  */
7228                 return -EEXIST;
7229         }
7230
7231         back = find_tree_backref(rec, parent, root);
7232         if (!back) {
7233                 back = alloc_tree_backref(rec, parent, root);
7234                 if (!back)
7235                         return -ENOMEM;
7236                 insert = true;
7237         }
7238
7239         if (found_ref) {
7240                 if (back->node.found_ref) {
7241                         fprintf(stderr, "Extent back ref already exists "
7242                                 "for %llu parent %llu root %llu \n",
7243                                 (unsigned long long)bytenr,
7244                                 (unsigned long long)parent,
7245                                 (unsigned long long)root);
7246                 }
7247                 back->node.found_ref = 1;
7248         } else {
7249                 if (back->node.found_extent_tree) {
7250                         fprintf(stderr, "Extent back ref already exists "
7251                                 "for %llu parent %llu root %llu \n",
7252                                 (unsigned long long)bytenr,
7253                                 (unsigned long long)parent,
7254                                 (unsigned long long)root);
7255                 }
7256                 back->node.found_extent_tree = 1;
7257         }
7258         if (insert)
7259                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7260                         compare_extent_backref));
7261         check_extent_type(rec);
7262         maybe_free_extent_rec(extent_cache, rec);
7263         return 0;
7264 }
7265
7266 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7267                             u64 parent, u64 root, u64 owner, u64 offset,
7268                             u32 num_refs, int found_ref, u64 max_size)
7269 {
7270         struct extent_record *rec;
7271         struct data_backref *back;
7272         struct cache_extent *cache;
7273         int ret;
7274         bool insert = false;
7275
7276         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7277         if (!cache) {
7278                 struct extent_record tmpl;
7279
7280                 memset(&tmpl, 0, sizeof(tmpl));
7281                 tmpl.start = bytenr;
7282                 tmpl.nr = 1;
7283                 tmpl.max_size = max_size;
7284
7285                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7286                 if (ret)
7287                         return ret;
7288
7289                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7290                 if (!cache)
7291                         abort();
7292         }
7293
7294         rec = container_of(cache, struct extent_record, cache);
7295         if (rec->max_size < max_size)
7296                 rec->max_size = max_size;
7297
7298         /*
7299          * If found_ref is set then max_size is the real size and must match the
7300          * existing refs.  So if we have already found a ref then we need to
7301          * make sure that this ref matches the existing one, otherwise we need
7302          * to add a new backref so we can notice that the backrefs don't match
7303          * and we need to figure out who is telling the truth.  This is to
7304          * account for that awful fsync bug I introduced where we'd end up with
7305          * a btrfs_file_extent_item that would have its length include multiple
7306          * prealloc extents or point inside of a prealloc extent.
7307          */
7308         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7309                                  bytenr, max_size);
7310         if (!back) {
7311                 back = alloc_data_backref(rec, parent, root, owner, offset,
7312                                           max_size);
7313                 BUG_ON(!back);
7314                 insert = true;
7315         }
7316
7317         if (found_ref) {
7318                 BUG_ON(num_refs != 1);
7319                 if (back->node.found_ref)
7320                         BUG_ON(back->bytes != max_size);
7321                 back->node.found_ref = 1;
7322                 back->found_ref += 1;
7323                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7324                         back->bytes = max_size;
7325                         back->disk_bytenr = bytenr;
7326
7327                         /* Need to reinsert if not already in the tree */
7328                         if (!insert) {
7329                                 rb_erase(&back->node.node, &rec->backref_tree);
7330                                 insert = true;
7331                         }
7332                 }
7333                 rec->refs += 1;
7334                 rec->content_checked = 1;
7335                 rec->owner_ref_checked = 1;
7336         } else {
7337                 if (back->node.found_extent_tree) {
7338                         fprintf(stderr, "Extent back ref already exists "
7339                                 "for %llu parent %llu root %llu "
7340                                 "owner %llu offset %llu num_refs %lu\n",
7341                                 (unsigned long long)bytenr,
7342                                 (unsigned long long)parent,
7343                                 (unsigned long long)root,
7344                                 (unsigned long long)owner,
7345                                 (unsigned long long)offset,
7346                                 (unsigned long)num_refs);
7347                 }
7348                 back->num_refs = num_refs;
7349                 back->node.found_extent_tree = 1;
7350         }
7351         if (insert)
7352                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7353                         compare_extent_backref));
7354
7355         maybe_free_extent_rec(extent_cache, rec);
7356         return 0;
7357 }
7358
7359 static int add_pending(struct cache_tree *pending,
7360                        struct cache_tree *seen, u64 bytenr, u32 size)
7361 {
7362         int ret;
7363         ret = add_cache_extent(seen, bytenr, size);
7364         if (ret)
7365                 return ret;
7366         add_cache_extent(pending, bytenr, size);
7367         return 0;
7368 }
7369
7370 static int pick_next_pending(struct cache_tree *pending,
7371                         struct cache_tree *reada,
7372                         struct cache_tree *nodes,
7373                         u64 last, struct block_info *bits, int bits_nr,
7374                         int *reada_bits)
7375 {
7376         unsigned long node_start = last;
7377         struct cache_extent *cache;
7378         int ret;
7379
7380         cache = search_cache_extent(reada, 0);
7381         if (cache) {
7382                 bits[0].start = cache->start;
7383                 bits[0].size = cache->size;
7384                 *reada_bits = 1;
7385                 return 1;
7386         }
7387         *reada_bits = 0;
7388         if (node_start > 32768)
7389                 node_start -= 32768;
7390
7391         cache = search_cache_extent(nodes, node_start);
7392         if (!cache)
7393                 cache = search_cache_extent(nodes, 0);
7394
7395         if (!cache) {
7396                  cache = search_cache_extent(pending, 0);
7397                  if (!cache)
7398                          return 0;
7399                  ret = 0;
7400                  do {
7401                          bits[ret].start = cache->start;
7402                          bits[ret].size = cache->size;
7403                          cache = next_cache_extent(cache);
7404                          ret++;
7405                  } while (cache && ret < bits_nr);
7406                  return ret;
7407         }
7408
7409         ret = 0;
7410         do {
7411                 bits[ret].start = cache->start;
7412                 bits[ret].size = cache->size;
7413                 cache = next_cache_extent(cache);
7414                 ret++;
7415         } while (cache && ret < bits_nr);
7416
7417         if (bits_nr - ret > 8) {
7418                 u64 lookup = bits[0].start + bits[0].size;
7419                 struct cache_extent *next;
7420                 next = search_cache_extent(pending, lookup);
7421                 while(next) {
7422                         if (next->start - lookup > 32768)
7423                                 break;
7424                         bits[ret].start = next->start;
7425                         bits[ret].size = next->size;
7426                         lookup = next->start + next->size;
7427                         ret++;
7428                         if (ret == bits_nr)
7429                                 break;
7430                         next = next_cache_extent(next);
7431                         if (!next)
7432                                 break;
7433                 }
7434         }
7435         return ret;
7436 }
7437
7438 static void free_chunk_record(struct cache_extent *cache)
7439 {
7440         struct chunk_record *rec;
7441
7442         rec = container_of(cache, struct chunk_record, cache);
7443         list_del_init(&rec->list);
7444         list_del_init(&rec->dextents);
7445         free(rec);
7446 }
7447
7448 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7449 {
7450         cache_tree_free_extents(chunk_cache, free_chunk_record);
7451 }
7452
7453 static void free_device_record(struct rb_node *node)
7454 {
7455         struct device_record *rec;
7456
7457         rec = container_of(node, struct device_record, node);
7458         free(rec);
7459 }
7460
7461 FREE_RB_BASED_TREE(device_cache, free_device_record);
7462
7463 int insert_block_group_record(struct block_group_tree *tree,
7464                               struct block_group_record *bg_rec)
7465 {
7466         int ret;
7467
7468         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7469         if (ret)
7470                 return ret;
7471
7472         list_add_tail(&bg_rec->list, &tree->block_groups);
7473         return 0;
7474 }
7475
7476 static void free_block_group_record(struct cache_extent *cache)
7477 {
7478         struct block_group_record *rec;
7479
7480         rec = container_of(cache, struct block_group_record, cache);
7481         list_del_init(&rec->list);
7482         free(rec);
7483 }
7484
7485 void free_block_group_tree(struct block_group_tree *tree)
7486 {
7487         cache_tree_free_extents(&tree->tree, free_block_group_record);
7488 }
7489
7490 int insert_device_extent_record(struct device_extent_tree *tree,
7491                                 struct device_extent_record *de_rec)
7492 {
7493         int ret;
7494
7495         /*
7496          * Device extent is a bit different from the other extents, because
7497          * the extents which belong to the different devices may have the
7498          * same start and size, so we need use the special extent cache
7499          * search/insert functions.
7500          */
7501         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7502         if (ret)
7503                 return ret;
7504
7505         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7506         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7507         return 0;
7508 }
7509
7510 static void free_device_extent_record(struct cache_extent *cache)
7511 {
7512         struct device_extent_record *rec;
7513
7514         rec = container_of(cache, struct device_extent_record, cache);
7515         if (!list_empty(&rec->chunk_list))
7516                 list_del_init(&rec->chunk_list);
7517         if (!list_empty(&rec->device_list))
7518                 list_del_init(&rec->device_list);
7519         free(rec);
7520 }
7521
7522 void free_device_extent_tree(struct device_extent_tree *tree)
7523 {
7524         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7525 }
7526
7527 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7528 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7529                                  struct extent_buffer *leaf, int slot)
7530 {
7531         struct btrfs_extent_ref_v0 *ref0;
7532         struct btrfs_key key;
7533         int ret;
7534
7535         btrfs_item_key_to_cpu(leaf, &key, slot);
7536         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7537         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7538                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7539                                 0, 0);
7540         } else {
7541                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7542                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7543         }
7544         return ret;
7545 }
7546 #endif
7547
7548 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7549                                             struct btrfs_key *key,
7550                                             int slot)
7551 {
7552         struct btrfs_chunk *ptr;
7553         struct chunk_record *rec;
7554         int num_stripes, i;
7555
7556         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7557         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7558
7559         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7560         if (!rec) {
7561                 fprintf(stderr, "memory allocation failed\n");
7562                 exit(-1);
7563         }
7564
7565         INIT_LIST_HEAD(&rec->list);
7566         INIT_LIST_HEAD(&rec->dextents);
7567         rec->bg_rec = NULL;
7568
7569         rec->cache.start = key->offset;
7570         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7571
7572         rec->generation = btrfs_header_generation(leaf);
7573
7574         rec->objectid = key->objectid;
7575         rec->type = key->type;
7576         rec->offset = key->offset;
7577
7578         rec->length = rec->cache.size;
7579         rec->owner = btrfs_chunk_owner(leaf, ptr);
7580         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7581         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7582         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7583         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7584         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7585         rec->num_stripes = num_stripes;
7586         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7587
7588         for (i = 0; i < rec->num_stripes; ++i) {
7589                 rec->stripes[i].devid =
7590                         btrfs_stripe_devid_nr(leaf, ptr, i);
7591                 rec->stripes[i].offset =
7592                         btrfs_stripe_offset_nr(leaf, ptr, i);
7593                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7594                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7595                                 BTRFS_UUID_SIZE);
7596         }
7597
7598         return rec;
7599 }
7600
7601 static int process_chunk_item(struct cache_tree *chunk_cache,
7602                               struct btrfs_key *key, struct extent_buffer *eb,
7603                               int slot)
7604 {
7605         struct chunk_record *rec;
7606         struct btrfs_chunk *chunk;
7607         int ret = 0;
7608
7609         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7610         /*
7611          * Do extra check for this chunk item,
7612          *
7613          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7614          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7615          * and owner<->key_type check.
7616          */
7617         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7618                                       key->offset);
7619         if (ret < 0) {
7620                 error("chunk(%llu, %llu) is not valid, ignore it",
7621                       key->offset, btrfs_chunk_length(eb, chunk));
7622                 return 0;
7623         }
7624         rec = btrfs_new_chunk_record(eb, key, slot);
7625         ret = insert_cache_extent(chunk_cache, &rec->cache);
7626         if (ret) {
7627                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7628                         rec->offset, rec->length);
7629                 free(rec);
7630         }
7631
7632         return ret;
7633 }
7634
7635 static int process_device_item(struct rb_root *dev_cache,
7636                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7637 {
7638         struct btrfs_dev_item *ptr;
7639         struct device_record *rec;
7640         int ret = 0;
7641
7642         ptr = btrfs_item_ptr(eb,
7643                 slot, struct btrfs_dev_item);
7644
7645         rec = malloc(sizeof(*rec));
7646         if (!rec) {
7647                 fprintf(stderr, "memory allocation failed\n");
7648                 return -ENOMEM;
7649         }
7650
7651         rec->devid = key->offset;
7652         rec->generation = btrfs_header_generation(eb);
7653
7654         rec->objectid = key->objectid;
7655         rec->type = key->type;
7656         rec->offset = key->offset;
7657
7658         rec->devid = btrfs_device_id(eb, ptr);
7659         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7660         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7661
7662         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7663         if (ret) {
7664                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7665                 free(rec);
7666         }
7667
7668         return ret;
7669 }
7670
7671 struct block_group_record *
7672 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7673                              int slot)
7674 {
7675         struct btrfs_block_group_item *ptr;
7676         struct block_group_record *rec;
7677
7678         rec = calloc(1, sizeof(*rec));
7679         if (!rec) {
7680                 fprintf(stderr, "memory allocation failed\n");
7681                 exit(-1);
7682         }
7683
7684         rec->cache.start = key->objectid;
7685         rec->cache.size = key->offset;
7686
7687         rec->generation = btrfs_header_generation(leaf);
7688
7689         rec->objectid = key->objectid;
7690         rec->type = key->type;
7691         rec->offset = key->offset;
7692
7693         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7694         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7695
7696         INIT_LIST_HEAD(&rec->list);
7697
7698         return rec;
7699 }
7700
7701 static int process_block_group_item(struct block_group_tree *block_group_cache,
7702                                     struct btrfs_key *key,
7703                                     struct extent_buffer *eb, int slot)
7704 {
7705         struct block_group_record *rec;
7706         int ret = 0;
7707
7708         rec = btrfs_new_block_group_record(eb, key, slot);
7709         ret = insert_block_group_record(block_group_cache, rec);
7710         if (ret) {
7711                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7712                         rec->objectid, rec->offset);
7713                 free(rec);
7714         }
7715
7716         return ret;
7717 }
7718
7719 struct device_extent_record *
7720 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7721                                struct btrfs_key *key, int slot)
7722 {
7723         struct device_extent_record *rec;
7724         struct btrfs_dev_extent *ptr;
7725
7726         rec = calloc(1, sizeof(*rec));
7727         if (!rec) {
7728                 fprintf(stderr, "memory allocation failed\n");
7729                 exit(-1);
7730         }
7731
7732         rec->cache.objectid = key->objectid;
7733         rec->cache.start = key->offset;
7734
7735         rec->generation = btrfs_header_generation(leaf);
7736
7737         rec->objectid = key->objectid;
7738         rec->type = key->type;
7739         rec->offset = key->offset;
7740
7741         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7742         rec->chunk_objecteid =
7743                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7744         rec->chunk_offset =
7745                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7746         rec->length = btrfs_dev_extent_length(leaf, ptr);
7747         rec->cache.size = rec->length;
7748
7749         INIT_LIST_HEAD(&rec->chunk_list);
7750         INIT_LIST_HEAD(&rec->device_list);
7751
7752         return rec;
7753 }
7754
7755 static int
7756 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7757                            struct btrfs_key *key, struct extent_buffer *eb,
7758                            int slot)
7759 {
7760         struct device_extent_record *rec;
7761         int ret;
7762
7763         rec = btrfs_new_device_extent_record(eb, key, slot);
7764         ret = insert_device_extent_record(dev_extent_cache, rec);
7765         if (ret) {
7766                 fprintf(stderr,
7767                         "Device extent[%llu, %llu, %llu] existed.\n",
7768                         rec->objectid, rec->offset, rec->length);
7769                 free(rec);
7770         }
7771
7772         return ret;
7773 }
7774
7775 static int process_extent_item(struct btrfs_root *root,
7776                                struct cache_tree *extent_cache,
7777                                struct extent_buffer *eb, int slot)
7778 {
7779         struct btrfs_extent_item *ei;
7780         struct btrfs_extent_inline_ref *iref;
7781         struct btrfs_extent_data_ref *dref;
7782         struct btrfs_shared_data_ref *sref;
7783         struct btrfs_key key;
7784         struct extent_record tmpl;
7785         unsigned long end;
7786         unsigned long ptr;
7787         int ret;
7788         int type;
7789         u32 item_size = btrfs_item_size_nr(eb, slot);
7790         u64 refs = 0;
7791         u64 offset;
7792         u64 num_bytes;
7793         int metadata = 0;
7794
7795         btrfs_item_key_to_cpu(eb, &key, slot);
7796
7797         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7798                 metadata = 1;
7799                 num_bytes = root->fs_info->nodesize;
7800         } else {
7801                 num_bytes = key.offset;
7802         }
7803
7804         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7805                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7806                       key.objectid, root->fs_info->sectorsize);
7807                 return -EIO;
7808         }
7809         if (item_size < sizeof(*ei)) {
7810 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7811                 struct btrfs_extent_item_v0 *ei0;
7812                 if (item_size != sizeof(*ei0)) {
7813                         error(
7814         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
7815                                 key.objectid, key.type, key.offset,
7816                                 btrfs_header_bytenr(eb), slot);
7817                         BUG();
7818                 }
7819                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7820                 refs = btrfs_extent_refs_v0(eb, ei0);
7821 #else
7822                 BUG();
7823 #endif
7824                 memset(&tmpl, 0, sizeof(tmpl));
7825                 tmpl.start = key.objectid;
7826                 tmpl.nr = num_bytes;
7827                 tmpl.extent_item_refs = refs;
7828                 tmpl.metadata = metadata;
7829                 tmpl.found_rec = 1;
7830                 tmpl.max_size = num_bytes;
7831
7832                 return add_extent_rec(extent_cache, &tmpl);
7833         }
7834
7835         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7836         refs = btrfs_extent_refs(eb, ei);
7837         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7838                 metadata = 1;
7839         else
7840                 metadata = 0;
7841         if (metadata && num_bytes != root->fs_info->nodesize) {
7842                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7843                       num_bytes, root->fs_info->nodesize);
7844                 return -EIO;
7845         }
7846         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7847                 error("ignore invalid data extent, length %llu is not aligned to %u",
7848                       num_bytes, root->fs_info->sectorsize);
7849                 return -EIO;
7850         }
7851
7852         memset(&tmpl, 0, sizeof(tmpl));
7853         tmpl.start = key.objectid;
7854         tmpl.nr = num_bytes;
7855         tmpl.extent_item_refs = refs;
7856         tmpl.metadata = metadata;
7857         tmpl.found_rec = 1;
7858         tmpl.max_size = num_bytes;
7859         add_extent_rec(extent_cache, &tmpl);
7860
7861         ptr = (unsigned long)(ei + 1);
7862         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7863             key.type == BTRFS_EXTENT_ITEM_KEY)
7864                 ptr += sizeof(struct btrfs_tree_block_info);
7865
7866         end = (unsigned long)ei + item_size;
7867         while (ptr < end) {
7868                 iref = (struct btrfs_extent_inline_ref *)ptr;
7869                 type = btrfs_extent_inline_ref_type(eb, iref);
7870                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7871                 switch (type) {
7872                 case BTRFS_TREE_BLOCK_REF_KEY:
7873                         ret = add_tree_backref(extent_cache, key.objectid,
7874                                         0, offset, 0);
7875                         if (ret < 0)
7876                                 error(
7877                         "add_tree_backref failed (extent items tree block): %s",
7878                                       strerror(-ret));
7879                         break;
7880                 case BTRFS_SHARED_BLOCK_REF_KEY:
7881                         ret = add_tree_backref(extent_cache, key.objectid,
7882                                         offset, 0, 0);
7883                         if (ret < 0)
7884                                 error(
7885                         "add_tree_backref failed (extent items shared block): %s",
7886                                       strerror(-ret));
7887                         break;
7888                 case BTRFS_EXTENT_DATA_REF_KEY:
7889                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7890                         add_data_backref(extent_cache, key.objectid, 0,
7891                                         btrfs_extent_data_ref_root(eb, dref),
7892                                         btrfs_extent_data_ref_objectid(eb,
7893                                                                        dref),
7894                                         btrfs_extent_data_ref_offset(eb, dref),
7895                                         btrfs_extent_data_ref_count(eb, dref),
7896                                         0, num_bytes);
7897                         break;
7898                 case BTRFS_SHARED_DATA_REF_KEY:
7899                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7900                         add_data_backref(extent_cache, key.objectid, offset,
7901                                         0, 0, 0,
7902                                         btrfs_shared_data_ref_count(eb, sref),
7903                                         0, num_bytes);
7904                         break;
7905                 default:
7906                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7907                                 key.objectid, key.type, num_bytes);
7908                         goto out;
7909                 }
7910                 ptr += btrfs_extent_inline_ref_size(type);
7911         }
7912         WARN_ON(ptr > end);
7913 out:
7914         return 0;
7915 }
7916
7917 static int check_cache_range(struct btrfs_root *root,
7918                              struct btrfs_block_group_cache *cache,
7919                              u64 offset, u64 bytes)
7920 {
7921         struct btrfs_free_space *entry;
7922         u64 *logical;
7923         u64 bytenr;
7924         int stripe_len;
7925         int i, nr, ret;
7926
7927         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7928                 bytenr = btrfs_sb_offset(i);
7929                 ret = btrfs_rmap_block(root->fs_info,
7930                                        cache->key.objectid, bytenr, 0,
7931                                        &logical, &nr, &stripe_len);
7932                 if (ret)
7933                         return ret;
7934
7935                 while (nr--) {
7936                         if (logical[nr] + stripe_len <= offset)
7937                                 continue;
7938                         if (offset + bytes <= logical[nr])
7939                                 continue;
7940                         if (logical[nr] == offset) {
7941                                 if (stripe_len >= bytes) {
7942                                         free(logical);
7943                                         return 0;
7944                                 }
7945                                 bytes -= stripe_len;
7946                                 offset += stripe_len;
7947                         } else if (logical[nr] < offset) {
7948                                 if (logical[nr] + stripe_len >=
7949                                     offset + bytes) {
7950                                         free(logical);
7951                                         return 0;
7952                                 }
7953                                 bytes = (offset + bytes) -
7954                                         (logical[nr] + stripe_len);
7955                                 offset = logical[nr] + stripe_len;
7956                         } else {
7957                                 /*
7958                                  * Could be tricky, the super may land in the
7959                                  * middle of the area we're checking.  First
7960                                  * check the easiest case, it's at the end.
7961                                  */
7962                                 if (logical[nr] + stripe_len >=
7963                                     bytes + offset) {
7964                                         bytes = logical[nr] - offset;
7965                                         continue;
7966                                 }
7967
7968                                 /* Check the left side */
7969                                 ret = check_cache_range(root, cache,
7970                                                         offset,
7971                                                         logical[nr] - offset);
7972                                 if (ret) {
7973                                         free(logical);
7974                                         return ret;
7975                                 }
7976
7977                                 /* Now we continue with the right side */
7978                                 bytes = (offset + bytes) -
7979                                         (logical[nr] + stripe_len);
7980                                 offset = logical[nr] + stripe_len;
7981                         }
7982                 }
7983
7984                 free(logical);
7985         }
7986
7987         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7988         if (!entry) {
7989                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7990                         offset, offset+bytes);
7991                 return -EINVAL;
7992         }
7993
7994         if (entry->offset != offset) {
7995                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7996                         entry->offset);
7997                 return -EINVAL;
7998         }
7999
8000         if (entry->bytes != bytes) {
8001                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8002                         bytes, entry->bytes, offset);
8003                 return -EINVAL;
8004         }
8005
8006         unlink_free_space(cache->free_space_ctl, entry);
8007         free(entry);
8008         return 0;
8009 }
8010
8011 static int verify_space_cache(struct btrfs_root *root,
8012                               struct btrfs_block_group_cache *cache)
8013 {
8014         struct btrfs_path path;
8015         struct extent_buffer *leaf;
8016         struct btrfs_key key;
8017         u64 last;
8018         int ret = 0;
8019
8020         root = root->fs_info->extent_root;
8021
8022         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8023
8024         btrfs_init_path(&path);
8025         key.objectid = last;
8026         key.offset = 0;
8027         key.type = BTRFS_EXTENT_ITEM_KEY;
8028         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8029         if (ret < 0)
8030                 goto out;
8031         ret = 0;
8032         while (1) {
8033                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8034                         ret = btrfs_next_leaf(root, &path);
8035                         if (ret < 0)
8036                                 goto out;
8037                         if (ret > 0) {
8038                                 ret = 0;
8039                                 break;
8040                         }
8041                 }
8042                 leaf = path.nodes[0];
8043                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8044                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8045                         break;
8046                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8047                     key.type != BTRFS_METADATA_ITEM_KEY) {
8048                         path.slots[0]++;
8049                         continue;
8050                 }
8051
8052                 if (last == key.objectid) {
8053                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8054                                 last = key.objectid + key.offset;
8055                         else
8056                                 last = key.objectid + root->fs_info->nodesize;
8057                         path.slots[0]++;
8058                         continue;
8059                 }
8060
8061                 ret = check_cache_range(root, cache, last,
8062                                         key.objectid - last);
8063                 if (ret)
8064                         break;
8065                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8066                         last = key.objectid + key.offset;
8067                 else
8068                         last = key.objectid + root->fs_info->nodesize;
8069                 path.slots[0]++;
8070         }
8071
8072         if (last < cache->key.objectid + cache->key.offset)
8073                 ret = check_cache_range(root, cache, last,
8074                                         cache->key.objectid +
8075                                         cache->key.offset - last);
8076
8077 out:
8078         btrfs_release_path(&path);
8079
8080         if (!ret &&
8081             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8082                 fprintf(stderr, "There are still entries left in the space "
8083                         "cache\n");
8084                 ret = -EINVAL;
8085         }
8086
8087         return ret;
8088 }
8089
8090 static int check_space_cache(struct btrfs_root *root)
8091 {
8092         struct btrfs_block_group_cache *cache;
8093         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8094         int ret;
8095         int error = 0;
8096
8097         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8098             btrfs_super_generation(root->fs_info->super_copy) !=
8099             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8100                 printf("cache and super generation don't match, space cache "
8101                        "will be invalidated\n");
8102                 return 0;
8103         }
8104
8105         if (ctx.progress_enabled) {
8106                 ctx.tp = TASK_FREE_SPACE;
8107                 task_start(ctx.info);
8108         }
8109
8110         while (1) {
8111                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8112                 if (!cache)
8113                         break;
8114
8115                 start = cache->key.objectid + cache->key.offset;
8116                 if (!cache->free_space_ctl) {
8117                         if (btrfs_init_free_space_ctl(cache,
8118                                                 root->fs_info->sectorsize)) {
8119                                 ret = -ENOMEM;
8120                                 break;
8121                         }
8122                 } else {
8123                         btrfs_remove_free_space_cache(cache);
8124                 }
8125
8126                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8127                         ret = exclude_super_stripes(root, cache);
8128                         if (ret) {
8129                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8130                                         strerror(-ret));
8131                                 error++;
8132                                 continue;
8133                         }
8134                         ret = load_free_space_tree(root->fs_info, cache);
8135                         free_excluded_extents(root, cache);
8136                         if (ret < 0) {
8137                                 fprintf(stderr, "could not load free space tree: %s\n",
8138                                         strerror(-ret));
8139                                 error++;
8140                                 continue;
8141                         }
8142                         error += ret;
8143                 } else {
8144                         ret = load_free_space_cache(root->fs_info, cache);
8145                         if (!ret)
8146                                 continue;
8147                 }
8148
8149                 ret = verify_space_cache(root, cache);
8150                 if (ret) {
8151                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8152                                 cache->key.objectid);
8153                         error++;
8154                 }
8155         }
8156
8157         task_stop(ctx.info);
8158
8159         return error ? -EINVAL : 0;
8160 }
8161
8162 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8163                         u64 num_bytes, unsigned long leaf_offset,
8164                         struct extent_buffer *eb) {
8165
8166         struct btrfs_fs_info *fs_info = root->fs_info;
8167         u64 offset = 0;
8168         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8169         char *data;
8170         unsigned long csum_offset;
8171         u32 csum;
8172         u32 csum_expected;
8173         u64 read_len;
8174         u64 data_checked = 0;
8175         u64 tmp;
8176         int ret = 0;
8177         int mirror;
8178         int num_copies;
8179
8180         if (num_bytes % fs_info->sectorsize)
8181                 return -EINVAL;
8182
8183         data = malloc(num_bytes);
8184         if (!data)
8185                 return -ENOMEM;
8186
8187         while (offset < num_bytes) {
8188                 mirror = 0;
8189 again:
8190                 read_len = num_bytes - offset;
8191                 /* read as much space once a time */
8192                 ret = read_extent_data(fs_info, data + offset,
8193                                 bytenr + offset, &read_len, mirror);
8194                 if (ret)
8195                         goto out;
8196                 data_checked = 0;
8197                 /* verify every 4k data's checksum */
8198                 while (data_checked < read_len) {
8199                         csum = ~(u32)0;
8200                         tmp = offset + data_checked;
8201
8202                         csum = btrfs_csum_data((char *)data + tmp,
8203                                                csum, fs_info->sectorsize);
8204                         btrfs_csum_final(csum, (u8 *)&csum);
8205
8206                         csum_offset = leaf_offset +
8207                                  tmp / fs_info->sectorsize * csum_size;
8208                         read_extent_buffer(eb, (char *)&csum_expected,
8209                                            csum_offset, csum_size);
8210                         /* try another mirror */
8211                         if (csum != csum_expected) {
8212                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8213                                                 mirror, bytenr + tmp,
8214                                                 csum, csum_expected);
8215                                 num_copies = btrfs_num_copies(root->fs_info,
8216                                                 bytenr, num_bytes);
8217                                 if (mirror < num_copies - 1) {
8218                                         mirror += 1;
8219                                         goto again;
8220                                 }
8221                         }
8222                         data_checked += fs_info->sectorsize;
8223                 }
8224                 offset += read_len;
8225         }
8226 out:
8227         free(data);
8228         return ret;
8229 }
8230
8231 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8232                                u64 num_bytes)
8233 {
8234         struct btrfs_path path;
8235         struct extent_buffer *leaf;
8236         struct btrfs_key key;
8237         int ret;
8238
8239         btrfs_init_path(&path);
8240         key.objectid = bytenr;
8241         key.type = BTRFS_EXTENT_ITEM_KEY;
8242         key.offset = (u64)-1;
8243
8244 again:
8245         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8246                                 0, 0);
8247         if (ret < 0) {
8248                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8249                 btrfs_release_path(&path);
8250                 return ret;
8251         } else if (ret) {
8252                 if (path.slots[0] > 0) {
8253                         path.slots[0]--;
8254                 } else {
8255                         ret = btrfs_prev_leaf(root, &path);
8256                         if (ret < 0) {
8257                                 goto out;
8258                         } else if (ret > 0) {
8259                                 ret = 0;
8260                                 goto out;
8261                         }
8262                 }
8263         }
8264
8265         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8266
8267         /*
8268          * Block group items come before extent items if they have the same
8269          * bytenr, so walk back one more just in case.  Dear future traveller,
8270          * first congrats on mastering time travel.  Now if it's not too much
8271          * trouble could you go back to 2006 and tell Chris to make the
8272          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8273          * EXTENT_ITEM_KEY please?
8274          */
8275         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8276                 if (path.slots[0] > 0) {
8277                         path.slots[0]--;
8278                 } else {
8279                         ret = btrfs_prev_leaf(root, &path);
8280                         if (ret < 0) {
8281                                 goto out;
8282                         } else if (ret > 0) {
8283                                 ret = 0;
8284                                 goto out;
8285                         }
8286                 }
8287                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8288         }
8289
8290         while (num_bytes) {
8291                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8292                         ret = btrfs_next_leaf(root, &path);
8293                         if (ret < 0) {
8294                                 fprintf(stderr, "Error going to next leaf "
8295                                         "%d\n", ret);
8296                                 btrfs_release_path(&path);
8297                                 return ret;
8298                         } else if (ret) {
8299                                 break;
8300                         }
8301                 }
8302                 leaf = path.nodes[0];
8303                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8304                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8305                         path.slots[0]++;
8306                         continue;
8307                 }
8308                 if (key.objectid + key.offset < bytenr) {
8309                         path.slots[0]++;
8310                         continue;
8311                 }
8312                 if (key.objectid > bytenr + num_bytes)
8313                         break;
8314
8315                 if (key.objectid == bytenr) {
8316                         if (key.offset >= num_bytes) {
8317                                 num_bytes = 0;
8318                                 break;
8319                         }
8320                         num_bytes -= key.offset;
8321                         bytenr += key.offset;
8322                 } else if (key.objectid < bytenr) {
8323                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8324                                 num_bytes = 0;
8325                                 break;
8326                         }
8327                         num_bytes = (bytenr + num_bytes) -
8328                                 (key.objectid + key.offset);
8329                         bytenr = key.objectid + key.offset;
8330                 } else {
8331                         if (key.objectid + key.offset < bytenr + num_bytes) {
8332                                 u64 new_start = key.objectid + key.offset;
8333                                 u64 new_bytes = bytenr + num_bytes - new_start;
8334
8335                                 /*
8336                                  * Weird case, the extent is in the middle of
8337                                  * our range, we'll have to search one side
8338                                  * and then the other.  Not sure if this happens
8339                                  * in real life, but no harm in coding it up
8340                                  * anyway just in case.
8341                                  */
8342                                 btrfs_release_path(&path);
8343                                 ret = check_extent_exists(root, new_start,
8344                                                           new_bytes);
8345                                 if (ret) {
8346                                         fprintf(stderr, "Right section didn't "
8347                                                 "have a record\n");
8348                                         break;
8349                                 }
8350                                 num_bytes = key.objectid - bytenr;
8351                                 goto again;
8352                         }
8353                         num_bytes = key.objectid - bytenr;
8354                 }
8355                 path.slots[0]++;
8356         }
8357         ret = 0;
8358
8359 out:
8360         if (num_bytes && !ret) {
8361                 fprintf(stderr, "There are no extents for csum range "
8362                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8363                 ret = 1;
8364         }
8365
8366         btrfs_release_path(&path);
8367         return ret;
8368 }
8369
8370 static int check_csums(struct btrfs_root *root)
8371 {
8372         struct btrfs_path path;
8373         struct extent_buffer *leaf;
8374         struct btrfs_key key;
8375         u64 offset = 0, num_bytes = 0;
8376         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8377         int errors = 0;
8378         int ret;
8379         u64 data_len;
8380         unsigned long leaf_offset;
8381
8382         root = root->fs_info->csum_root;
8383         if (!extent_buffer_uptodate(root->node)) {
8384                 fprintf(stderr, "No valid csum tree found\n");
8385                 return -ENOENT;
8386         }
8387
8388         btrfs_init_path(&path);
8389         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8390         key.type = BTRFS_EXTENT_CSUM_KEY;
8391         key.offset = 0;
8392         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8393         if (ret < 0) {
8394                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8395                 btrfs_release_path(&path);
8396                 return ret;
8397         }
8398
8399         if (ret > 0 && path.slots[0])
8400                 path.slots[0]--;
8401         ret = 0;
8402
8403         while (1) {
8404                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8405                         ret = btrfs_next_leaf(root, &path);
8406                         if (ret < 0) {
8407                                 fprintf(stderr, "Error going to next leaf "
8408                                         "%d\n", ret);
8409                                 break;
8410                         }
8411                         if (ret)
8412                                 break;
8413                 }
8414                 leaf = path.nodes[0];
8415
8416                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8417                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8418                         path.slots[0]++;
8419                         continue;
8420                 }
8421
8422                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8423                               csum_size) * root->fs_info->sectorsize;
8424                 if (!check_data_csum)
8425                         goto skip_csum_check;
8426                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8427                 ret = check_extent_csums(root, key.offset, data_len,
8428                                          leaf_offset, leaf);
8429                 if (ret)
8430                         break;
8431 skip_csum_check:
8432                 if (!num_bytes) {
8433                         offset = key.offset;
8434                 } else if (key.offset != offset + num_bytes) {
8435                         ret = check_extent_exists(root, offset, num_bytes);
8436                         if (ret) {
8437                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8438                                         "there is no extent record\n",
8439                                         offset, offset+num_bytes);
8440                                 errors++;
8441                         }
8442                         offset = key.offset;
8443                         num_bytes = 0;
8444                 }
8445                 num_bytes += data_len;
8446                 path.slots[0]++;
8447         }
8448
8449         btrfs_release_path(&path);
8450         return errors;
8451 }
8452
8453 static int is_dropped_key(struct btrfs_key *key,
8454                           struct btrfs_key *drop_key) {
8455         if (key->objectid < drop_key->objectid)
8456                 return 1;
8457         else if (key->objectid == drop_key->objectid) {
8458                 if (key->type < drop_key->type)
8459                         return 1;
8460                 else if (key->type == drop_key->type) {
8461                         if (key->offset < drop_key->offset)
8462                                 return 1;
8463                 }
8464         }
8465         return 0;
8466 }
8467
8468 /*
8469  * Here are the rules for FULL_BACKREF.
8470  *
8471  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8472  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8473  *      FULL_BACKREF set.
8474  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8475  *    if it happened after the relocation occurred since we'll have dropped the
8476  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8477  *    have no real way to know for sure.
8478  *
8479  * We process the blocks one root at a time, and we start from the lowest root
8480  * objectid and go to the highest.  So we can just lookup the owner backref for
8481  * the record and if we don't find it then we know it doesn't exist and we have
8482  * a FULL BACKREF.
8483  *
8484  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8485  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8486  * be set or not and then we can check later once we've gathered all the refs.
8487  */
8488 static int calc_extent_flag(struct cache_tree *extent_cache,
8489                            struct extent_buffer *buf,
8490                            struct root_item_record *ri,
8491                            u64 *flags)
8492 {
8493         struct extent_record *rec;
8494         struct cache_extent *cache;
8495         struct tree_backref *tback;
8496         u64 owner = 0;
8497
8498         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8499         /* we have added this extent before */
8500         if (!cache)
8501                 return -ENOENT;
8502
8503         rec = container_of(cache, struct extent_record, cache);
8504
8505         /*
8506          * Except file/reloc tree, we can not have
8507          * FULL BACKREF MODE
8508          */
8509         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8510                 goto normal;
8511         /*
8512          * root node
8513          */
8514         if (buf->start == ri->bytenr)
8515                 goto normal;
8516
8517         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8518                 goto full_backref;
8519
8520         owner = btrfs_header_owner(buf);
8521         if (owner == ri->objectid)
8522                 goto normal;
8523
8524         tback = find_tree_backref(rec, 0, owner);
8525         if (!tback)
8526                 goto full_backref;
8527 normal:
8528         *flags = 0;
8529         if (rec->flag_block_full_backref != FLAG_UNSET &&
8530             rec->flag_block_full_backref != 0)
8531                 rec->bad_full_backref = 1;
8532         return 0;
8533 full_backref:
8534         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8535         if (rec->flag_block_full_backref != FLAG_UNSET &&
8536             rec->flag_block_full_backref != 1)
8537                 rec->bad_full_backref = 1;
8538         return 0;
8539 }
8540
8541 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8542 {
8543         fprintf(stderr, "Invalid key type(");
8544         print_key_type(stderr, 0, key_type);
8545         fprintf(stderr, ") found in root(");
8546         print_objectid(stderr, rootid, 0);
8547         fprintf(stderr, ")\n");
8548 }
8549
8550 /*
8551  * Check if the key is valid with its extent buffer.
8552  *
8553  * This is a early check in case invalid key exists in a extent buffer
8554  * This is not comprehensive yet, but should prevent wrong key/item passed
8555  * further
8556  */
8557 static int check_type_with_root(u64 rootid, u8 key_type)
8558 {
8559         switch (key_type) {
8560         /* Only valid in chunk tree */
8561         case BTRFS_DEV_ITEM_KEY:
8562         case BTRFS_CHUNK_ITEM_KEY:
8563                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8564                         goto err;
8565                 break;
8566         /* valid in csum and log tree */
8567         case BTRFS_CSUM_TREE_OBJECTID:
8568                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8569                       is_fstree(rootid)))
8570                         goto err;
8571                 break;
8572         case BTRFS_EXTENT_ITEM_KEY:
8573         case BTRFS_METADATA_ITEM_KEY:
8574         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8575                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8576                         goto err;
8577                 break;
8578         case BTRFS_ROOT_ITEM_KEY:
8579                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8580                         goto err;
8581                 break;
8582         case BTRFS_DEV_EXTENT_KEY:
8583                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8584                         goto err;
8585                 break;
8586         }
8587         return 0;
8588 err:
8589         report_mismatch_key_root(key_type, rootid);
8590         return -EINVAL;
8591 }
8592
8593 static int run_next_block(struct btrfs_root *root,
8594                           struct block_info *bits,
8595                           int bits_nr,
8596                           u64 *last,
8597                           struct cache_tree *pending,
8598                           struct cache_tree *seen,
8599                           struct cache_tree *reada,
8600                           struct cache_tree *nodes,
8601                           struct cache_tree *extent_cache,
8602                           struct cache_tree *chunk_cache,
8603                           struct rb_root *dev_cache,
8604                           struct block_group_tree *block_group_cache,
8605                           struct device_extent_tree *dev_extent_cache,
8606                           struct root_item_record *ri)
8607 {
8608         struct btrfs_fs_info *fs_info = root->fs_info;
8609         struct extent_buffer *buf;
8610         struct extent_record *rec = NULL;
8611         u64 bytenr;
8612         u32 size;
8613         u64 parent;
8614         u64 owner;
8615         u64 flags;
8616         u64 ptr;
8617         u64 gen = 0;
8618         int ret = 0;
8619         int i;
8620         int nritems;
8621         struct btrfs_key key;
8622         struct cache_extent *cache;
8623         int reada_bits;
8624
8625         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8626                                     bits_nr, &reada_bits);
8627         if (nritems == 0)
8628                 return 1;
8629
8630         if (!reada_bits) {
8631                 for(i = 0; i < nritems; i++) {
8632                         ret = add_cache_extent(reada, bits[i].start,
8633                                                bits[i].size);
8634                         if (ret == -EEXIST)
8635                                 continue;
8636
8637                         /* fixme, get the parent transid */
8638                         readahead_tree_block(fs_info, bits[i].start, 0);
8639                 }
8640         }
8641         *last = bits[0].start;
8642         bytenr = bits[0].start;
8643         size = bits[0].size;
8644
8645         cache = lookup_cache_extent(pending, bytenr, size);
8646         if (cache) {
8647                 remove_cache_extent(pending, cache);
8648                 free(cache);
8649         }
8650         cache = lookup_cache_extent(reada, bytenr, size);
8651         if (cache) {
8652                 remove_cache_extent(reada, cache);
8653                 free(cache);
8654         }
8655         cache = lookup_cache_extent(nodes, bytenr, size);
8656         if (cache) {
8657                 remove_cache_extent(nodes, cache);
8658                 free(cache);
8659         }
8660         cache = lookup_cache_extent(extent_cache, bytenr, size);
8661         if (cache) {
8662                 rec = container_of(cache, struct extent_record, cache);
8663                 gen = rec->parent_generation;
8664         }
8665
8666         /* fixme, get the real parent transid */
8667         buf = read_tree_block(root->fs_info, bytenr, gen);
8668         if (!extent_buffer_uptodate(buf)) {
8669                 record_bad_block_io(root->fs_info,
8670                                     extent_cache, bytenr, size);
8671                 goto out;
8672         }
8673
8674         nritems = btrfs_header_nritems(buf);
8675
8676         flags = 0;
8677         if (!init_extent_tree) {
8678                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8679                                        btrfs_header_level(buf), 1, NULL,
8680                                        &flags);
8681                 if (ret < 0) {
8682                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8683                         if (ret < 0) {
8684                                 fprintf(stderr, "Couldn't calc extent flags\n");
8685                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8686                         }
8687                 }
8688         } else {
8689                 flags = 0;
8690                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8691                 if (ret < 0) {
8692                         fprintf(stderr, "Couldn't calc extent flags\n");
8693                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8694                 }
8695         }
8696
8697         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8698                 if (ri != NULL &&
8699                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8700                     ri->objectid == btrfs_header_owner(buf)) {
8701                         /*
8702                          * Ok we got to this block from it's original owner and
8703                          * we have FULL_BACKREF set.  Relocation can leave
8704                          * converted blocks over so this is altogether possible,
8705                          * however it's not possible if the generation > the
8706                          * last snapshot, so check for this case.
8707                          */
8708                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8709                             btrfs_header_generation(buf) > ri->last_snapshot) {
8710                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8711                                 rec->bad_full_backref = 1;
8712                         }
8713                 }
8714         } else {
8715                 if (ri != NULL &&
8716                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8717                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8718                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8719                         rec->bad_full_backref = 1;
8720                 }
8721         }
8722
8723         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8724                 rec->flag_block_full_backref = 1;
8725                 parent = bytenr;
8726                 owner = 0;
8727         } else {
8728                 rec->flag_block_full_backref = 0;
8729                 parent = 0;
8730                 owner = btrfs_header_owner(buf);
8731         }
8732
8733         ret = check_block(root, extent_cache, buf, flags);
8734         if (ret)
8735                 goto out;
8736
8737         if (btrfs_is_leaf(buf)) {
8738                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8739                 for (i = 0; i < nritems; i++) {
8740                         struct btrfs_file_extent_item *fi;
8741                         btrfs_item_key_to_cpu(buf, &key, i);
8742                         /*
8743                          * Check key type against the leaf owner.
8744                          * Could filter quite a lot of early error if
8745                          * owner is correct
8746                          */
8747                         if (check_type_with_root(btrfs_header_owner(buf),
8748                                                  key.type)) {
8749                                 fprintf(stderr, "ignoring invalid key\n");
8750                                 continue;
8751                         }
8752                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8753                                 process_extent_item(root, extent_cache, buf,
8754                                                     i);
8755                                 continue;
8756                         }
8757                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8758                                 process_extent_item(root, extent_cache, buf,
8759                                                     i);
8760                                 continue;
8761                         }
8762                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8763                                 total_csum_bytes +=
8764                                         btrfs_item_size_nr(buf, i);
8765                                 continue;
8766                         }
8767                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8768                                 process_chunk_item(chunk_cache, &key, buf, i);
8769                                 continue;
8770                         }
8771                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8772                                 process_device_item(dev_cache, &key, buf, i);
8773                                 continue;
8774                         }
8775                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8776                                 process_block_group_item(block_group_cache,
8777                                         &key, buf, i);
8778                                 continue;
8779                         }
8780                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8781                                 process_device_extent_item(dev_extent_cache,
8782                                         &key, buf, i);
8783                                 continue;
8784
8785                         }
8786                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8787 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8788                                 process_extent_ref_v0(extent_cache, buf, i);
8789 #else
8790                                 BUG();
8791 #endif
8792                                 continue;
8793                         }
8794
8795                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8796                                 ret = add_tree_backref(extent_cache,
8797                                                 key.objectid, 0, key.offset, 0);
8798                                 if (ret < 0)
8799                                         error(
8800                                 "add_tree_backref failed (leaf tree block): %s",
8801                                               strerror(-ret));
8802                                 continue;
8803                         }
8804                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8805                                 ret = add_tree_backref(extent_cache,
8806                                                 key.objectid, key.offset, 0, 0);
8807                                 if (ret < 0)
8808                                         error(
8809                                 "add_tree_backref failed (leaf shared block): %s",
8810                                               strerror(-ret));
8811                                 continue;
8812                         }
8813                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8814                                 struct btrfs_extent_data_ref *ref;
8815                                 ref = btrfs_item_ptr(buf, i,
8816                                                 struct btrfs_extent_data_ref);
8817                                 add_data_backref(extent_cache,
8818                                         key.objectid, 0,
8819                                         btrfs_extent_data_ref_root(buf, ref),
8820                                         btrfs_extent_data_ref_objectid(buf,
8821                                                                        ref),
8822                                         btrfs_extent_data_ref_offset(buf, ref),
8823                                         btrfs_extent_data_ref_count(buf, ref),
8824                                         0, root->fs_info->sectorsize);
8825                                 continue;
8826                         }
8827                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8828                                 struct btrfs_shared_data_ref *ref;
8829                                 ref = btrfs_item_ptr(buf, i,
8830                                                 struct btrfs_shared_data_ref);
8831                                 add_data_backref(extent_cache,
8832                                         key.objectid, key.offset, 0, 0, 0,
8833                                         btrfs_shared_data_ref_count(buf, ref),
8834                                         0, root->fs_info->sectorsize);
8835                                 continue;
8836                         }
8837                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8838                                 struct bad_item *bad;
8839
8840                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8841                                         continue;
8842                                 if (!owner)
8843                                         continue;
8844                                 bad = malloc(sizeof(struct bad_item));
8845                                 if (!bad)
8846                                         continue;
8847                                 INIT_LIST_HEAD(&bad->list);
8848                                 memcpy(&bad->key, &key,
8849                                        sizeof(struct btrfs_key));
8850                                 bad->root_id = owner;
8851                                 list_add_tail(&bad->list, &delete_items);
8852                                 continue;
8853                         }
8854                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8855                                 continue;
8856                         fi = btrfs_item_ptr(buf, i,
8857                                             struct btrfs_file_extent_item);
8858                         if (btrfs_file_extent_type(buf, fi) ==
8859                             BTRFS_FILE_EXTENT_INLINE)
8860                                 continue;
8861                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8862                                 continue;
8863
8864                         data_bytes_allocated +=
8865                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8866                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8867                                 abort();
8868                         }
8869                         data_bytes_referenced +=
8870                                 btrfs_file_extent_num_bytes(buf, fi);
8871                         add_data_backref(extent_cache,
8872                                 btrfs_file_extent_disk_bytenr(buf, fi),
8873                                 parent, owner, key.objectid, key.offset -
8874                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8875                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8876                 }
8877         } else {
8878                 int level;
8879                 struct btrfs_key first_key;
8880
8881                 first_key.objectid = 0;
8882
8883                 if (nritems > 0)
8884                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8885                 level = btrfs_header_level(buf);
8886                 for (i = 0; i < nritems; i++) {
8887                         struct extent_record tmpl;
8888
8889                         ptr = btrfs_node_blockptr(buf, i);
8890                         size = root->fs_info->nodesize;
8891                         btrfs_node_key_to_cpu(buf, &key, i);
8892                         if (ri != NULL) {
8893                                 if ((level == ri->drop_level)
8894                                     && is_dropped_key(&key, &ri->drop_key)) {
8895                                         continue;
8896                                 }
8897                         }
8898
8899                         memset(&tmpl, 0, sizeof(tmpl));
8900                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8901                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8902                         tmpl.start = ptr;
8903                         tmpl.nr = size;
8904                         tmpl.refs = 1;
8905                         tmpl.metadata = 1;
8906                         tmpl.max_size = size;
8907                         ret = add_extent_rec(extent_cache, &tmpl);
8908                         if (ret < 0)
8909                                 goto out;
8910
8911                         ret = add_tree_backref(extent_cache, ptr, parent,
8912                                         owner, 1);
8913                         if (ret < 0) {
8914                                 error(
8915                                 "add_tree_backref failed (non-leaf block): %s",
8916                                       strerror(-ret));
8917                                 continue;
8918                         }
8919
8920                         if (level > 1) {
8921                                 add_pending(nodes, seen, ptr, size);
8922                         } else {
8923                                 add_pending(pending, seen, ptr, size);
8924                         }
8925                 }
8926                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
8927                                       nritems) * sizeof(struct btrfs_key_ptr);
8928         }
8929         total_btree_bytes += buf->len;
8930         if (fs_root_objectid(btrfs_header_owner(buf)))
8931                 total_fs_tree_bytes += buf->len;
8932         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8933                 total_extent_tree_bytes += buf->len;
8934 out:
8935         free_extent_buffer(buf);
8936         return ret;
8937 }
8938
8939 static int add_root_to_pending(struct extent_buffer *buf,
8940                                struct cache_tree *extent_cache,
8941                                struct cache_tree *pending,
8942                                struct cache_tree *seen,
8943                                struct cache_tree *nodes,
8944                                u64 objectid)
8945 {
8946         struct extent_record tmpl;
8947         int ret;
8948
8949         if (btrfs_header_level(buf) > 0)
8950                 add_pending(nodes, seen, buf->start, buf->len);
8951         else
8952                 add_pending(pending, seen, buf->start, buf->len);
8953
8954         memset(&tmpl, 0, sizeof(tmpl));
8955         tmpl.start = buf->start;
8956         tmpl.nr = buf->len;
8957         tmpl.is_root = 1;
8958         tmpl.refs = 1;
8959         tmpl.metadata = 1;
8960         tmpl.max_size = buf->len;
8961         add_extent_rec(extent_cache, &tmpl);
8962
8963         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8964             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8965                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8966                                 0, 1);
8967         else
8968                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8969                                 1);
8970         return ret;
8971 }
8972
8973 /* as we fix the tree, we might be deleting blocks that
8974  * we're tracking for repair.  This hook makes sure we
8975  * remove any backrefs for blocks as we are fixing them.
8976  */
8977 static int free_extent_hook(struct btrfs_trans_handle *trans,
8978                             struct btrfs_root *root,
8979                             u64 bytenr, u64 num_bytes, u64 parent,
8980                             u64 root_objectid, u64 owner, u64 offset,
8981                             int refs_to_drop)
8982 {
8983         struct extent_record *rec;
8984         struct cache_extent *cache;
8985         int is_data;
8986         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8987
8988         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8989         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8990         if (!cache)
8991                 return 0;
8992
8993         rec = container_of(cache, struct extent_record, cache);
8994         if (is_data) {
8995                 struct data_backref *back;
8996                 back = find_data_backref(rec, parent, root_objectid, owner,
8997                                          offset, 1, bytenr, num_bytes);
8998                 if (!back)
8999                         goto out;
9000                 if (back->node.found_ref) {
9001                         back->found_ref -= refs_to_drop;
9002                         if (rec->refs)
9003                                 rec->refs -= refs_to_drop;
9004                 }
9005                 if (back->node.found_extent_tree) {
9006                         back->num_refs -= refs_to_drop;
9007                         if (rec->extent_item_refs)
9008                                 rec->extent_item_refs -= refs_to_drop;
9009                 }
9010                 if (back->found_ref == 0)
9011                         back->node.found_ref = 0;
9012                 if (back->num_refs == 0)
9013                         back->node.found_extent_tree = 0;
9014
9015                 if (!back->node.found_extent_tree && back->node.found_ref) {
9016                         rb_erase(&back->node.node, &rec->backref_tree);
9017                         free(back);
9018                 }
9019         } else {
9020                 struct tree_backref *back;
9021                 back = find_tree_backref(rec, parent, root_objectid);
9022                 if (!back)
9023                         goto out;
9024                 if (back->node.found_ref) {
9025                         if (rec->refs)
9026                                 rec->refs--;
9027                         back->node.found_ref = 0;
9028                 }
9029                 if (back->node.found_extent_tree) {
9030                         if (rec->extent_item_refs)
9031                                 rec->extent_item_refs--;
9032                         back->node.found_extent_tree = 0;
9033                 }
9034                 if (!back->node.found_extent_tree && back->node.found_ref) {
9035                         rb_erase(&back->node.node, &rec->backref_tree);
9036                         free(back);
9037                 }
9038         }
9039         maybe_free_extent_rec(extent_cache, rec);
9040 out:
9041         return 0;
9042 }
9043
9044 static int delete_extent_records(struct btrfs_trans_handle *trans,
9045                                  struct btrfs_root *root,
9046                                  struct btrfs_path *path,
9047                                  u64 bytenr)
9048 {
9049         struct btrfs_key key;
9050         struct btrfs_key found_key;
9051         struct extent_buffer *leaf;
9052         int ret;
9053         int slot;
9054
9055
9056         key.objectid = bytenr;
9057         key.type = (u8)-1;
9058         key.offset = (u64)-1;
9059
9060         while(1) {
9061                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9062                                         &key, path, 0, 1);
9063                 if (ret < 0)
9064                         break;
9065
9066                 if (ret > 0) {
9067                         ret = 0;
9068                         if (path->slots[0] == 0)
9069                                 break;
9070                         path->slots[0]--;
9071                 }
9072                 ret = 0;
9073
9074                 leaf = path->nodes[0];
9075                 slot = path->slots[0];
9076
9077                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9078                 if (found_key.objectid != bytenr)
9079                         break;
9080
9081                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9082                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9083                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9084                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9085                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9086                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9087                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9088                         btrfs_release_path(path);
9089                         if (found_key.type == 0) {
9090                                 if (found_key.offset == 0)
9091                                         break;
9092                                 key.offset = found_key.offset - 1;
9093                                 key.type = found_key.type;
9094                         }
9095                         key.type = found_key.type - 1;
9096                         key.offset = (u64)-1;
9097                         continue;
9098                 }
9099
9100                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9101                         found_key.objectid, found_key.type, found_key.offset);
9102
9103                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9104                 if (ret)
9105                         break;
9106                 btrfs_release_path(path);
9107
9108                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9109                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9110                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9111                                 found_key.offset : root->fs_info->nodesize;
9112
9113                         ret = btrfs_update_block_group(root, bytenr,
9114                                                        bytes, 0, 0);
9115                         if (ret)
9116                                 break;
9117                 }
9118         }
9119
9120         btrfs_release_path(path);
9121         return ret;
9122 }
9123
9124 /*
9125  * for a single backref, this will allocate a new extent
9126  * and add the backref to it.
9127  */
9128 static int record_extent(struct btrfs_trans_handle *trans,
9129                          struct btrfs_fs_info *info,
9130                          struct btrfs_path *path,
9131                          struct extent_record *rec,
9132                          struct extent_backref *back,
9133                          int allocated, u64 flags)
9134 {
9135         int ret = 0;
9136         struct btrfs_root *extent_root = info->extent_root;
9137         struct extent_buffer *leaf;
9138         struct btrfs_key ins_key;
9139         struct btrfs_extent_item *ei;
9140         struct data_backref *dback;
9141         struct btrfs_tree_block_info *bi;
9142
9143         if (!back->is_data)
9144                 rec->max_size = max_t(u64, rec->max_size,
9145                                     info->nodesize);
9146
9147         if (!allocated) {
9148                 u32 item_size = sizeof(*ei);
9149
9150                 if (!back->is_data)
9151                         item_size += sizeof(*bi);
9152
9153                 ins_key.objectid = rec->start;
9154                 ins_key.offset = rec->max_size;
9155                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9156
9157                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9158                                         &ins_key, item_size);
9159                 if (ret)
9160                         goto fail;
9161
9162                 leaf = path->nodes[0];
9163                 ei = btrfs_item_ptr(leaf, path->slots[0],
9164                                     struct btrfs_extent_item);
9165
9166                 btrfs_set_extent_refs(leaf, ei, 0);
9167                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9168
9169                 if (back->is_data) {
9170                         btrfs_set_extent_flags(leaf, ei,
9171                                                BTRFS_EXTENT_FLAG_DATA);
9172                 } else {
9173                         struct btrfs_disk_key copy_key;;
9174
9175                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9176                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9177                                              sizeof(*bi));
9178
9179                         btrfs_set_disk_key_objectid(&copy_key,
9180                                                     rec->info_objectid);
9181                         btrfs_set_disk_key_type(&copy_key, 0);
9182                         btrfs_set_disk_key_offset(&copy_key, 0);
9183
9184                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9185                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9186
9187                         btrfs_set_extent_flags(leaf, ei,
9188                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9189                 }
9190
9191                 btrfs_mark_buffer_dirty(leaf);
9192                 ret = btrfs_update_block_group(extent_root, rec->start,
9193                                                rec->max_size, 1, 0);
9194                 if (ret)
9195                         goto fail;
9196                 btrfs_release_path(path);
9197         }
9198
9199         if (back->is_data) {
9200                 u64 parent;
9201                 int i;
9202
9203                 dback = to_data_backref(back);
9204                 if (back->full_backref)
9205                         parent = dback->parent;
9206                 else
9207                         parent = 0;
9208
9209                 for (i = 0; i < dback->found_ref; i++) {
9210                         /* if parent != 0, we're doing a full backref
9211                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9212                          * just makes the backref allocator create a data
9213                          * backref
9214                          */
9215                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9216                                                    rec->start, rec->max_size,
9217                                                    parent,
9218                                                    dback->root,
9219                                                    parent ?
9220                                                    BTRFS_FIRST_FREE_OBJECTID :
9221                                                    dback->owner,
9222                                                    dback->offset);
9223                         if (ret)
9224                                 break;
9225                 }
9226                 fprintf(stderr, "adding new data backref"
9227                                 " on %llu %s %llu owner %llu"
9228                                 " offset %llu found %d\n",
9229                                 (unsigned long long)rec->start,
9230                                 back->full_backref ?
9231                                 "parent" : "root",
9232                                 back->full_backref ?
9233                                 (unsigned long long)parent :
9234                                 (unsigned long long)dback->root,
9235                                 (unsigned long long)dback->owner,
9236                                 (unsigned long long)dback->offset,
9237                                 dback->found_ref);
9238         } else {
9239                 u64 parent;
9240                 struct tree_backref *tback;
9241
9242                 tback = to_tree_backref(back);
9243                 if (back->full_backref)
9244                         parent = tback->parent;
9245                 else
9246                         parent = 0;
9247
9248                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9249                                            rec->start, rec->max_size,
9250                                            parent, tback->root, 0, 0);
9251                 fprintf(stderr, "adding new tree backref on "
9252                         "start %llu len %llu parent %llu root %llu\n",
9253                         rec->start, rec->max_size, parent, tback->root);
9254         }
9255 fail:
9256         btrfs_release_path(path);
9257         return ret;
9258 }
9259
9260 static struct extent_entry *find_entry(struct list_head *entries,
9261                                        u64 bytenr, u64 bytes)
9262 {
9263         struct extent_entry *entry = NULL;
9264
9265         list_for_each_entry(entry, entries, list) {
9266                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9267                         return entry;
9268         }
9269
9270         return NULL;
9271 }
9272
9273 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9274 {
9275         struct extent_entry *entry, *best = NULL, *prev = NULL;
9276
9277         list_for_each_entry(entry, entries, list) {
9278                 /*
9279                  * If there are as many broken entries as entries then we know
9280                  * not to trust this particular entry.
9281                  */
9282                 if (entry->broken == entry->count)
9283                         continue;
9284
9285                 /*
9286                  * Special case, when there are only two entries and 'best' is
9287                  * the first one
9288                  */
9289                 if (!prev) {
9290                         best = entry;
9291                         prev = entry;
9292                         continue;
9293                 }
9294
9295                 /*
9296                  * If our current entry == best then we can't be sure our best
9297                  * is really the best, so we need to keep searching.
9298                  */
9299                 if (best && best->count == entry->count) {
9300                         prev = entry;
9301                         best = NULL;
9302                         continue;
9303                 }
9304
9305                 /* Prev == entry, not good enough, have to keep searching */
9306                 if (!prev->broken && prev->count == entry->count)
9307                         continue;
9308
9309                 if (!best)
9310                         best = (prev->count > entry->count) ? prev : entry;
9311                 else if (best->count < entry->count)
9312                         best = entry;
9313                 prev = entry;
9314         }
9315
9316         return best;
9317 }
9318
9319 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9320                       struct data_backref *dback, struct extent_entry *entry)
9321 {
9322         struct btrfs_trans_handle *trans;
9323         struct btrfs_root *root;
9324         struct btrfs_file_extent_item *fi;
9325         struct extent_buffer *leaf;
9326         struct btrfs_key key;
9327         u64 bytenr, bytes;
9328         int ret, err;
9329
9330         key.objectid = dback->root;
9331         key.type = BTRFS_ROOT_ITEM_KEY;
9332         key.offset = (u64)-1;
9333         root = btrfs_read_fs_root(info, &key);
9334         if (IS_ERR(root)) {
9335                 fprintf(stderr, "Couldn't find root for our ref\n");
9336                 return -EINVAL;
9337         }
9338
9339         /*
9340          * The backref points to the original offset of the extent if it was
9341          * split, so we need to search down to the offset we have and then walk
9342          * forward until we find the backref we're looking for.
9343          */
9344         key.objectid = dback->owner;
9345         key.type = BTRFS_EXTENT_DATA_KEY;
9346         key.offset = dback->offset;
9347         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9348         if (ret < 0) {
9349                 fprintf(stderr, "Error looking up ref %d\n", ret);
9350                 return ret;
9351         }
9352
9353         while (1) {
9354                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9355                         ret = btrfs_next_leaf(root, path);
9356                         if (ret) {
9357                                 fprintf(stderr, "Couldn't find our ref, next\n");
9358                                 return -EINVAL;
9359                         }
9360                 }
9361                 leaf = path->nodes[0];
9362                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9363                 if (key.objectid != dback->owner ||
9364                     key.type != BTRFS_EXTENT_DATA_KEY) {
9365                         fprintf(stderr, "Couldn't find our ref, search\n");
9366                         return -EINVAL;
9367                 }
9368                 fi = btrfs_item_ptr(leaf, path->slots[0],
9369                                     struct btrfs_file_extent_item);
9370                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9371                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9372
9373                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9374                         break;
9375                 path->slots[0]++;
9376         }
9377
9378         btrfs_release_path(path);
9379
9380         trans = btrfs_start_transaction(root, 1);
9381         if (IS_ERR(trans))
9382                 return PTR_ERR(trans);
9383
9384         /*
9385          * Ok we have the key of the file extent we want to fix, now we can cow
9386          * down to the thing and fix it.
9387          */
9388         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9389         if (ret < 0) {
9390                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9391                         key.objectid, key.type, key.offset, ret);
9392                 goto out;
9393         }
9394         if (ret > 0) {
9395                 fprintf(stderr, "Well that's odd, we just found this key "
9396                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9397                         key.offset);
9398                 ret = -EINVAL;
9399                 goto out;
9400         }
9401         leaf = path->nodes[0];
9402         fi = btrfs_item_ptr(leaf, path->slots[0],
9403                             struct btrfs_file_extent_item);
9404
9405         if (btrfs_file_extent_compression(leaf, fi) &&
9406             dback->disk_bytenr != entry->bytenr) {
9407                 fprintf(stderr, "Ref doesn't match the record start and is "
9408                         "compressed, please take a btrfs-image of this file "
9409                         "system and send it to a btrfs developer so they can "
9410                         "complete this functionality for bytenr %Lu\n",
9411                         dback->disk_bytenr);
9412                 ret = -EINVAL;
9413                 goto out;
9414         }
9415
9416         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9417                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9418         } else if (dback->disk_bytenr > entry->bytenr) {
9419                 u64 off_diff, offset;
9420
9421                 off_diff = dback->disk_bytenr - entry->bytenr;
9422                 offset = btrfs_file_extent_offset(leaf, fi);
9423                 if (dback->disk_bytenr + offset +
9424                     btrfs_file_extent_num_bytes(leaf, fi) >
9425                     entry->bytenr + entry->bytes) {
9426                         fprintf(stderr, "Ref is past the entry end, please "
9427                                 "take a btrfs-image of this file system and "
9428                                 "send it to a btrfs developer, ref %Lu\n",
9429                                 dback->disk_bytenr);
9430                         ret = -EINVAL;
9431                         goto out;
9432                 }
9433                 offset += off_diff;
9434                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9435                 btrfs_set_file_extent_offset(leaf, fi, offset);
9436         } else if (dback->disk_bytenr < entry->bytenr) {
9437                 u64 offset;
9438
9439                 offset = btrfs_file_extent_offset(leaf, fi);
9440                 if (dback->disk_bytenr + offset < entry->bytenr) {
9441                         fprintf(stderr, "Ref is before the entry start, please"
9442                                 " take a btrfs-image of this file system and "
9443                                 "send it to a btrfs developer, ref %Lu\n",
9444                                 dback->disk_bytenr);
9445                         ret = -EINVAL;
9446                         goto out;
9447                 }
9448
9449                 offset += dback->disk_bytenr;
9450                 offset -= entry->bytenr;
9451                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9452                 btrfs_set_file_extent_offset(leaf, fi, offset);
9453         }
9454
9455         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9456
9457         /*
9458          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9459          * only do this if we aren't using compression, otherwise it's a
9460          * trickier case.
9461          */
9462         if (!btrfs_file_extent_compression(leaf, fi))
9463                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9464         else
9465                 printf("ram bytes may be wrong?\n");
9466         btrfs_mark_buffer_dirty(leaf);
9467 out:
9468         err = btrfs_commit_transaction(trans, root);
9469         btrfs_release_path(path);
9470         return ret ? ret : err;
9471 }
9472
9473 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9474                            struct extent_record *rec)
9475 {
9476         struct extent_backref *back, *tmp;
9477         struct data_backref *dback;
9478         struct extent_entry *entry, *best = NULL;
9479         LIST_HEAD(entries);
9480         int nr_entries = 0;
9481         int broken_entries = 0;
9482         int ret = 0;
9483         short mismatch = 0;
9484
9485         /*
9486          * Metadata is easy and the backrefs should always agree on bytenr and
9487          * size, if not we've got bigger issues.
9488          */
9489         if (rec->metadata)
9490                 return 0;
9491
9492         rbtree_postorder_for_each_entry_safe(back, tmp,
9493                                              &rec->backref_tree, node) {
9494                 if (back->full_backref || !back->is_data)
9495                         continue;
9496
9497                 dback = to_data_backref(back);
9498
9499                 /*
9500                  * We only pay attention to backrefs that we found a real
9501                  * backref for.
9502                  */
9503                 if (dback->found_ref == 0)
9504                         continue;
9505
9506                 /*
9507                  * For now we only catch when the bytes don't match, not the
9508                  * bytenr.  We can easily do this at the same time, but I want
9509                  * to have a fs image to test on before we just add repair
9510                  * functionality willy-nilly so we know we won't screw up the
9511                  * repair.
9512                  */
9513
9514                 entry = find_entry(&entries, dback->disk_bytenr,
9515                                    dback->bytes);
9516                 if (!entry) {
9517                         entry = malloc(sizeof(struct extent_entry));
9518                         if (!entry) {
9519                                 ret = -ENOMEM;
9520                                 goto out;
9521                         }
9522                         memset(entry, 0, sizeof(*entry));
9523                         entry->bytenr = dback->disk_bytenr;
9524                         entry->bytes = dback->bytes;
9525                         list_add_tail(&entry->list, &entries);
9526                         nr_entries++;
9527                 }
9528
9529                 /*
9530                  * If we only have on entry we may think the entries agree when
9531                  * in reality they don't so we have to do some extra checking.
9532                  */
9533                 if (dback->disk_bytenr != rec->start ||
9534                     dback->bytes != rec->nr || back->broken)
9535                         mismatch = 1;
9536
9537                 if (back->broken) {
9538                         entry->broken++;
9539                         broken_entries++;
9540                 }
9541
9542                 entry->count++;
9543         }
9544
9545         /* Yay all the backrefs agree, carry on good sir */
9546         if (nr_entries <= 1 && !mismatch)
9547                 goto out;
9548
9549         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9550                 "%Lu\n", rec->start);
9551
9552         /*
9553          * First we want to see if the backrefs can agree amongst themselves who
9554          * is right, so figure out which one of the entries has the highest
9555          * count.
9556          */
9557         best = find_most_right_entry(&entries);
9558
9559         /*
9560          * Ok so we may have an even split between what the backrefs think, so
9561          * this is where we use the extent ref to see what it thinks.
9562          */
9563         if (!best) {
9564                 entry = find_entry(&entries, rec->start, rec->nr);
9565                 if (!entry && (!broken_entries || !rec->found_rec)) {
9566                         fprintf(stderr, "Backrefs don't agree with each other "
9567                                 "and extent record doesn't agree with anybody,"
9568                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9569                                 rec->start, rec->nr);
9570                         ret = -EINVAL;
9571                         goto out;
9572                 } else if (!entry) {
9573                         /*
9574                          * Ok our backrefs were broken, we'll assume this is the
9575                          * correct value and add an entry for this range.
9576                          */
9577                         entry = malloc(sizeof(struct extent_entry));
9578                         if (!entry) {
9579                                 ret = -ENOMEM;
9580                                 goto out;
9581                         }
9582                         memset(entry, 0, sizeof(*entry));
9583                         entry->bytenr = rec->start;
9584                         entry->bytes = rec->nr;
9585                         list_add_tail(&entry->list, &entries);
9586                         nr_entries++;
9587                 }
9588                 entry->count++;
9589                 best = find_most_right_entry(&entries);
9590                 if (!best) {
9591                         fprintf(stderr, "Backrefs and extent record evenly "
9592                                 "split on who is right, this is going to "
9593                                 "require user input to fix bytenr %Lu bytes "
9594                                 "%Lu\n", rec->start, rec->nr);
9595                         ret = -EINVAL;
9596                         goto out;
9597                 }
9598         }
9599
9600         /*
9601          * I don't think this can happen currently as we'll abort() if we catch
9602          * this case higher up, but in case somebody removes that we still can't
9603          * deal with it properly here yet, so just bail out of that's the case.
9604          */
9605         if (best->bytenr != rec->start) {
9606                 fprintf(stderr, "Extent start and backref starts don't match, "
9607                         "please use btrfs-image on this file system and send "
9608                         "it to a btrfs developer so they can make fsck fix "
9609                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9610                         rec->start, rec->nr);
9611                 ret = -EINVAL;
9612                 goto out;
9613         }
9614
9615         /*
9616          * Ok great we all agreed on an extent record, let's go find the real
9617          * references and fix up the ones that don't match.
9618          */
9619         rbtree_postorder_for_each_entry_safe(back, tmp,
9620                                              &rec->backref_tree, node) {
9621                 if (back->full_backref || !back->is_data)
9622                         continue;
9623
9624                 dback = to_data_backref(back);
9625
9626                 /*
9627                  * Still ignoring backrefs that don't have a real ref attached
9628                  * to them.
9629                  */
9630                 if (dback->found_ref == 0)
9631                         continue;
9632
9633                 if (dback->bytes == best->bytes &&
9634                     dback->disk_bytenr == best->bytenr)
9635                         continue;
9636
9637                 ret = repair_ref(info, path, dback, best);
9638                 if (ret)
9639                         goto out;
9640         }
9641
9642         /*
9643          * Ok we messed with the actual refs, which means we need to drop our
9644          * entire cache and go back and rescan.  I know this is a huge pain and
9645          * adds a lot of extra work, but it's the only way to be safe.  Once all
9646          * the backrefs agree we may not need to do anything to the extent
9647          * record itself.
9648          */
9649         ret = -EAGAIN;
9650 out:
9651         while (!list_empty(&entries)) {
9652                 entry = list_entry(entries.next, struct extent_entry, list);
9653                 list_del_init(&entry->list);
9654                 free(entry);
9655         }
9656         return ret;
9657 }
9658
9659 static int process_duplicates(struct cache_tree *extent_cache,
9660                               struct extent_record *rec)
9661 {
9662         struct extent_record *good, *tmp;
9663         struct cache_extent *cache;
9664         int ret;
9665
9666         /*
9667          * If we found a extent record for this extent then return, or if we
9668          * have more than one duplicate we are likely going to need to delete
9669          * something.
9670          */
9671         if (rec->found_rec || rec->num_duplicates > 1)
9672                 return 0;
9673
9674         /* Shouldn't happen but just in case */
9675         BUG_ON(!rec->num_duplicates);
9676
9677         /*
9678          * So this happens if we end up with a backref that doesn't match the
9679          * actual extent entry.  So either the backref is bad or the extent
9680          * entry is bad.  Either way we want to have the extent_record actually
9681          * reflect what we found in the extent_tree, so we need to take the
9682          * duplicate out and use that as the extent_record since the only way we
9683          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9684          */
9685         remove_cache_extent(extent_cache, &rec->cache);
9686
9687         good = to_extent_record(rec->dups.next);
9688         list_del_init(&good->list);
9689         INIT_LIST_HEAD(&good->backrefs);
9690         INIT_LIST_HEAD(&good->dups);
9691         good->cache.start = good->start;
9692         good->cache.size = good->nr;
9693         good->content_checked = 0;
9694         good->owner_ref_checked = 0;
9695         good->num_duplicates = 0;
9696         good->refs = rec->refs;
9697         list_splice_init(&rec->backrefs, &good->backrefs);
9698         while (1) {
9699                 cache = lookup_cache_extent(extent_cache, good->start,
9700                                             good->nr);
9701                 if (!cache)
9702                         break;
9703                 tmp = container_of(cache, struct extent_record, cache);
9704
9705                 /*
9706                  * If we find another overlapping extent and it's found_rec is
9707                  * set then it's a duplicate and we need to try and delete
9708                  * something.
9709                  */
9710                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9711                         if (list_empty(&good->list))
9712                                 list_add_tail(&good->list,
9713                                               &duplicate_extents);
9714                         good->num_duplicates += tmp->num_duplicates + 1;
9715                         list_splice_init(&tmp->dups, &good->dups);
9716                         list_del_init(&tmp->list);
9717                         list_add_tail(&tmp->list, &good->dups);
9718                         remove_cache_extent(extent_cache, &tmp->cache);
9719                         continue;
9720                 }
9721
9722                 /*
9723                  * Ok we have another non extent item backed extent rec, so lets
9724                  * just add it to this extent and carry on like we did above.
9725                  */
9726                 good->refs += tmp->refs;
9727                 list_splice_init(&tmp->backrefs, &good->backrefs);
9728                 remove_cache_extent(extent_cache, &tmp->cache);
9729                 free(tmp);
9730         }
9731         ret = insert_cache_extent(extent_cache, &good->cache);
9732         BUG_ON(ret);
9733         free(rec);
9734         return good->num_duplicates ? 0 : 1;
9735 }
9736
9737 static int delete_duplicate_records(struct btrfs_root *root,
9738                                     struct extent_record *rec)
9739 {
9740         struct btrfs_trans_handle *trans;
9741         LIST_HEAD(delete_list);
9742         struct btrfs_path path;
9743         struct extent_record *tmp, *good, *n;
9744         int nr_del = 0;
9745         int ret = 0, err;
9746         struct btrfs_key key;
9747
9748         btrfs_init_path(&path);
9749
9750         good = rec;
9751         /* Find the record that covers all of the duplicates. */
9752         list_for_each_entry(tmp, &rec->dups, list) {
9753                 if (good->start < tmp->start)
9754                         continue;
9755                 if (good->nr > tmp->nr)
9756                         continue;
9757
9758                 if (tmp->start + tmp->nr < good->start + good->nr) {
9759                         fprintf(stderr, "Ok we have overlapping extents that "
9760                                 "aren't completely covered by each other, this "
9761                                 "is going to require more careful thought.  "
9762                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9763                                 tmp->start, tmp->nr, good->start, good->nr);
9764                         abort();
9765                 }
9766                 good = tmp;
9767         }
9768
9769         if (good != rec)
9770                 list_add_tail(&rec->list, &delete_list);
9771
9772         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9773                 if (tmp == good)
9774                         continue;
9775                 list_move_tail(&tmp->list, &delete_list);
9776         }
9777
9778         root = root->fs_info->extent_root;
9779         trans = btrfs_start_transaction(root, 1);
9780         if (IS_ERR(trans)) {
9781                 ret = PTR_ERR(trans);
9782                 goto out;
9783         }
9784
9785         list_for_each_entry(tmp, &delete_list, list) {
9786                 if (tmp->found_rec == 0)
9787                         continue;
9788                 key.objectid = tmp->start;
9789                 key.type = BTRFS_EXTENT_ITEM_KEY;
9790                 key.offset = tmp->nr;
9791
9792                 /* Shouldn't happen but just in case */
9793                 if (tmp->metadata) {
9794                         fprintf(stderr, "Well this shouldn't happen, extent "
9795                                 "record overlaps but is metadata? "
9796                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9797                         abort();
9798                 }
9799
9800                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9801                 if (ret) {
9802                         if (ret > 0)
9803                                 ret = -EINVAL;
9804                         break;
9805                 }
9806                 ret = btrfs_del_item(trans, root, &path);
9807                 if (ret)
9808                         break;
9809                 btrfs_release_path(&path);
9810                 nr_del++;
9811         }
9812         err = btrfs_commit_transaction(trans, root);
9813         if (err && !ret)
9814                 ret = err;
9815 out:
9816         while (!list_empty(&delete_list)) {
9817                 tmp = to_extent_record(delete_list.next);
9818                 list_del_init(&tmp->list);
9819                 if (tmp == rec)
9820                         continue;
9821                 free(tmp);
9822         }
9823
9824         while (!list_empty(&rec->dups)) {
9825                 tmp = to_extent_record(rec->dups.next);
9826                 list_del_init(&tmp->list);
9827                 free(tmp);
9828         }
9829
9830         btrfs_release_path(&path);
9831
9832         if (!ret && !nr_del)
9833                 rec->num_duplicates = 0;
9834
9835         return ret ? ret : nr_del;
9836 }
9837
9838 static int find_possible_backrefs(struct btrfs_fs_info *info,
9839                                   struct btrfs_path *path,
9840                                   struct cache_tree *extent_cache,
9841                                   struct extent_record *rec)
9842 {
9843         struct btrfs_root *root;
9844         struct extent_backref *back, *tmp;
9845         struct data_backref *dback;
9846         struct cache_extent *cache;
9847         struct btrfs_file_extent_item *fi;
9848         struct btrfs_key key;
9849         u64 bytenr, bytes;
9850         int ret;
9851
9852         rbtree_postorder_for_each_entry_safe(back, tmp,
9853                                              &rec->backref_tree, node) {
9854                 /* Don't care about full backrefs (poor unloved backrefs) */
9855                 if (back->full_backref || !back->is_data)
9856                         continue;
9857
9858                 dback = to_data_backref(back);
9859
9860                 /* We found this one, we don't need to do a lookup */
9861                 if (dback->found_ref)
9862                         continue;
9863
9864                 key.objectid = dback->root;
9865                 key.type = BTRFS_ROOT_ITEM_KEY;
9866                 key.offset = (u64)-1;
9867
9868                 root = btrfs_read_fs_root(info, &key);
9869
9870                 /* No root, definitely a bad ref, skip */
9871                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9872                         continue;
9873                 /* Other err, exit */
9874                 if (IS_ERR(root))
9875                         return PTR_ERR(root);
9876
9877                 key.objectid = dback->owner;
9878                 key.type = BTRFS_EXTENT_DATA_KEY;
9879                 key.offset = dback->offset;
9880                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9881                 if (ret) {
9882                         btrfs_release_path(path);
9883                         if (ret < 0)
9884                                 return ret;
9885                         /* Didn't find it, we can carry on */
9886                         ret = 0;
9887                         continue;
9888                 }
9889
9890                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9891                                     struct btrfs_file_extent_item);
9892                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9893                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9894                 btrfs_release_path(path);
9895                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9896                 if (cache) {
9897                         struct extent_record *tmp;
9898                         tmp = container_of(cache, struct extent_record, cache);
9899
9900                         /*
9901                          * If we found an extent record for the bytenr for this
9902                          * particular backref then we can't add it to our
9903                          * current extent record.  We only want to add backrefs
9904                          * that don't have a corresponding extent item in the
9905                          * extent tree since they likely belong to this record
9906                          * and we need to fix it if it doesn't match bytenrs.
9907                          */
9908                         if  (tmp->found_rec)
9909                                 continue;
9910                 }
9911
9912                 dback->found_ref += 1;
9913                 dback->disk_bytenr = bytenr;
9914                 dback->bytes = bytes;
9915
9916                 /*
9917                  * Set this so the verify backref code knows not to trust the
9918                  * values in this backref.
9919                  */
9920                 back->broken = 1;
9921         }
9922
9923         return 0;
9924 }
9925
9926 /*
9927  * Record orphan data ref into corresponding root.
9928  *
9929  * Return 0 if the extent item contains data ref and recorded.
9930  * Return 1 if the extent item contains no useful data ref
9931  *   On that case, it may contains only shared_dataref or metadata backref
9932  *   or the file extent exists(this should be handled by the extent bytenr
9933  *   recovery routine)
9934  * Return <0 if something goes wrong.
9935  */
9936 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9937                                       struct extent_record *rec)
9938 {
9939         struct btrfs_key key;
9940         struct btrfs_root *dest_root;
9941         struct extent_backref *back, *tmp;
9942         struct data_backref *dback;
9943         struct orphan_data_extent *orphan;
9944         struct btrfs_path path;
9945         int recorded_data_ref = 0;
9946         int ret = 0;
9947
9948         if (rec->metadata)
9949                 return 1;
9950         btrfs_init_path(&path);
9951         rbtree_postorder_for_each_entry_safe(back, tmp,
9952                                              &rec->backref_tree, node) {
9953                 if (back->full_backref || !back->is_data ||
9954                     !back->found_extent_tree)
9955                         continue;
9956                 dback = to_data_backref(back);
9957                 if (dback->found_ref)
9958                         continue;
9959                 key.objectid = dback->root;
9960                 key.type = BTRFS_ROOT_ITEM_KEY;
9961                 key.offset = (u64)-1;
9962
9963                 dest_root = btrfs_read_fs_root(fs_info, &key);
9964
9965                 /* For non-exist root we just skip it */
9966                 if (IS_ERR(dest_root) || !dest_root)
9967                         continue;
9968
9969                 key.objectid = dback->owner;
9970                 key.type = BTRFS_EXTENT_DATA_KEY;
9971                 key.offset = dback->offset;
9972
9973                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9974                 btrfs_release_path(&path);
9975                 /*
9976                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9977                  * we need to record it for inode/file extent rebuild.
9978                  * For ret > 0, we record it only for file extent rebuild.
9979                  * For ret == 0, the file extent exists but only bytenr
9980                  * mismatch, let the original bytenr fix routine to handle,
9981                  * don't record it.
9982                  */
9983                 if (ret == 0)
9984                         continue;
9985                 ret = 0;
9986                 orphan = malloc(sizeof(*orphan));
9987                 if (!orphan) {
9988                         ret = -ENOMEM;
9989                         goto out;
9990                 }
9991                 INIT_LIST_HEAD(&orphan->list);
9992                 orphan->root = dback->root;
9993                 orphan->objectid = dback->owner;
9994                 orphan->offset = dback->offset;
9995                 orphan->disk_bytenr = rec->cache.start;
9996                 orphan->disk_len = rec->cache.size;
9997                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9998                 recorded_data_ref = 1;
9999         }
10000 out:
10001         btrfs_release_path(&path);
10002         if (!ret)
10003                 return !recorded_data_ref;
10004         else
10005                 return ret;
10006 }
10007
10008 /*
10009  * when an incorrect extent item is found, this will delete
10010  * all of the existing entries for it and recreate them
10011  * based on what the tree scan found.
10012  */
10013 static int fixup_extent_refs(struct btrfs_fs_info *info,
10014                              struct cache_tree *extent_cache,
10015                              struct extent_record *rec)
10016 {
10017         struct btrfs_trans_handle *trans = NULL;
10018         int ret;
10019         struct btrfs_path path;
10020         struct cache_extent *cache;
10021         struct extent_backref *back, *tmp;
10022         int allocated = 0;
10023         u64 flags = 0;
10024
10025         if (rec->flag_block_full_backref)
10026                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10027
10028         btrfs_init_path(&path);
10029         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10030                 /*
10031                  * Sometimes the backrefs themselves are so broken they don't
10032                  * get attached to any meaningful rec, so first go back and
10033                  * check any of our backrefs that we couldn't find and throw
10034                  * them into the list if we find the backref so that
10035                  * verify_backrefs can figure out what to do.
10036                  */
10037                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10038                 if (ret < 0)
10039                         goto out;
10040         }
10041
10042         /* step one, make sure all of the backrefs agree */
10043         ret = verify_backrefs(info, &path, rec);
10044         if (ret < 0)
10045                 goto out;
10046
10047         trans = btrfs_start_transaction(info->extent_root, 1);
10048         if (IS_ERR(trans)) {
10049                 ret = PTR_ERR(trans);
10050                 goto out;
10051         }
10052
10053         /* step two, delete all the existing records */
10054         ret = delete_extent_records(trans, info->extent_root, &path,
10055                                     rec->start);
10056
10057         if (ret < 0)
10058                 goto out;
10059
10060         /* was this block corrupt?  If so, don't add references to it */
10061         cache = lookup_cache_extent(info->corrupt_blocks,
10062                                     rec->start, rec->max_size);
10063         if (cache) {
10064                 ret = 0;
10065                 goto out;
10066         }
10067
10068         /* step three, recreate all the refs we did find */
10069         rbtree_postorder_for_each_entry_safe(back, tmp,
10070                                              &rec->backref_tree, node) {
10071                 /*
10072                  * if we didn't find any references, don't create a
10073                  * new extent record
10074                  */
10075                 if (!back->found_ref)
10076                         continue;
10077
10078                 rec->bad_full_backref = 0;
10079                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10080                 allocated = 1;
10081
10082                 if (ret)
10083                         goto out;
10084         }
10085 out:
10086         if (trans) {
10087                 int err = btrfs_commit_transaction(trans, info->extent_root);
10088                 if (!ret)
10089                         ret = err;
10090         }
10091
10092         if (!ret)
10093                 fprintf(stderr, "Repaired extent references for %llu\n",
10094                                 (unsigned long long)rec->start);
10095
10096         btrfs_release_path(&path);
10097         return ret;
10098 }
10099
10100 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10101                               struct extent_record *rec)
10102 {
10103         struct btrfs_trans_handle *trans;
10104         struct btrfs_root *root = fs_info->extent_root;
10105         struct btrfs_path path;
10106         struct btrfs_extent_item *ei;
10107         struct btrfs_key key;
10108         u64 flags;
10109         int ret = 0;
10110
10111         key.objectid = rec->start;
10112         if (rec->metadata) {
10113                 key.type = BTRFS_METADATA_ITEM_KEY;
10114                 key.offset = rec->info_level;
10115         } else {
10116                 key.type = BTRFS_EXTENT_ITEM_KEY;
10117                 key.offset = rec->max_size;
10118         }
10119
10120         trans = btrfs_start_transaction(root, 0);
10121         if (IS_ERR(trans))
10122                 return PTR_ERR(trans);
10123
10124         btrfs_init_path(&path);
10125         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10126         if (ret < 0) {
10127                 btrfs_release_path(&path);
10128                 btrfs_commit_transaction(trans, root);
10129                 return ret;
10130         } else if (ret) {
10131                 fprintf(stderr, "Didn't find extent for %llu\n",
10132                         (unsigned long long)rec->start);
10133                 btrfs_release_path(&path);
10134                 btrfs_commit_transaction(trans, root);
10135                 return -ENOENT;
10136         }
10137
10138         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10139                             struct btrfs_extent_item);
10140         flags = btrfs_extent_flags(path.nodes[0], ei);
10141         if (rec->flag_block_full_backref) {
10142                 fprintf(stderr, "setting full backref on %llu\n",
10143                         (unsigned long long)key.objectid);
10144                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10145         } else {
10146                 fprintf(stderr, "clearing full backref on %llu\n",
10147                         (unsigned long long)key.objectid);
10148                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10149         }
10150         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10151         btrfs_mark_buffer_dirty(path.nodes[0]);
10152         btrfs_release_path(&path);
10153         ret = btrfs_commit_transaction(trans, root);
10154         if (!ret)
10155                 fprintf(stderr, "Repaired extent flags for %llu\n",
10156                                 (unsigned long long)rec->start);
10157
10158         return ret;
10159 }
10160
10161 /* right now we only prune from the extent allocation tree */
10162 static int prune_one_block(struct btrfs_trans_handle *trans,
10163                            struct btrfs_fs_info *info,
10164                            struct btrfs_corrupt_block *corrupt)
10165 {
10166         int ret;
10167         struct btrfs_path path;
10168         struct extent_buffer *eb;
10169         u64 found;
10170         int slot;
10171         int nritems;
10172         int level = corrupt->level + 1;
10173
10174         btrfs_init_path(&path);
10175 again:
10176         /* we want to stop at the parent to our busted block */
10177         path.lowest_level = level;
10178
10179         ret = btrfs_search_slot(trans, info->extent_root,
10180                                 &corrupt->key, &path, -1, 1);
10181
10182         if (ret < 0)
10183                 goto out;
10184
10185         eb = path.nodes[level];
10186         if (!eb) {
10187                 ret = -ENOENT;
10188                 goto out;
10189         }
10190
10191         /*
10192          * hopefully the search gave us the block we want to prune,
10193          * lets try that first
10194          */
10195         slot = path.slots[level];
10196         found =  btrfs_node_blockptr(eb, slot);
10197         if (found == corrupt->cache.start)
10198                 goto del_ptr;
10199
10200         nritems = btrfs_header_nritems(eb);
10201
10202         /* the search failed, lets scan this node and hope we find it */
10203         for (slot = 0; slot < nritems; slot++) {
10204                 found =  btrfs_node_blockptr(eb, slot);
10205                 if (found == corrupt->cache.start)
10206                         goto del_ptr;
10207         }
10208         /*
10209          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10210          * to this block
10211          */
10212         if (eb == info->extent_root->node) {
10213                 ret = -ENOENT;
10214                 goto out;
10215         } else {
10216                 level++;
10217                 btrfs_release_path(&path);
10218                 goto again;
10219         }
10220
10221 del_ptr:
10222         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10223         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10224
10225 out:
10226         btrfs_release_path(&path);
10227         return ret;
10228 }
10229
10230 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10231 {
10232         struct btrfs_trans_handle *trans = NULL;
10233         struct cache_extent *cache;
10234         struct btrfs_corrupt_block *corrupt;
10235
10236         while (1) {
10237                 cache = search_cache_extent(info->corrupt_blocks, 0);
10238                 if (!cache)
10239                         break;
10240                 if (!trans) {
10241                         trans = btrfs_start_transaction(info->extent_root, 1);
10242                         if (IS_ERR(trans))
10243                                 return PTR_ERR(trans);
10244                 }
10245                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10246                 prune_one_block(trans, info, corrupt);
10247                 remove_cache_extent(info->corrupt_blocks, cache);
10248         }
10249         if (trans)
10250                 return btrfs_commit_transaction(trans, info->extent_root);
10251         return 0;
10252 }
10253
10254 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10255 {
10256         struct btrfs_block_group_cache *cache;
10257         u64 start, end;
10258         int ret;
10259
10260         while (1) {
10261                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10262                                             &start, &end, EXTENT_DIRTY);
10263                 if (ret)
10264                         break;
10265                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10266         }
10267
10268         start = 0;
10269         while (1) {
10270                 cache = btrfs_lookup_first_block_group(fs_info, start);
10271                 if (!cache)
10272                         break;
10273                 if (cache->cached)
10274                         cache->cached = 0;
10275                 start = cache->key.objectid + cache->key.offset;
10276         }
10277 }
10278
10279 static int check_extent_refs(struct btrfs_root *root,
10280                              struct cache_tree *extent_cache)
10281 {
10282         struct extent_record *rec;
10283         struct cache_extent *cache;
10284         int ret = 0;
10285         int had_dups = 0;
10286         int err = 0;
10287
10288         if (repair) {
10289                 /*
10290                  * if we're doing a repair, we have to make sure
10291                  * we don't allocate from the problem extents.
10292                  * In the worst case, this will be all the
10293                  * extents in the FS
10294                  */
10295                 cache = search_cache_extent(extent_cache, 0);
10296                 while(cache) {
10297                         rec = container_of(cache, struct extent_record, cache);
10298                         set_extent_dirty(root->fs_info->excluded_extents,
10299                                          rec->start,
10300                                          rec->start + rec->max_size - 1);
10301                         cache = next_cache_extent(cache);
10302                 }
10303
10304                 /* pin down all the corrupted blocks too */
10305                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10306                 while(cache) {
10307                         set_extent_dirty(root->fs_info->excluded_extents,
10308                                          cache->start,
10309                                          cache->start + cache->size - 1);
10310                         cache = next_cache_extent(cache);
10311                 }
10312                 prune_corrupt_blocks(root->fs_info);
10313                 reset_cached_block_groups(root->fs_info);
10314         }
10315
10316         reset_cached_block_groups(root->fs_info);
10317
10318         /*
10319          * We need to delete any duplicate entries we find first otherwise we
10320          * could mess up the extent tree when we have backrefs that actually
10321          * belong to a different extent item and not the weird duplicate one.
10322          */
10323         while (repair && !list_empty(&duplicate_extents)) {
10324                 rec = to_extent_record(duplicate_extents.next);
10325                 list_del_init(&rec->list);
10326
10327                 /* Sometimes we can find a backref before we find an actual
10328                  * extent, so we need to process it a little bit to see if there
10329                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10330                  * if this is a backref screwup.  If we need to delete stuff
10331                  * process_duplicates() will return 0, otherwise it will return
10332                  * 1 and we
10333                  */
10334                 if (process_duplicates(extent_cache, rec))
10335                         continue;
10336                 ret = delete_duplicate_records(root, rec);
10337                 if (ret < 0)
10338                         return ret;
10339                 /*
10340                  * delete_duplicate_records will return the number of entries
10341                  * deleted, so if it's greater than 0 then we know we actually
10342                  * did something and we need to remove.
10343                  */
10344                 if (ret)
10345                         had_dups = 1;
10346         }
10347
10348         if (had_dups)
10349                 return -EAGAIN;
10350
10351         while(1) {
10352                 int cur_err = 0;
10353                 int fix = 0;
10354
10355                 cache = search_cache_extent(extent_cache, 0);
10356                 if (!cache)
10357                         break;
10358                 rec = container_of(cache, struct extent_record, cache);
10359                 if (rec->num_duplicates) {
10360                         fprintf(stderr, "extent item %llu has multiple extent "
10361                                 "items\n", (unsigned long long)rec->start);
10362                         cur_err = 1;
10363                 }
10364
10365                 if (rec->refs != rec->extent_item_refs) {
10366                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10367                                 (unsigned long long)rec->start,
10368                                 (unsigned long long)rec->nr);
10369                         fprintf(stderr, "extent item %llu, found %llu\n",
10370                                 (unsigned long long)rec->extent_item_refs,
10371                                 (unsigned long long)rec->refs);
10372                         ret = record_orphan_data_extents(root->fs_info, rec);
10373                         if (ret < 0)
10374                                 goto repair_abort;
10375                         fix = ret;
10376                         cur_err = 1;
10377                 }
10378                 if (all_backpointers_checked(rec, 1)) {
10379                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10380                                 (unsigned long long)rec->start,
10381                                 (unsigned long long)rec->nr);
10382                         fix = 1;
10383                         cur_err = 1;
10384                 }
10385                 if (!rec->owner_ref_checked) {
10386                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10387                                 (unsigned long long)rec->start,
10388                                 (unsigned long long)rec->nr);
10389                         fix = 1;
10390                         cur_err = 1;
10391                 }
10392
10393                 if (repair && fix) {
10394                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10395                         if (ret)
10396                                 goto repair_abort;
10397                 }
10398
10399
10400                 if (rec->bad_full_backref) {
10401                         fprintf(stderr, "bad full backref, on [%llu]\n",
10402                                 (unsigned long long)rec->start);
10403                         if (repair) {
10404                                 ret = fixup_extent_flags(root->fs_info, rec);
10405                                 if (ret)
10406                                         goto repair_abort;
10407                                 fix = 1;
10408                         }
10409                         cur_err = 1;
10410                 }
10411                 /*
10412                  * Although it's not a extent ref's problem, we reuse this
10413                  * routine for error reporting.
10414                  * No repair function yet.
10415                  */
10416                 if (rec->crossing_stripes) {
10417                         fprintf(stderr,
10418                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10419                                 rec->start, rec->start + rec->max_size);
10420                         cur_err = 1;
10421                 }
10422
10423                 if (rec->wrong_chunk_type) {
10424                         fprintf(stderr,
10425                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10426                                 rec->start, rec->start + rec->max_size);
10427                         cur_err = 1;
10428                 }
10429
10430                 err = cur_err;
10431                 remove_cache_extent(extent_cache, cache);
10432                 free_all_extent_backrefs(rec);
10433                 if (!init_extent_tree && repair && (!cur_err || fix))
10434                         clear_extent_dirty(root->fs_info->excluded_extents,
10435                                            rec->start,
10436                                            rec->start + rec->max_size - 1);
10437                 free(rec);
10438         }
10439 repair_abort:
10440         if (repair) {
10441                 if (ret && ret != -EAGAIN) {
10442                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10443                         exit(1);
10444                 } else if (!ret) {
10445                         struct btrfs_trans_handle *trans;
10446
10447                         root = root->fs_info->extent_root;
10448                         trans = btrfs_start_transaction(root, 1);
10449                         if (IS_ERR(trans)) {
10450                                 ret = PTR_ERR(trans);
10451                                 goto repair_abort;
10452                         }
10453
10454                         ret = btrfs_fix_block_accounting(trans, root);
10455                         if (ret)
10456                                 goto repair_abort;
10457                         ret = btrfs_commit_transaction(trans, root);
10458                         if (ret)
10459                                 goto repair_abort;
10460                 }
10461                 return ret;
10462         }
10463
10464         if (err)
10465                 err = -EIO;
10466         return err;
10467 }
10468
10469 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10470 {
10471         u64 stripe_size;
10472
10473         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10474                 stripe_size = length;
10475                 stripe_size /= num_stripes;
10476         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10477                 stripe_size = length * 2;
10478                 stripe_size /= num_stripes;
10479         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10480                 stripe_size = length;
10481                 stripe_size /= (num_stripes - 1);
10482         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10483                 stripe_size = length;
10484                 stripe_size /= (num_stripes - 2);
10485         } else {
10486                 stripe_size = length;
10487         }
10488         return stripe_size;
10489 }
10490
10491 /*
10492  * Check the chunk with its block group/dev list ref:
10493  * Return 0 if all refs seems valid.
10494  * Return 1 if part of refs seems valid, need later check for rebuild ref
10495  * like missing block group and needs to search extent tree to rebuild them.
10496  * Return -1 if essential refs are missing and unable to rebuild.
10497  */
10498 static int check_chunk_refs(struct chunk_record *chunk_rec,
10499                             struct block_group_tree *block_group_cache,
10500                             struct device_extent_tree *dev_extent_cache,
10501                             int silent)
10502 {
10503         struct cache_extent *block_group_item;
10504         struct block_group_record *block_group_rec;
10505         struct cache_extent *dev_extent_item;
10506         struct device_extent_record *dev_extent_rec;
10507         u64 devid;
10508         u64 offset;
10509         u64 length;
10510         int metadump_v2 = 0;
10511         int i;
10512         int ret = 0;
10513
10514         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10515                                                chunk_rec->offset,
10516                                                chunk_rec->length);
10517         if (block_group_item) {
10518                 block_group_rec = container_of(block_group_item,
10519                                                struct block_group_record,
10520                                                cache);
10521                 if (chunk_rec->length != block_group_rec->offset ||
10522                     chunk_rec->offset != block_group_rec->objectid ||
10523                     (!metadump_v2 &&
10524                      chunk_rec->type_flags != block_group_rec->flags)) {
10525                         if (!silent)
10526                                 fprintf(stderr,
10527                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10528                                         chunk_rec->objectid,
10529                                         chunk_rec->type,
10530                                         chunk_rec->offset,
10531                                         chunk_rec->length,
10532                                         chunk_rec->offset,
10533                                         chunk_rec->type_flags,
10534                                         block_group_rec->objectid,
10535                                         block_group_rec->type,
10536                                         block_group_rec->offset,
10537                                         block_group_rec->offset,
10538                                         block_group_rec->objectid,
10539                                         block_group_rec->flags);
10540                         ret = -1;
10541                 } else {
10542                         list_del_init(&block_group_rec->list);
10543                         chunk_rec->bg_rec = block_group_rec;
10544                 }
10545         } else {
10546                 if (!silent)
10547                         fprintf(stderr,
10548                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10549                                 chunk_rec->objectid,
10550                                 chunk_rec->type,
10551                                 chunk_rec->offset,
10552                                 chunk_rec->length,
10553                                 chunk_rec->offset,
10554                                 chunk_rec->type_flags);
10555                 ret = 1;
10556         }
10557
10558         if (metadump_v2)
10559                 return ret;
10560
10561         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10562                                     chunk_rec->num_stripes);
10563         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10564                 devid = chunk_rec->stripes[i].devid;
10565                 offset = chunk_rec->stripes[i].offset;
10566                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10567                                                        devid, offset, length);
10568                 if (dev_extent_item) {
10569                         dev_extent_rec = container_of(dev_extent_item,
10570                                                 struct device_extent_record,
10571                                                 cache);
10572                         if (dev_extent_rec->objectid != devid ||
10573                             dev_extent_rec->offset != offset ||
10574                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10575                             dev_extent_rec->length != length) {
10576                                 if (!silent)
10577                                         fprintf(stderr,
10578                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10579                                                 chunk_rec->objectid,
10580                                                 chunk_rec->type,
10581                                                 chunk_rec->offset,
10582                                                 chunk_rec->stripes[i].devid,
10583                                                 chunk_rec->stripes[i].offset,
10584                                                 dev_extent_rec->objectid,
10585                                                 dev_extent_rec->offset,
10586                                                 dev_extent_rec->length);
10587                                 ret = -1;
10588                         } else {
10589                                 list_move(&dev_extent_rec->chunk_list,
10590                                           &chunk_rec->dextents);
10591                         }
10592                 } else {
10593                         if (!silent)
10594                                 fprintf(stderr,
10595                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10596                                         chunk_rec->objectid,
10597                                         chunk_rec->type,
10598                                         chunk_rec->offset,
10599                                         chunk_rec->stripes[i].devid,
10600                                         chunk_rec->stripes[i].offset);
10601                         ret = -1;
10602                 }
10603         }
10604         return ret;
10605 }
10606
10607 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10608 int check_chunks(struct cache_tree *chunk_cache,
10609                  struct block_group_tree *block_group_cache,
10610                  struct device_extent_tree *dev_extent_cache,
10611                  struct list_head *good, struct list_head *bad,
10612                  struct list_head *rebuild, int silent)
10613 {
10614         struct cache_extent *chunk_item;
10615         struct chunk_record *chunk_rec;
10616         struct block_group_record *bg_rec;
10617         struct device_extent_record *dext_rec;
10618         int err;
10619         int ret = 0;
10620
10621         chunk_item = first_cache_extent(chunk_cache);
10622         while (chunk_item) {
10623                 chunk_rec = container_of(chunk_item, struct chunk_record,
10624                                          cache);
10625                 err = check_chunk_refs(chunk_rec, block_group_cache,
10626                                        dev_extent_cache, silent);
10627                 if (err < 0)
10628                         ret = err;
10629                 if (err == 0 && good)
10630                         list_add_tail(&chunk_rec->list, good);
10631                 if (err > 0 && rebuild)
10632                         list_add_tail(&chunk_rec->list, rebuild);
10633                 if (err < 0 && bad)
10634                         list_add_tail(&chunk_rec->list, bad);
10635                 chunk_item = next_cache_extent(chunk_item);
10636         }
10637
10638         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10639                 if (!silent)
10640                         fprintf(stderr,
10641                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10642                                 bg_rec->objectid,
10643                                 bg_rec->offset,
10644                                 bg_rec->flags);
10645                 if (!ret)
10646                         ret = 1;
10647         }
10648
10649         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10650                             chunk_list) {
10651                 if (!silent)
10652                         fprintf(stderr,
10653                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10654                                 dext_rec->objectid,
10655                                 dext_rec->offset,
10656                                 dext_rec->length);
10657                 if (!ret)
10658                         ret = 1;
10659         }
10660         return ret;
10661 }
10662
10663
10664 static int check_device_used(struct device_record *dev_rec,
10665                              struct device_extent_tree *dext_cache)
10666 {
10667         struct cache_extent *cache;
10668         struct device_extent_record *dev_extent_rec;
10669         u64 total_byte = 0;
10670
10671         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10672         while (cache) {
10673                 dev_extent_rec = container_of(cache,
10674                                               struct device_extent_record,
10675                                               cache);
10676                 if (dev_extent_rec->objectid != dev_rec->devid)
10677                         break;
10678
10679                 list_del_init(&dev_extent_rec->device_list);
10680                 total_byte += dev_extent_rec->length;
10681                 cache = next_cache_extent(cache);
10682         }
10683
10684         if (total_byte != dev_rec->byte_used) {
10685                 fprintf(stderr,
10686                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10687                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10688                         dev_rec->type, dev_rec->offset);
10689                 return -1;
10690         } else {
10691                 return 0;
10692         }
10693 }
10694
10695 /*
10696  * Unlike device size alignment check above, some super total_bytes check
10697  * failure can lead to mount failure for newer kernel.
10698  *
10699  * So this function will return the error for a fatal super total_bytes problem.
10700  */
10701 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10702 {
10703         struct btrfs_device *dev;
10704         struct list_head *dev_list = &fs_info->fs_devices->devices;
10705         u64 total_bytes = 0;
10706         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10707
10708         list_for_each_entry(dev, dev_list, dev_list)
10709                 total_bytes += dev->total_bytes;
10710
10711         /* Important check, which can cause unmountable fs */
10712         if (super_bytes < total_bytes) {
10713                 error("super total bytes %llu smaller than real device(s) size %llu",
10714                         super_bytes, total_bytes);
10715                 error("mounting this fs may fail for newer kernels");
10716                 error("this can be fixed by 'btrfs rescue fix-device-size'");
10717                 return false;
10718         }
10719
10720         /*
10721          * Optional check, just to make everything aligned and match with each
10722          * other.
10723          *
10724          * For a btrfs-image restored fs, we don't need to check it anyway.
10725          */
10726         if (btrfs_super_flags(fs_info->super_copy) &
10727             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10728                 return true;
10729         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10730             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10731             super_bytes != total_bytes) {
10732                 warning("minor unaligned/mismatch device size detected");
10733                 warning(
10734                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10735         }
10736         return true;
10737 }
10738
10739 /* check btrfs_dev_item -> btrfs_dev_extent */
10740 static int check_devices(struct rb_root *dev_cache,
10741                          struct device_extent_tree *dev_extent_cache)
10742 {
10743         struct rb_node *dev_node;
10744         struct device_record *dev_rec;
10745         struct device_extent_record *dext_rec;
10746         int err;
10747         int ret = 0;
10748
10749         dev_node = rb_first(dev_cache);
10750         while (dev_node) {
10751                 dev_rec = container_of(dev_node, struct device_record, node);
10752                 err = check_device_used(dev_rec, dev_extent_cache);
10753                 if (err)
10754                         ret = err;
10755
10756                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
10757                                          global_info->sectorsize);
10758                 dev_node = rb_next(dev_node);
10759         }
10760         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10761                             device_list) {
10762                 fprintf(stderr,
10763                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10764                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10765                 if (!ret)
10766                         ret = 1;
10767         }
10768         return ret;
10769 }
10770
10771 static int add_root_item_to_list(struct list_head *head,
10772                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10773                                   u8 level, u8 drop_level,
10774                                   struct btrfs_key *drop_key)
10775 {
10776
10777         struct root_item_record *ri_rec;
10778         ri_rec = malloc(sizeof(*ri_rec));
10779         if (!ri_rec)
10780                 return -ENOMEM;
10781         ri_rec->bytenr = bytenr;
10782         ri_rec->objectid = objectid;
10783         ri_rec->level = level;
10784         ri_rec->drop_level = drop_level;
10785         ri_rec->last_snapshot = last_snapshot;
10786         if (drop_key)
10787                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10788         list_add_tail(&ri_rec->list, head);
10789
10790         return 0;
10791 }
10792
10793 static void free_root_item_list(struct list_head *list)
10794 {
10795         struct root_item_record *ri_rec;
10796
10797         while (!list_empty(list)) {
10798                 ri_rec = list_first_entry(list, struct root_item_record,
10799                                           list);
10800                 list_del_init(&ri_rec->list);
10801                 free(ri_rec);
10802         }
10803 }
10804
10805 static int deal_root_from_list(struct list_head *list,
10806                                struct btrfs_root *root,
10807                                struct block_info *bits,
10808                                int bits_nr,
10809                                struct cache_tree *pending,
10810                                struct cache_tree *seen,
10811                                struct cache_tree *reada,
10812                                struct cache_tree *nodes,
10813                                struct cache_tree *extent_cache,
10814                                struct cache_tree *chunk_cache,
10815                                struct rb_root *dev_cache,
10816                                struct block_group_tree *block_group_cache,
10817                                struct device_extent_tree *dev_extent_cache)
10818 {
10819         int ret = 0;
10820         u64 last;
10821
10822         while (!list_empty(list)) {
10823                 struct root_item_record *rec;
10824                 struct extent_buffer *buf;
10825                 rec = list_entry(list->next,
10826                                  struct root_item_record, list);
10827                 last = 0;
10828                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10829                 if (!extent_buffer_uptodate(buf)) {
10830                         free_extent_buffer(buf);
10831                         ret = -EIO;
10832                         break;
10833                 }
10834                 ret = add_root_to_pending(buf, extent_cache, pending,
10835                                     seen, nodes, rec->objectid);
10836                 if (ret < 0)
10837                         break;
10838                 /*
10839                  * To rebuild extent tree, we need deal with snapshot
10840                  * one by one, otherwise we deal with node firstly which
10841                  * can maximize readahead.
10842                  */
10843                 while (1) {
10844                         ret = run_next_block(root, bits, bits_nr, &last,
10845                                              pending, seen, reada, nodes,
10846                                              extent_cache, chunk_cache,
10847                                              dev_cache, block_group_cache,
10848                                              dev_extent_cache, rec);
10849                         if (ret != 0)
10850                                 break;
10851                 }
10852                 free_extent_buffer(buf);
10853                 list_del(&rec->list);
10854                 free(rec);
10855                 if (ret < 0)
10856                         break;
10857         }
10858         while (ret >= 0) {
10859                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10860                                      reada, nodes, extent_cache, chunk_cache,
10861                                      dev_cache, block_group_cache,
10862                                      dev_extent_cache, NULL);
10863                 if (ret != 0) {
10864                         if (ret > 0)
10865                                 ret = 0;
10866                         break;
10867                 }
10868         }
10869         return ret;
10870 }
10871
10872 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10873 {
10874         struct rb_root dev_cache;
10875         struct cache_tree chunk_cache;
10876         struct block_group_tree block_group_cache;
10877         struct device_extent_tree dev_extent_cache;
10878         struct cache_tree extent_cache;
10879         struct cache_tree seen;
10880         struct cache_tree pending;
10881         struct cache_tree reada;
10882         struct cache_tree nodes;
10883         struct extent_io_tree excluded_extents;
10884         struct cache_tree corrupt_blocks;
10885         struct btrfs_path path;
10886         struct btrfs_key key;
10887         struct btrfs_key found_key;
10888         int ret, err = 0;
10889         struct block_info *bits;
10890         int bits_nr;
10891         struct extent_buffer *leaf;
10892         int slot;
10893         struct btrfs_root_item ri;
10894         struct list_head dropping_trees;
10895         struct list_head normal_trees;
10896         struct btrfs_root *root1;
10897         struct btrfs_root *root;
10898         u64 objectid;
10899         u8 level;
10900
10901         root = fs_info->fs_root;
10902         dev_cache = RB_ROOT;
10903         cache_tree_init(&chunk_cache);
10904         block_group_tree_init(&block_group_cache);
10905         device_extent_tree_init(&dev_extent_cache);
10906
10907         cache_tree_init(&extent_cache);
10908         cache_tree_init(&seen);
10909         cache_tree_init(&pending);
10910         cache_tree_init(&nodes);
10911         cache_tree_init(&reada);
10912         cache_tree_init(&corrupt_blocks);
10913         extent_io_tree_init(&excluded_extents);
10914         INIT_LIST_HEAD(&dropping_trees);
10915         INIT_LIST_HEAD(&normal_trees);
10916
10917         if (repair) {
10918                 fs_info->excluded_extents = &excluded_extents;
10919                 fs_info->fsck_extent_cache = &extent_cache;
10920                 fs_info->free_extent_hook = free_extent_hook;
10921                 fs_info->corrupt_blocks = &corrupt_blocks;
10922         }
10923
10924         bits_nr = 1024;
10925         bits = malloc(bits_nr * sizeof(struct block_info));
10926         if (!bits) {
10927                 perror("malloc");
10928                 exit(1);
10929         }
10930
10931         if (ctx.progress_enabled) {
10932                 ctx.tp = TASK_EXTENTS;
10933                 task_start(ctx.info);
10934         }
10935
10936 again:
10937         root1 = fs_info->tree_root;
10938         level = btrfs_header_level(root1->node);
10939         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10940                                     root1->node->start, 0, level, 0, NULL);
10941         if (ret < 0)
10942                 goto out;
10943         root1 = fs_info->chunk_root;
10944         level = btrfs_header_level(root1->node);
10945         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10946                                     root1->node->start, 0, level, 0, NULL);
10947         if (ret < 0)
10948                 goto out;
10949         btrfs_init_path(&path);
10950         key.offset = 0;
10951         key.objectid = 0;
10952         key.type = BTRFS_ROOT_ITEM_KEY;
10953         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10954         if (ret < 0)
10955                 goto out;
10956         while(1) {
10957                 leaf = path.nodes[0];
10958                 slot = path.slots[0];
10959                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10960                         ret = btrfs_next_leaf(root, &path);
10961                         if (ret != 0)
10962                                 break;
10963                         leaf = path.nodes[0];
10964                         slot = path.slots[0];
10965                 }
10966                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10967                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10968                         unsigned long offset;
10969                         u64 last_snapshot;
10970
10971                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10972                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10973                         last_snapshot = btrfs_root_last_snapshot(&ri);
10974                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10975                                 level = btrfs_root_level(&ri);
10976                                 ret = add_root_item_to_list(&normal_trees,
10977                                                 found_key.objectid,
10978                                                 btrfs_root_bytenr(&ri),
10979                                                 last_snapshot, level,
10980                                                 0, NULL);
10981                                 if (ret < 0)
10982                                         goto out;
10983                         } else {
10984                                 level = btrfs_root_level(&ri);
10985                                 objectid = found_key.objectid;
10986                                 btrfs_disk_key_to_cpu(&found_key,
10987                                                       &ri.drop_progress);
10988                                 ret = add_root_item_to_list(&dropping_trees,
10989                                                 objectid,
10990                                                 btrfs_root_bytenr(&ri),
10991                                                 last_snapshot, level,
10992                                                 ri.drop_level, &found_key);
10993                                 if (ret < 0)
10994                                         goto out;
10995                         }
10996                 }
10997                 path.slots[0]++;
10998         }
10999         btrfs_release_path(&path);
11000
11001         /*
11002          * check_block can return -EAGAIN if it fixes something, please keep
11003          * this in mind when dealing with return values from these functions, if
11004          * we get -EAGAIN we want to fall through and restart the loop.
11005          */
11006         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11007                                   &seen, &reada, &nodes, &extent_cache,
11008                                   &chunk_cache, &dev_cache, &block_group_cache,
11009                                   &dev_extent_cache);
11010         if (ret < 0) {
11011                 if (ret == -EAGAIN)
11012                         goto loop;
11013                 goto out;
11014         }
11015         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11016                                   &pending, &seen, &reada, &nodes,
11017                                   &extent_cache, &chunk_cache, &dev_cache,
11018                                   &block_group_cache, &dev_extent_cache);
11019         if (ret < 0) {
11020                 if (ret == -EAGAIN)
11021                         goto loop;
11022                 goto out;
11023         }
11024
11025         ret = check_chunks(&chunk_cache, &block_group_cache,
11026                            &dev_extent_cache, NULL, NULL, NULL, 0);
11027         if (ret) {
11028                 if (ret == -EAGAIN)
11029                         goto loop;
11030                 err = ret;
11031         }
11032
11033         ret = check_extent_refs(root, &extent_cache);
11034         if (ret < 0) {
11035                 if (ret == -EAGAIN)
11036                         goto loop;
11037                 goto out;
11038         }
11039
11040         ret = check_devices(&dev_cache, &dev_extent_cache);
11041         if (ret && err)
11042                 ret = err;
11043
11044 out:
11045         task_stop(ctx.info);
11046         if (repair) {
11047                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11048                 extent_io_tree_cleanup(&excluded_extents);
11049                 fs_info->fsck_extent_cache = NULL;
11050                 fs_info->free_extent_hook = NULL;
11051                 fs_info->corrupt_blocks = NULL;
11052                 fs_info->excluded_extents = NULL;
11053         }
11054         free(bits);
11055         free_chunk_cache_tree(&chunk_cache);
11056         free_device_cache_tree(&dev_cache);
11057         free_block_group_tree(&block_group_cache);
11058         free_device_extent_tree(&dev_extent_cache);
11059         free_extent_cache_tree(&seen);
11060         free_extent_cache_tree(&pending);
11061         free_extent_cache_tree(&reada);
11062         free_extent_cache_tree(&nodes);
11063         free_root_item_list(&normal_trees);
11064         free_root_item_list(&dropping_trees);
11065         return ret;
11066 loop:
11067         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11068         free_extent_cache_tree(&seen);
11069         free_extent_cache_tree(&pending);
11070         free_extent_cache_tree(&reada);
11071         free_extent_cache_tree(&nodes);
11072         free_chunk_cache_tree(&chunk_cache);
11073         free_block_group_tree(&block_group_cache);
11074         free_device_cache_tree(&dev_cache);
11075         free_device_extent_tree(&dev_extent_cache);
11076         free_extent_record_cache(&extent_cache);
11077         free_root_item_list(&normal_trees);
11078         free_root_item_list(&dropping_trees);
11079         extent_io_tree_cleanup(&excluded_extents);
11080         goto again;
11081 }
11082
11083 static int check_extent_inline_ref(struct extent_buffer *eb,
11084                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11085 {
11086         int ret;
11087         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11088
11089         switch (type) {
11090         case BTRFS_TREE_BLOCK_REF_KEY:
11091         case BTRFS_EXTENT_DATA_REF_KEY:
11092         case BTRFS_SHARED_BLOCK_REF_KEY:
11093         case BTRFS_SHARED_DATA_REF_KEY:
11094                 ret = 0;
11095                 break;
11096         default:
11097                 error("extent[%llu %u %llu] has unknown ref type: %d",
11098                       key->objectid, key->type, key->offset, type);
11099                 ret = UNKNOWN_TYPE;
11100                 break;
11101         }
11102
11103         return ret;
11104 }
11105
11106 /*
11107  * Check backrefs of a tree block given by @bytenr or @eb.
11108  *
11109  * @root:       the root containing the @bytenr or @eb
11110  * @eb:         tree block extent buffer, can be NULL
11111  * @bytenr:     bytenr of the tree block to search
11112  * @level:      tree level of the tree block
11113  * @owner:      owner of the tree block
11114  *
11115  * Return >0 for any error found and output error message
11116  * Return 0 for no error found
11117  */
11118 static int check_tree_block_ref(struct btrfs_root *root,
11119                                 struct extent_buffer *eb, u64 bytenr,
11120                                 int level, u64 owner, struct node_refs *nrefs)
11121 {
11122         struct btrfs_key key;
11123         struct btrfs_root *extent_root = root->fs_info->extent_root;
11124         struct btrfs_path path;
11125         struct btrfs_extent_item *ei;
11126         struct btrfs_extent_inline_ref *iref;
11127         struct extent_buffer *leaf;
11128         unsigned long end;
11129         unsigned long ptr;
11130         int slot;
11131         int skinny_level;
11132         int root_level = btrfs_header_level(root->node);
11133         int type;
11134         u32 nodesize = root->fs_info->nodesize;
11135         u32 item_size;
11136         u64 offset;
11137         int found_ref = 0;
11138         int err = 0;
11139         int ret;
11140         int strict = 1;
11141         int parent = 0;
11142
11143         btrfs_init_path(&path);
11144         key.objectid = bytenr;
11145         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11146                 key.type = BTRFS_METADATA_ITEM_KEY;
11147         else
11148                 key.type = BTRFS_EXTENT_ITEM_KEY;
11149         key.offset = (u64)-1;
11150
11151         /* Search for the backref in extent tree */
11152         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11153         if (ret < 0) {
11154                 err |= BACKREF_MISSING;
11155                 goto out;
11156         }
11157         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11158         if (ret) {
11159                 err |= BACKREF_MISSING;
11160                 goto out;
11161         }
11162
11163         leaf = path.nodes[0];
11164         slot = path.slots[0];
11165         btrfs_item_key_to_cpu(leaf, &key, slot);
11166
11167         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11168
11169         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11170                 skinny_level = (int)key.offset;
11171                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11172         } else {
11173                 struct btrfs_tree_block_info *info;
11174
11175                 info = (struct btrfs_tree_block_info *)(ei + 1);
11176                 skinny_level = btrfs_tree_block_level(leaf, info);
11177                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11178         }
11179
11180
11181         if (eb) {
11182                 u64 header_gen;
11183                 u64 extent_gen;
11184
11185                 /*
11186                  * Due to the feature of shared tree blocks, if the upper node
11187                  * is a fs root or shared node, the extent of checked node may
11188                  * not be updated until the next CoW.
11189                  */
11190                 if (nrefs)
11191                         strict = should_check_extent_strictly(root, nrefs,
11192                                         level);
11193                 if (!(btrfs_extent_flags(leaf, ei) &
11194                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11195                         error(
11196                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11197                                 key.objectid, nodesize,
11198                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11199                         err = BACKREF_MISMATCH;
11200                 }
11201                 header_gen = btrfs_header_generation(eb);
11202                 extent_gen = btrfs_extent_generation(leaf, ei);
11203                 if (header_gen != extent_gen) {
11204                         error(
11205         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11206                                 key.objectid, nodesize, header_gen,
11207                                 extent_gen);
11208                         err = BACKREF_MISMATCH;
11209                 }
11210                 if (level != skinny_level) {
11211                         error(
11212                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11213                                 key.objectid, nodesize, level, skinny_level);
11214                         err = BACKREF_MISMATCH;
11215                 }
11216                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11217                         error(
11218                         "extent[%llu %u] is referred by other roots than %llu",
11219                                 key.objectid, nodesize, root->objectid);
11220                         err = BACKREF_MISMATCH;
11221                 }
11222         }
11223
11224         /*
11225          * Iterate the extent/metadata item to find the exact backref
11226          */
11227         item_size = btrfs_item_size_nr(leaf, slot);
11228         ptr = (unsigned long)iref;
11229         end = (unsigned long)ei + item_size;
11230
11231         while (ptr < end) {
11232                 iref = (struct btrfs_extent_inline_ref *)ptr;
11233                 type = btrfs_extent_inline_ref_type(leaf, iref);
11234                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11235
11236                 ret = check_extent_inline_ref(leaf, &key, iref);
11237                 if (ret) {
11238                         err |= ret;
11239                         break;
11240                 }
11241                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11242                         if (offset == root->objectid)
11243                                 found_ref = 1;
11244                         if (!strict && owner == offset)
11245                                 found_ref = 1;
11246                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11247                         /*
11248                          * Backref of tree reloc root points to itself, no need
11249                          * to check backref any more.
11250                          *
11251                          * This may be an error of loop backref, but extent tree
11252                          * checker should have already handled it.
11253                          * Here we only need to avoid infinite iteration.
11254                          */
11255                         if (offset == bytenr) {
11256                                 found_ref = 1;
11257                         } else {
11258                                 /*
11259                                  * Check if the backref points to valid
11260                                  * referencer
11261                                  */
11262                                 found_ref = !check_tree_block_ref( root, NULL,
11263                                                 offset, level + 1, owner,
11264                                                 NULL);
11265                         }
11266                 }
11267
11268                 if (found_ref)
11269                         break;
11270                 ptr += btrfs_extent_inline_ref_size(type);
11271         }
11272
11273         /*
11274          * Inlined extent item doesn't have what we need, check
11275          * TREE_BLOCK_REF_KEY
11276          */
11277         if (!found_ref) {
11278                 btrfs_release_path(&path);
11279                 key.objectid = bytenr;
11280                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11281                 key.offset = root->objectid;
11282
11283                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11284                 if (!ret)
11285                         found_ref = 1;
11286         }
11287         /*
11288          * Finally check SHARED BLOCK REF, any found will be good
11289          * Here we're not doing comprehensive extent backref checking,
11290          * only need to ensure there is some extent referring to this
11291          * tree block.
11292          */
11293         if (!found_ref) {
11294                 btrfs_release_path(&path);
11295                 key.objectid = bytenr;
11296                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11297                 key.offset = (u64)-1;
11298
11299                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11300                 if (ret < 0) {
11301                         err |= BACKREF_MISSING;
11302                         goto out;
11303                 }
11304                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11305                 if (ret) {
11306                         err |= BACKREF_MISSING;
11307                         goto out;
11308                 }
11309                 found_ref = 1;
11310         }
11311         if (!found_ref)
11312                 err |= BACKREF_MISSING;
11313 out:
11314         btrfs_release_path(&path);
11315         if (nrefs && strict &&
11316             level < root_level && nrefs->full_backref[level + 1])
11317                 parent = nrefs->bytenr[level + 1];
11318         if (eb && (err & BACKREF_MISSING))
11319                 error(
11320         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11321                       bytenr, nodesize, owner, level,
11322                       parent ? "parent" : "root",
11323                       parent ? parent : root->objectid);
11324         return err;
11325 }
11326
11327 /*
11328  * If @err contains BACKREF_MISSING then add extent of the
11329  * file_extent_data_item.
11330  *
11331  * Returns error bits after reapir.
11332  */
11333 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11334                                    struct btrfs_root *root,
11335                                    struct btrfs_path *pathp,
11336                                    struct node_refs *nrefs,
11337                                    int err)
11338 {
11339         struct btrfs_file_extent_item *fi;
11340         struct btrfs_key fi_key;
11341         struct btrfs_key key;
11342         struct btrfs_extent_item *ei;
11343         struct btrfs_path path;
11344         struct btrfs_root *extent_root = root->fs_info->extent_root;
11345         struct extent_buffer *eb;
11346         u64 size;
11347         u64 disk_bytenr;
11348         u64 num_bytes;
11349         u64 parent;
11350         u64 offset;
11351         u64 extent_offset;
11352         u64 file_offset;
11353         int generation;
11354         int slot;
11355         int ret = 0;
11356
11357         eb = pathp->nodes[0];
11358         slot = pathp->slots[0];
11359         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11360         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11361
11362         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11363             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11364                 return err;
11365
11366         file_offset = fi_key.offset;
11367         generation = btrfs_file_extent_generation(eb, fi);
11368         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11369         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11370         extent_offset = btrfs_file_extent_offset(eb, fi);
11371         offset = file_offset - extent_offset;
11372
11373         /* now repair only adds backref */
11374         if ((err & BACKREF_MISSING) == 0)
11375                 return err;
11376
11377         /* search extent item */
11378         key.objectid = disk_bytenr;
11379         key.type = BTRFS_EXTENT_ITEM_KEY;
11380         key.offset = num_bytes;
11381
11382         btrfs_init_path(&path);
11383         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11384         if (ret < 0) {
11385                 ret = -EIO;
11386                 goto out;
11387         }
11388
11389         /* insert an extent item */
11390         if (ret > 0) {
11391                 key.objectid = disk_bytenr;
11392                 key.type = BTRFS_EXTENT_ITEM_KEY;
11393                 key.offset = num_bytes;
11394                 size = sizeof(*ei);
11395
11396                 btrfs_release_path(&path);
11397                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11398                                               size);
11399                 if (ret)
11400                         goto out;
11401                 eb = path.nodes[0];
11402                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11403
11404                 btrfs_set_extent_refs(eb, ei, 0);
11405                 btrfs_set_extent_generation(eb, ei, generation);
11406                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11407
11408                 btrfs_mark_buffer_dirty(eb);
11409                 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11410                                                num_bytes, 1, 0);
11411                 btrfs_release_path(&path);
11412         }
11413
11414         if (nrefs->full_backref[0])
11415                 parent = btrfs_header_bytenr(eb);
11416         else
11417                 parent = 0;
11418
11419         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11420                                    root->objectid,
11421                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11422                                    offset);
11423         if (ret) {
11424                 error(
11425                 "failed to increase extent data backref[%llu %llu] root %llu",
11426                       disk_bytenr, num_bytes, root->objectid);
11427                 goto out;
11428         } else {
11429                 printf("Add one extent data backref [%llu %llu]\n",
11430                        disk_bytenr, num_bytes);
11431         }
11432
11433         err &= ~BACKREF_MISSING;
11434 out:
11435         if (ret)
11436                 error("can't repair root %llu extent data item[%llu %llu]",
11437                       root->objectid, disk_bytenr, num_bytes);
11438         return err;
11439 }
11440
11441 /*
11442  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11443  *
11444  * Return >0 any error found and output error message
11445  * Return 0 for no error found
11446  */
11447 static int check_extent_data_item(struct btrfs_root *root,
11448                                   struct btrfs_path *pathp,
11449                                   struct node_refs *nrefs,  int account_bytes)
11450 {
11451         struct btrfs_file_extent_item *fi;
11452         struct extent_buffer *eb = pathp->nodes[0];
11453         struct btrfs_path path;
11454         struct btrfs_root *extent_root = root->fs_info->extent_root;
11455         struct btrfs_key fi_key;
11456         struct btrfs_key dbref_key;
11457         struct extent_buffer *leaf;
11458         struct btrfs_extent_item *ei;
11459         struct btrfs_extent_inline_ref *iref;
11460         struct btrfs_extent_data_ref *dref;
11461         u64 owner;
11462         u64 disk_bytenr;
11463         u64 disk_num_bytes;
11464         u64 extent_num_bytes;
11465         u64 extent_flags;
11466         u64 offset;
11467         u32 item_size;
11468         unsigned long end;
11469         unsigned long ptr;
11470         int type;
11471         int found_dbackref = 0;
11472         int slot = pathp->slots[0];
11473         int err = 0;
11474         int ret;
11475         int strict;
11476
11477         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11478         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11479
11480         /* Nothing to check for hole and inline data extents */
11481         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11482             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11483                 return 0;
11484
11485         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11486         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11487         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11488         offset = btrfs_file_extent_offset(eb, fi);
11489
11490         /* Check unaligned disk_num_bytes and num_bytes */
11491         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11492                 error(
11493 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11494                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11495                         root->fs_info->sectorsize);
11496                 err |= BYTES_UNALIGNED;
11497         } else if (account_bytes) {
11498                 data_bytes_allocated += disk_num_bytes;
11499         }
11500         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11501                 error(
11502 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11503                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11504                         root->fs_info->sectorsize);
11505                 err |= BYTES_UNALIGNED;
11506         } else if (account_bytes) {
11507                 data_bytes_referenced += extent_num_bytes;
11508         }
11509         owner = btrfs_header_owner(eb);
11510
11511         /* Check the extent item of the file extent in extent tree */
11512         btrfs_init_path(&path);
11513         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11514         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11515         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11516
11517         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11518         if (ret)
11519                 goto out;
11520
11521         leaf = path.nodes[0];
11522         slot = path.slots[0];
11523         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11524
11525         extent_flags = btrfs_extent_flags(leaf, ei);
11526
11527         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11528                 error(
11529                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11530                     disk_bytenr, disk_num_bytes,
11531                     BTRFS_EXTENT_FLAG_DATA);
11532                 err |= BACKREF_MISMATCH;
11533         }
11534
11535         /* Check data backref inside that extent item */
11536         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11537         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11538         ptr = (unsigned long)iref;
11539         end = (unsigned long)ei + item_size;
11540         strict = should_check_extent_strictly(root, nrefs, -1);
11541
11542         while (ptr < end) {
11543                 u64 ref_root;
11544                 u64 ref_objectid;
11545                 u64 ref_offset;
11546                 bool match = false;
11547
11548                 iref = (struct btrfs_extent_inline_ref *)ptr;
11549                 type = btrfs_extent_inline_ref_type(leaf, iref);
11550                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11551
11552                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11553                 if (ret) {
11554                         err |= ret;
11555                         break;
11556                 }
11557                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11558                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11559                         ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11560                         ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11561
11562                         if (ref_objectid == fi_key.objectid &&
11563                             ref_offset == fi_key.offset - offset)
11564                                 match = true;
11565                         if (ref_root == root->objectid && match)
11566                                 found_dbackref = 1;
11567                         else if (!strict && owner == ref_root && match)
11568                                 found_dbackref = 1;
11569                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11570                         found_dbackref = !check_tree_block_ref(root, NULL,
11571                                 btrfs_extent_inline_ref_offset(leaf, iref),
11572                                 0, owner, NULL);
11573                 }
11574
11575                 if (found_dbackref)
11576                         break;
11577                 ptr += btrfs_extent_inline_ref_size(type);
11578         }
11579
11580         if (!found_dbackref) {
11581                 btrfs_release_path(&path);
11582
11583                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11584                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11585                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11586                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11587                                 fi_key.objectid, fi_key.offset - offset);
11588
11589                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11590                                         &dbref_key, &path, 0, 0);
11591                 if (!ret) {
11592                         found_dbackref = 1;
11593                         goto out;
11594                 }
11595
11596                 btrfs_release_path(&path);
11597
11598                 /*
11599                  * Neither inlined nor EXTENT_DATA_REF found, try
11600                  * SHARED_DATA_REF as last chance.
11601                  */
11602                 dbref_key.objectid = disk_bytenr;
11603                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11604                 dbref_key.offset = eb->start;
11605
11606                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11607                                         &dbref_key, &path, 0, 0);
11608                 if (!ret) {
11609                         found_dbackref = 1;
11610                         goto out;
11611                 }
11612         }
11613
11614 out:
11615         if (!found_dbackref)
11616                 err |= BACKREF_MISSING;
11617         btrfs_release_path(&path);
11618         if (err & BACKREF_MISSING) {
11619                 error("data extent[%llu %llu] backref lost",
11620                       disk_bytenr, disk_num_bytes);
11621         }
11622         return err;
11623 }
11624
11625 /*
11626  * Get real tree block level for the case like shared block
11627  * Return >= 0 as tree level
11628  * Return <0 for error
11629  */
11630 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11631 {
11632         struct extent_buffer *eb;
11633         struct btrfs_path path;
11634         struct btrfs_key key;
11635         struct btrfs_extent_item *ei;
11636         u64 flags;
11637         u64 transid;
11638         u8 backref_level;
11639         u8 header_level;
11640         int ret;
11641
11642         /* Search extent tree for extent generation and level */
11643         key.objectid = bytenr;
11644         key.type = BTRFS_METADATA_ITEM_KEY;
11645         key.offset = (u64)-1;
11646
11647         btrfs_init_path(&path);
11648         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11649         if (ret < 0)
11650                 goto release_out;
11651         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11652         if (ret < 0)
11653                 goto release_out;
11654         if (ret > 0) {
11655                 ret = -ENOENT;
11656                 goto release_out;
11657         }
11658
11659         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11660         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11661                             struct btrfs_extent_item);
11662         flags = btrfs_extent_flags(path.nodes[0], ei);
11663         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11664                 ret = -ENOENT;
11665                 goto release_out;
11666         }
11667
11668         /* Get transid for later read_tree_block() check */
11669         transid = btrfs_extent_generation(path.nodes[0], ei);
11670
11671         /* Get backref level as one source */
11672         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11673                 backref_level = key.offset;
11674         } else {
11675                 struct btrfs_tree_block_info *info;
11676
11677                 info = (struct btrfs_tree_block_info *)(ei + 1);
11678                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11679         }
11680         btrfs_release_path(&path);
11681
11682         /* Get level from tree block as an alternative source */
11683         eb = read_tree_block(fs_info, bytenr, transid);
11684         if (!extent_buffer_uptodate(eb)) {
11685                 free_extent_buffer(eb);
11686                 return -EIO;
11687         }
11688         header_level = btrfs_header_level(eb);
11689         free_extent_buffer(eb);
11690
11691         if (header_level != backref_level)
11692                 return -EIO;
11693         return header_level;
11694
11695 release_out:
11696         btrfs_release_path(&path);
11697         return ret;
11698 }
11699
11700 /*
11701  * Check if a tree block backref is valid (points to a valid tree block)
11702  * if level == -1, level will be resolved
11703  * Return >0 for any error found and print error message
11704  */
11705 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11706                                     u64 bytenr, int level)
11707 {
11708         struct btrfs_root *root;
11709         struct btrfs_key key;
11710         struct btrfs_path path;
11711         struct extent_buffer *eb;
11712         struct extent_buffer *node;
11713         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11714         int err = 0;
11715         int ret;
11716
11717         /* Query level for level == -1 special case */
11718         if (level == -1)
11719                 level = query_tree_block_level(fs_info, bytenr);
11720         if (level < 0) {
11721                 err |= REFERENCER_MISSING;
11722                 goto out;
11723         }
11724
11725         key.objectid = root_id;
11726         key.type = BTRFS_ROOT_ITEM_KEY;
11727         key.offset = (u64)-1;
11728
11729         root = btrfs_read_fs_root(fs_info, &key);
11730         if (IS_ERR(root)) {
11731                 err |= REFERENCER_MISSING;
11732                 goto out;
11733         }
11734
11735         /* Read out the tree block to get item/node key */
11736         eb = read_tree_block(fs_info, bytenr, 0);
11737         if (!extent_buffer_uptodate(eb)) {
11738                 err |= REFERENCER_MISSING;
11739                 free_extent_buffer(eb);
11740                 goto out;
11741         }
11742
11743         /* Empty tree, no need to check key */
11744         if (!btrfs_header_nritems(eb) && !level) {
11745                 free_extent_buffer(eb);
11746                 goto out;
11747         }
11748
11749         if (level)
11750                 btrfs_node_key_to_cpu(eb, &key, 0);
11751         else
11752                 btrfs_item_key_to_cpu(eb, &key, 0);
11753
11754         free_extent_buffer(eb);
11755
11756         btrfs_init_path(&path);
11757         path.lowest_level = level;
11758         /* Search with the first key, to ensure we can reach it */
11759         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11760         if (ret < 0) {
11761                 err |= REFERENCER_MISSING;
11762                 goto release_out;
11763         }
11764
11765         node = path.nodes[level];
11766         if (btrfs_header_bytenr(node) != bytenr) {
11767                 error(
11768         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11769                         bytenr, nodesize, bytenr,
11770                         btrfs_header_bytenr(node));
11771                 err |= REFERENCER_MISMATCH;
11772         }
11773         if (btrfs_header_level(node) != level) {
11774                 error(
11775         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11776                         bytenr, nodesize, level,
11777                         btrfs_header_level(node));
11778                 err |= REFERENCER_MISMATCH;
11779         }
11780
11781 release_out:
11782         btrfs_release_path(&path);
11783 out:
11784         if (err & REFERENCER_MISSING) {
11785                 if (level < 0)
11786                         error("extent [%llu %d] lost referencer (owner: %llu)",
11787                                 bytenr, nodesize, root_id);
11788                 else
11789                         error(
11790                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11791                                 bytenr, nodesize, root_id, level);
11792         }
11793
11794         return err;
11795 }
11796
11797 /*
11798  * Check if tree block @eb is tree reloc root.
11799  * Return 0 if it's not or any problem happens
11800  * Return 1 if it's a tree reloc root
11801  */
11802 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11803                                  struct extent_buffer *eb)
11804 {
11805         struct btrfs_root *tree_reloc_root;
11806         struct btrfs_key key;
11807         u64 bytenr = btrfs_header_bytenr(eb);
11808         u64 owner = btrfs_header_owner(eb);
11809         int ret = 0;
11810
11811         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11812         key.offset = owner;
11813         key.type = BTRFS_ROOT_ITEM_KEY;
11814
11815         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11816         if (IS_ERR(tree_reloc_root))
11817                 return 0;
11818
11819         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11820                 ret = 1;
11821         btrfs_free_fs_root(tree_reloc_root);
11822         return ret;
11823 }
11824
11825 /*
11826  * Check referencer for shared block backref
11827  * If level == -1, this function will resolve the level.
11828  */
11829 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11830                                      u64 parent, u64 bytenr, int level)
11831 {
11832         struct extent_buffer *eb;
11833         u32 nr;
11834         int found_parent = 0;
11835         int i;
11836
11837         eb = read_tree_block(fs_info, parent, 0);
11838         if (!extent_buffer_uptodate(eb))
11839                 goto out;
11840
11841         if (level == -1)
11842                 level = query_tree_block_level(fs_info, bytenr);
11843         if (level < 0)
11844                 goto out;
11845
11846         /* It's possible it's a tree reloc root */
11847         if (parent == bytenr) {
11848                 if (is_tree_reloc_root(fs_info, eb))
11849                         found_parent = 1;
11850                 goto out;
11851         }
11852
11853         if (level + 1 != btrfs_header_level(eb))
11854                 goto out;
11855
11856         nr = btrfs_header_nritems(eb);
11857         for (i = 0; i < nr; i++) {
11858                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11859                         found_parent = 1;
11860                         break;
11861                 }
11862         }
11863 out:
11864         free_extent_buffer(eb);
11865         if (!found_parent) {
11866                 error(
11867         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11868                         bytenr, fs_info->nodesize, parent, level);
11869                 return REFERENCER_MISSING;
11870         }
11871         return 0;
11872 }
11873
11874 /*
11875  * Check referencer for normal (inlined) data ref
11876  * If len == 0, it will be resolved by searching in extent tree
11877  */
11878 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11879                                      u64 root_id, u64 objectid, u64 offset,
11880                                      u64 bytenr, u64 len, u32 count)
11881 {
11882         struct btrfs_root *root;
11883         struct btrfs_root *extent_root = fs_info->extent_root;
11884         struct btrfs_key key;
11885         struct btrfs_path path;
11886         struct extent_buffer *leaf;
11887         struct btrfs_file_extent_item *fi;
11888         u32 found_count = 0;
11889         int slot;
11890         int ret = 0;
11891
11892         if (!len) {
11893                 key.objectid = bytenr;
11894                 key.type = BTRFS_EXTENT_ITEM_KEY;
11895                 key.offset = (u64)-1;
11896
11897                 btrfs_init_path(&path);
11898                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11899                 if (ret < 0)
11900                         goto out;
11901                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11902                 if (ret)
11903                         goto out;
11904                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11905                 if (key.objectid != bytenr ||
11906                     key.type != BTRFS_EXTENT_ITEM_KEY)
11907                         goto out;
11908                 len = key.offset;
11909                 btrfs_release_path(&path);
11910         }
11911         key.objectid = root_id;
11912         key.type = BTRFS_ROOT_ITEM_KEY;
11913         key.offset = (u64)-1;
11914         btrfs_init_path(&path);
11915
11916         root = btrfs_read_fs_root(fs_info, &key);
11917         if (IS_ERR(root))
11918                 goto out;
11919
11920         key.objectid = objectid;
11921         key.type = BTRFS_EXTENT_DATA_KEY;
11922         /*
11923          * It can be nasty as data backref offset is
11924          * file offset - file extent offset, which is smaller or
11925          * equal to original backref offset.  The only special case is
11926          * overflow.  So we need to special check and do further search.
11927          */
11928         key.offset = offset & (1ULL << 63) ? 0 : offset;
11929
11930         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11931         if (ret < 0)
11932                 goto out;
11933
11934         /*
11935          * Search afterwards to get correct one
11936          * NOTE: As we must do a comprehensive check on the data backref to
11937          * make sure the dref count also matches, we must iterate all file
11938          * extents for that inode.
11939          */
11940         while (1) {
11941                 leaf = path.nodes[0];
11942                 slot = path.slots[0];
11943
11944                 if (slot >= btrfs_header_nritems(leaf) ||
11945                     btrfs_header_owner(leaf) != root_id)
11946                         goto next;
11947                 btrfs_item_key_to_cpu(leaf, &key, slot);
11948                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11949                         break;
11950                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11951                 /*
11952                  * Except normal disk bytenr and disk num bytes, we still
11953                  * need to do extra check on dbackref offset as
11954                  * dbackref offset = file_offset - file_extent_offset
11955                  *
11956                  * Also, we must check the leaf owner.
11957                  * In case of shared tree blocks (snapshots) we can inherit
11958                  * leaves from source snapshot.
11959                  * In that case, reference from source snapshot should not
11960                  * count.
11961                  */
11962                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11963                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11964                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11965                     offset && btrfs_header_owner(leaf) == root_id)
11966                         found_count++;
11967
11968 next:
11969                 ret = btrfs_next_item(root, &path);
11970                 if (ret)
11971                         break;
11972         }
11973 out:
11974         btrfs_release_path(&path);
11975         if (found_count != count) {
11976                 error(
11977 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11978                         bytenr, len, root_id, objectid, offset, count, found_count);
11979                 return REFERENCER_MISSING;
11980         }
11981         return 0;
11982 }
11983
11984 /*
11985  * Check if the referencer of a shared data backref exists
11986  */
11987 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11988                                      u64 parent, u64 bytenr)
11989 {
11990         struct extent_buffer *eb;
11991         struct btrfs_key key;
11992         struct btrfs_file_extent_item *fi;
11993         u32 nr;
11994         int found_parent = 0;
11995         int i;
11996
11997         eb = read_tree_block(fs_info, parent, 0);
11998         if (!extent_buffer_uptodate(eb))
11999                 goto out;
12000
12001         nr = btrfs_header_nritems(eb);
12002         for (i = 0; i < nr; i++) {
12003                 btrfs_item_key_to_cpu(eb, &key, i);
12004                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12005                         continue;
12006
12007                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12008                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12009                         continue;
12010
12011                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12012                         found_parent = 1;
12013                         break;
12014                 }
12015         }
12016
12017 out:
12018         free_extent_buffer(eb);
12019         if (!found_parent) {
12020                 error("shared extent %llu referencer lost (parent: %llu)",
12021                         bytenr, parent);
12022                 return REFERENCER_MISSING;
12023         }
12024         return 0;
12025 }
12026
12027 /*
12028  * Only delete backref if REFERENCER_MISSING now
12029  *
12030  * Returns <0   the extent was deleted
12031  * Returns >0   the backref was deleted but extent still exists, returned value
12032  *               means error after repair
12033  * Returns  0   nothing happened
12034  */
12035 static int repair_extent_item(struct btrfs_trans_handle *trans,
12036                       struct btrfs_root *root, struct btrfs_path *path,
12037                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12038                       u64 owner, u64 offset, int err)
12039 {
12040         struct btrfs_key old_key;
12041         int freed = 0;
12042         int ret;
12043
12044         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12045
12046         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12047                 /* delete the backref */
12048                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12049                           num_bytes, parent, root_objectid, owner, offset);
12050                 if (!ret) {
12051                         freed = 1;
12052                         err &= ~REFERENCER_MISSING;
12053                         printf("Delete backref in extent [%llu %llu]\n",
12054                                bytenr, num_bytes);
12055                 } else {
12056                         error("fail to delete backref in extent [%llu %llu]",
12057                                bytenr, num_bytes);
12058                 }
12059         }
12060
12061         /* btrfs_free_extent may delete the extent */
12062         btrfs_release_path(path);
12063         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12064
12065         if (ret)
12066                 ret = -ENOENT;
12067         else if (freed)
12068                 ret = err;
12069         return ret;
12070 }
12071
12072 /*
12073  * This function will check a given extent item, including its backref and
12074  * itself (like crossing stripe boundary and type)
12075  *
12076  * Since we don't use extent_record anymore, introduce new error bit
12077  */
12078 static int check_extent_item(struct btrfs_trans_handle *trans,
12079                              struct btrfs_fs_info *fs_info,
12080                              struct btrfs_path *path)
12081 {
12082         struct btrfs_extent_item *ei;
12083         struct btrfs_extent_inline_ref *iref;
12084         struct btrfs_extent_data_ref *dref;
12085         struct extent_buffer *eb = path->nodes[0];
12086         unsigned long end;
12087         unsigned long ptr;
12088         int slot = path->slots[0];
12089         int type;
12090         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12091         u32 item_size = btrfs_item_size_nr(eb, slot);
12092         u64 flags;
12093         u64 offset;
12094         u64 parent;
12095         u64 num_bytes;
12096         u64 root_objectid;
12097         u64 owner;
12098         u64 owner_offset;
12099         int metadata = 0;
12100         int level;
12101         struct btrfs_key key;
12102         int ret;
12103         int err = 0;
12104
12105         btrfs_item_key_to_cpu(eb, &key, slot);
12106         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12107                 bytes_used += key.offset;
12108                 num_bytes = key.offset;
12109         } else {
12110                 bytes_used += nodesize;
12111                 num_bytes = nodesize;
12112         }
12113
12114         if (item_size < sizeof(*ei)) {
12115                 /*
12116                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12117                  * old thing when on disk format is still un-determined.
12118                  * No need to care about it anymore
12119                  */
12120                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12121                 return -ENOTTY;
12122         }
12123
12124         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12125         flags = btrfs_extent_flags(eb, ei);
12126
12127         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12128                 metadata = 1;
12129         if (metadata && check_crossing_stripes(global_info, key.objectid,
12130                                                eb->len)) {
12131                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12132                       key.objectid, key.objectid + nodesize);
12133                 err |= CROSSING_STRIPE_BOUNDARY;
12134         }
12135
12136         ptr = (unsigned long)(ei + 1);
12137
12138         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12139                 /* Old EXTENT_ITEM metadata */
12140                 struct btrfs_tree_block_info *info;
12141
12142                 info = (struct btrfs_tree_block_info *)ptr;
12143                 level = btrfs_tree_block_level(eb, info);
12144                 ptr += sizeof(struct btrfs_tree_block_info);
12145         } else {
12146                 /* New METADATA_ITEM */
12147                 level = key.offset;
12148         }
12149         end = (unsigned long)ei + item_size;
12150
12151 next:
12152         /* Reached extent item end normally */
12153         if (ptr == end)
12154                 goto out;
12155
12156         /* Beyond extent item end, wrong item size */
12157         if (ptr > end) {
12158                 err |= ITEM_SIZE_MISMATCH;
12159                 error("extent item at bytenr %llu slot %d has wrong size",
12160                         eb->start, slot);
12161                 goto out;
12162         }
12163
12164         parent = 0;
12165         root_objectid = 0;
12166         owner = 0;
12167         owner_offset = 0;
12168         /* Now check every backref in this extent item */
12169         iref = (struct btrfs_extent_inline_ref *)ptr;
12170         type = btrfs_extent_inline_ref_type(eb, iref);
12171         offset = btrfs_extent_inline_ref_offset(eb, iref);
12172         switch (type) {
12173         case BTRFS_TREE_BLOCK_REF_KEY:
12174                 root_objectid = offset;
12175                 owner = level;
12176                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12177                                                level);
12178                 err |= ret;
12179                 break;
12180         case BTRFS_SHARED_BLOCK_REF_KEY:
12181                 parent = offset;
12182                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12183                                                  level);
12184                 err |= ret;
12185                 break;
12186         case BTRFS_EXTENT_DATA_REF_KEY:
12187                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12188                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12189                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12190                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12191                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12192                                         owner_offset, key.objectid, key.offset,
12193                                         btrfs_extent_data_ref_count(eb, dref));
12194                 err |= ret;
12195                 break;
12196         case BTRFS_SHARED_DATA_REF_KEY:
12197                 parent = offset;
12198                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12199                 err |= ret;
12200                 break;
12201         default:
12202                 error("extent[%llu %d %llu] has unknown ref type: %d",
12203                         key.objectid, key.type, key.offset, type);
12204                 ret = UNKNOWN_TYPE;
12205                 err |= ret;
12206                 goto out;
12207         }
12208
12209         if (err && repair) {
12210                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12211                          key.objectid, num_bytes, parent, root_objectid,
12212                          owner, owner_offset, ret);
12213                 if (ret < 0)
12214                         goto out;
12215                 if (ret) {
12216                         goto next;
12217                         err = ret;
12218                 }
12219         }
12220
12221         ptr += btrfs_extent_inline_ref_size(type);
12222         goto next;
12223
12224 out:
12225         return err;
12226 }
12227
12228 /*
12229  * Check if a dev extent item is referred correctly by its chunk
12230  */
12231 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12232                                  struct extent_buffer *eb, int slot)
12233 {
12234         struct btrfs_root *chunk_root = fs_info->chunk_root;
12235         struct btrfs_dev_extent *ptr;
12236         struct btrfs_path path;
12237         struct btrfs_key chunk_key;
12238         struct btrfs_key devext_key;
12239         struct btrfs_chunk *chunk;
12240         struct extent_buffer *l;
12241         int num_stripes;
12242         u64 length;
12243         int i;
12244         int found_chunk = 0;
12245         int ret;
12246
12247         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12248         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12249         length = btrfs_dev_extent_length(eb, ptr);
12250
12251         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12252         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12253         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12254
12255         btrfs_init_path(&path);
12256         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12257         if (ret)
12258                 goto out;
12259
12260         l = path.nodes[0];
12261         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12262         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12263                                       chunk_key.offset);
12264         if (ret < 0)
12265                 goto out;
12266
12267         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12268                 goto out;
12269
12270         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12271         for (i = 0; i < num_stripes; i++) {
12272                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12273                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12274
12275                 if (devid == devext_key.objectid &&
12276                     offset == devext_key.offset) {
12277                         found_chunk = 1;
12278                         break;
12279                 }
12280         }
12281 out:
12282         btrfs_release_path(&path);
12283         if (!found_chunk) {
12284                 error(
12285                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12286                         devext_key.objectid, devext_key.offset, length);
12287                 return REFERENCER_MISSING;
12288         }
12289         return 0;
12290 }
12291
12292 /*
12293  * Check if the used space is correct with the dev item
12294  */
12295 static int check_dev_item(struct btrfs_fs_info *fs_info,
12296                           struct extent_buffer *eb, int slot)
12297 {
12298         struct btrfs_root *dev_root = fs_info->dev_root;
12299         struct btrfs_dev_item *dev_item;
12300         struct btrfs_path path;
12301         struct btrfs_key key;
12302         struct btrfs_dev_extent *ptr;
12303         u64 total_bytes;
12304         u64 dev_id;
12305         u64 used;
12306         u64 total = 0;
12307         int ret;
12308
12309         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12310         dev_id = btrfs_device_id(eb, dev_item);
12311         used = btrfs_device_bytes_used(eb, dev_item);
12312         total_bytes = btrfs_device_total_bytes(eb, dev_item);
12313
12314         key.objectid = dev_id;
12315         key.type = BTRFS_DEV_EXTENT_KEY;
12316         key.offset = 0;
12317
12318         btrfs_init_path(&path);
12319         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12320         if (ret < 0) {
12321                 btrfs_item_key_to_cpu(eb, &key, slot);
12322                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12323                         key.objectid, key.type, key.offset);
12324                 btrfs_release_path(&path);
12325                 return REFERENCER_MISSING;
12326         }
12327
12328         /* Iterate dev_extents to calculate the used space of a device */
12329         while (1) {
12330                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12331                         goto next;
12332
12333                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12334                 if (key.objectid > dev_id)
12335                         break;
12336                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12337                         goto next;
12338
12339                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12340                                      struct btrfs_dev_extent);
12341                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12342 next:
12343                 ret = btrfs_next_item(dev_root, &path);
12344                 if (ret)
12345                         break;
12346         }
12347         btrfs_release_path(&path);
12348
12349         if (used != total) {
12350                 btrfs_item_key_to_cpu(eb, &key, slot);
12351                 error(
12352 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12353                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12354                         BTRFS_DEV_EXTENT_KEY, dev_id);
12355                 return ACCOUNTING_MISMATCH;
12356         }
12357         check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12358
12359         return 0;
12360 }
12361
12362 /*
12363  * Check a block group item with its referener (chunk) and its used space
12364  * with extent/metadata item
12365  */
12366 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12367                                   struct extent_buffer *eb, int slot)
12368 {
12369         struct btrfs_root *extent_root = fs_info->extent_root;
12370         struct btrfs_root *chunk_root = fs_info->chunk_root;
12371         struct btrfs_block_group_item *bi;
12372         struct btrfs_block_group_item bg_item;
12373         struct btrfs_path path;
12374         struct btrfs_key bg_key;
12375         struct btrfs_key chunk_key;
12376         struct btrfs_key extent_key;
12377         struct btrfs_chunk *chunk;
12378         struct extent_buffer *leaf;
12379         struct btrfs_extent_item *ei;
12380         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12381         u64 flags;
12382         u64 bg_flags;
12383         u64 used;
12384         u64 total = 0;
12385         int ret;
12386         int err = 0;
12387
12388         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12389         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12390         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12391         used = btrfs_block_group_used(&bg_item);
12392         bg_flags = btrfs_block_group_flags(&bg_item);
12393
12394         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12395         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12396         chunk_key.offset = bg_key.objectid;
12397
12398         btrfs_init_path(&path);
12399         /* Search for the referencer chunk */
12400         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12401         if (ret) {
12402                 error(
12403                 "block group[%llu %llu] did not find the related chunk item",
12404                         bg_key.objectid, bg_key.offset);
12405                 err |= REFERENCER_MISSING;
12406         } else {
12407                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12408                                         struct btrfs_chunk);
12409                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12410                                                 bg_key.offset) {
12411                         error(
12412         "block group[%llu %llu] related chunk item length does not match",
12413                                 bg_key.objectid, bg_key.offset);
12414                         err |= REFERENCER_MISMATCH;
12415                 }
12416         }
12417         btrfs_release_path(&path);
12418
12419         /* Search from the block group bytenr */
12420         extent_key.objectid = bg_key.objectid;
12421         extent_key.type = 0;
12422         extent_key.offset = 0;
12423
12424         btrfs_init_path(&path);
12425         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12426         if (ret < 0)
12427                 goto out;
12428
12429         /* Iterate extent tree to account used space */
12430         while (1) {
12431                 leaf = path.nodes[0];
12432
12433                 /* Search slot can point to the last item beyond leaf nritems */
12434                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12435                         goto next;
12436
12437                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12438                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12439                         break;
12440
12441                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12442                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12443                         goto next;
12444                 if (extent_key.objectid < bg_key.objectid)
12445                         goto next;
12446
12447                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12448                         total += nodesize;
12449                 else
12450                         total += extent_key.offset;
12451
12452                 ei = btrfs_item_ptr(leaf, path.slots[0],
12453                                     struct btrfs_extent_item);
12454                 flags = btrfs_extent_flags(leaf, ei);
12455                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12456                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12457                                 error(
12458                         "bad extent[%llu, %llu) type mismatch with chunk",
12459                                         extent_key.objectid,
12460                                         extent_key.objectid + extent_key.offset);
12461                                 err |= CHUNK_TYPE_MISMATCH;
12462                         }
12463                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12464                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12465                                     BTRFS_BLOCK_GROUP_METADATA))) {
12466                                 error(
12467                         "bad extent[%llu, %llu) type mismatch with chunk",
12468                                         extent_key.objectid,
12469                                         extent_key.objectid + nodesize);
12470                                 err |= CHUNK_TYPE_MISMATCH;
12471                         }
12472                 }
12473 next:
12474                 ret = btrfs_next_item(extent_root, &path);
12475                 if (ret)
12476                         break;
12477         }
12478
12479 out:
12480         btrfs_release_path(&path);
12481
12482         if (total != used) {
12483                 error(
12484                 "block group[%llu %llu] used %llu but extent items used %llu",
12485                         bg_key.objectid, bg_key.offset, used, total);
12486                 err |= BG_ACCOUNTING_ERROR;
12487         }
12488         return err;
12489 }
12490
12491 /*
12492  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12493  * FIXME: We still need to repair error of dev_item.
12494  *
12495  * Returns error after repair.
12496  */
12497 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12498                              struct btrfs_root *chunk_root,
12499                              struct btrfs_path *path, int err)
12500 {
12501         struct btrfs_chunk *chunk;
12502         struct btrfs_key chunk_key;
12503         struct extent_buffer *eb = path->nodes[0];
12504         u64 length;
12505         int slot = path->slots[0];
12506         u64 type;
12507         int ret = 0;
12508
12509         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12510         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12511                 return err;
12512         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12513         type = btrfs_chunk_type(path->nodes[0], chunk);
12514         length = btrfs_chunk_length(eb, chunk);
12515
12516         if (err & REFERENCER_MISSING) {
12517                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12518                                              type, chunk_key.offset, length);
12519                 if (ret) {
12520                         error("fail to add block group item[%llu %llu]",
12521                               chunk_key.offset, length);
12522                         goto out;
12523                 } else {
12524                         err &= ~REFERENCER_MISSING;
12525                         printf("Added block group item[%llu %llu]\n",
12526                                chunk_key.offset, length);
12527                 }
12528         }
12529
12530 out:
12531         return err;
12532 }
12533
12534 /*
12535  * Check a chunk item.
12536  * Including checking all referred dev_extents and block group
12537  */
12538 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12539                             struct extent_buffer *eb, int slot)
12540 {
12541         struct btrfs_root *extent_root = fs_info->extent_root;
12542         struct btrfs_root *dev_root = fs_info->dev_root;
12543         struct btrfs_path path;
12544         struct btrfs_key chunk_key;
12545         struct btrfs_key bg_key;
12546         struct btrfs_key devext_key;
12547         struct btrfs_chunk *chunk;
12548         struct extent_buffer *leaf;
12549         struct btrfs_block_group_item *bi;
12550         struct btrfs_block_group_item bg_item;
12551         struct btrfs_dev_extent *ptr;
12552         u64 length;
12553         u64 chunk_end;
12554         u64 stripe_len;
12555         u64 type;
12556         int num_stripes;
12557         u64 offset;
12558         u64 objectid;
12559         int i;
12560         int ret;
12561         int err = 0;
12562
12563         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12564         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12565         length = btrfs_chunk_length(eb, chunk);
12566         chunk_end = chunk_key.offset + length;
12567         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12568                                       chunk_key.offset);
12569         if (ret < 0) {
12570                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12571                         chunk_end);
12572                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12573                 goto out;
12574         }
12575         type = btrfs_chunk_type(eb, chunk);
12576
12577         bg_key.objectid = chunk_key.offset;
12578         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12579         bg_key.offset = length;
12580
12581         btrfs_init_path(&path);
12582         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12583         if (ret) {
12584                 error(
12585                 "chunk[%llu %llu) did not find the related block group item",
12586                         chunk_key.offset, chunk_end);
12587                 err |= REFERENCER_MISSING;
12588         } else{
12589                 leaf = path.nodes[0];
12590                 bi = btrfs_item_ptr(leaf, path.slots[0],
12591                                     struct btrfs_block_group_item);
12592                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12593                                    sizeof(bg_item));
12594                 if (btrfs_block_group_flags(&bg_item) != type) {
12595                         error(
12596 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12597                                 chunk_key.offset, chunk_end, type,
12598                                 btrfs_block_group_flags(&bg_item));
12599                         err |= REFERENCER_MISSING;
12600                 }
12601         }
12602
12603         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12604         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12605         for (i = 0; i < num_stripes; i++) {
12606                 btrfs_release_path(&path);
12607                 btrfs_init_path(&path);
12608                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12609                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12610                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12611
12612                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12613                                         0, 0);
12614                 if (ret)
12615                         goto not_match_dev;
12616
12617                 leaf = path.nodes[0];
12618                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12619                                      struct btrfs_dev_extent);
12620                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12621                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12622                 if (objectid != chunk_key.objectid ||
12623                     offset != chunk_key.offset ||
12624                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12625                         goto not_match_dev;
12626                 continue;
12627 not_match_dev:
12628                 err |= BACKREF_MISSING;
12629                 error(
12630                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12631                         chunk_key.objectid, chunk_end, i);
12632                 continue;
12633         }
12634         btrfs_release_path(&path);
12635 out:
12636         return err;
12637 }
12638
12639 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12640                                    struct btrfs_root *root,
12641                                    struct btrfs_path *path)
12642 {
12643         struct btrfs_key key;
12644         int ret = 0;
12645
12646         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12647         btrfs_release_path(path);
12648         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12649         if (ret) {
12650                 ret = -ENOENT;
12651                 goto out;
12652         }
12653
12654         ret = btrfs_del_item(trans, root, path);
12655         if (ret)
12656                 goto out;
12657
12658         if (path->slots[0] == 0)
12659                 btrfs_prev_leaf(root, path);
12660         else
12661                 path->slots[0]--;
12662 out:
12663         if (ret)
12664                 error("failed to delete root %llu item[%llu, %u, %llu]",
12665                       root->objectid, key.objectid, key.type, key.offset);
12666         else
12667                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12668                        root->objectid, key.objectid, key.type, key.offset);
12669         return ret;
12670 }
12671
12672 /*
12673  * Main entry function to check known items and update related accounting info
12674  */
12675 static int check_leaf_items(struct btrfs_trans_handle *trans,
12676                             struct btrfs_root *root, struct btrfs_path *path,
12677                             struct node_refs *nrefs, int account_bytes)
12678 {
12679         struct btrfs_fs_info *fs_info = root->fs_info;
12680         struct btrfs_key key;
12681         struct extent_buffer *eb;
12682         int slot;
12683         int type;
12684         struct btrfs_extent_data_ref *dref;
12685         int ret = 0;
12686         int err = 0;
12687
12688 again:
12689         eb = path->nodes[0];
12690         slot = path->slots[0];
12691         if (slot >= btrfs_header_nritems(eb)) {
12692                 if (slot == 0) {
12693                         error("empty leaf [%llu %u] root %llu", eb->start,
12694                                 root->fs_info->nodesize, root->objectid);
12695                         err |= EIO;
12696                 }
12697                 goto out;
12698         }
12699
12700         btrfs_item_key_to_cpu(eb, &key, slot);
12701         type = key.type;
12702
12703         switch (type) {
12704         case BTRFS_EXTENT_DATA_KEY:
12705                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12706                 if (repair && ret)
12707                         ret = repair_extent_data_item(trans, root, path, nrefs,
12708                                                       ret);
12709                 err |= ret;
12710                 break;
12711         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12712                 ret = check_block_group_item(fs_info, eb, slot);
12713                 if (repair &&
12714                     ret & REFERENCER_MISSING)
12715                         ret = delete_extent_tree_item(trans, root, path);
12716                 err |= ret;
12717                 break;
12718         case BTRFS_DEV_ITEM_KEY:
12719                 ret = check_dev_item(fs_info, eb, slot);
12720                 err |= ret;
12721                 break;
12722         case BTRFS_CHUNK_ITEM_KEY:
12723                 ret = check_chunk_item(fs_info, eb, slot);
12724                 if (repair && ret)
12725                         ret = repair_chunk_item(trans, root, path, ret);
12726                 err |= ret;
12727                 break;
12728         case BTRFS_DEV_EXTENT_KEY:
12729                 ret = check_dev_extent_item(fs_info, eb, slot);
12730                 err |= ret;
12731                 break;
12732         case BTRFS_EXTENT_ITEM_KEY:
12733         case BTRFS_METADATA_ITEM_KEY:
12734                 ret = check_extent_item(trans, fs_info, path);
12735                 err |= ret;
12736                 break;
12737         case BTRFS_EXTENT_CSUM_KEY:
12738                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12739                 err |= ret;
12740                 break;
12741         case BTRFS_TREE_BLOCK_REF_KEY:
12742                 ret = check_tree_block_backref(fs_info, key.offset,
12743                                                key.objectid, -1);
12744                 if (repair &&
12745                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12746                         ret = delete_extent_tree_item(trans, root, path);
12747                 err |= ret;
12748                 break;
12749         case BTRFS_EXTENT_DATA_REF_KEY:
12750                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12751                 ret = check_extent_data_backref(fs_info,
12752                                 btrfs_extent_data_ref_root(eb, dref),
12753                                 btrfs_extent_data_ref_objectid(eb, dref),
12754                                 btrfs_extent_data_ref_offset(eb, dref),
12755                                 key.objectid, 0,
12756                                 btrfs_extent_data_ref_count(eb, dref));
12757                 if (repair &&
12758                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12759                         ret = delete_extent_tree_item(trans, root, path);
12760                 err |= ret;
12761                 break;
12762         case BTRFS_SHARED_BLOCK_REF_KEY:
12763                 ret = check_shared_block_backref(fs_info, key.offset,
12764                                                  key.objectid, -1);
12765                 if (repair &&
12766                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12767                         ret = delete_extent_tree_item(trans, root, path);
12768                 err |= ret;
12769                 break;
12770         case BTRFS_SHARED_DATA_REF_KEY:
12771                 ret = check_shared_data_backref(fs_info, key.offset,
12772                                                 key.objectid);
12773                 if (repair &&
12774                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12775                         ret = delete_extent_tree_item(trans, root, path);
12776                 err |= ret;
12777                 break;
12778         default:
12779                 break;
12780         }
12781
12782         ++path->slots[0];
12783         goto again;
12784 out:
12785         return err;
12786 }
12787
12788 /*
12789  * Low memory usage version check_chunks_and_extents.
12790  */
12791 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12792 {
12793         struct btrfs_trans_handle *trans = NULL;
12794         struct btrfs_path path;
12795         struct btrfs_key old_key;
12796         struct btrfs_key key;
12797         struct btrfs_root *root1;
12798         struct btrfs_root *root;
12799         struct btrfs_root *cur_root;
12800         int err = 0;
12801         int ret;
12802
12803         root = fs_info->fs_root;
12804
12805         if (repair) {
12806                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12807                 if (IS_ERR(trans)) {
12808                         error("failed to start transaction before check");
12809                         return PTR_ERR(trans);
12810                 }
12811         }
12812
12813         root1 = root->fs_info->chunk_root;
12814         ret = check_btrfs_root(trans, root1, 0, 1);
12815         err |= ret;
12816
12817         root1 = root->fs_info->tree_root;
12818         ret = check_btrfs_root(trans, root1, 0, 1);
12819         err |= ret;
12820
12821         btrfs_init_path(&path);
12822         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12823         key.offset = 0;
12824         key.type = BTRFS_ROOT_ITEM_KEY;
12825
12826         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12827         if (ret) {
12828                 error("cannot find extent tree in tree_root");
12829                 goto out;
12830         }
12831
12832         while (1) {
12833                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12834                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12835                         goto next;
12836                 old_key = key;
12837                 key.offset = (u64)-1;
12838
12839                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12840                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12841                                         &key);
12842                 else
12843                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12844                 if (IS_ERR(cur_root) || !cur_root) {
12845                         error("failed to read tree: %lld", key.objectid);
12846                         goto next;
12847                 }
12848
12849                 ret = check_btrfs_root(trans, cur_root, 0, 1);
12850                 err |= ret;
12851
12852                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12853                         btrfs_free_fs_root(cur_root);
12854
12855                 btrfs_release_path(&path);
12856                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
12857                                         &old_key, &path, 0, 0);
12858                 if (ret)
12859                         goto out;
12860 next:
12861                 ret = btrfs_next_item(root1, &path);
12862                 if (ret)
12863                         goto out;
12864         }
12865 out:
12866
12867         /* if repair, update block accounting */
12868         if (repair) {
12869                 ret = btrfs_fix_block_accounting(trans, root);
12870                 if (ret)
12871                         err |= ret;
12872                 else
12873                         err &= ~BG_ACCOUNTING_ERROR;
12874         }
12875
12876         if (trans)
12877                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
12878
12879         btrfs_release_path(&path);
12880
12881         return err;
12882 }
12883
12884 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12885 {
12886         int ret;
12887
12888         if (!ctx.progress_enabled)
12889                 fprintf(stderr, "checking extents\n");
12890         if (check_mode == CHECK_MODE_LOWMEM)
12891                 ret = check_chunks_and_extents_v2(fs_info);
12892         else
12893                 ret = check_chunks_and_extents(fs_info);
12894
12895         /* Also repair device size related problems */
12896         if (repair && !ret) {
12897                 ret = btrfs_fix_device_and_super_size(fs_info);
12898                 if (ret > 0)
12899                         ret = 0;
12900         }
12901         return ret;
12902 }
12903
12904 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12905                            struct btrfs_root *root, int overwrite)
12906 {
12907         struct extent_buffer *c;
12908         struct extent_buffer *old = root->node;
12909         int level;
12910         int ret;
12911         struct btrfs_disk_key disk_key = {0,0,0};
12912
12913         level = 0;
12914
12915         if (overwrite) {
12916                 c = old;
12917                 extent_buffer_get(c);
12918                 goto init;
12919         }
12920         c = btrfs_alloc_free_block(trans, root,
12921                                    root->fs_info->nodesize,
12922                                    root->root_key.objectid,
12923                                    &disk_key, level, 0, 0);
12924         if (IS_ERR(c)) {
12925                 c = old;
12926                 extent_buffer_get(c);
12927                 overwrite = 1;
12928         }
12929 init:
12930         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12931         btrfs_set_header_level(c, level);
12932         btrfs_set_header_bytenr(c, c->start);
12933         btrfs_set_header_generation(c, trans->transid);
12934         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12935         btrfs_set_header_owner(c, root->root_key.objectid);
12936
12937         write_extent_buffer(c, root->fs_info->fsid,
12938                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12939
12940         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12941                             btrfs_header_chunk_tree_uuid(c),
12942                             BTRFS_UUID_SIZE);
12943
12944         btrfs_mark_buffer_dirty(c);
12945         /*
12946          * this case can happen in the following case:
12947          *
12948          * 1.overwrite previous root.
12949          *
12950          * 2.reinit reloc data root, this is because we skip pin
12951          * down reloc data tree before which means we can allocate
12952          * same block bytenr here.
12953          */
12954         if (old->start == c->start) {
12955                 btrfs_set_root_generation(&root->root_item,
12956                                           trans->transid);
12957                 root->root_item.level = btrfs_header_level(root->node);
12958                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12959                                         &root->root_key, &root->root_item);
12960                 if (ret) {
12961                         free_extent_buffer(c);
12962                         return ret;
12963                 }
12964         }
12965         free_extent_buffer(old);
12966         root->node = c;
12967         add_root_to_dirty_list(root);
12968         return 0;
12969 }
12970
12971 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12972                                 struct extent_buffer *eb, int tree_root)
12973 {
12974         struct extent_buffer *tmp;
12975         struct btrfs_root_item *ri;
12976         struct btrfs_key key;
12977         u64 bytenr;
12978         int level = btrfs_header_level(eb);
12979         int nritems;
12980         int ret;
12981         int i;
12982
12983         /*
12984          * If we have pinned this block before, don't pin it again.
12985          * This can not only avoid forever loop with broken filesystem
12986          * but also give us some speedups.
12987          */
12988         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12989                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12990                 return 0;
12991
12992         btrfs_pin_extent(fs_info, eb->start, eb->len);
12993
12994         nritems = btrfs_header_nritems(eb);
12995         for (i = 0; i < nritems; i++) {
12996                 if (level == 0) {
12997                         btrfs_item_key_to_cpu(eb, &key, i);
12998                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12999                                 continue;
13000                         /* Skip the extent root and reloc roots */
13001                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13002                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13003                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13004                                 continue;
13005                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13006                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13007
13008                         /*
13009                          * If at any point we start needing the real root we
13010                          * will have to build a stump root for the root we are
13011                          * in, but for now this doesn't actually use the root so
13012                          * just pass in extent_root.
13013                          */
13014                         tmp = read_tree_block(fs_info, bytenr, 0);
13015                         if (!extent_buffer_uptodate(tmp)) {
13016                                 fprintf(stderr, "Error reading root block\n");
13017                                 return -EIO;
13018                         }
13019                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13020                         free_extent_buffer(tmp);
13021                         if (ret)
13022                                 return ret;
13023                 } else {
13024                         bytenr = btrfs_node_blockptr(eb, i);
13025
13026                         /* If we aren't the tree root don't read the block */
13027                         if (level == 1 && !tree_root) {
13028                                 btrfs_pin_extent(fs_info, bytenr,
13029                                                 fs_info->nodesize);
13030                                 continue;
13031                         }
13032
13033                         tmp = read_tree_block(fs_info, bytenr, 0);
13034                         if (!extent_buffer_uptodate(tmp)) {
13035                                 fprintf(stderr, "Error reading tree block\n");
13036                                 return -EIO;
13037                         }
13038                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13039                         free_extent_buffer(tmp);
13040                         if (ret)
13041                                 return ret;
13042                 }
13043         }
13044
13045         return 0;
13046 }
13047
13048 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13049 {
13050         int ret;
13051
13052         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13053         if (ret)
13054                 return ret;
13055
13056         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13057 }
13058
13059 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13060 {
13061         struct btrfs_block_group_cache *cache;
13062         struct btrfs_path path;
13063         struct extent_buffer *leaf;
13064         struct btrfs_chunk *chunk;
13065         struct btrfs_key key;
13066         int ret;
13067         u64 start;
13068
13069         btrfs_init_path(&path);
13070         key.objectid = 0;
13071         key.type = BTRFS_CHUNK_ITEM_KEY;
13072         key.offset = 0;
13073         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13074         if (ret < 0) {
13075                 btrfs_release_path(&path);
13076                 return ret;
13077         }
13078
13079         /*
13080          * We do this in case the block groups were screwed up and had alloc
13081          * bits that aren't actually set on the chunks.  This happens with
13082          * restored images every time and could happen in real life I guess.
13083          */
13084         fs_info->avail_data_alloc_bits = 0;
13085         fs_info->avail_metadata_alloc_bits = 0;
13086         fs_info->avail_system_alloc_bits = 0;
13087
13088         /* First we need to create the in-memory block groups */
13089         while (1) {
13090                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13091                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13092                         if (ret < 0) {
13093                                 btrfs_release_path(&path);
13094                                 return ret;
13095                         }
13096                         if (ret) {
13097                                 ret = 0;
13098                                 break;
13099                         }
13100                 }
13101                 leaf = path.nodes[0];
13102                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13103                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13104                         path.slots[0]++;
13105                         continue;
13106                 }
13107
13108                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13109                 btrfs_add_block_group(fs_info, 0,
13110                                       btrfs_chunk_type(leaf, chunk), key.offset,
13111                                       btrfs_chunk_length(leaf, chunk));
13112                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13113                                  key.offset + btrfs_chunk_length(leaf, chunk));
13114                 path.slots[0]++;
13115         }
13116         start = 0;
13117         while (1) {
13118                 cache = btrfs_lookup_first_block_group(fs_info, start);
13119                 if (!cache)
13120                         break;
13121                 cache->cached = 1;
13122                 start = cache->key.objectid + cache->key.offset;
13123         }
13124
13125         btrfs_release_path(&path);
13126         return 0;
13127 }
13128
13129 static int reset_balance(struct btrfs_trans_handle *trans,
13130                          struct btrfs_fs_info *fs_info)
13131 {
13132         struct btrfs_root *root = fs_info->tree_root;
13133         struct btrfs_path path;
13134         struct extent_buffer *leaf;
13135         struct btrfs_key key;
13136         int del_slot, del_nr = 0;
13137         int ret;
13138         int found = 0;
13139
13140         btrfs_init_path(&path);
13141         key.objectid = BTRFS_BALANCE_OBJECTID;
13142         key.type = BTRFS_BALANCE_ITEM_KEY;
13143         key.offset = 0;
13144         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13145         if (ret) {
13146                 if (ret > 0)
13147                         ret = 0;
13148                 if (!ret)
13149                         goto reinit_data_reloc;
13150                 else
13151                         goto out;
13152         }
13153
13154         ret = btrfs_del_item(trans, root, &path);
13155         if (ret)
13156                 goto out;
13157         btrfs_release_path(&path);
13158
13159         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13160         key.type = BTRFS_ROOT_ITEM_KEY;
13161         key.offset = 0;
13162         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13163         if (ret < 0)
13164                 goto out;
13165         while (1) {
13166                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13167                         if (!found)
13168                                 break;
13169
13170                         if (del_nr) {
13171                                 ret = btrfs_del_items(trans, root, &path,
13172                                                       del_slot, del_nr);
13173                                 del_nr = 0;
13174                                 if (ret)
13175                                         goto out;
13176                         }
13177                         key.offset++;
13178                         btrfs_release_path(&path);
13179
13180                         found = 0;
13181                         ret = btrfs_search_slot(trans, root, &key, &path,
13182                                                 -1, 1);
13183                         if (ret < 0)
13184                                 goto out;
13185                         continue;
13186                 }
13187                 found = 1;
13188                 leaf = path.nodes[0];
13189                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13190                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13191                         break;
13192                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13193                         path.slots[0]++;
13194                         continue;
13195                 }
13196                 if (!del_nr) {
13197                         del_slot = path.slots[0];
13198                         del_nr = 1;
13199                 } else {
13200                         del_nr++;
13201                 }
13202                 path.slots[0]++;
13203         }
13204
13205         if (del_nr) {
13206                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13207                 if (ret)
13208                         goto out;
13209         }
13210         btrfs_release_path(&path);
13211
13212 reinit_data_reloc:
13213         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13214         key.type = BTRFS_ROOT_ITEM_KEY;
13215         key.offset = (u64)-1;
13216         root = btrfs_read_fs_root(fs_info, &key);
13217         if (IS_ERR(root)) {
13218                 fprintf(stderr, "Error reading data reloc tree\n");
13219                 ret = PTR_ERR(root);
13220                 goto out;
13221         }
13222         record_root_in_trans(trans, root);
13223         ret = btrfs_fsck_reinit_root(trans, root, 0);
13224         if (ret)
13225                 goto out;
13226         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13227 out:
13228         btrfs_release_path(&path);
13229         return ret;
13230 }
13231
13232 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13233                               struct btrfs_fs_info *fs_info)
13234 {
13235         u64 start = 0;
13236         int ret;
13237
13238         /*
13239          * The only reason we don't do this is because right now we're just
13240          * walking the trees we find and pinning down their bytes, we don't look
13241          * at any of the leaves.  In order to do mixed groups we'd have to check
13242          * the leaves of any fs roots and pin down the bytes for any file
13243          * extents we find.  Not hard but why do it if we don't have to?
13244          */
13245         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13246                 fprintf(stderr, "We don't support re-initing the extent tree "
13247                         "for mixed block groups yet, please notify a btrfs "
13248                         "developer you want to do this so they can add this "
13249                         "functionality.\n");
13250                 return -EINVAL;
13251         }
13252
13253         /*
13254          * first we need to walk all of the trees except the extent tree and pin
13255          * down the bytes that are in use so we don't overwrite any existing
13256          * metadata.
13257          */
13258         ret = pin_metadata_blocks(fs_info);
13259         if (ret) {
13260                 fprintf(stderr, "error pinning down used bytes\n");
13261                 return ret;
13262         }
13263
13264         /*
13265          * Need to drop all the block groups since we're going to recreate all
13266          * of them again.
13267          */
13268         btrfs_free_block_groups(fs_info);
13269         ret = reset_block_groups(fs_info);
13270         if (ret) {
13271                 fprintf(stderr, "error resetting the block groups\n");
13272                 return ret;
13273         }
13274
13275         /* Ok we can allocate now, reinit the extent root */
13276         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13277         if (ret) {
13278                 fprintf(stderr, "extent root initialization failed\n");
13279                 /*
13280                  * When the transaction code is updated we should end the
13281                  * transaction, but for now progs only knows about commit so
13282                  * just return an error.
13283                  */
13284                 return ret;
13285         }
13286
13287         /*
13288          * Now we have all the in-memory block groups setup so we can make
13289          * allocations properly, and the metadata we care about is safe since we
13290          * pinned all of it above.
13291          */
13292         while (1) {
13293                 struct btrfs_block_group_cache *cache;
13294
13295                 cache = btrfs_lookup_first_block_group(fs_info, start);
13296                 if (!cache)
13297                         break;
13298                 start = cache->key.objectid + cache->key.offset;
13299                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13300                                         &cache->key, &cache->item,
13301                                         sizeof(cache->item));
13302                 if (ret) {
13303                         fprintf(stderr, "Error adding block group\n");
13304                         return ret;
13305                 }
13306                 btrfs_extent_post_op(trans, fs_info->extent_root);
13307         }
13308
13309         ret = reset_balance(trans, fs_info);
13310         if (ret)
13311                 fprintf(stderr, "error resetting the pending balance\n");
13312
13313         return ret;
13314 }
13315
13316 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13317 {
13318         struct btrfs_path path;
13319         struct btrfs_trans_handle *trans;
13320         struct btrfs_key key;
13321         int ret;
13322
13323         printf("Recowing metadata block %llu\n", eb->start);
13324         key.objectid = btrfs_header_owner(eb);
13325         key.type = BTRFS_ROOT_ITEM_KEY;
13326         key.offset = (u64)-1;
13327
13328         root = btrfs_read_fs_root(root->fs_info, &key);
13329         if (IS_ERR(root)) {
13330                 fprintf(stderr, "Couldn't find owner root %llu\n",
13331                         key.objectid);
13332                 return PTR_ERR(root);
13333         }
13334
13335         trans = btrfs_start_transaction(root, 1);
13336         if (IS_ERR(trans))
13337                 return PTR_ERR(trans);
13338
13339         btrfs_init_path(&path);
13340         path.lowest_level = btrfs_header_level(eb);
13341         if (path.lowest_level)
13342                 btrfs_node_key_to_cpu(eb, &key, 0);
13343         else
13344                 btrfs_item_key_to_cpu(eb, &key, 0);
13345
13346         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13347         btrfs_commit_transaction(trans, root);
13348         btrfs_release_path(&path);
13349         return ret;
13350 }
13351
13352 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13353 {
13354         struct btrfs_path path;
13355         struct btrfs_trans_handle *trans;
13356         struct btrfs_key key;
13357         int ret;
13358
13359         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13360                bad->key.type, bad->key.offset);
13361         key.objectid = bad->root_id;
13362         key.type = BTRFS_ROOT_ITEM_KEY;
13363         key.offset = (u64)-1;
13364
13365         root = btrfs_read_fs_root(root->fs_info, &key);
13366         if (IS_ERR(root)) {
13367                 fprintf(stderr, "Couldn't find owner root %llu\n",
13368                         key.objectid);
13369                 return PTR_ERR(root);
13370         }
13371
13372         trans = btrfs_start_transaction(root, 1);
13373         if (IS_ERR(trans))
13374                 return PTR_ERR(trans);
13375
13376         btrfs_init_path(&path);
13377         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13378         if (ret) {
13379                 if (ret > 0)
13380                         ret = 0;
13381                 goto out;
13382         }
13383         ret = btrfs_del_item(trans, root, &path);
13384 out:
13385         btrfs_commit_transaction(trans, root);
13386         btrfs_release_path(&path);
13387         return ret;
13388 }
13389
13390 static int zero_log_tree(struct btrfs_root *root)
13391 {
13392         struct btrfs_trans_handle *trans;
13393         int ret;
13394
13395         trans = btrfs_start_transaction(root, 1);
13396         if (IS_ERR(trans)) {
13397                 ret = PTR_ERR(trans);
13398                 return ret;
13399         }
13400         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13401         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13402         ret = btrfs_commit_transaction(trans, root);
13403         return ret;
13404 }
13405
13406 static int populate_csum(struct btrfs_trans_handle *trans,
13407                          struct btrfs_root *csum_root, char *buf, u64 start,
13408                          u64 len)
13409 {
13410         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13411         u64 offset = 0;
13412         u64 sectorsize;
13413         int ret = 0;
13414
13415         while (offset < len) {
13416                 sectorsize = fs_info->sectorsize;
13417                 ret = read_extent_data(fs_info, buf, start + offset,
13418                                        &sectorsize, 0);
13419                 if (ret)
13420                         break;
13421                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13422                                             start + offset, buf, sectorsize);
13423                 if (ret)
13424                         break;
13425                 offset += sectorsize;
13426         }
13427         return ret;
13428 }
13429
13430 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13431                                       struct btrfs_root *csum_root,
13432                                       struct btrfs_root *cur_root)
13433 {
13434         struct btrfs_path path;
13435         struct btrfs_key key;
13436         struct extent_buffer *node;
13437         struct btrfs_file_extent_item *fi;
13438         char *buf = NULL;
13439         u64 start = 0;
13440         u64 len = 0;
13441         int slot = 0;
13442         int ret = 0;
13443
13444         buf = malloc(cur_root->fs_info->sectorsize);
13445         if (!buf)
13446                 return -ENOMEM;
13447
13448         btrfs_init_path(&path);
13449         key.objectid = 0;
13450         key.offset = 0;
13451         key.type = 0;
13452         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13453         if (ret < 0)
13454                 goto out;
13455         /* Iterate all regular file extents and fill its csum */
13456         while (1) {
13457                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13458
13459                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13460                         goto next;
13461                 node = path.nodes[0];
13462                 slot = path.slots[0];
13463                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13464                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13465                         goto next;
13466                 start = btrfs_file_extent_disk_bytenr(node, fi);
13467                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13468
13469                 ret = populate_csum(trans, csum_root, buf, start, len);
13470                 if (ret == -EEXIST)
13471                         ret = 0;
13472                 if (ret < 0)
13473                         goto out;
13474 next:
13475                 /*
13476                  * TODO: if next leaf is corrupted, jump to nearest next valid
13477                  * leaf.
13478                  */
13479                 ret = btrfs_next_item(cur_root, &path);
13480                 if (ret < 0)
13481                         goto out;
13482                 if (ret > 0) {
13483                         ret = 0;
13484                         goto out;
13485                 }
13486         }
13487
13488 out:
13489         btrfs_release_path(&path);
13490         free(buf);
13491         return ret;
13492 }
13493
13494 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13495                                   struct btrfs_root *csum_root)
13496 {
13497         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13498         struct btrfs_path path;
13499         struct btrfs_root *tree_root = fs_info->tree_root;
13500         struct btrfs_root *cur_root;
13501         struct extent_buffer *node;
13502         struct btrfs_key key;
13503         int slot = 0;
13504         int ret = 0;
13505
13506         btrfs_init_path(&path);
13507         key.objectid = BTRFS_FS_TREE_OBJECTID;
13508         key.offset = 0;
13509         key.type = BTRFS_ROOT_ITEM_KEY;
13510         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13511         if (ret < 0)
13512                 goto out;
13513         if (ret > 0) {
13514                 ret = -ENOENT;
13515                 goto out;
13516         }
13517
13518         while (1) {
13519                 node = path.nodes[0];
13520                 slot = path.slots[0];
13521                 btrfs_item_key_to_cpu(node, &key, slot);
13522                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13523                         goto out;
13524                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13525                         goto next;
13526                 if (!is_fstree(key.objectid))
13527                         goto next;
13528                 key.offset = (u64)-1;
13529
13530                 cur_root = btrfs_read_fs_root(fs_info, &key);
13531                 if (IS_ERR(cur_root) || !cur_root) {
13532                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13533                                 key.objectid);
13534                         goto out;
13535                 }
13536                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13537                                 cur_root);
13538                 if (ret < 0)
13539                         goto out;
13540 next:
13541                 ret = btrfs_next_item(tree_root, &path);
13542                 if (ret > 0) {
13543                         ret = 0;
13544                         goto out;
13545                 }
13546                 if (ret < 0)
13547                         goto out;
13548         }
13549
13550 out:
13551         btrfs_release_path(&path);
13552         return ret;
13553 }
13554
13555 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13556                                       struct btrfs_root *csum_root)
13557 {
13558         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13559         struct btrfs_path path;
13560         struct btrfs_extent_item *ei;
13561         struct extent_buffer *leaf;
13562         char *buf;
13563         struct btrfs_key key;
13564         int ret;
13565
13566         btrfs_init_path(&path);
13567         key.objectid = 0;
13568         key.type = BTRFS_EXTENT_ITEM_KEY;
13569         key.offset = 0;
13570         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13571         if (ret < 0) {
13572                 btrfs_release_path(&path);
13573                 return ret;
13574         }
13575
13576         buf = malloc(csum_root->fs_info->sectorsize);
13577         if (!buf) {
13578                 btrfs_release_path(&path);
13579                 return -ENOMEM;
13580         }
13581
13582         while (1) {
13583                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13584                         ret = btrfs_next_leaf(extent_root, &path);
13585                         if (ret < 0)
13586                                 break;
13587                         if (ret) {
13588                                 ret = 0;
13589                                 break;
13590                         }
13591                 }
13592                 leaf = path.nodes[0];
13593
13594                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13595                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13596                         path.slots[0]++;
13597                         continue;
13598                 }
13599
13600                 ei = btrfs_item_ptr(leaf, path.slots[0],
13601                                     struct btrfs_extent_item);
13602                 if (!(btrfs_extent_flags(leaf, ei) &
13603                       BTRFS_EXTENT_FLAG_DATA)) {
13604                         path.slots[0]++;
13605                         continue;
13606                 }
13607
13608                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13609                                     key.offset);
13610                 if (ret)
13611                         break;
13612                 path.slots[0]++;
13613         }
13614
13615         btrfs_release_path(&path);
13616         free(buf);
13617         return ret;
13618 }
13619
13620 /*
13621  * Recalculate the csum and put it into the csum tree.
13622  *
13623  * Extent tree init will wipe out all the extent info, so in that case, we
13624  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13625  * will use fs/subvol trees to init the csum tree.
13626  */
13627 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13628                           struct btrfs_root *csum_root,
13629                           int search_fs_tree)
13630 {
13631         if (search_fs_tree)
13632                 return fill_csum_tree_from_fs(trans, csum_root);
13633         else
13634                 return fill_csum_tree_from_extent(trans, csum_root);
13635 }
13636
13637 static void free_roots_info_cache(void)
13638 {
13639         if (!roots_info_cache)
13640                 return;
13641
13642         while (!cache_tree_empty(roots_info_cache)) {
13643                 struct cache_extent *entry;
13644                 struct root_item_info *rii;
13645
13646                 entry = first_cache_extent(roots_info_cache);
13647                 if (!entry)
13648                         break;
13649                 remove_cache_extent(roots_info_cache, entry);
13650                 rii = container_of(entry, struct root_item_info, cache_extent);
13651                 free(rii);
13652         }
13653
13654         free(roots_info_cache);
13655         roots_info_cache = NULL;
13656 }
13657
13658 static int build_roots_info_cache(struct btrfs_fs_info *info)
13659 {
13660         int ret = 0;
13661         struct btrfs_key key;
13662         struct extent_buffer *leaf;
13663         struct btrfs_path path;
13664
13665         if (!roots_info_cache) {
13666                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13667                 if (!roots_info_cache)
13668                         return -ENOMEM;
13669                 cache_tree_init(roots_info_cache);
13670         }
13671
13672         btrfs_init_path(&path);
13673         key.objectid = 0;
13674         key.type = BTRFS_EXTENT_ITEM_KEY;
13675         key.offset = 0;
13676         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13677         if (ret < 0)
13678                 goto out;
13679         leaf = path.nodes[0];
13680
13681         while (1) {
13682                 struct btrfs_key found_key;
13683                 struct btrfs_extent_item *ei;
13684                 struct btrfs_extent_inline_ref *iref;
13685                 int slot = path.slots[0];
13686                 int type;
13687                 u64 flags;
13688                 u64 root_id;
13689                 u8 level;
13690                 struct cache_extent *entry;
13691                 struct root_item_info *rii;
13692
13693                 if (slot >= btrfs_header_nritems(leaf)) {
13694                         ret = btrfs_next_leaf(info->extent_root, &path);
13695                         if (ret < 0) {
13696                                 break;
13697                         } else if (ret) {
13698                                 ret = 0;
13699                                 break;
13700                         }
13701                         leaf = path.nodes[0];
13702                         slot = path.slots[0];
13703                 }
13704
13705                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13706
13707                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13708                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13709                         goto next;
13710
13711                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13712                 flags = btrfs_extent_flags(leaf, ei);
13713
13714                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13715                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13716                         goto next;
13717
13718                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13719                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13720                         level = found_key.offset;
13721                 } else {
13722                         struct btrfs_tree_block_info *binfo;
13723
13724                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13725                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13726                         level = btrfs_tree_block_level(leaf, binfo);
13727                 }
13728
13729                 /*
13730                  * For a root extent, it must be of the following type and the
13731                  * first (and only one) iref in the item.
13732                  */
13733                 type = btrfs_extent_inline_ref_type(leaf, iref);
13734                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13735                         goto next;
13736
13737                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13738                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13739                 if (!entry) {
13740                         rii = malloc(sizeof(struct root_item_info));
13741                         if (!rii) {
13742                                 ret = -ENOMEM;
13743                                 goto out;
13744                         }
13745                         rii->cache_extent.start = root_id;
13746                         rii->cache_extent.size = 1;
13747                         rii->level = (u8)-1;
13748                         entry = &rii->cache_extent;
13749                         ret = insert_cache_extent(roots_info_cache, entry);
13750                         ASSERT(ret == 0);
13751                 } else {
13752                         rii = container_of(entry, struct root_item_info,
13753                                            cache_extent);
13754                 }
13755
13756                 ASSERT(rii->cache_extent.start == root_id);
13757                 ASSERT(rii->cache_extent.size == 1);
13758
13759                 if (level > rii->level || rii->level == (u8)-1) {
13760                         rii->level = level;
13761                         rii->bytenr = found_key.objectid;
13762                         rii->gen = btrfs_extent_generation(leaf, ei);
13763                         rii->node_count = 1;
13764                 } else if (level == rii->level) {
13765                         rii->node_count++;
13766                 }
13767 next:
13768                 path.slots[0]++;
13769         }
13770
13771 out:
13772         btrfs_release_path(&path);
13773
13774         return ret;
13775 }
13776
13777 static int maybe_repair_root_item(struct btrfs_path *path,
13778                                   const struct btrfs_key *root_key,
13779                                   const int read_only_mode)
13780 {
13781         const u64 root_id = root_key->objectid;
13782         struct cache_extent *entry;
13783         struct root_item_info *rii;
13784         struct btrfs_root_item ri;
13785         unsigned long offset;
13786
13787         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13788         if (!entry) {
13789                 fprintf(stderr,
13790                         "Error: could not find extent items for root %llu\n",
13791                         root_key->objectid);
13792                 return -ENOENT;
13793         }
13794
13795         rii = container_of(entry, struct root_item_info, cache_extent);
13796         ASSERT(rii->cache_extent.start == root_id);
13797         ASSERT(rii->cache_extent.size == 1);
13798
13799         if (rii->node_count != 1) {
13800                 fprintf(stderr,
13801                         "Error: could not find btree root extent for root %llu\n",
13802                         root_id);
13803                 return -ENOENT;
13804         }
13805
13806         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13807         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13808
13809         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13810             btrfs_root_level(&ri) != rii->level ||
13811             btrfs_root_generation(&ri) != rii->gen) {
13812
13813                 /*
13814                  * If we're in repair mode but our caller told us to not update
13815                  * the root item, i.e. just check if it needs to be updated, don't
13816                  * print this message, since the caller will call us again shortly
13817                  * for the same root item without read only mode (the caller will
13818                  * open a transaction first).
13819                  */
13820                 if (!(read_only_mode && repair))
13821                         fprintf(stderr,
13822                                 "%sroot item for root %llu,"
13823                                 " current bytenr %llu, current gen %llu, current level %u,"
13824                                 " new bytenr %llu, new gen %llu, new level %u\n",
13825                                 (read_only_mode ? "" : "fixing "),
13826                                 root_id,
13827                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13828                                 btrfs_root_level(&ri),
13829                                 rii->bytenr, rii->gen, rii->level);
13830
13831                 if (btrfs_root_generation(&ri) > rii->gen) {
13832                         fprintf(stderr,
13833                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13834                                 root_id, btrfs_root_generation(&ri), rii->gen);
13835                         return -EINVAL;
13836                 }
13837
13838                 if (!read_only_mode) {
13839                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13840                         btrfs_set_root_level(&ri, rii->level);
13841                         btrfs_set_root_generation(&ri, rii->gen);
13842                         write_extent_buffer(path->nodes[0], &ri,
13843                                             offset, sizeof(ri));
13844                 }
13845
13846                 return 1;
13847         }
13848
13849         return 0;
13850 }
13851
13852 /*
13853  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13854  * caused read-only snapshots to be corrupted if they were created at a moment
13855  * when the source subvolume/snapshot had orphan items. The issue was that the
13856  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13857  * node instead of the post orphan cleanup root node.
13858  * So this function, and its callees, just detects and fixes those cases. Even
13859  * though the regression was for read-only snapshots, this function applies to
13860  * any snapshot/subvolume root.
13861  * This must be run before any other repair code - not doing it so, makes other
13862  * repair code delete or modify backrefs in the extent tree for example, which
13863  * will result in an inconsistent fs after repairing the root items.
13864  */
13865 static int repair_root_items(struct btrfs_fs_info *info)
13866 {
13867         struct btrfs_path path;
13868         struct btrfs_key key;
13869         struct extent_buffer *leaf;
13870         struct btrfs_trans_handle *trans = NULL;
13871         int ret = 0;
13872         int bad_roots = 0;
13873         int need_trans = 0;
13874
13875         btrfs_init_path(&path);
13876
13877         ret = build_roots_info_cache(info);
13878         if (ret)
13879                 goto out;
13880
13881         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13882         key.type = BTRFS_ROOT_ITEM_KEY;
13883         key.offset = 0;
13884
13885 again:
13886         /*
13887          * Avoid opening and committing transactions if a leaf doesn't have
13888          * any root items that need to be fixed, so that we avoid rotating
13889          * backup roots unnecessarily.
13890          */
13891         if (need_trans) {
13892                 trans = btrfs_start_transaction(info->tree_root, 1);
13893                 if (IS_ERR(trans)) {
13894                         ret = PTR_ERR(trans);
13895                         goto out;
13896                 }
13897         }
13898
13899         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13900                                 0, trans ? 1 : 0);
13901         if (ret < 0)
13902                 goto out;
13903         leaf = path.nodes[0];
13904
13905         while (1) {
13906                 struct btrfs_key found_key;
13907
13908                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13909                         int no_more_keys = find_next_key(&path, &key);
13910
13911                         btrfs_release_path(&path);
13912                         if (trans) {
13913                                 ret = btrfs_commit_transaction(trans,
13914                                                                info->tree_root);
13915                                 trans = NULL;
13916                                 if (ret < 0)
13917                                         goto out;
13918                         }
13919                         need_trans = 0;
13920                         if (no_more_keys)
13921                                 break;
13922                         goto again;
13923                 }
13924
13925                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13926
13927                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13928                         goto next;
13929                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13930                         goto next;
13931
13932                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13933                 if (ret < 0)
13934                         goto out;
13935                 if (ret) {
13936                         if (!trans && repair) {
13937                                 need_trans = 1;
13938                                 key = found_key;
13939                                 btrfs_release_path(&path);
13940                                 goto again;
13941                         }
13942                         bad_roots++;
13943                 }
13944 next:
13945                 path.slots[0]++;
13946         }
13947         ret = 0;
13948 out:
13949         free_roots_info_cache();
13950         btrfs_release_path(&path);
13951         if (trans)
13952                 btrfs_commit_transaction(trans, info->tree_root);
13953         if (ret < 0)
13954                 return ret;
13955
13956         return bad_roots;
13957 }
13958
13959 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13960 {
13961         struct btrfs_trans_handle *trans;
13962         struct btrfs_block_group_cache *bg_cache;
13963         u64 current = 0;
13964         int ret = 0;
13965
13966         /* Clear all free space cache inodes and its extent data */
13967         while (1) {
13968                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13969                 if (!bg_cache)
13970                         break;
13971                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13972                 if (ret < 0)
13973                         return ret;
13974                 current = bg_cache->key.objectid + bg_cache->key.offset;
13975         }
13976
13977         /* Don't forget to set cache_generation to -1 */
13978         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13979         if (IS_ERR(trans)) {
13980                 error("failed to update super block cache generation");
13981                 return PTR_ERR(trans);
13982         }
13983         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13984         btrfs_commit_transaction(trans, fs_info->tree_root);
13985
13986         return ret;
13987 }
13988
13989 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13990                 int clear_version)
13991 {
13992         int ret = 0;
13993
13994         if (clear_version == 1) {
13995                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13996                         error(
13997                 "free space cache v2 detected, use --clear-space-cache v2");
13998                         ret = 1;
13999                         goto close_out;
14000                 }
14001                 printf("Clearing free space cache\n");
14002                 ret = clear_free_space_cache(fs_info);
14003                 if (ret) {
14004                         error("failed to clear free space cache");
14005                         ret = 1;
14006                 } else {
14007                         printf("Free space cache cleared\n");
14008                 }
14009         } else if (clear_version == 2) {
14010                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14011                         printf("no free space cache v2 to clear\n");
14012                         ret = 0;
14013                         goto close_out;
14014                 }
14015                 printf("Clear free space cache v2\n");
14016                 ret = btrfs_clear_free_space_tree(fs_info);
14017                 if (ret) {
14018                         error("failed to clear free space cache v2: %d", ret);
14019                         ret = 1;
14020                 } else {
14021                         printf("free space cache v2 cleared\n");
14022                 }
14023         }
14024 close_out:
14025         return ret;
14026 }
14027
14028 const char * const cmd_check_usage[] = {
14029         "btrfs check [options] <device>",
14030         "Check structural integrity of a filesystem (unmounted).",
14031         "Check structural integrity of an unmounted filesystem. Verify internal",
14032         "trees' consistency and item connectivity. In the repair mode try to",
14033         "fix the problems found. ",
14034         "WARNING: the repair mode is considered dangerous",
14035         "",
14036         "-s|--super <superblock>     use this superblock copy",
14037         "-b|--backup                 use the first valid backup root copy",
14038         "--force                     skip mount checks, repair is not possible",
14039         "--repair                    try to repair the filesystem",
14040         "--readonly                  run in read-only mode (default)",
14041         "--init-csum-tree            create a new CRC tree",
14042         "--init-extent-tree          create a new extent tree",
14043         "--mode <MODE>               allows choice of memory/IO trade-offs",
14044         "                            where MODE is one of:",
14045         "                            original - read inodes and extents to memory (requires",
14046         "                                       more memory, does less IO)",
14047         "                            lowmem   - try to use less memory but read blocks again",
14048         "                                       when needed",
14049         "--check-data-csum           verify checksums of data blocks",
14050         "-Q|--qgroup-report          print a report on qgroup consistency",
14051         "-E|--subvol-extents <subvolid>",
14052         "                            print subvolume extents and sharing state",
14053         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14054         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14055         "-p|--progress               indicate progress",
14056         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14057         NULL
14058 };
14059
14060 int cmd_check(int argc, char **argv)
14061 {
14062         struct cache_tree root_cache;
14063         struct btrfs_root *root;
14064         struct btrfs_fs_info *info;
14065         u64 bytenr = 0;
14066         u64 subvolid = 0;
14067         u64 tree_root_bytenr = 0;
14068         u64 chunk_root_bytenr = 0;
14069         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14070         int ret = 0;
14071         int err = 0;
14072         u64 num;
14073         int init_csum_tree = 0;
14074         int readonly = 0;
14075         int clear_space_cache = 0;
14076         int qgroup_report = 0;
14077         int qgroups_repaired = 0;
14078         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14079         int force = 0;
14080
14081         while(1) {
14082                 int c;
14083                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14084                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14085                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14086                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14087                         GETOPT_VAL_FORCE };
14088                 static const struct option long_options[] = {
14089                         { "super", required_argument, NULL, 's' },
14090                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14091                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14092                         { "init-csum-tree", no_argument, NULL,
14093                                 GETOPT_VAL_INIT_CSUM },
14094                         { "init-extent-tree", no_argument, NULL,
14095                                 GETOPT_VAL_INIT_EXTENT },
14096                         { "check-data-csum", no_argument, NULL,
14097                                 GETOPT_VAL_CHECK_CSUM },
14098                         { "backup", no_argument, NULL, 'b' },
14099                         { "subvol-extents", required_argument, NULL, 'E' },
14100                         { "qgroup-report", no_argument, NULL, 'Q' },
14101                         { "tree-root", required_argument, NULL, 'r' },
14102                         { "chunk-root", required_argument, NULL,
14103                                 GETOPT_VAL_CHUNK_TREE },
14104                         { "progress", no_argument, NULL, 'p' },
14105                         { "mode", required_argument, NULL,
14106                                 GETOPT_VAL_MODE },
14107                         { "clear-space-cache", required_argument, NULL,
14108                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14109                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14110                         { NULL, 0, NULL, 0}
14111                 };
14112
14113                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14114                 if (c < 0)
14115                         break;
14116                 switch(c) {
14117                         case 'a': /* ignored */ break;
14118                         case 'b':
14119                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14120                                 break;
14121                         case 's':
14122                                 num = arg_strtou64(optarg);
14123                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14124                                         error(
14125                                         "super mirror should be less than %d",
14126                                                 BTRFS_SUPER_MIRROR_MAX);
14127                                         exit(1);
14128                                 }
14129                                 bytenr = btrfs_sb_offset(((int)num));
14130                                 printf("using SB copy %llu, bytenr %llu\n", num,
14131                                        (unsigned long long)bytenr);
14132                                 break;
14133                         case 'Q':
14134                                 qgroup_report = 1;
14135                                 break;
14136                         case 'E':
14137                                 subvolid = arg_strtou64(optarg);
14138                                 break;
14139                         case 'r':
14140                                 tree_root_bytenr = arg_strtou64(optarg);
14141                                 break;
14142                         case GETOPT_VAL_CHUNK_TREE:
14143                                 chunk_root_bytenr = arg_strtou64(optarg);
14144                                 break;
14145                         case 'p':
14146                                 ctx.progress_enabled = true;
14147                                 break;
14148                         case '?':
14149                         case 'h':
14150                                 usage(cmd_check_usage);
14151                         case GETOPT_VAL_REPAIR:
14152                                 printf("enabling repair mode\n");
14153                                 repair = 1;
14154                                 ctree_flags |= OPEN_CTREE_WRITES;
14155                                 break;
14156                         case GETOPT_VAL_READONLY:
14157                                 readonly = 1;
14158                                 break;
14159                         case GETOPT_VAL_INIT_CSUM:
14160                                 printf("Creating a new CRC tree\n");
14161                                 init_csum_tree = 1;
14162                                 repair = 1;
14163                                 ctree_flags |= OPEN_CTREE_WRITES;
14164                                 break;
14165                         case GETOPT_VAL_INIT_EXTENT:
14166                                 init_extent_tree = 1;
14167                                 ctree_flags |= (OPEN_CTREE_WRITES |
14168                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14169                                 repair = 1;
14170                                 break;
14171                         case GETOPT_VAL_CHECK_CSUM:
14172                                 check_data_csum = 1;
14173                                 break;
14174                         case GETOPT_VAL_MODE:
14175                                 check_mode = parse_check_mode(optarg);
14176                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14177                                         error("unknown mode: %s", optarg);
14178                                         exit(1);
14179                                 }
14180                                 break;
14181                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14182                                 if (strcmp(optarg, "v1") == 0) {
14183                                         clear_space_cache = 1;
14184                                 } else if (strcmp(optarg, "v2") == 0) {
14185                                         clear_space_cache = 2;
14186                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14187                                 } else {
14188                                         error(
14189                 "invalid argument to --clear-space-cache, must be v1 or v2");
14190                                         exit(1);
14191                                 }
14192                                 ctree_flags |= OPEN_CTREE_WRITES;
14193                                 break;
14194                         case GETOPT_VAL_FORCE:
14195                                 force = 1;
14196                                 break;
14197                 }
14198         }
14199
14200         if (check_argc_exact(argc - optind, 1))
14201                 usage(cmd_check_usage);
14202
14203         if (ctx.progress_enabled) {
14204                 ctx.tp = TASK_NOTHING;
14205                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14206         }
14207
14208         /* This check is the only reason for --readonly to exist */
14209         if (readonly && repair) {
14210                 error("repair options are not compatible with --readonly");
14211                 exit(1);
14212         }
14213
14214         /*
14215          * experimental and dangerous
14216          */
14217         if (repair && check_mode == CHECK_MODE_LOWMEM)
14218                 warning("low-memory mode repair support is only partial");
14219
14220         radix_tree_init();
14221         cache_tree_init(&root_cache);
14222
14223         ret = check_mounted(argv[optind]);
14224         if (!force) {
14225                 if (ret < 0) {
14226                         error("could not check mount status: %s",
14227                                         strerror(-ret));
14228                         err |= !!ret;
14229                         goto err_out;
14230                 } else if (ret) {
14231                         error(
14232 "%s is currently mounted, use --force if you really intend to check the filesystem",
14233                                 argv[optind]);
14234                         ret = -EBUSY;
14235                         err |= !!ret;
14236                         goto err_out;
14237                 }
14238         } else {
14239                 if (repair) {
14240                         error("repair and --force is not yet supported");
14241                         ret = 1;
14242                         err |= !!ret;
14243                         goto err_out;
14244                 }
14245                 if (ret < 0) {
14246                         warning(
14247 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14248                                 argv[optind]);
14249                 } else if (ret) {
14250                         warning(
14251                         "filesystem mounted, continuing because of --force");
14252                 }
14253                 /* A block device is mounted in exclusive mode by kernel */
14254                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14255         }
14256
14257         /* only allow partial opening under repair mode */
14258         if (repair)
14259                 ctree_flags |= OPEN_CTREE_PARTIAL;
14260
14261         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14262                                   chunk_root_bytenr, ctree_flags);
14263         if (!info) {
14264                 error("cannot open file system");
14265                 ret = -EIO;
14266                 err |= !!ret;
14267                 goto err_out;
14268         }
14269
14270         global_info = info;
14271         root = info->fs_root;
14272         uuid_unparse(info->super_copy->fsid, uuidbuf);
14273
14274         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14275
14276         /*
14277          * Check the bare minimum before starting anything else that could rely
14278          * on it, namely the tree roots, any local consistency checks
14279          */
14280         if (!extent_buffer_uptodate(info->tree_root->node) ||
14281             !extent_buffer_uptodate(info->dev_root->node) ||
14282             !extent_buffer_uptodate(info->chunk_root->node)) {
14283                 error("critical roots corrupted, unable to check the filesystem");
14284                 err |= !!ret;
14285                 ret = -EIO;
14286                 goto close_out;
14287         }
14288
14289         if (clear_space_cache) {
14290                 ret = do_clear_free_space_cache(info, clear_space_cache);
14291                 err |= !!ret;
14292                 goto close_out;
14293         }
14294
14295         /*
14296          * repair mode will force us to commit transaction which
14297          * will make us fail to load log tree when mounting.
14298          */
14299         if (repair && btrfs_super_log_root(info->super_copy)) {
14300                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14301                 if (!ret) {
14302                         ret = 1;
14303                         err |= !!ret;
14304                         goto close_out;
14305                 }
14306                 ret = zero_log_tree(root);
14307                 err |= !!ret;
14308                 if (ret) {
14309                         error("failed to zero log tree: %d", ret);
14310                         goto close_out;
14311                 }
14312         }
14313
14314         if (qgroup_report) {
14315                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14316                        uuidbuf);
14317                 ret = qgroup_verify_all(info);
14318                 err |= !!ret;
14319                 if (ret == 0)
14320                         report_qgroups(1);
14321                 goto close_out;
14322         }
14323         if (subvolid) {
14324                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14325                        subvolid, argv[optind], uuidbuf);
14326                 ret = print_extent_state(info, subvolid);
14327                 err |= !!ret;
14328                 goto close_out;
14329         }
14330
14331         if (init_extent_tree || init_csum_tree) {
14332                 struct btrfs_trans_handle *trans;
14333
14334                 trans = btrfs_start_transaction(info->extent_root, 0);
14335                 if (IS_ERR(trans)) {
14336                         error("error starting transaction");
14337                         ret = PTR_ERR(trans);
14338                         err |= !!ret;
14339                         goto close_out;
14340                 }
14341
14342                 if (init_extent_tree) {
14343                         printf("Creating a new extent tree\n");
14344                         ret = reinit_extent_tree(trans, info);
14345                         err |= !!ret;
14346                         if (ret)
14347                                 goto close_out;
14348                 }
14349
14350                 if (init_csum_tree) {
14351                         printf("Reinitialize checksum tree\n");
14352                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14353                         if (ret) {
14354                                 error("checksum tree initialization failed: %d",
14355                                                 ret);
14356                                 ret = -EIO;
14357                                 err |= !!ret;
14358                                 goto close_out;
14359                         }
14360
14361                         ret = fill_csum_tree(trans, info->csum_root,
14362                                              init_extent_tree);
14363                         err |= !!ret;
14364                         if (ret) {
14365                                 error("checksum tree refilling failed: %d", ret);
14366                                 return -EIO;
14367                         }
14368                 }
14369                 /*
14370                  * Ok now we commit and run the normal fsck, which will add
14371                  * extent entries for all of the items it finds.
14372                  */
14373                 ret = btrfs_commit_transaction(trans, info->extent_root);
14374                 err |= !!ret;
14375                 if (ret)
14376                         goto close_out;
14377         }
14378         if (!extent_buffer_uptodate(info->extent_root->node)) {
14379                 error("critical: extent_root, unable to check the filesystem");
14380                 ret = -EIO;
14381                 err |= !!ret;
14382                 goto close_out;
14383         }
14384         if (!extent_buffer_uptodate(info->csum_root->node)) {
14385                 error("critical: csum_root, unable to check the filesystem");
14386                 ret = -EIO;
14387                 err |= !!ret;
14388                 goto close_out;
14389         }
14390
14391         if (!init_extent_tree) {
14392                 ret = repair_root_items(info);
14393                 if (ret < 0) {
14394                         err = !!ret;
14395                         error("failed to repair root items: %s", strerror(-ret));
14396                         goto close_out;
14397                 }
14398                 if (repair) {
14399                         fprintf(stderr, "Fixed %d roots.\n", ret);
14400                         ret = 0;
14401                 } else if (ret > 0) {
14402                         fprintf(stderr,
14403                                 "Found %d roots with an outdated root item.\n",
14404                                 ret);
14405                         fprintf(stderr,
14406         "Please run a filesystem check with the option --repair to fix them.\n");
14407                         ret = 1;
14408                         err |= ret;
14409                         goto close_out;
14410                 }
14411         }
14412
14413         ret = do_check_chunks_and_extents(info);
14414         err |= !!ret;
14415         if (ret)
14416                 error(
14417                 "errors found in extent allocation tree or chunk allocation");
14418
14419         /* Only re-check super size after we checked and repaired the fs */
14420         err |= !is_super_size_valid(info);
14421
14422         if (!ctx.progress_enabled) {
14423                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14424                         fprintf(stderr, "checking free space tree\n");
14425                 else
14426                         fprintf(stderr, "checking free space cache\n");
14427         }
14428         ret = check_space_cache(root);
14429         err |= !!ret;
14430         if (ret) {
14431                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14432                         error("errors found in free space tree");
14433                 else
14434                         error("errors found in free space cache");
14435                 goto out;
14436         }
14437
14438         /*
14439          * We used to have to have these hole extents in between our real
14440          * extents so if we don't have this flag set we need to make sure there
14441          * are no gaps in the file extents for inodes, otherwise we can just
14442          * ignore it when this happens.
14443          */
14444         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14445         ret = do_check_fs_roots(info, &root_cache);
14446         err |= !!ret;
14447         if (ret) {
14448                 error("errors found in fs roots");
14449                 goto out;
14450         }
14451
14452         fprintf(stderr, "checking csums\n");
14453         ret = check_csums(root);
14454         err |= !!ret;
14455         if (ret) {
14456                 error("errors found in csum tree");
14457                 goto out;
14458         }
14459
14460         fprintf(stderr, "checking root refs\n");
14461         /* For low memory mode, check_fs_roots_v2 handles root refs */
14462         if (check_mode != CHECK_MODE_LOWMEM) {
14463                 ret = check_root_refs(root, &root_cache);
14464                 err |= !!ret;
14465                 if (ret) {
14466                         error("errors found in root refs");
14467                         goto out;
14468                 }
14469         }
14470
14471         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14472                 struct extent_buffer *eb;
14473
14474                 eb = list_first_entry(&root->fs_info->recow_ebs,
14475                                       struct extent_buffer, recow);
14476                 list_del_init(&eb->recow);
14477                 ret = recow_extent_buffer(root, eb);
14478                 err |= !!ret;
14479                 if (ret) {
14480                         error("fails to fix transid errors");
14481                         break;
14482                 }
14483         }
14484
14485         while (!list_empty(&delete_items)) {
14486                 struct bad_item *bad;
14487
14488                 bad = list_first_entry(&delete_items, struct bad_item, list);
14489                 list_del_init(&bad->list);
14490                 if (repair) {
14491                         ret = delete_bad_item(root, bad);
14492                         err |= !!ret;
14493                 }
14494                 free(bad);
14495         }
14496
14497         if (info->quota_enabled) {
14498                 fprintf(stderr, "checking quota groups\n");
14499                 ret = qgroup_verify_all(info);
14500                 err |= !!ret;
14501                 if (ret) {
14502                         error("failed to check quota groups");
14503                         goto out;
14504                 }
14505                 report_qgroups(0);
14506                 ret = repair_qgroups(info, &qgroups_repaired);
14507                 err |= !!ret;
14508                 if (err) {
14509                         error("failed to repair quota groups");
14510                         goto out;
14511                 }
14512                 ret = 0;
14513         }
14514
14515         if (!list_empty(&root->fs_info->recow_ebs)) {
14516                 error("transid errors in file system");
14517                 ret = 1;
14518                 err |= !!ret;
14519         }
14520 out:
14521         printf("found %llu bytes used, ",
14522                (unsigned long long)bytes_used);
14523         if (err)
14524                 printf("error(s) found\n");
14525         else
14526                 printf("no error found\n");
14527         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14528         printf("total tree bytes: %llu\n",
14529                (unsigned long long)total_btree_bytes);
14530         printf("total fs tree bytes: %llu\n",
14531                (unsigned long long)total_fs_tree_bytes);
14532         printf("total extent tree bytes: %llu\n",
14533                (unsigned long long)total_extent_tree_bytes);
14534         printf("btree space waste bytes: %llu\n",
14535                (unsigned long long)btree_space_waste);
14536         printf("file data blocks allocated: %llu\n referenced %llu\n",
14537                 (unsigned long long)data_bytes_allocated,
14538                 (unsigned long long)data_bytes_referenced);
14539
14540         free_qgroup_counts();
14541         free_root_recs_tree(&root_cache);
14542 close_out:
14543         close_ctree(root);
14544 err_out:
14545         if (ctx.progress_enabled)
14546                 task_deinit(ctx.info);
14547
14548         return err;
14549 }