7619c8e2f0d5c8e7793aae00be150a4f1899023f
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
416
417 static void record_root_in_trans(struct btrfs_trans_handle *trans,
418                                  struct btrfs_root *root)
419 {
420         if (root->last_trans != trans->transid) {
421                 root->track_dirty = 1;
422                 root->last_trans = trans->transid;
423                 root->commit_root = root->node;
424                 extent_buffer_get(root->node);
425         }
426 }
427
428 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
429 {
430         struct device_record *rec1;
431         struct device_record *rec2;
432
433         rec1 = rb_entry(node1, struct device_record, node);
434         rec2 = rb_entry(node2, struct device_record, node);
435         if (rec1->devid > rec2->devid)
436                 return -1;
437         else if (rec1->devid < rec2->devid)
438                 return 1;
439         else
440                 return 0;
441 }
442
443 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
444 {
445         struct inode_record *rec;
446         struct inode_backref *backref;
447         struct inode_backref *orig;
448         struct inode_backref *tmp;
449         struct orphan_data_extent *src_orphan;
450         struct orphan_data_extent *dst_orphan;
451         struct rb_node *rb;
452         size_t size;
453         int ret;
454
455         rec = malloc(sizeof(*rec));
456         if (!rec)
457                 return ERR_PTR(-ENOMEM);
458         memcpy(rec, orig_rec, sizeof(*rec));
459         rec->refs = 1;
460         INIT_LIST_HEAD(&rec->backrefs);
461         INIT_LIST_HEAD(&rec->orphan_extents);
462         rec->holes = RB_ROOT;
463
464         list_for_each_entry(orig, &orig_rec->backrefs, list) {
465                 size = sizeof(*orig) + orig->namelen + 1;
466                 backref = malloc(size);
467                 if (!backref) {
468                         ret = -ENOMEM;
469                         goto cleanup;
470                 }
471                 memcpy(backref, orig, size);
472                 list_add_tail(&backref->list, &rec->backrefs);
473         }
474         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
475                 dst_orphan = malloc(sizeof(*dst_orphan));
476                 if (!dst_orphan) {
477                         ret = -ENOMEM;
478                         goto cleanup;
479                 }
480                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
481                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
482         }
483         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
484         if (ret < 0)
485                 goto cleanup_rb;
486
487         return rec;
488
489 cleanup_rb:
490         rb = rb_first(&rec->holes);
491         while (rb) {
492                 struct file_extent_hole *hole;
493
494                 hole = rb_entry(rb, struct file_extent_hole, node);
495                 rb = rb_next(rb);
496                 free(hole);
497         }
498
499 cleanup:
500         if (!list_empty(&rec->backrefs))
501                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
502                         list_del(&orig->list);
503                         free(orig);
504                 }
505
506         if (!list_empty(&rec->orphan_extents))
507                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
508                         list_del(&orig->list);
509                         free(orig);
510                 }
511
512         free(rec);
513
514         return ERR_PTR(ret);
515 }
516
517 static void print_orphan_data_extents(struct list_head *orphan_extents,
518                                       u64 objectid)
519 {
520         struct orphan_data_extent *orphan;
521
522         if (list_empty(orphan_extents))
523                 return;
524         printf("The following data extent is lost in tree %llu:\n",
525                objectid);
526         list_for_each_entry(orphan, orphan_extents, list) {
527                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
528                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
529                        orphan->disk_len);
530         }
531 }
532
533 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
534 {
535         u64 root_objectid = root->root_key.objectid;
536         int errors = rec->errors;
537
538         if (!errors)
539                 return;
540         /* reloc root errors, we print its corresponding fs root objectid*/
541         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
542                 root_objectid = root->root_key.offset;
543                 fprintf(stderr, "reloc");
544         }
545         fprintf(stderr, "root %llu inode %llu errors %x",
546                 (unsigned long long) root_objectid,
547                 (unsigned long long) rec->ino, rec->errors);
548
549         if (errors & I_ERR_NO_INODE_ITEM)
550                 fprintf(stderr, ", no inode item");
551         if (errors & I_ERR_NO_ORPHAN_ITEM)
552                 fprintf(stderr, ", no orphan item");
553         if (errors & I_ERR_DUP_INODE_ITEM)
554                 fprintf(stderr, ", dup inode item");
555         if (errors & I_ERR_DUP_DIR_INDEX)
556                 fprintf(stderr, ", dup dir index");
557         if (errors & I_ERR_ODD_DIR_ITEM)
558                 fprintf(stderr, ", odd dir item");
559         if (errors & I_ERR_ODD_FILE_EXTENT)
560                 fprintf(stderr, ", odd file extent");
561         if (errors & I_ERR_BAD_FILE_EXTENT)
562                 fprintf(stderr, ", bad file extent");
563         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
564                 fprintf(stderr, ", file extent overlap");
565         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566                 fprintf(stderr, ", file extent discount");
567         if (errors & I_ERR_DIR_ISIZE_WRONG)
568                 fprintf(stderr, ", dir isize wrong");
569         if (errors & I_ERR_FILE_NBYTES_WRONG)
570                 fprintf(stderr, ", nbytes wrong");
571         if (errors & I_ERR_ODD_CSUM_ITEM)
572                 fprintf(stderr, ", odd csum item");
573         if (errors & I_ERR_SOME_CSUM_MISSING)
574                 fprintf(stderr, ", some csum missing");
575         if (errors & I_ERR_LINK_COUNT_WRONG)
576                 fprintf(stderr, ", link count wrong");
577         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578                 fprintf(stderr, ", orphan file extent");
579         fprintf(stderr, "\n");
580         /* Print the orphan extents if needed */
581         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
583
584         /* Print the holes if needed */
585         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586                 struct file_extent_hole *hole;
587                 struct rb_node *node;
588                 int found = 0;
589
590                 node = rb_first(&rec->holes);
591                 fprintf(stderr, "Found file extent holes:\n");
592                 while (node) {
593                         found = 1;
594                         hole = rb_entry(node, struct file_extent_hole, node);
595                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
596                                 hole->start, hole->len);
597                         node = rb_next(node);
598                 }
599                 if (!found)
600                         fprintf(stderr, "\tstart: 0, len: %llu\n",
601                                 round_up(rec->isize,
602                                          root->fs_info->sectorsize));
603         }
604 }
605
606 static void print_ref_error(int errors)
607 {
608         if (errors & REF_ERR_NO_DIR_ITEM)
609                 fprintf(stderr, ", no dir item");
610         if (errors & REF_ERR_NO_DIR_INDEX)
611                 fprintf(stderr, ", no dir index");
612         if (errors & REF_ERR_NO_INODE_REF)
613                 fprintf(stderr, ", no inode ref");
614         if (errors & REF_ERR_DUP_DIR_ITEM)
615                 fprintf(stderr, ", dup dir item");
616         if (errors & REF_ERR_DUP_DIR_INDEX)
617                 fprintf(stderr, ", dup dir index");
618         if (errors & REF_ERR_DUP_INODE_REF)
619                 fprintf(stderr, ", dup inode ref");
620         if (errors & REF_ERR_INDEX_UNMATCH)
621                 fprintf(stderr, ", index mismatch");
622         if (errors & REF_ERR_FILETYPE_UNMATCH)
623                 fprintf(stderr, ", filetype mismatch");
624         if (errors & REF_ERR_NAME_TOO_LONG)
625                 fprintf(stderr, ", name too long");
626         if (errors & REF_ERR_NO_ROOT_REF)
627                 fprintf(stderr, ", no root ref");
628         if (errors & REF_ERR_NO_ROOT_BACKREF)
629                 fprintf(stderr, ", no root backref");
630         if (errors & REF_ERR_DUP_ROOT_REF)
631                 fprintf(stderr, ", dup root ref");
632         if (errors & REF_ERR_DUP_ROOT_BACKREF)
633                 fprintf(stderr, ", dup root backref");
634         fprintf(stderr, "\n");
635 }
636
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
638                                           u64 ino, int mod)
639 {
640         struct ptr_node *node;
641         struct cache_extent *cache;
642         struct inode_record *rec = NULL;
643         int ret;
644
645         cache = lookup_cache_extent(inode_cache, ino, 1);
646         if (cache) {
647                 node = container_of(cache, struct ptr_node, cache);
648                 rec = node->data;
649                 if (mod && rec->refs > 1) {
650                         node->data = clone_inode_rec(rec);
651                         if (IS_ERR(node->data))
652                                 return node->data;
653                         rec->refs--;
654                         rec = node->data;
655                 }
656         } else if (mod) {
657                 rec = calloc(1, sizeof(*rec));
658                 if (!rec)
659                         return ERR_PTR(-ENOMEM);
660                 rec->ino = ino;
661                 rec->extent_start = (u64)-1;
662                 rec->refs = 1;
663                 INIT_LIST_HEAD(&rec->backrefs);
664                 INIT_LIST_HEAD(&rec->orphan_extents);
665                 rec->holes = RB_ROOT;
666
667                 node = malloc(sizeof(*node));
668                 if (!node) {
669                         free(rec);
670                         return ERR_PTR(-ENOMEM);
671                 }
672                 node->cache.start = ino;
673                 node->cache.size = 1;
674                 node->data = rec;
675
676                 if (ino == BTRFS_FREE_INO_OBJECTID)
677                         rec->found_link = 1;
678
679                 ret = insert_cache_extent(inode_cache, &node->cache);
680                 if (ret)
681                         return ERR_PTR(-EEXIST);
682         }
683         return rec;
684 }
685
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
687 {
688         struct orphan_data_extent *orphan;
689
690         while (!list_empty(orphan_extents)) {
691                 orphan = list_entry(orphan_extents->next,
692                                     struct orphan_data_extent, list);
693                 list_del(&orphan->list);
694                 free(orphan);
695         }
696 }
697
698 static void free_inode_rec(struct inode_record *rec)
699 {
700         struct inode_backref *backref;
701
702         if (--rec->refs > 0)
703                 return;
704
705         while (!list_empty(&rec->backrefs)) {
706                 backref = to_inode_backref(rec->backrefs.next);
707                 list_del(&backref->list);
708                 free(backref);
709         }
710         free_orphan_data_extents(&rec->orphan_extents);
711         free_file_extent_holes(&rec->holes);
712         free(rec);
713 }
714
715 static int can_free_inode_rec(struct inode_record *rec)
716 {
717         if (!rec->errors && rec->checked && rec->found_inode_item &&
718             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
719                 return 1;
720         return 0;
721 }
722
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724                                  struct inode_record *rec)
725 {
726         struct cache_extent *cache;
727         struct inode_backref *tmp, *backref;
728         struct ptr_node *node;
729         u8 filetype;
730
731         if (!rec->found_inode_item)
732                 return;
733
734         filetype = imode_to_type(rec->imode);
735         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736                 if (backref->found_dir_item && backref->found_dir_index) {
737                         if (backref->filetype != filetype)
738                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739                         if (!backref->errors && backref->found_inode_ref &&
740                             rec->nlink == rec->found_link) {
741                                 list_del(&backref->list);
742                                 free(backref);
743                         }
744                 }
745         }
746
747         if (!rec->checked || rec->merging)
748                 return;
749
750         if (S_ISDIR(rec->imode)) {
751                 if (rec->found_size != rec->isize)
752                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753                 if (rec->found_file_extent)
754                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
755         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756                 if (rec->found_dir_item)
757                         rec->errors |= I_ERR_ODD_DIR_ITEM;
758                 if (rec->found_size != rec->nbytes)
759                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760                 if (rec->nlink > 0 && !no_holes &&
761                     (rec->extent_end < rec->isize ||
762                      first_extent_gap(&rec->holes) < rec->isize))
763                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
764         }
765
766         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767                 if (rec->found_csum_item && rec->nodatasum)
768                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
769                 if (rec->some_csum_missing && !rec->nodatasum)
770                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
771         }
772
773         BUG_ON(rec->refs != 1);
774         if (can_free_inode_rec(rec)) {
775                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776                 node = container_of(cache, struct ptr_node, cache);
777                 BUG_ON(node->data != rec);
778                 remove_cache_extent(inode_cache, &node->cache);
779                 free(node);
780                 free_inode_rec(rec);
781         }
782 }
783
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
785 {
786         struct btrfs_path path;
787         struct btrfs_key key;
788         int ret;
789
790         key.objectid = BTRFS_ORPHAN_OBJECTID;
791         key.type = BTRFS_ORPHAN_ITEM_KEY;
792         key.offset = ino;
793
794         btrfs_init_path(&path);
795         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796         btrfs_release_path(&path);
797         if (ret > 0)
798                 ret = -ENOENT;
799         return ret;
800 }
801
802 static int process_inode_item(struct extent_buffer *eb,
803                               int slot, struct btrfs_key *key,
804                               struct shared_node *active_node)
805 {
806         struct inode_record *rec;
807         struct btrfs_inode_item *item;
808
809         rec = active_node->current;
810         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811         if (rec->found_inode_item) {
812                 rec->errors |= I_ERR_DUP_INODE_ITEM;
813                 return 1;
814         }
815         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816         rec->nlink = btrfs_inode_nlink(eb, item);
817         rec->isize = btrfs_inode_size(eb, item);
818         rec->nbytes = btrfs_inode_nbytes(eb, item);
819         rec->imode = btrfs_inode_mode(eb, item);
820         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
821                 rec->nodatasum = 1;
822         rec->found_inode_item = 1;
823         if (rec->nlink == 0)
824                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825         maybe_free_inode_rec(&active_node->inode_cache, rec);
826         return 0;
827 }
828
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
830                                                 const char *name,
831                                                 int namelen, u64 dir)
832 {
833         struct inode_backref *backref;
834
835         list_for_each_entry(backref, &rec->backrefs, list) {
836                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
837                         break;
838                 if (backref->dir != dir || backref->namelen != namelen)
839                         continue;
840                 if (memcmp(name, backref->name, namelen))
841                         continue;
842                 return backref;
843         }
844
845         backref = malloc(sizeof(*backref) + namelen + 1);
846         if (!backref)
847                 return NULL;
848         memset(backref, 0, sizeof(*backref));
849         backref->dir = dir;
850         backref->namelen = namelen;
851         memcpy(backref->name, name, namelen);
852         backref->name[namelen] = '\0';
853         list_add_tail(&backref->list, &rec->backrefs);
854         return backref;
855 }
856
857 static int add_inode_backref(struct cache_tree *inode_cache,
858                              u64 ino, u64 dir, u64 index,
859                              const char *name, int namelen,
860                              u8 filetype, u8 itemtype, int errors)
861 {
862         struct inode_record *rec;
863         struct inode_backref *backref;
864
865         rec = get_inode_rec(inode_cache, ino, 1);
866         BUG_ON(IS_ERR(rec));
867         backref = get_inode_backref(rec, name, namelen, dir);
868         BUG_ON(!backref);
869         if (errors)
870                 backref->errors |= errors;
871         if (itemtype == BTRFS_DIR_INDEX_KEY) {
872                 if (backref->found_dir_index)
873                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
874                 if (backref->found_inode_ref && backref->index != index)
875                         backref->errors |= REF_ERR_INDEX_UNMATCH;
876                 if (backref->found_dir_item && backref->filetype != filetype)
877                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
878
879                 backref->index = index;
880                 backref->filetype = filetype;
881                 backref->found_dir_index = 1;
882         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
883                 rec->found_link++;
884                 if (backref->found_dir_item)
885                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
886                 if (backref->found_dir_index && backref->filetype != filetype)
887                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
888
889                 backref->filetype = filetype;
890                 backref->found_dir_item = 1;
891         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893                 if (backref->found_inode_ref)
894                         backref->errors |= REF_ERR_DUP_INODE_REF;
895                 if (backref->found_dir_index && backref->index != index)
896                         backref->errors |= REF_ERR_INDEX_UNMATCH;
897                 else
898                         backref->index = index;
899
900                 backref->ref_type = itemtype;
901                 backref->found_inode_ref = 1;
902         } else {
903                 BUG_ON(1);
904         }
905
906         maybe_free_inode_rec(inode_cache, rec);
907         return 0;
908 }
909
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911                             struct cache_tree *dst_cache)
912 {
913         struct inode_backref *backref;
914         u32 dir_count = 0;
915         int ret = 0;
916
917         dst->merging = 1;
918         list_for_each_entry(backref, &src->backrefs, list) {
919                 if (backref->found_dir_index) {
920                         add_inode_backref(dst_cache, dst->ino, backref->dir,
921                                         backref->index, backref->name,
922                                         backref->namelen, backref->filetype,
923                                         BTRFS_DIR_INDEX_KEY, backref->errors);
924                 }
925                 if (backref->found_dir_item) {
926                         dir_count++;
927                         add_inode_backref(dst_cache, dst->ino,
928                                         backref->dir, 0, backref->name,
929                                         backref->namelen, backref->filetype,
930                                         BTRFS_DIR_ITEM_KEY, backref->errors);
931                 }
932                 if (backref->found_inode_ref) {
933                         add_inode_backref(dst_cache, dst->ino,
934                                         backref->dir, backref->index,
935                                         backref->name, backref->namelen, 0,
936                                         backref->ref_type, backref->errors);
937                 }
938         }
939
940         if (src->found_dir_item)
941                 dst->found_dir_item = 1;
942         if (src->found_file_extent)
943                 dst->found_file_extent = 1;
944         if (src->found_csum_item)
945                 dst->found_csum_item = 1;
946         if (src->some_csum_missing)
947                 dst->some_csum_missing = 1;
948         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
950                 if (ret < 0)
951                         return ret;
952         }
953
954         BUG_ON(src->found_link < dir_count);
955         dst->found_link += src->found_link - dir_count;
956         dst->found_size += src->found_size;
957         if (src->extent_start != (u64)-1) {
958                 if (dst->extent_start == (u64)-1) {
959                         dst->extent_start = src->extent_start;
960                         dst->extent_end = src->extent_end;
961                 } else {
962                         if (dst->extent_end > src->extent_start)
963                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964                         else if (dst->extent_end < src->extent_start) {
965                                 ret = add_file_extent_hole(&dst->holes,
966                                         dst->extent_end,
967                                         src->extent_start - dst->extent_end);
968                         }
969                         if (dst->extent_end < src->extent_end)
970                                 dst->extent_end = src->extent_end;
971                 }
972         }
973
974         dst->errors |= src->errors;
975         if (src->found_inode_item) {
976                 if (!dst->found_inode_item) {
977                         dst->nlink = src->nlink;
978                         dst->isize = src->isize;
979                         dst->nbytes = src->nbytes;
980                         dst->imode = src->imode;
981                         dst->nodatasum = src->nodatasum;
982                         dst->found_inode_item = 1;
983                 } else {
984                         dst->errors |= I_ERR_DUP_INODE_ITEM;
985                 }
986         }
987         dst->merging = 0;
988
989         return 0;
990 }
991
992 static int splice_shared_node(struct shared_node *src_node,
993                               struct shared_node *dst_node)
994 {
995         struct cache_extent *cache;
996         struct ptr_node *node, *ins;
997         struct cache_tree *src, *dst;
998         struct inode_record *rec, *conflict;
999         u64 current_ino = 0;
1000         int splice = 0;
1001         int ret;
1002
1003         if (--src_node->refs == 0)
1004                 splice = 1;
1005         if (src_node->current)
1006                 current_ino = src_node->current->ino;
1007
1008         src = &src_node->root_cache;
1009         dst = &dst_node->root_cache;
1010 again:
1011         cache = search_cache_extent(src, 0);
1012         while (cache) {
1013                 node = container_of(cache, struct ptr_node, cache);
1014                 rec = node->data;
1015                 cache = next_cache_extent(cache);
1016
1017                 if (splice) {
1018                         remove_cache_extent(src, &node->cache);
1019                         ins = node;
1020                 } else {
1021                         ins = malloc(sizeof(*ins));
1022                         BUG_ON(!ins);
1023                         ins->cache.start = node->cache.start;
1024                         ins->cache.size = node->cache.size;
1025                         ins->data = rec;
1026                         rec->refs++;
1027                 }
1028                 ret = insert_cache_extent(dst, &ins->cache);
1029                 if (ret == -EEXIST) {
1030                         conflict = get_inode_rec(dst, rec->ino, 1);
1031                         BUG_ON(IS_ERR(conflict));
1032                         merge_inode_recs(rec, conflict, dst);
1033                         if (rec->checked) {
1034                                 conflict->checked = 1;
1035                                 if (dst_node->current == conflict)
1036                                         dst_node->current = NULL;
1037                         }
1038                         maybe_free_inode_rec(dst, conflict);
1039                         free_inode_rec(rec);
1040                         free(ins);
1041                 } else {
1042                         BUG_ON(ret);
1043                 }
1044         }
1045
1046         if (src == &src_node->root_cache) {
1047                 src = &src_node->inode_cache;
1048                 dst = &dst_node->inode_cache;
1049                 goto again;
1050         }
1051
1052         if (current_ino > 0 && (!dst_node->current ||
1053             current_ino > dst_node->current->ino)) {
1054                 if (dst_node->current) {
1055                         dst_node->current->checked = 1;
1056                         maybe_free_inode_rec(dst, dst_node->current);
1057                 }
1058                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059                 BUG_ON(IS_ERR(dst_node->current));
1060         }
1061         return 0;
1062 }
1063
1064 static void free_inode_ptr(struct cache_extent *cache)
1065 {
1066         struct ptr_node *node;
1067         struct inode_record *rec;
1068
1069         node = container_of(cache, struct ptr_node, cache);
1070         rec = node->data;
1071         free_inode_rec(rec);
1072         free(node);
1073 }
1074
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1076
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1078                                             u64 bytenr)
1079 {
1080         struct cache_extent *cache;
1081         struct shared_node *node;
1082
1083         cache = lookup_cache_extent(shared, bytenr, 1);
1084         if (cache) {
1085                 node = container_of(cache, struct shared_node, cache);
1086                 return node;
1087         }
1088         return NULL;
1089 }
1090
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1092 {
1093         int ret;
1094         struct shared_node *node;
1095
1096         node = calloc(1, sizeof(*node));
1097         if (!node)
1098                 return -ENOMEM;
1099         node->cache.start = bytenr;
1100         node->cache.size = 1;
1101         cache_tree_init(&node->root_cache);
1102         cache_tree_init(&node->inode_cache);
1103         node->refs = refs;
1104
1105         ret = insert_cache_extent(shared, &node->cache);
1106
1107         return ret;
1108 }
1109
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111                              struct walk_control *wc, int level)
1112 {
1113         struct shared_node *node;
1114         struct shared_node *dest;
1115         int ret;
1116
1117         if (level == wc->active_node)
1118                 return 0;
1119
1120         BUG_ON(wc->active_node <= level);
1121         node = find_shared_node(&wc->shared, bytenr);
1122         if (!node) {
1123                 ret = add_shared_node(&wc->shared, bytenr, refs);
1124                 BUG_ON(ret);
1125                 node = find_shared_node(&wc->shared, bytenr);
1126                 wc->nodes[level] = node;
1127                 wc->active_node = level;
1128                 return 0;
1129         }
1130
1131         if (wc->root_level == wc->active_node &&
1132             btrfs_root_refs(&root->root_item) == 0) {
1133                 if (--node->refs == 0) {
1134                         free_inode_recs_tree(&node->root_cache);
1135                         free_inode_recs_tree(&node->inode_cache);
1136                         remove_cache_extent(&wc->shared, &node->cache);
1137                         free(node);
1138                 }
1139                 return 1;
1140         }
1141
1142         dest = wc->nodes[wc->active_node];
1143         splice_shared_node(node, dest);
1144         if (node->refs == 0) {
1145                 remove_cache_extent(&wc->shared, &node->cache);
1146                 free(node);
1147         }
1148         return 1;
1149 }
1150
1151 static int leave_shared_node(struct btrfs_root *root,
1152                              struct walk_control *wc, int level)
1153 {
1154         struct shared_node *node;
1155         struct shared_node *dest;
1156         int i;
1157
1158         if (level == wc->root_level)
1159                 return 0;
1160
1161         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1162                 if (wc->nodes[i])
1163                         break;
1164         }
1165         BUG_ON(i >= BTRFS_MAX_LEVEL);
1166
1167         node = wc->nodes[wc->active_node];
1168         wc->nodes[wc->active_node] = NULL;
1169         wc->active_node = i;
1170
1171         dest = wc->nodes[wc->active_node];
1172         if (wc->active_node < wc->root_level ||
1173             btrfs_root_refs(&root->root_item) > 0) {
1174                 BUG_ON(node->refs <= 1);
1175                 splice_shared_node(node, dest);
1176         } else {
1177                 BUG_ON(node->refs < 2);
1178                 node->refs--;
1179         }
1180         return 0;
1181 }
1182
1183 /*
1184  * Returns:
1185  * < 0 - on error
1186  * 1   - if the root with id child_root_id is a child of root parent_root_id
1187  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1188  *       has other root(s) as parent(s)
1189  * 2   - if the root child_root_id doesn't have any parent roots
1190  */
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1192                          u64 child_root_id)
1193 {
1194         struct btrfs_path path;
1195         struct btrfs_key key;
1196         struct extent_buffer *leaf;
1197         int has_parent = 0;
1198         int ret;
1199
1200         btrfs_init_path(&path);
1201
1202         key.objectid = parent_root_id;
1203         key.type = BTRFS_ROOT_REF_KEY;
1204         key.offset = child_root_id;
1205         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1206                                 0, 0);
1207         if (ret < 0)
1208                 return ret;
1209         btrfs_release_path(&path);
1210         if (!ret)
1211                 return 1;
1212
1213         key.objectid = child_root_id;
1214         key.type = BTRFS_ROOT_BACKREF_KEY;
1215         key.offset = 0;
1216         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1217                                 0, 0);
1218         if (ret < 0)
1219                 goto out;
1220
1221         while (1) {
1222                 leaf = path.nodes[0];
1223                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1225                         if (ret)
1226                                 break;
1227                         leaf = path.nodes[0];
1228                 }
1229
1230                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231                 if (key.objectid != child_root_id ||
1232                     key.type != BTRFS_ROOT_BACKREF_KEY)
1233                         break;
1234
1235                 has_parent = 1;
1236
1237                 if (key.offset == parent_root_id) {
1238                         btrfs_release_path(&path);
1239                         return 1;
1240                 }
1241
1242                 path.slots[0]++;
1243         }
1244 out:
1245         btrfs_release_path(&path);
1246         if (ret < 0)
1247                 return ret;
1248         return has_parent ? 0 : 2;
1249 }
1250
1251 static int process_dir_item(struct extent_buffer *eb,
1252                             int slot, struct btrfs_key *key,
1253                             struct shared_node *active_node)
1254 {
1255         u32 total;
1256         u32 cur = 0;
1257         u32 len;
1258         u32 name_len;
1259         u32 data_len;
1260         int error;
1261         int nritems = 0;
1262         u8 filetype;
1263         struct btrfs_dir_item *di;
1264         struct inode_record *rec;
1265         struct cache_tree *root_cache;
1266         struct cache_tree *inode_cache;
1267         struct btrfs_key location;
1268         char namebuf[BTRFS_NAME_LEN];
1269
1270         root_cache = &active_node->root_cache;
1271         inode_cache = &active_node->inode_cache;
1272         rec = active_node->current;
1273         rec->found_dir_item = 1;
1274
1275         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276         total = btrfs_item_size_nr(eb, slot);
1277         while (cur < total) {
1278                 nritems++;
1279                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280                 name_len = btrfs_dir_name_len(eb, di);
1281                 data_len = btrfs_dir_data_len(eb, di);
1282                 filetype = btrfs_dir_type(eb, di);
1283
1284                 rec->found_size += name_len;
1285                 if (cur + sizeof(*di) + name_len > total ||
1286                     name_len > BTRFS_NAME_LEN) {
1287                         error = REF_ERR_NAME_TOO_LONG;
1288
1289                         if (cur + sizeof(*di) > total)
1290                                 break;
1291                         len = min_t(u32, total - cur - sizeof(*di),
1292                                     BTRFS_NAME_LEN);
1293                 } else {
1294                         len = name_len;
1295                         error = 0;
1296                 }
1297
1298                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1299
1300                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301                     key->offset != btrfs_name_hash(namebuf, len)) {
1302                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1303                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304                         key->objectid, key->offset, namebuf, len, filetype,
1305                         key->offset, btrfs_name_hash(namebuf, len));
1306                 }
1307
1308                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309                         add_inode_backref(inode_cache, location.objectid,
1310                                           key->objectid, key->offset, namebuf,
1311                                           len, filetype, key->type, error);
1312                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313                         add_inode_backref(root_cache, location.objectid,
1314                                           key->objectid, key->offset,
1315                                           namebuf, len, filetype,
1316                                           key->type, error);
1317                 } else {
1318                         fprintf(stderr,
1319                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320                                 location.type, key->objectid, key->offset);
1321                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322                                           key->objectid, key->offset, namebuf,
1323                                           len, filetype, key->type, error);
1324                 }
1325
1326                 len = sizeof(*di) + name_len + data_len;
1327                 di = (struct btrfs_dir_item *)((char *)di + len);
1328                 cur += len;
1329         }
1330         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1332
1333         return 0;
1334 }
1335
1336 static int process_inode_ref(struct extent_buffer *eb,
1337                              int slot, struct btrfs_key *key,
1338                              struct shared_node *active_node)
1339 {
1340         u32 total;
1341         u32 cur = 0;
1342         u32 len;
1343         u32 name_len;
1344         u64 index;
1345         int error;
1346         struct cache_tree *inode_cache;
1347         struct btrfs_inode_ref *ref;
1348         char namebuf[BTRFS_NAME_LEN];
1349
1350         inode_cache = &active_node->inode_cache;
1351
1352         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353         total = btrfs_item_size_nr(eb, slot);
1354         while (cur < total) {
1355                 name_len = btrfs_inode_ref_name_len(eb, ref);
1356                 index = btrfs_inode_ref_index(eb, ref);
1357
1358                 /* inode_ref + namelen should not cross item boundary */
1359                 if (cur + sizeof(*ref) + name_len > total ||
1360                     name_len > BTRFS_NAME_LEN) {
1361                         if (total < cur + sizeof(*ref))
1362                                 break;
1363
1364                         /* Still try to read out the remaining part */
1365                         len = min_t(u32, total - cur - sizeof(*ref),
1366                                     BTRFS_NAME_LEN);
1367                         error = REF_ERR_NAME_TOO_LONG;
1368                 } else {
1369                         len = name_len;
1370                         error = 0;
1371                 }
1372
1373                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374                 add_inode_backref(inode_cache, key->objectid, key->offset,
1375                                   index, namebuf, len, 0, key->type, error);
1376
1377                 len = sizeof(*ref) + name_len;
1378                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1379                 cur += len;
1380         }
1381         return 0;
1382 }
1383
1384 static int process_inode_extref(struct extent_buffer *eb,
1385                                 int slot, struct btrfs_key *key,
1386                                 struct shared_node *active_node)
1387 {
1388         u32 total;
1389         u32 cur = 0;
1390         u32 len;
1391         u32 name_len;
1392         u64 index;
1393         u64 parent;
1394         int error;
1395         struct cache_tree *inode_cache;
1396         struct btrfs_inode_extref *extref;
1397         char namebuf[BTRFS_NAME_LEN];
1398
1399         inode_cache = &active_node->inode_cache;
1400
1401         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402         total = btrfs_item_size_nr(eb, slot);
1403         while (cur < total) {
1404                 name_len = btrfs_inode_extref_name_len(eb, extref);
1405                 index = btrfs_inode_extref_index(eb, extref);
1406                 parent = btrfs_inode_extref_parent(eb, extref);
1407                 if (name_len <= BTRFS_NAME_LEN) {
1408                         len = name_len;
1409                         error = 0;
1410                 } else {
1411                         len = BTRFS_NAME_LEN;
1412                         error = REF_ERR_NAME_TOO_LONG;
1413                 }
1414                 read_extent_buffer(eb, namebuf,
1415                                    (unsigned long)(extref + 1), len);
1416                 add_inode_backref(inode_cache, key->objectid, parent,
1417                                   index, namebuf, len, 0, key->type, error);
1418
1419                 len = sizeof(*extref) + name_len;
1420                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1421                 cur += len;
1422         }
1423         return 0;
1424
1425 }
1426
1427 static int process_file_extent(struct btrfs_root *root,
1428                                 struct extent_buffer *eb,
1429                                 int slot, struct btrfs_key *key,
1430                                 struct shared_node *active_node)
1431 {
1432         struct inode_record *rec;
1433         struct btrfs_file_extent_item *fi;
1434         u64 num_bytes = 0;
1435         u64 disk_bytenr = 0;
1436         u64 extent_offset = 0;
1437         u64 mask = root->fs_info->sectorsize - 1;
1438         int extent_type;
1439         int ret;
1440
1441         rec = active_node->current;
1442         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1443         rec->found_file_extent = 1;
1444
1445         if (rec->extent_start == (u64)-1) {
1446                 rec->extent_start = key->offset;
1447                 rec->extent_end = key->offset;
1448         }
1449
1450         if (rec->extent_end > key->offset)
1451                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1452         else if (rec->extent_end < key->offset) {
1453                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1454                                            key->offset - rec->extent_end);
1455                 if (ret < 0)
1456                         return ret;
1457         }
1458
1459         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1460         extent_type = btrfs_file_extent_type(eb, fi);
1461
1462         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1463                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1464                 if (num_bytes == 0)
1465                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1466                 rec->found_size += num_bytes;
1467                 num_bytes = (num_bytes + mask) & ~mask;
1468         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1469                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1470                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1471                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1472                 extent_offset = btrfs_file_extent_offset(eb, fi);
1473                 if (num_bytes == 0 || (num_bytes & mask))
1474                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1475                 if (num_bytes + extent_offset >
1476                     btrfs_file_extent_ram_bytes(eb, fi))
1477                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1478                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1479                     (btrfs_file_extent_compression(eb, fi) ||
1480                      btrfs_file_extent_encryption(eb, fi) ||
1481                      btrfs_file_extent_other_encoding(eb, fi)))
1482                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1483                 if (disk_bytenr > 0)
1484                         rec->found_size += num_bytes;
1485         } else {
1486                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1487         }
1488         rec->extent_end = key->offset + num_bytes;
1489
1490         /*
1491          * The data reloc tree will copy full extents into its inode and then
1492          * copy the corresponding csums.  Because the extent it copied could be
1493          * a preallocated extent that hasn't been written to yet there may be no
1494          * csums to copy, ergo we won't have csums for our file extent.  This is
1495          * ok so just don't bother checking csums if the inode belongs to the
1496          * data reloc tree.
1497          */
1498         if (disk_bytenr > 0 &&
1499             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1500                 u64 found;
1501                 if (btrfs_file_extent_compression(eb, fi))
1502                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1503                 else
1504                         disk_bytenr += extent_offset;
1505
1506                 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1507                                        &found);
1508                 if (ret < 0)
1509                         return ret;
1510                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1511                         if (found > 0)
1512                                 rec->found_csum_item = 1;
1513                         if (found < num_bytes)
1514                                 rec->some_csum_missing = 1;
1515                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1516                         if (found > 0)
1517                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1518                 }
1519         }
1520         return 0;
1521 }
1522
1523 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1524                             struct walk_control *wc)
1525 {
1526         struct btrfs_key key;
1527         u32 nritems;
1528         int i;
1529         int ret = 0;
1530         struct cache_tree *inode_cache;
1531         struct shared_node *active_node;
1532
1533         if (wc->root_level == wc->active_node &&
1534             btrfs_root_refs(&root->root_item) == 0)
1535                 return 0;
1536
1537         active_node = wc->nodes[wc->active_node];
1538         inode_cache = &active_node->inode_cache;
1539         nritems = btrfs_header_nritems(eb);
1540         for (i = 0; i < nritems; i++) {
1541                 btrfs_item_key_to_cpu(eb, &key, i);
1542
1543                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1544                         continue;
1545                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1546                         continue;
1547
1548                 if (active_node->current == NULL ||
1549                     active_node->current->ino < key.objectid) {
1550                         if (active_node->current) {
1551                                 active_node->current->checked = 1;
1552                                 maybe_free_inode_rec(inode_cache,
1553                                                      active_node->current);
1554                         }
1555                         active_node->current = get_inode_rec(inode_cache,
1556                                                              key.objectid, 1);
1557                         BUG_ON(IS_ERR(active_node->current));
1558                 }
1559                 switch (key.type) {
1560                 case BTRFS_DIR_ITEM_KEY:
1561                 case BTRFS_DIR_INDEX_KEY:
1562                         ret = process_dir_item(eb, i, &key, active_node);
1563                         break;
1564                 case BTRFS_INODE_REF_KEY:
1565                         ret = process_inode_ref(eb, i, &key, active_node);
1566                         break;
1567                 case BTRFS_INODE_EXTREF_KEY:
1568                         ret = process_inode_extref(eb, i, &key, active_node);
1569                         break;
1570                 case BTRFS_INODE_ITEM_KEY:
1571                         ret = process_inode_item(eb, i, &key, active_node);
1572                         break;
1573                 case BTRFS_EXTENT_DATA_KEY:
1574                         ret = process_file_extent(root, eb, i, &key,
1575                                                   active_node);
1576                         break;
1577                 default:
1578                         break;
1579                 };
1580         }
1581         return ret;
1582 }
1583
1584 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1585                              struct extent_buffer *eb, struct node_refs *nrefs,
1586                              u64 level, int check_all);
1587 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1588                             unsigned int ext_ref);
1589
1590 /*
1591  * Returns >0  Found error, not fatal, should continue
1592  * Returns <0  Fatal error, must exit the whole check
1593  * Returns 0   No errors found
1594  */
1595 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1596                                struct node_refs *nrefs, int *level, int ext_ref)
1597 {
1598         struct extent_buffer *cur = path->nodes[0];
1599         struct btrfs_key key;
1600         u64 cur_bytenr;
1601         u32 nritems;
1602         u64 first_ino = 0;
1603         int root_level = btrfs_header_level(root->node);
1604         int i;
1605         int ret = 0; /* Final return value */
1606         int err = 0; /* Positive error bitmap */
1607
1608         cur_bytenr = cur->start;
1609
1610         /* skip to first inode item or the first inode number change */
1611         nritems = btrfs_header_nritems(cur);
1612         for (i = 0; i < nritems; i++) {
1613                 btrfs_item_key_to_cpu(cur, &key, i);
1614                 if (i == 0)
1615                         first_ino = key.objectid;
1616                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1617                     (first_ino && first_ino != key.objectid))
1618                         break;
1619         }
1620         if (i == nritems) {
1621                 path->slots[0] = nritems;
1622                 return 0;
1623         }
1624         path->slots[0] = i;
1625
1626 again:
1627         err |= check_inode_item(root, path, ext_ref);
1628
1629         /* modify cur since check_inode_item may change path */
1630         cur = path->nodes[0];
1631
1632         if (err & LAST_ITEM)
1633                 goto out;
1634
1635         /* still have inode items in thie leaf */
1636         if (cur->start == cur_bytenr)
1637                 goto again;
1638
1639         /*
1640          * we have switched to another leaf, above nodes may
1641          * have changed, here walk down the path, if a node
1642          * or leaf is shared, check whether we can skip this
1643          * node or leaf.
1644          */
1645         for (i = root_level; i >= 0; i--) {
1646                 if (path->nodes[i]->start == nrefs->bytenr[i])
1647                         continue;
1648
1649                 ret = update_nodes_refs(root, path->nodes[i]->start,
1650                                 path->nodes[i], nrefs, i, 0);
1651                 if (ret)
1652                         goto out;
1653
1654                 if (!nrefs->need_check[i]) {
1655                         *level += 1;
1656                         break;
1657                 }
1658         }
1659
1660         for (i = 0; i < *level; i++) {
1661                 free_extent_buffer(path->nodes[i]);
1662                 path->nodes[i] = NULL;
1663         }
1664 out:
1665         err &= ~LAST_ITEM;
1666         if (err && !ret)
1667                 ret = err;
1668         return ret;
1669 }
1670
1671 static void reada_walk_down(struct btrfs_root *root,
1672                             struct extent_buffer *node, int slot)
1673 {
1674         struct btrfs_fs_info *fs_info = root->fs_info;
1675         u64 bytenr;
1676         u64 ptr_gen;
1677         u32 nritems;
1678         int i;
1679         int level;
1680
1681         level = btrfs_header_level(node);
1682         if (level != 1)
1683                 return;
1684
1685         nritems = btrfs_header_nritems(node);
1686         for (i = slot; i < nritems; i++) {
1687                 bytenr = btrfs_node_blockptr(node, i);
1688                 ptr_gen = btrfs_node_ptr_generation(node, i);
1689                 readahead_tree_block(fs_info, bytenr, ptr_gen);
1690         }
1691 }
1692
1693 /*
1694  * Check the child node/leaf by the following condition:
1695  * 1. the first item key of the node/leaf should be the same with the one
1696  *    in parent.
1697  * 2. block in parent node should match the child node/leaf.
1698  * 3. generation of parent node and child's header should be consistent.
1699  *
1700  * Or the child node/leaf pointed by the key in parent is not valid.
1701  *
1702  * We hope to check leaf owner too, but since subvol may share leaves,
1703  * which makes leaf owner check not so strong, key check should be
1704  * sufficient enough for that case.
1705  */
1706 static int check_child_node(struct extent_buffer *parent, int slot,
1707                             struct extent_buffer *child)
1708 {
1709         struct btrfs_key parent_key;
1710         struct btrfs_key child_key;
1711         int ret = 0;
1712
1713         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1714         if (btrfs_header_level(child) == 0)
1715                 btrfs_item_key_to_cpu(child, &child_key, 0);
1716         else
1717                 btrfs_node_key_to_cpu(child, &child_key, 0);
1718
1719         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1720                 ret = -EINVAL;
1721                 fprintf(stderr,
1722                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1723                         parent_key.objectid, parent_key.type, parent_key.offset,
1724                         child_key.objectid, child_key.type, child_key.offset);
1725         }
1726         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1727                 ret = -EINVAL;
1728                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1729                         btrfs_node_blockptr(parent, slot),
1730                         btrfs_header_bytenr(child));
1731         }
1732         if (btrfs_node_ptr_generation(parent, slot) !=
1733             btrfs_header_generation(child)) {
1734                 ret = -EINVAL;
1735                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1736                         btrfs_header_generation(child),
1737                         btrfs_node_ptr_generation(parent, slot));
1738         }
1739         return ret;
1740 }
1741
1742 /*
1743  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1744  * in every fs or file tree check. Here we find its all root ids, and only check
1745  * it in the fs or file tree which has the smallest root id.
1746  */
1747 static int need_check(struct btrfs_root *root, struct ulist *roots)
1748 {
1749         struct rb_node *node;
1750         struct ulist_node *u;
1751
1752         /*
1753          * @roots can be empty if it belongs to tree reloc tree
1754          * In that case, we should always check the leaf, as we can't use
1755          * the tree owner to ensure some other root will check it.
1756          */
1757         if (roots->nnodes == 1 || roots->nnodes == 0)
1758                 return 1;
1759
1760         node = rb_first(&roots->root);
1761         u = rb_entry(node, struct ulist_node, rb_node);
1762         /*
1763          * current root id is not smallest, we skip it and let it be checked
1764          * in the fs or file tree who hash the smallest root id.
1765          */
1766         if (root->objectid != u->val)
1767                 return 0;
1768
1769         return 1;
1770 }
1771
1772 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1773                                u64 *flags_ret)
1774 {
1775         struct btrfs_root *extent_root = root->fs_info->extent_root;
1776         struct btrfs_root_item *ri = &root->root_item;
1777         struct btrfs_extent_inline_ref *iref;
1778         struct btrfs_extent_item *ei;
1779         struct btrfs_key key;
1780         struct btrfs_path *path = NULL;
1781         unsigned long ptr;
1782         unsigned long end;
1783         u64 flags;
1784         u64 owner = 0;
1785         u64 offset;
1786         int slot;
1787         int type;
1788         int ret = 0;
1789
1790         /*
1791          * Except file/reloc tree, we can not have FULL BACKREF MODE
1792          */
1793         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1794                 goto normal;
1795
1796         /* root node */
1797         if (eb->start == btrfs_root_bytenr(ri))
1798                 goto normal;
1799
1800         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1801                 goto full_backref;
1802
1803         owner = btrfs_header_owner(eb);
1804         if (owner == root->objectid)
1805                 goto normal;
1806
1807         path = btrfs_alloc_path();
1808         if (!path)
1809                 return -ENOMEM;
1810
1811         key.objectid = btrfs_header_bytenr(eb);
1812         key.type = (u8)-1;
1813         key.offset = (u64)-1;
1814
1815         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1816         if (ret <= 0) {
1817                 ret = -EIO;
1818                 goto out;
1819         }
1820
1821         if (ret > 0) {
1822                 ret = btrfs_previous_extent_item(extent_root, path,
1823                                                  key.objectid);
1824                 if (ret)
1825                         goto full_backref;
1826
1827         }
1828         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1829
1830         eb = path->nodes[0];
1831         slot = path->slots[0];
1832         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1833
1834         flags = btrfs_extent_flags(eb, ei);
1835         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1836                 goto full_backref;
1837
1838         ptr = (unsigned long)(ei + 1);
1839         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1840
1841         if (key.type == BTRFS_EXTENT_ITEM_KEY)
1842                 ptr += sizeof(struct btrfs_tree_block_info);
1843
1844 next:
1845         /* Reached extent item ends normally */
1846         if (ptr == end)
1847                 goto full_backref;
1848
1849         /* Beyond extent item end, wrong item size */
1850         if (ptr > end) {
1851                 error("extent item at bytenr %llu slot %d has wrong size",
1852                         eb->start, slot);
1853                 goto full_backref;
1854         }
1855
1856         iref = (struct btrfs_extent_inline_ref *)ptr;
1857         offset = btrfs_extent_inline_ref_offset(eb, iref);
1858         type = btrfs_extent_inline_ref_type(eb, iref);
1859
1860         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1861                 goto normal;
1862         ptr += btrfs_extent_inline_ref_size(type);
1863         goto next;
1864
1865 normal:
1866         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1867         goto out;
1868
1869 full_backref:
1870         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1871 out:
1872         btrfs_free_path(path);
1873         return ret;
1874 }
1875
1876 /*
1877  * for a tree node or leaf, we record its reference count, so later if we still
1878  * process this node or leaf, don't need to compute its reference count again.
1879  *
1880  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1881  */
1882 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1883                              struct extent_buffer *eb, struct node_refs *nrefs,
1884                              u64 level, int check_all)
1885 {
1886         struct ulist *roots;
1887         u64 refs = 0;
1888         u64 flags = 0;
1889         int root_level = btrfs_header_level(root->node);
1890         int check;
1891         int ret;
1892
1893         if (nrefs->bytenr[level] == bytenr)
1894                 return 0;
1895
1896         if (bytenr != (u64)-1) {
1897                 /* the return value of this function seems a mistake */
1898                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1899                                        level, 1, &refs, &flags);
1900                 /* temporary fix */
1901                 if (ret < 0 && !check_all)
1902                         return ret;
1903
1904                 nrefs->bytenr[level] = bytenr;
1905                 nrefs->refs[level] = refs;
1906                 nrefs->full_backref[level] = 0;
1907                 nrefs->checked[level] = 0;
1908
1909                 if (refs > 1) {
1910                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1911                                                    0, &roots);
1912                         if (ret)
1913                                 return -EIO;
1914
1915                         check = need_check(root, roots);
1916                         ulist_free(roots);
1917                         nrefs->need_check[level] = check;
1918                 } else {
1919                         if (!check_all) {
1920                                 nrefs->need_check[level] = 1;
1921                         } else {
1922                                 if (level == root_level) {
1923                                         nrefs->need_check[level] = 1;
1924                                 } else {
1925                                         /*
1926                                          * The node refs may have not been
1927                                          * updated if upper needs checking (the
1928                                          * lowest root_objectid) the node can
1929                                          * be checked.
1930                                          */
1931                                         nrefs->need_check[level] =
1932                                                 nrefs->need_check[level + 1];
1933                                 }
1934                         }
1935                 }
1936         }
1937
1938         if (check_all && eb) {
1939                 calc_extent_flag_v2(root, eb, &flags);
1940                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1941                         nrefs->full_backref[level] = 1;
1942         }
1943
1944         return 0;
1945 }
1946
1947 /*
1948  * @level           if @level == -1 means extent data item
1949  *                  else normal treeblocl.
1950  */
1951 static int should_check_extent_strictly(struct btrfs_root *root,
1952                                         struct node_refs *nrefs, int level)
1953 {
1954         int root_level = btrfs_header_level(root->node);
1955
1956         if (level > root_level || level < -1)
1957                 return 1;
1958         if (level == root_level)
1959                 return 1;
1960         /*
1961          * if the upper node is marked full backref, it should contain shared
1962          * backref of the parent (except owner == root->objectid).
1963          */
1964         while (++level <= root_level)
1965                 if (nrefs->refs[level] > 1)
1966                         return 0;
1967
1968         return 1;
1969 }
1970
1971 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1972                           struct walk_control *wc, int *level,
1973                           struct node_refs *nrefs)
1974 {
1975         enum btrfs_tree_block_status status;
1976         u64 bytenr;
1977         u64 ptr_gen;
1978         struct btrfs_fs_info *fs_info = root->fs_info;
1979         struct extent_buffer *next;
1980         struct extent_buffer *cur;
1981         int ret, err = 0;
1982         u64 refs;
1983
1984         WARN_ON(*level < 0);
1985         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1986
1987         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1988                 refs = nrefs->refs[*level];
1989                 ret = 0;
1990         } else {
1991                 ret = btrfs_lookup_extent_info(NULL, root,
1992                                        path->nodes[*level]->start,
1993                                        *level, 1, &refs, NULL);
1994                 if (ret < 0) {
1995                         err = ret;
1996                         goto out;
1997                 }
1998                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1999                 nrefs->refs[*level] = refs;
2000         }
2001
2002         if (refs > 1) {
2003                 ret = enter_shared_node(root, path->nodes[*level]->start,
2004                                         refs, wc, *level);
2005                 if (ret > 0) {
2006                         err = ret;
2007                         goto out;
2008                 }
2009         }
2010
2011         while (*level >= 0) {
2012                 WARN_ON(*level < 0);
2013                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2014                 cur = path->nodes[*level];
2015
2016                 if (btrfs_header_level(cur) != *level)
2017                         WARN_ON(1);
2018
2019                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2020                         break;
2021                 if (*level == 0) {
2022                         ret = process_one_leaf(root, cur, wc);
2023                         if (ret < 0)
2024                                 err = ret;
2025                         break;
2026                 }
2027                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2028                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2029
2030                 if (bytenr == nrefs->bytenr[*level - 1]) {
2031                         refs = nrefs->refs[*level - 1];
2032                 } else {
2033                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2034                                         *level - 1, 1, &refs, NULL);
2035                         if (ret < 0) {
2036                                 refs = 0;
2037                         } else {
2038                                 nrefs->bytenr[*level - 1] = bytenr;
2039                                 nrefs->refs[*level - 1] = refs;
2040                         }
2041                 }
2042
2043                 if (refs > 1) {
2044                         ret = enter_shared_node(root, bytenr, refs,
2045                                                 wc, *level - 1);
2046                         if (ret > 0) {
2047                                 path->slots[*level]++;
2048                                 continue;
2049                         }
2050                 }
2051
2052                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2053                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2054                         free_extent_buffer(next);
2055                         reada_walk_down(root, cur, path->slots[*level]);
2056                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2057                         if (!extent_buffer_uptodate(next)) {
2058                                 struct btrfs_key node_key;
2059
2060                                 btrfs_node_key_to_cpu(path->nodes[*level],
2061                                                       &node_key,
2062                                                       path->slots[*level]);
2063                                 btrfs_add_corrupt_extent_record(root->fs_info,
2064                                                 &node_key,
2065                                                 path->nodes[*level]->start,
2066                                                 root->fs_info->nodesize,
2067                                                 *level);
2068                                 err = -EIO;
2069                                 goto out;
2070                         }
2071                 }
2072
2073                 ret = check_child_node(cur, path->slots[*level], next);
2074                 if (ret) {
2075                         free_extent_buffer(next);
2076                         err = ret;
2077                         goto out;
2078                 }
2079
2080                 if (btrfs_is_leaf(next))
2081                         status = btrfs_check_leaf(root, NULL, next);
2082                 else
2083                         status = btrfs_check_node(root, NULL, next);
2084                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2085                         free_extent_buffer(next);
2086                         err = -EIO;
2087                         goto out;
2088                 }
2089
2090                 *level = *level - 1;
2091                 free_extent_buffer(path->nodes[*level]);
2092                 path->nodes[*level] = next;
2093                 path->slots[*level] = 0;
2094         }
2095 out:
2096         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2097         return err;
2098 }
2099
2100 /*
2101  * Update global fs information.
2102  */
2103 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2104                          int level)
2105 {
2106         u32 free_nrs;
2107         struct extent_buffer *eb = path->nodes[level];
2108
2109         total_btree_bytes += eb->len;
2110         if (fs_root_objectid(root->objectid))
2111                 total_fs_tree_bytes += eb->len;
2112         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2113                 total_extent_tree_bytes += eb->len;
2114
2115         if (level == 0) {
2116                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2117         } else {
2118                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2119                             btrfs_header_nritems(eb));
2120                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2121         }
2122 }
2123
2124 /*
2125  * This function only handles BACKREF_MISSING,
2126  * If corresponding extent item exists, increase the ref, else insert an extent
2127  * item and backref.
2128  *
2129  * Returns error bits after repair.
2130  */
2131 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2132                                  struct btrfs_root *root,
2133                                  struct extent_buffer *node,
2134                                  struct node_refs *nrefs, int level, int err)
2135 {
2136         struct btrfs_fs_info *fs_info = root->fs_info;
2137         struct btrfs_root *extent_root = fs_info->extent_root;
2138         struct btrfs_path path;
2139         struct btrfs_extent_item *ei;
2140         struct btrfs_tree_block_info *bi;
2141         struct btrfs_key key;
2142         struct extent_buffer *eb;
2143         u32 size = sizeof(*ei);
2144         u32 node_size = root->fs_info->nodesize;
2145         int insert_extent = 0;
2146         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2147         int root_level = btrfs_header_level(root->node);
2148         int generation;
2149         int ret;
2150         u64 owner;
2151         u64 bytenr;
2152         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2153         u64 parent = 0;
2154
2155         if ((err & BACKREF_MISSING) == 0)
2156                 return err;
2157
2158         WARN_ON(level > BTRFS_MAX_LEVEL);
2159         WARN_ON(level < 0);
2160
2161         btrfs_init_path(&path);
2162         bytenr = btrfs_header_bytenr(node);
2163         owner = btrfs_header_owner(node);
2164         generation = btrfs_header_generation(node);
2165
2166         key.objectid = bytenr;
2167         key.type = (u8)-1;
2168         key.offset = (u64)-1;
2169
2170         /* Search for the extent item */
2171         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2172         if (ret <= 0) {
2173                 ret = -EIO;
2174                 goto out;
2175         }
2176
2177         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2178         if (ret)
2179                 insert_extent = 1;
2180
2181         /* calculate if the extent item flag is full backref or not */
2182         if (nrefs->full_backref[level] != 0)
2183                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2184
2185         /* insert an extent item */
2186         if (insert_extent) {
2187                 struct btrfs_disk_key copy_key;
2188
2189                 generation = btrfs_header_generation(node);
2190
2191                 if (level < root_level && nrefs->full_backref[level + 1] &&
2192                     owner != root->objectid) {
2193                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2194                 }
2195
2196                 key.objectid = bytenr;
2197                 if (!skinny_metadata) {
2198                         key.type = BTRFS_EXTENT_ITEM_KEY;
2199                         key.offset = node_size;
2200                         size += sizeof(*bi);
2201                 } else {
2202                         key.type = BTRFS_METADATA_ITEM_KEY;
2203                         key.offset = level;
2204                 }
2205
2206                 btrfs_release_path(&path);
2207                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2208                                               size);
2209                 if (ret)
2210                         goto out;
2211
2212                 eb = path.nodes[0];
2213                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2214
2215                 btrfs_set_extent_refs(eb, ei, 0);
2216                 btrfs_set_extent_generation(eb, ei, generation);
2217                 btrfs_set_extent_flags(eb, ei, flags);
2218
2219                 if (!skinny_metadata) {
2220                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2221                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2222                                              sizeof(*bi));
2223                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2224                         btrfs_set_disk_key_type(&copy_key, 0);
2225                         btrfs_set_disk_key_offset(&copy_key, 0);
2226
2227                         btrfs_set_tree_block_level(eb, bi, level);
2228                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2229                 }
2230                 btrfs_mark_buffer_dirty(eb);
2231                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2232                 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2233
2234                 nrefs->refs[level] = 0;
2235                 nrefs->full_backref[level] =
2236                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2237                 btrfs_release_path(&path);
2238         }
2239
2240         if (level < root_level && nrefs->full_backref[level + 1] &&
2241             owner != root->objectid)
2242                 parent = nrefs->bytenr[level + 1];
2243
2244         /* increase the ref */
2245         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2246                         parent, root->objectid, level, 0);
2247
2248         nrefs->refs[level]++;
2249 out:
2250         btrfs_release_path(&path);
2251         if (ret) {
2252                 error(
2253         "failed to repair tree block ref start %llu root %llu due to %s",
2254                       bytenr, root->objectid, strerror(-ret));
2255         } else {
2256                 printf("Added one tree block ref start %llu %s %llu\n",
2257                        bytenr, parent ? "parent" : "root",
2258                        parent ? parent : root->objectid);
2259                 err &= ~BACKREF_MISSING;
2260         }
2261
2262         return err;
2263 }
2264
2265 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2266                             unsigned int ext_ref);
2267 static int check_tree_block_ref(struct btrfs_root *root,
2268                                 struct extent_buffer *eb, u64 bytenr,
2269                                 int level, u64 owner, struct node_refs *nrefs);
2270 static int check_leaf_items(struct btrfs_trans_handle *trans,
2271                             struct btrfs_root *root, struct btrfs_path *path,
2272                             struct node_refs *nrefs, int account_bytes);
2273
2274 /*
2275  * @trans      just for lowmem repair mode
2276  * @check all  if not 0 then check all tree block backrefs and items
2277  *             0 then just check relationship of items in fs tree(s)
2278  *
2279  * Returns >0  Found error, should continue
2280  * Returns <0  Fatal error, must exit the whole check
2281  * Returns 0   No errors found
2282  */
2283 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2284                              struct btrfs_root *root, struct btrfs_path *path,
2285                              int *level, struct node_refs *nrefs, int ext_ref,
2286                              int check_all)
2287
2288 {
2289         enum btrfs_tree_block_status status;
2290         u64 bytenr;
2291         u64 ptr_gen;
2292         struct btrfs_fs_info *fs_info = root->fs_info;
2293         struct extent_buffer *next;
2294         struct extent_buffer *cur;
2295         int ret;
2296         int err = 0;
2297         int check;
2298         int account_file_data = 0;
2299
2300         WARN_ON(*level < 0);
2301         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2302
2303         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2304                                 path->nodes[*level], nrefs, *level, check_all);
2305         if (ret < 0)
2306                 return ret;
2307
2308         while (*level >= 0) {
2309                 WARN_ON(*level < 0);
2310                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2311                 cur = path->nodes[*level];
2312                 bytenr = btrfs_header_bytenr(cur);
2313                 check = nrefs->need_check[*level];
2314
2315                 if (btrfs_header_level(cur) != *level)
2316                         WARN_ON(1);
2317                /*
2318                 * Update bytes accounting and check tree block ref
2319                 * NOTE: Doing accounting and check before checking nritems
2320                 * is necessary because of empty node/leaf.
2321                 */
2322                 if ((check_all && !nrefs->checked[*level]) ||
2323                     (!check_all && nrefs->need_check[*level])) {
2324                         ret = check_tree_block_ref(root, cur,
2325                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2326                            btrfs_header_owner(cur), nrefs);
2327
2328                         if (repair && ret)
2329                                 ret = repair_tree_block_ref(trans, root,
2330                                     path->nodes[*level], nrefs, *level, ret);
2331                         err |= ret;
2332
2333                         if (check_all && nrefs->need_check[*level] &&
2334                                 nrefs->refs[*level]) {
2335                                 account_bytes(root, path, *level);
2336                                 account_file_data = 1;
2337                         }
2338                         nrefs->checked[*level] = 1;
2339                 }
2340
2341                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2342                         break;
2343
2344                 /* Don't forgot to check leaf/node validation */
2345                 if (*level == 0) {
2346                         /* skip duplicate check */
2347                         if (check || !check_all) {
2348                                 ret = btrfs_check_leaf(root, NULL, cur);
2349                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2350                                         err |= -EIO;
2351                                         break;
2352                                 }
2353                         }
2354
2355                         ret = 0;
2356                         if (!check_all)
2357                                 ret = process_one_leaf_v2(root, path, nrefs,
2358                                                           level, ext_ref);
2359                         else
2360                                 ret = check_leaf_items(trans, root, path,
2361                                                nrefs, account_file_data);
2362                         err |= ret;
2363                         break;
2364                 } else {
2365                         if (check || !check_all) {
2366                                 ret = btrfs_check_node(root, NULL, cur);
2367                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2368                                         err |= -EIO;
2369                                         break;
2370                                 }
2371                         }
2372                 }
2373
2374                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2375                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2376
2377                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2378                                         check_all);
2379                 if (ret < 0)
2380                         break;
2381                 /*
2382                  * check all trees in check_chunks_and_extent_v2
2383                  * check shared node once in check_fs_roots
2384                  */
2385                 if (!check_all && !nrefs->need_check[*level - 1]) {
2386                         path->slots[*level]++;
2387                         continue;
2388                 }
2389
2390                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392                         free_extent_buffer(next);
2393                         reada_walk_down(root, cur, path->slots[*level]);
2394                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2395                         if (!extent_buffer_uptodate(next)) {
2396                                 struct btrfs_key node_key;
2397
2398                                 btrfs_node_key_to_cpu(path->nodes[*level],
2399                                                       &node_key,
2400                                                       path->slots[*level]);
2401                                 btrfs_add_corrupt_extent_record(fs_info,
2402                                         &node_key, path->nodes[*level]->start,
2403                                         fs_info->nodesize, *level);
2404                                 err |= -EIO;
2405                                 break;
2406                         }
2407                 }
2408
2409                 ret = check_child_node(cur, path->slots[*level], next);
2410                 err |= ret;
2411                 if (ret < 0) 
2412                         break;
2413
2414                 if (btrfs_is_leaf(next))
2415                         status = btrfs_check_leaf(root, NULL, next);
2416                 else
2417                         status = btrfs_check_node(root, NULL, next);
2418                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2419                         free_extent_buffer(next);
2420                         err |= -EIO;
2421                         break;
2422                 }
2423
2424                 *level = *level - 1;
2425                 free_extent_buffer(path->nodes[*level]);
2426                 path->nodes[*level] = next;
2427                 path->slots[*level] = 0;
2428                 account_file_data = 0;
2429
2430                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2431         }
2432         return err;
2433 }
2434
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436                         struct walk_control *wc, int *level)
2437 {
2438         int i;
2439         struct extent_buffer *leaf;
2440
2441         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442                 leaf = path->nodes[i];
2443                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2444                         path->slots[i]++;
2445                         *level = i;
2446                         return 0;
2447                 } else {
2448                         free_extent_buffer(path->nodes[*level]);
2449                         path->nodes[*level] = NULL;
2450                         BUG_ON(*level > wc->active_node);
2451                         if (*level == wc->active_node)
2452                                 leave_shared_node(root, wc, *level);
2453                         *level = i + 1;
2454                 }
2455         }
2456         return 1;
2457 }
2458
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2460                            int *level)
2461 {
2462         int i;
2463         struct extent_buffer *leaf;
2464
2465         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466                 leaf = path->nodes[i];
2467                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2468                         path->slots[i]++;
2469                         *level = i;
2470                         return 0;
2471                 } else {
2472                         free_extent_buffer(path->nodes[*level]);
2473                         path->nodes[*level] = NULL;
2474                         *level = i + 1;
2475                 }
2476         }
2477         return 1;
2478 }
2479
2480 static int check_root_dir(struct inode_record *rec)
2481 {
2482         struct inode_backref *backref;
2483         int ret = -1;
2484
2485         if (!rec->found_inode_item || rec->errors)
2486                 goto out;
2487         if (rec->nlink != 1 || rec->found_link != 0)
2488                 goto out;
2489         if (list_empty(&rec->backrefs))
2490                 goto out;
2491         backref = to_inode_backref(rec->backrefs.next);
2492         if (!backref->found_inode_ref)
2493                 goto out;
2494         if (backref->index != 0 || backref->namelen != 2 ||
2495             memcmp(backref->name, "..", 2))
2496                 goto out;
2497         if (backref->found_dir_index || backref->found_dir_item)
2498                 goto out;
2499         ret = 0;
2500 out:
2501         return ret;
2502 }
2503
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505                               struct btrfs_root *root, struct btrfs_path *path,
2506                               struct inode_record *rec)
2507 {
2508         struct btrfs_inode_item *ei;
2509         struct btrfs_key key;
2510         int ret;
2511
2512         key.objectid = rec->ino;
2513         key.type = BTRFS_INODE_ITEM_KEY;
2514         key.offset = (u64)-1;
2515
2516         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2517         if (ret < 0)
2518                 goto out;
2519         if (ret) {
2520                 if (!path->slots[0]) {
2521                         ret = -ENOENT;
2522                         goto out;
2523                 }
2524                 path->slots[0]--;
2525                 ret = 0;
2526         }
2527         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528         if (key.objectid != rec->ino) {
2529                 ret = -ENOENT;
2530                 goto out;
2531         }
2532
2533         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534                             struct btrfs_inode_item);
2535         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536         btrfs_mark_buffer_dirty(path->nodes[0]);
2537         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539                root->root_key.objectid);
2540 out:
2541         btrfs_release_path(path);
2542         return ret;
2543 }
2544
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546                                     struct btrfs_root *root,
2547                                     struct btrfs_path *path,
2548                                     struct inode_record *rec)
2549 {
2550         int ret;
2551
2552         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553         btrfs_release_path(path);
2554         if (!ret)
2555                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2556         return ret;
2557 }
2558
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560                                struct btrfs_root *root,
2561                                struct btrfs_path *path,
2562                                struct inode_record *rec)
2563 {
2564         struct btrfs_inode_item *ei;
2565         struct btrfs_key key;
2566         int ret = 0;
2567
2568         key.objectid = rec->ino;
2569         key.type = BTRFS_INODE_ITEM_KEY;
2570         key.offset = 0;
2571
2572         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2573         if (ret) {
2574                 if (ret > 0)
2575                         ret = -ENOENT;
2576                 goto out;
2577         }
2578
2579         /* Since ret == 0, no need to check anything */
2580         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581                             struct btrfs_inode_item);
2582         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583         btrfs_mark_buffer_dirty(path->nodes[0]);
2584         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585         printf("reset nbytes for ino %llu root %llu\n",
2586                rec->ino, root->root_key.objectid);
2587 out:
2588         btrfs_release_path(path);
2589         return ret;
2590 }
2591
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593                                  struct cache_tree *inode_cache,
2594                                  struct inode_record *rec,
2595                                  struct inode_backref *backref)
2596 {
2597         struct btrfs_path path;
2598         struct btrfs_trans_handle *trans;
2599         struct btrfs_dir_item *dir_item;
2600         struct extent_buffer *leaf;
2601         struct btrfs_key key;
2602         struct btrfs_disk_key disk_key;
2603         struct inode_record *dir_rec;
2604         unsigned long name_ptr;
2605         u32 data_size = sizeof(*dir_item) + backref->namelen;
2606         int ret;
2607
2608         trans = btrfs_start_transaction(root, 1);
2609         if (IS_ERR(trans))
2610                 return PTR_ERR(trans);
2611
2612         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613                 (unsigned long long)rec->ino);
2614
2615         btrfs_init_path(&path);
2616         key.objectid = backref->dir;
2617         key.type = BTRFS_DIR_INDEX_KEY;
2618         key.offset = backref->index;
2619         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2620         BUG_ON(ret);
2621
2622         leaf = path.nodes[0];
2623         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2624
2625         disk_key.objectid = cpu_to_le64(rec->ino);
2626         disk_key.type = BTRFS_INODE_ITEM_KEY;
2627         disk_key.offset = 0;
2628
2629         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631         btrfs_set_dir_data_len(leaf, dir_item, 0);
2632         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633         name_ptr = (unsigned long)(dir_item + 1);
2634         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635         btrfs_mark_buffer_dirty(leaf);
2636         btrfs_release_path(&path);
2637         btrfs_commit_transaction(trans, root);
2638
2639         backref->found_dir_index = 1;
2640         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641         BUG_ON(IS_ERR(dir_rec));
2642         if (!dir_rec)
2643                 return 0;
2644         dir_rec->found_size += backref->namelen;
2645         if (dir_rec->found_size == dir_rec->isize &&
2646             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648         if (dir_rec->found_size != dir_rec->isize)
2649                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2650
2651         return 0;
2652 }
2653
2654 static int delete_dir_index(struct btrfs_root *root,
2655                             struct inode_backref *backref)
2656 {
2657         struct btrfs_trans_handle *trans;
2658         struct btrfs_dir_item *di;
2659         struct btrfs_path path;
2660         int ret = 0;
2661
2662         trans = btrfs_start_transaction(root, 1);
2663         if (IS_ERR(trans))
2664                 return PTR_ERR(trans);
2665
2666         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667                 (unsigned long long)backref->dir,
2668                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669                 (unsigned long long)root->objectid);
2670
2671         btrfs_init_path(&path);
2672         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673                                     backref->name, backref->namelen,
2674                                     backref->index, -1);
2675         if (IS_ERR(di)) {
2676                 ret = PTR_ERR(di);
2677                 btrfs_release_path(&path);
2678                 btrfs_commit_transaction(trans, root);
2679                 if (ret == -ENOENT)
2680                         return 0;
2681                 return ret;
2682         }
2683
2684         if (!di)
2685                 ret = btrfs_del_item(trans, root, &path);
2686         else
2687                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2688         BUG_ON(ret);
2689         btrfs_release_path(&path);
2690         btrfs_commit_transaction(trans, root);
2691         return ret;
2692 }
2693
2694 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2695                                     struct btrfs_root *root, u64 ino,
2696                                     u8 filetype)
2697 {
2698         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2699
2700         return insert_inode_item(trans, root, ino, 0, 0, 0, mode);
2701 }
2702
2703 static int create_inode_item(struct btrfs_root *root,
2704                              struct inode_record *rec, int root_dir)
2705 {
2706         struct btrfs_trans_handle *trans;
2707         u64 nlink = 0;
2708         u32 mode = 0;
2709         u64 size = 0;
2710         int ret;
2711
2712         trans = btrfs_start_transaction(root, 1);
2713         if (IS_ERR(trans)) {
2714                 ret = PTR_ERR(trans);
2715                 return ret;
2716         }
2717
2718         nlink = root_dir ? 1 : rec->found_link;
2719         if (rec->found_dir_item) {
2720                 if (rec->found_file_extent)
2721                         fprintf(stderr, "root %llu inode %llu has both a dir "
2722                                 "item and extents, unsure if it is a dir or a "
2723                                 "regular file so setting it as a directory\n",
2724                                 (unsigned long long)root->objectid,
2725                                 (unsigned long long)rec->ino);
2726                 mode = S_IFDIR | 0755;
2727                 size = rec->found_size;
2728         } else if (!rec->found_dir_item) {
2729                 size = rec->extent_end;
2730                 mode =  S_IFREG | 0755;
2731         }
2732
2733         ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
2734                                   nlink, mode);
2735         btrfs_commit_transaction(trans, root);
2736         return 0;
2737 }
2738
2739 static int repair_inode_backrefs(struct btrfs_root *root,
2740                                  struct inode_record *rec,
2741                                  struct cache_tree *inode_cache,
2742                                  int delete)
2743 {
2744         struct inode_backref *tmp, *backref;
2745         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2746         int ret = 0;
2747         int repaired = 0;
2748
2749         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2750                 if (!delete && rec->ino == root_dirid) {
2751                         if (!rec->found_inode_item) {
2752                                 ret = create_inode_item(root, rec, 1);
2753                                 if (ret)
2754                                         break;
2755                                 repaired++;
2756                         }
2757                 }
2758
2759                 /* Index 0 for root dir's are special, don't mess with it */
2760                 if (rec->ino == root_dirid && backref->index == 0)
2761                         continue;
2762
2763                 if (delete &&
2764                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2765                      (backref->found_dir_index && backref->found_inode_ref &&
2766                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2767                         ret = delete_dir_index(root, backref);
2768                         if (ret)
2769                                 break;
2770                         repaired++;
2771                         list_del(&backref->list);
2772                         free(backref);
2773                         continue;
2774                 }
2775
2776                 if (!delete && !backref->found_dir_index &&
2777                     backref->found_dir_item && backref->found_inode_ref) {
2778                         ret = add_missing_dir_index(root, inode_cache, rec,
2779                                                     backref);
2780                         if (ret)
2781                                 break;
2782                         repaired++;
2783                         if (backref->found_dir_item &&
2784                             backref->found_dir_index) {
2785                                 if (!backref->errors &&
2786                                     backref->found_inode_ref) {
2787                                         list_del(&backref->list);
2788                                         free(backref);
2789                                         continue;
2790                                 }
2791                         }
2792                 }
2793
2794                 if (!delete && (!backref->found_dir_index &&
2795                                 !backref->found_dir_item &&
2796                                 backref->found_inode_ref)) {
2797                         struct btrfs_trans_handle *trans;
2798                         struct btrfs_key location;
2799
2800                         ret = check_dir_conflict(root, backref->name,
2801                                                  backref->namelen,
2802                                                  backref->dir,
2803                                                  backref->index);
2804                         if (ret) {
2805                                 /*
2806                                  * let nlink fixing routine to handle it,
2807                                  * which can do it better.
2808                                  */
2809                                 ret = 0;
2810                                 break;
2811                         }
2812                         location.objectid = rec->ino;
2813                         location.type = BTRFS_INODE_ITEM_KEY;
2814                         location.offset = 0;
2815
2816                         trans = btrfs_start_transaction(root, 1);
2817                         if (IS_ERR(trans)) {
2818                                 ret = PTR_ERR(trans);
2819                                 break;
2820                         }
2821                         fprintf(stderr, "adding missing dir index/item pair "
2822                                 "for inode %llu\n",
2823                                 (unsigned long long)rec->ino);
2824                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2825                                                     backref->namelen,
2826                                                     backref->dir, &location,
2827                                                     imode_to_type(rec->imode),
2828                                                     backref->index);
2829                         BUG_ON(ret);
2830                         btrfs_commit_transaction(trans, root);
2831                         repaired++;
2832                 }
2833
2834                 if (!delete && (backref->found_inode_ref &&
2835                                 backref->found_dir_index &&
2836                                 backref->found_dir_item &&
2837                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2838                                 !rec->found_inode_item)) {
2839                         ret = create_inode_item(root, rec, 0);
2840                         if (ret)
2841                                 break;
2842                         repaired++;
2843                 }
2844
2845         }
2846         return ret ? ret : repaired;
2847 }
2848
2849 /*
2850  * To determine the file type for nlink/inode_item repair
2851  *
2852  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2853  * Return -ENOENT if file type is not found.
2854  */
2855 static int find_file_type(struct inode_record *rec, u8 *type)
2856 {
2857         struct inode_backref *backref;
2858
2859         /* For inode item recovered case */
2860         if (rec->found_inode_item) {
2861                 *type = imode_to_type(rec->imode);
2862                 return 0;
2863         }
2864
2865         list_for_each_entry(backref, &rec->backrefs, list) {
2866                 if (backref->found_dir_index || backref->found_dir_item) {
2867                         *type = backref->filetype;
2868                         return 0;
2869                 }
2870         }
2871         return -ENOENT;
2872 }
2873
2874 /*
2875  * To determine the file name for nlink repair
2876  *
2877  * Return 0 if file name is found, set name and namelen.
2878  * Return -ENOENT if file name is not found.
2879  */
2880 static int find_file_name(struct inode_record *rec,
2881                           char *name, int *namelen)
2882 {
2883         struct inode_backref *backref;
2884
2885         list_for_each_entry(backref, &rec->backrefs, list) {
2886                 if (backref->found_dir_index || backref->found_dir_item ||
2887                     backref->found_inode_ref) {
2888                         memcpy(name, backref->name, backref->namelen);
2889                         *namelen = backref->namelen;
2890                         return 0;
2891                 }
2892         }
2893         return -ENOENT;
2894 }
2895
2896 /* Reset the nlink of the inode to the correct one */
2897 static int reset_nlink(struct btrfs_trans_handle *trans,
2898                        struct btrfs_root *root,
2899                        struct btrfs_path *path,
2900                        struct inode_record *rec)
2901 {
2902         struct inode_backref *backref;
2903         struct inode_backref *tmp;
2904         struct btrfs_key key;
2905         struct btrfs_inode_item *inode_item;
2906         int ret = 0;
2907
2908         /* We don't believe this either, reset it and iterate backref */
2909         rec->found_link = 0;
2910
2911         /* Remove all backref including the valid ones */
2912         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2913                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2914                                    backref->index, backref->name,
2915                                    backref->namelen, 0);
2916                 if (ret < 0)
2917                         goto out;
2918
2919                 /* remove invalid backref, so it won't be added back */
2920                 if (!(backref->found_dir_index &&
2921                       backref->found_dir_item &&
2922                       backref->found_inode_ref)) {
2923                         list_del(&backref->list);
2924                         free(backref);
2925                 } else {
2926                         rec->found_link++;
2927                 }
2928         }
2929
2930         /* Set nlink to 0 */
2931         key.objectid = rec->ino;
2932         key.type = BTRFS_INODE_ITEM_KEY;
2933         key.offset = 0;
2934         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2935         if (ret < 0)
2936                 goto out;
2937         if (ret > 0) {
2938                 ret = -ENOENT;
2939                 goto out;
2940         }
2941         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2942                                     struct btrfs_inode_item);
2943         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2944         btrfs_mark_buffer_dirty(path->nodes[0]);
2945         btrfs_release_path(path);
2946
2947         /*
2948          * Add back valid inode_ref/dir_item/dir_index,
2949          * add_link() will handle the nlink inc, so new nlink must be correct
2950          */
2951         list_for_each_entry(backref, &rec->backrefs, list) {
2952                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2953                                      backref->name, backref->namelen,
2954                                      backref->filetype, &backref->index, 1, 0);
2955                 if (ret < 0)
2956                         goto out;
2957         }
2958 out:
2959         btrfs_release_path(path);
2960         return ret;
2961 }
2962
2963 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2964                                struct btrfs_root *root,
2965                                struct btrfs_path *path,
2966                                struct inode_record *rec)
2967 {
2968         char namebuf[BTRFS_NAME_LEN] = {0};
2969         u8 type = 0;
2970         int namelen = 0;
2971         int name_recovered = 0;
2972         int type_recovered = 0;
2973         int ret = 0;
2974
2975         /*
2976          * Get file name and type first before these invalid inode ref
2977          * are deleted by remove_all_invalid_backref()
2978          */
2979         name_recovered = !find_file_name(rec, namebuf, &namelen);
2980         type_recovered = !find_file_type(rec, &type);
2981
2982         if (!name_recovered) {
2983                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2984                        rec->ino, rec->ino);
2985                 namelen = count_digits(rec->ino);
2986                 sprintf(namebuf, "%llu", rec->ino);
2987                 name_recovered = 1;
2988         }
2989         if (!type_recovered) {
2990                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2991                        rec->ino);
2992                 type = BTRFS_FT_REG_FILE;
2993                 type_recovered = 1;
2994         }
2995
2996         ret = reset_nlink(trans, root, path, rec);
2997         if (ret < 0) {
2998                 fprintf(stderr,
2999                         "Failed to reset nlink for inode %llu: %s\n",
3000                         rec->ino, strerror(-ret));
3001                 goto out;
3002         }
3003
3004         if (rec->found_link == 0) {
3005                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3006                                               namebuf, namelen, type,
3007                                               (u64 *)&rec->found_link);
3008                 if (ret)
3009                         goto out;
3010         }
3011         printf("Fixed the nlink of inode %llu\n", rec->ino);
3012 out:
3013         /*
3014          * Clear the flag anyway, or we will loop forever for the same inode
3015          * as it will not be removed from the bad inode list and the dead loop
3016          * happens.
3017          */
3018         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3019         btrfs_release_path(path);
3020         return ret;
3021 }
3022
3023 /*
3024  * Check if there is any normal(reg or prealloc) file extent for given
3025  * ino.
3026  * This is used to determine the file type when neither its dir_index/item or
3027  * inode_item exists.
3028  *
3029  * This will *NOT* report error, if any error happens, just consider it does
3030  * not have any normal file extent.
3031  */
3032 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3033 {
3034         struct btrfs_path path;
3035         struct btrfs_key key;
3036         struct btrfs_key found_key;
3037         struct btrfs_file_extent_item *fi;
3038         u8 type;
3039         int ret = 0;
3040
3041         btrfs_init_path(&path);
3042         key.objectid = ino;
3043         key.type = BTRFS_EXTENT_DATA_KEY;
3044         key.offset = 0;
3045
3046         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3047         if (ret < 0) {
3048                 ret = 0;
3049                 goto out;
3050         }
3051         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3052                 ret = btrfs_next_leaf(root, &path);
3053                 if (ret) {
3054                         ret = 0;
3055                         goto out;
3056                 }
3057         }
3058         while (1) {
3059                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3060                                       path.slots[0]);
3061                 if (found_key.objectid != ino ||
3062                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3063                         break;
3064                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3065                                     struct btrfs_file_extent_item);
3066                 type = btrfs_file_extent_type(path.nodes[0], fi);
3067                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3068                         ret = 1;
3069                         goto out;
3070                 }
3071         }
3072 out:
3073         btrfs_release_path(&path);
3074         return ret;
3075 }
3076
3077 static u32 btrfs_type_to_imode(u8 type)
3078 {
3079         static u32 imode_by_btrfs_type[] = {
3080                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3081                 [BTRFS_FT_DIR]          = S_IFDIR,
3082                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3083                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3084                 [BTRFS_FT_FIFO]         = S_IFIFO,
3085                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3086                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3087         };
3088
3089         return imode_by_btrfs_type[(type)];
3090 }
3091
3092 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3093                                 struct btrfs_root *root,
3094                                 struct btrfs_path *path,
3095                                 struct inode_record *rec)
3096 {
3097         u8 filetype;
3098         u32 mode = 0700;
3099         int type_recovered = 0;
3100         int ret = 0;
3101
3102         printf("Trying to rebuild inode:%llu\n", rec->ino);
3103
3104         type_recovered = !find_file_type(rec, &filetype);
3105
3106         /*
3107          * Try to determine inode type if type not found.
3108          *
3109          * For found regular file extent, it must be FILE.
3110          * For found dir_item/index, it must be DIR.
3111          *
3112          * For undetermined one, use FILE as fallback.
3113          *
3114          * TODO:
3115          * 1. If found backref(inode_index/item is already handled) to it,
3116          *    it must be DIR.
3117          *    Need new inode-inode ref structure to allow search for that.
3118          */
3119         if (!type_recovered) {
3120                 if (rec->found_file_extent &&
3121                     find_normal_file_extent(root, rec->ino)) {
3122                         type_recovered = 1;
3123                         filetype = BTRFS_FT_REG_FILE;
3124                 } else if (rec->found_dir_item) {
3125                         type_recovered = 1;
3126                         filetype = BTRFS_FT_DIR;
3127                 } else if (!list_empty(&rec->orphan_extents)) {
3128                         type_recovered = 1;
3129                         filetype = BTRFS_FT_REG_FILE;
3130                 } else{
3131                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3132                                rec->ino);
3133                         type_recovered = 1;
3134                         filetype = BTRFS_FT_REG_FILE;
3135                 }
3136         }
3137
3138         ret = btrfs_new_inode(trans, root, rec->ino,
3139                               mode | btrfs_type_to_imode(filetype));
3140         if (ret < 0)
3141                 goto out;
3142
3143         /*
3144          * Here inode rebuild is done, we only rebuild the inode item,
3145          * don't repair the nlink(like move to lost+found).
3146          * That is the job of nlink repair.
3147          *
3148          * We just fill the record and return
3149          */
3150         rec->found_dir_item = 1;
3151         rec->imode = mode | btrfs_type_to_imode(filetype);
3152         rec->nlink = 0;
3153         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3154         /* Ensure the inode_nlinks repair function will be called */
3155         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3156 out:
3157         return ret;
3158 }
3159
3160 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3161                                       struct btrfs_root *root,
3162                                       struct btrfs_path *path,
3163                                       struct inode_record *rec)
3164 {
3165         struct orphan_data_extent *orphan;
3166         struct orphan_data_extent *tmp;
3167         int ret = 0;
3168
3169         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3170                 /*
3171                  * Check for conflicting file extents
3172                  *
3173                  * Here we don't know whether the extents is compressed or not,
3174                  * so we can only assume it not compressed nor data offset,
3175                  * and use its disk_len as extent length.
3176                  */
3177                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3178                                        orphan->offset, orphan->disk_len, 0);
3179                 btrfs_release_path(path);
3180                 if (ret < 0)
3181                         goto out;
3182                 if (!ret) {
3183                         fprintf(stderr,
3184                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3185                                 orphan->disk_bytenr, orphan->disk_len);
3186                         ret = btrfs_free_extent(trans,
3187                                         root->fs_info->extent_root,
3188                                         orphan->disk_bytenr, orphan->disk_len,
3189                                         0, root->objectid, orphan->objectid,
3190                                         orphan->offset);
3191                         if (ret < 0)
3192                                 goto out;
3193                 }
3194                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3195                                 orphan->offset, orphan->disk_bytenr,
3196                                 orphan->disk_len, orphan->disk_len);
3197                 if (ret < 0)
3198                         goto out;
3199
3200                 /* Update file size info */
3201                 rec->found_size += orphan->disk_len;
3202                 if (rec->found_size == rec->nbytes)
3203                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3204
3205                 /* Update the file extent hole info too */
3206                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3207                                            orphan->disk_len);
3208                 if (ret < 0)
3209                         goto out;
3210                 if (RB_EMPTY_ROOT(&rec->holes))
3211                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3212
3213                 list_del(&orphan->list);
3214                 free(orphan);
3215         }
3216         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3217 out:
3218         return ret;
3219 }
3220
3221 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3222                                         struct btrfs_root *root,
3223                                         struct btrfs_path *path,
3224                                         struct inode_record *rec)
3225 {
3226         struct rb_node *node;
3227         struct file_extent_hole *hole;
3228         int found = 0;
3229         int ret = 0;
3230
3231         node = rb_first(&rec->holes);
3232
3233         while (node) {
3234                 found = 1;
3235                 hole = rb_entry(node, struct file_extent_hole, node);
3236                 ret = btrfs_punch_hole(trans, root, rec->ino,
3237                                        hole->start, hole->len);
3238                 if (ret < 0)
3239                         goto out;
3240                 ret = del_file_extent_hole(&rec->holes, hole->start,
3241                                            hole->len);
3242                 if (ret < 0)
3243                         goto out;
3244                 if (RB_EMPTY_ROOT(&rec->holes))
3245                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3246                 node = rb_first(&rec->holes);
3247         }
3248         /* special case for a file losing all its file extent */
3249         if (!found) {
3250                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3251                                        round_up(rec->isize,
3252                                                 root->fs_info->sectorsize));
3253                 if (ret < 0)
3254                         goto out;
3255         }
3256         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3257                rec->ino, root->objectid);
3258 out:
3259         return ret;
3260 }
3261
3262 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3263 {
3264         struct btrfs_trans_handle *trans;
3265         struct btrfs_path path;
3266         int ret = 0;
3267
3268         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3269                              I_ERR_NO_ORPHAN_ITEM |
3270                              I_ERR_LINK_COUNT_WRONG |
3271                              I_ERR_NO_INODE_ITEM |
3272                              I_ERR_FILE_EXTENT_ORPHAN |
3273                              I_ERR_FILE_EXTENT_DISCOUNT|
3274                              I_ERR_FILE_NBYTES_WRONG)))
3275                 return rec->errors;
3276
3277         /*
3278          * For nlink repair, it may create a dir and add link, so
3279          * 2 for parent(256)'s dir_index and dir_item
3280          * 2 for lost+found dir's inode_item and inode_ref
3281          * 1 for the new inode_ref of the file
3282          * 2 for lost+found dir's dir_index and dir_item for the file
3283          */
3284         trans = btrfs_start_transaction(root, 7);
3285         if (IS_ERR(trans))
3286                 return PTR_ERR(trans);
3287
3288         btrfs_init_path(&path);
3289         if (rec->errors & I_ERR_NO_INODE_ITEM)
3290                 ret = repair_inode_no_item(trans, root, &path, rec);
3291         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3292                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3293         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3294                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3295         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3296                 ret = repair_inode_isize(trans, root, &path, rec);
3297         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3298                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3299         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3300                 ret = repair_inode_nlinks(trans, root, &path, rec);
3301         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3302                 ret = repair_inode_nbytes(trans, root, &path, rec);
3303         btrfs_commit_transaction(trans, root);
3304         btrfs_release_path(&path);
3305         return ret;
3306 }
3307
3308 static int check_inode_recs(struct btrfs_root *root,
3309                             struct cache_tree *inode_cache)
3310 {
3311         struct cache_extent *cache;
3312         struct ptr_node *node;
3313         struct inode_record *rec;
3314         struct inode_backref *backref;
3315         int stage = 0;
3316         int ret = 0;
3317         int err = 0;
3318         u64 error = 0;
3319         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3320
3321         if (btrfs_root_refs(&root->root_item) == 0) {
3322                 if (!cache_tree_empty(inode_cache))
3323                         fprintf(stderr, "warning line %d\n", __LINE__);
3324                 return 0;
3325         }
3326
3327         /*
3328          * We need to repair backrefs first because we could change some of the
3329          * errors in the inode recs.
3330          *
3331          * We also need to go through and delete invalid backrefs first and then
3332          * add the correct ones second.  We do this because we may get EEXIST
3333          * when adding back the correct index because we hadn't yet deleted the
3334          * invalid index.
3335          *
3336          * For example, if we were missing a dir index then the directories
3337          * isize would be wrong, so if we fixed the isize to what we thought it
3338          * would be and then fixed the backref we'd still have a invalid fs, so
3339          * we need to add back the dir index and then check to see if the isize
3340          * is still wrong.
3341          */
3342         while (stage < 3) {
3343                 stage++;
3344                 if (stage == 3 && !err)
3345                         break;
3346
3347                 cache = search_cache_extent(inode_cache, 0);
3348                 while (repair && cache) {
3349                         node = container_of(cache, struct ptr_node, cache);
3350                         rec = node->data;
3351                         cache = next_cache_extent(cache);
3352
3353                         /* Need to free everything up and rescan */
3354                         if (stage == 3) {
3355                                 remove_cache_extent(inode_cache, &node->cache);
3356                                 free(node);
3357                                 free_inode_rec(rec);
3358                                 continue;
3359                         }
3360
3361                         if (list_empty(&rec->backrefs))
3362                                 continue;
3363
3364                         ret = repair_inode_backrefs(root, rec, inode_cache,
3365                                                     stage == 1);
3366                         if (ret < 0) {
3367                                 err = ret;
3368                                 stage = 2;
3369                                 break;
3370                         } if (ret > 0) {
3371                                 err = -EAGAIN;
3372                         }
3373                 }
3374         }
3375         if (err)
3376                 return err;
3377
3378         rec = get_inode_rec(inode_cache, root_dirid, 0);
3379         BUG_ON(IS_ERR(rec));
3380         if (rec) {
3381                 ret = check_root_dir(rec);
3382                 if (ret) {
3383                         fprintf(stderr, "root %llu root dir %llu error\n",
3384                                 (unsigned long long)root->root_key.objectid,
3385                                 (unsigned long long)root_dirid);
3386                         print_inode_error(root, rec);
3387                         error++;
3388                 }
3389         } else {
3390                 if (repair) {
3391                         struct btrfs_trans_handle *trans;
3392
3393                         trans = btrfs_start_transaction(root, 1);
3394                         if (IS_ERR(trans)) {
3395                                 err = PTR_ERR(trans);
3396                                 return err;
3397                         }
3398
3399                         fprintf(stderr,
3400                                 "root %llu missing its root dir, recreating\n",
3401                                 (unsigned long long)root->objectid);
3402
3403                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3404                         BUG_ON(ret);
3405
3406                         btrfs_commit_transaction(trans, root);
3407                         return -EAGAIN;
3408                 }
3409
3410                 fprintf(stderr, "root %llu root dir %llu not found\n",
3411                         (unsigned long long)root->root_key.objectid,
3412                         (unsigned long long)root_dirid);
3413         }
3414
3415         while (1) {
3416                 cache = search_cache_extent(inode_cache, 0);
3417                 if (!cache)
3418                         break;
3419                 node = container_of(cache, struct ptr_node, cache);
3420                 rec = node->data;
3421                 remove_cache_extent(inode_cache, &node->cache);
3422                 free(node);
3423                 if (rec->ino == root_dirid ||
3424                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3425                         free_inode_rec(rec);
3426                         continue;
3427                 }
3428
3429                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3430                         ret = check_orphan_item(root, rec->ino);
3431                         if (ret == 0)
3432                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3433                         if (can_free_inode_rec(rec)) {
3434                                 free_inode_rec(rec);
3435                                 continue;
3436                         }
3437                 }
3438
3439                 if (!rec->found_inode_item)
3440                         rec->errors |= I_ERR_NO_INODE_ITEM;
3441                 if (rec->found_link != rec->nlink)
3442                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3443                 if (repair) {
3444                         ret = try_repair_inode(root, rec);
3445                         if (ret == 0 && can_free_inode_rec(rec)) {
3446                                 free_inode_rec(rec);
3447                                 continue;
3448                         }
3449                         ret = 0;
3450                 }
3451
3452                 if (!(repair && ret == 0))
3453                         error++;
3454                 print_inode_error(root, rec);
3455                 list_for_each_entry(backref, &rec->backrefs, list) {
3456                         if (!backref->found_dir_item)
3457                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3458                         if (!backref->found_dir_index)
3459                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3460                         if (!backref->found_inode_ref)
3461                                 backref->errors |= REF_ERR_NO_INODE_REF;
3462                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3463                                 " namelen %u name %s filetype %d errors %x",
3464                                 (unsigned long long)backref->dir,
3465                                 (unsigned long long)backref->index,
3466                                 backref->namelen, backref->name,
3467                                 backref->filetype, backref->errors);
3468                         print_ref_error(backref->errors);
3469                 }
3470                 free_inode_rec(rec);
3471         }
3472         return (error > 0) ? -1 : 0;
3473 }
3474
3475 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3476                                         u64 objectid)
3477 {
3478         struct cache_extent *cache;
3479         struct root_record *rec = NULL;
3480         int ret;
3481
3482         cache = lookup_cache_extent(root_cache, objectid, 1);
3483         if (cache) {
3484                 rec = container_of(cache, struct root_record, cache);
3485         } else {
3486                 rec = calloc(1, sizeof(*rec));
3487                 if (!rec)
3488                         return ERR_PTR(-ENOMEM);
3489                 rec->objectid = objectid;
3490                 INIT_LIST_HEAD(&rec->backrefs);
3491                 rec->cache.start = objectid;
3492                 rec->cache.size = 1;
3493
3494                 ret = insert_cache_extent(root_cache, &rec->cache);
3495                 if (ret)
3496                         return ERR_PTR(-EEXIST);
3497         }
3498         return rec;
3499 }
3500
3501 static struct root_backref *get_root_backref(struct root_record *rec,
3502                                              u64 ref_root, u64 dir, u64 index,
3503                                              const char *name, int namelen)
3504 {
3505         struct root_backref *backref;
3506
3507         list_for_each_entry(backref, &rec->backrefs, list) {
3508                 if (backref->ref_root != ref_root || backref->dir != dir ||
3509                     backref->namelen != namelen)
3510                         continue;
3511                 if (memcmp(name, backref->name, namelen))
3512                         continue;
3513                 return backref;
3514         }
3515
3516         backref = calloc(1, sizeof(*backref) + namelen + 1);
3517         if (!backref)
3518                 return NULL;
3519         backref->ref_root = ref_root;
3520         backref->dir = dir;
3521         backref->index = index;
3522         backref->namelen = namelen;
3523         memcpy(backref->name, name, namelen);
3524         backref->name[namelen] = '\0';
3525         list_add_tail(&backref->list, &rec->backrefs);
3526         return backref;
3527 }
3528
3529 static void free_root_record(struct cache_extent *cache)
3530 {
3531         struct root_record *rec;
3532         struct root_backref *backref;
3533
3534         rec = container_of(cache, struct root_record, cache);
3535         while (!list_empty(&rec->backrefs)) {
3536                 backref = to_root_backref(rec->backrefs.next);
3537                 list_del(&backref->list);
3538                 free(backref);
3539         }
3540
3541         free(rec);
3542 }
3543
3544 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3545
3546 static int add_root_backref(struct cache_tree *root_cache,
3547                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3548                             const char *name, int namelen,
3549                             int item_type, int errors)
3550 {
3551         struct root_record *rec;
3552         struct root_backref *backref;
3553
3554         rec = get_root_rec(root_cache, root_id);
3555         BUG_ON(IS_ERR(rec));
3556         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3557         BUG_ON(!backref);
3558
3559         backref->errors |= errors;
3560
3561         if (item_type != BTRFS_DIR_ITEM_KEY) {
3562                 if (backref->found_dir_index || backref->found_back_ref ||
3563                     backref->found_forward_ref) {
3564                         if (backref->index != index)
3565                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3566                 } else {
3567                         backref->index = index;
3568                 }
3569         }
3570
3571         if (item_type == BTRFS_DIR_ITEM_KEY) {
3572                 if (backref->found_forward_ref)
3573                         rec->found_ref++;
3574                 backref->found_dir_item = 1;
3575         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3576                 backref->found_dir_index = 1;
3577         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3578                 if (backref->found_forward_ref)
3579                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3580                 else if (backref->found_dir_item)
3581                         rec->found_ref++;
3582                 backref->found_forward_ref = 1;
3583         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3584                 if (backref->found_back_ref)
3585                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3586                 backref->found_back_ref = 1;
3587         } else {
3588                 BUG_ON(1);
3589         }
3590
3591         if (backref->found_forward_ref && backref->found_dir_item)
3592                 backref->reachable = 1;
3593         return 0;
3594 }
3595
3596 static int merge_root_recs(struct btrfs_root *root,
3597                            struct cache_tree *src_cache,
3598                            struct cache_tree *dst_cache)
3599 {
3600         struct cache_extent *cache;
3601         struct ptr_node *node;
3602         struct inode_record *rec;
3603         struct inode_backref *backref;
3604         int ret = 0;
3605
3606         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3607                 free_inode_recs_tree(src_cache);
3608                 return 0;
3609         }
3610
3611         while (1) {
3612                 cache = search_cache_extent(src_cache, 0);
3613                 if (!cache)
3614                         break;
3615                 node = container_of(cache, struct ptr_node, cache);
3616                 rec = node->data;
3617                 remove_cache_extent(src_cache, &node->cache);
3618                 free(node);
3619
3620                 ret = is_child_root(root, root->objectid, rec->ino);
3621                 if (ret < 0)
3622                         break;
3623                 else if (ret == 0)
3624                         goto skip;
3625
3626                 list_for_each_entry(backref, &rec->backrefs, list) {
3627                         BUG_ON(backref->found_inode_ref);
3628                         if (backref->found_dir_item)
3629                                 add_root_backref(dst_cache, rec->ino,
3630                                         root->root_key.objectid, backref->dir,
3631                                         backref->index, backref->name,
3632                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3633                                         backref->errors);
3634                         if (backref->found_dir_index)
3635                                 add_root_backref(dst_cache, rec->ino,
3636                                         root->root_key.objectid, backref->dir,
3637                                         backref->index, backref->name,
3638                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3639                                         backref->errors);
3640                 }
3641 skip:
3642                 free_inode_rec(rec);
3643         }
3644         if (ret < 0)
3645                 return ret;
3646         return 0;
3647 }
3648
3649 static int check_root_refs(struct btrfs_root *root,
3650                            struct cache_tree *root_cache)
3651 {
3652         struct root_record *rec;
3653         struct root_record *ref_root;
3654         struct root_backref *backref;
3655         struct cache_extent *cache;
3656         int loop = 1;
3657         int ret;
3658         int error;
3659         int errors = 0;
3660
3661         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3662         BUG_ON(IS_ERR(rec));
3663         rec->found_ref = 1;
3664
3665         /* fixme: this can not detect circular references */
3666         while (loop) {
3667                 loop = 0;
3668                 cache = search_cache_extent(root_cache, 0);
3669                 while (1) {
3670                         if (!cache)
3671                                 break;
3672                         rec = container_of(cache, struct root_record, cache);
3673                         cache = next_cache_extent(cache);
3674
3675                         if (rec->found_ref == 0)
3676                                 continue;
3677
3678                         list_for_each_entry(backref, &rec->backrefs, list) {
3679                                 if (!backref->reachable)
3680                                         continue;
3681
3682                                 ref_root = get_root_rec(root_cache,
3683                                                         backref->ref_root);
3684                                 BUG_ON(IS_ERR(ref_root));
3685                                 if (ref_root->found_ref > 0)
3686                                         continue;
3687
3688                                 backref->reachable = 0;
3689                                 rec->found_ref--;
3690                                 if (rec->found_ref == 0)
3691                                         loop = 1;
3692                         }
3693                 }
3694         }
3695
3696         cache = search_cache_extent(root_cache, 0);
3697         while (1) {
3698                 if (!cache)
3699                         break;
3700                 rec = container_of(cache, struct root_record, cache);
3701                 cache = next_cache_extent(cache);
3702
3703                 if (rec->found_ref == 0 &&
3704                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3705                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3706                         ret = check_orphan_item(root->fs_info->tree_root,
3707                                                 rec->objectid);
3708                         if (ret == 0)
3709                                 continue;
3710
3711                         /*
3712                          * If we don't have a root item then we likely just have
3713                          * a dir item in a snapshot for this root but no actual
3714                          * ref key or anything so it's meaningless.
3715                          */
3716                         if (!rec->found_root_item)
3717                                 continue;
3718                         errors++;
3719                         fprintf(stderr, "fs tree %llu not referenced\n",
3720                                 (unsigned long long)rec->objectid);
3721                 }
3722
3723                 error = 0;
3724                 if (rec->found_ref > 0 && !rec->found_root_item)
3725                         error = 1;
3726                 list_for_each_entry(backref, &rec->backrefs, list) {
3727                         if (!backref->found_dir_item)
3728                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3729                         if (!backref->found_dir_index)
3730                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3731                         if (!backref->found_back_ref)
3732                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3733                         if (!backref->found_forward_ref)
3734                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3735                         if (backref->reachable && backref->errors)
3736                                 error = 1;
3737                 }
3738                 if (!error)
3739                         continue;
3740
3741                 errors++;
3742                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3743                         (unsigned long long)rec->objectid, rec->found_ref,
3744                          rec->found_root_item ? "" : "not found");
3745
3746                 list_for_each_entry(backref, &rec->backrefs, list) {
3747                         if (!backref->reachable)
3748                                 continue;
3749                         if (!backref->errors && rec->found_root_item)
3750                                 continue;
3751                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3752                                 " index %llu namelen %u name %s errors %x\n",
3753                                 (unsigned long long)backref->ref_root,
3754                                 (unsigned long long)backref->dir,
3755                                 (unsigned long long)backref->index,
3756                                 backref->namelen, backref->name,
3757                                 backref->errors);
3758                         print_ref_error(backref->errors);
3759                 }
3760         }
3761         return errors > 0 ? 1 : 0;
3762 }
3763
3764 static int process_root_ref(struct extent_buffer *eb, int slot,
3765                             struct btrfs_key *key,
3766                             struct cache_tree *root_cache)
3767 {
3768         u64 dirid;
3769         u64 index;
3770         u32 len;
3771         u32 name_len;
3772         struct btrfs_root_ref *ref;
3773         char namebuf[BTRFS_NAME_LEN];
3774         int error;
3775
3776         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3777
3778         dirid = btrfs_root_ref_dirid(eb, ref);
3779         index = btrfs_root_ref_sequence(eb, ref);
3780         name_len = btrfs_root_ref_name_len(eb, ref);
3781
3782         if (name_len <= BTRFS_NAME_LEN) {
3783                 len = name_len;
3784                 error = 0;
3785         } else {
3786                 len = BTRFS_NAME_LEN;
3787                 error = REF_ERR_NAME_TOO_LONG;
3788         }
3789         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3790
3791         if (key->type == BTRFS_ROOT_REF_KEY) {
3792                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3793                                  index, namebuf, len, key->type, error);
3794         } else {
3795                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3796                                  index, namebuf, len, key->type, error);
3797         }
3798         return 0;
3799 }
3800
3801 static void free_corrupt_block(struct cache_extent *cache)
3802 {
3803         struct btrfs_corrupt_block *corrupt;
3804
3805         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3806         free(corrupt);
3807 }
3808
3809 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3810
3811 /*
3812  * Repair the btree of the given root.
3813  *
3814  * The fix is to remove the node key in corrupt_blocks cache_tree.
3815  * and rebalance the tree.
3816  * After the fix, the btree should be writeable.
3817  */
3818 static int repair_btree(struct btrfs_root *root,
3819                         struct cache_tree *corrupt_blocks)
3820 {
3821         struct btrfs_trans_handle *trans;
3822         struct btrfs_path path;
3823         struct btrfs_corrupt_block *corrupt;
3824         struct cache_extent *cache;
3825         struct btrfs_key key;
3826         u64 offset;
3827         int level;
3828         int ret = 0;
3829
3830         if (cache_tree_empty(corrupt_blocks))
3831                 return 0;
3832
3833         trans = btrfs_start_transaction(root, 1);
3834         if (IS_ERR(trans)) {
3835                 ret = PTR_ERR(trans);
3836                 fprintf(stderr, "Error starting transaction: %s\n",
3837                         strerror(-ret));
3838                 return ret;
3839         }
3840         btrfs_init_path(&path);
3841         cache = first_cache_extent(corrupt_blocks);
3842         while (cache) {
3843                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3844                                        cache);
3845                 level = corrupt->level;
3846                 path.lowest_level = level;
3847                 key.objectid = corrupt->key.objectid;
3848                 key.type = corrupt->key.type;
3849                 key.offset = corrupt->key.offset;
3850
3851                 /*
3852                  * Here we don't want to do any tree balance, since it may
3853                  * cause a balance with corrupted brother leaf/node,
3854                  * so ins_len set to 0 here.
3855                  * Balance will be done after all corrupt node/leaf is deleted.
3856                  */
3857                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3858                 if (ret < 0)
3859                         goto out;
3860                 offset = btrfs_node_blockptr(path.nodes[level],
3861                                              path.slots[level]);
3862
3863                 /* Remove the ptr */
3864                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3865                 if (ret < 0)
3866                         goto out;
3867                 /*
3868                  * Remove the corresponding extent
3869                  * return value is not concerned.
3870                  */
3871                 btrfs_release_path(&path);
3872                 ret = btrfs_free_extent(trans, root, offset,
3873                                 root->fs_info->nodesize, 0,
3874                                 root->root_key.objectid, level - 1, 0);
3875                 cache = next_cache_extent(cache);
3876         }
3877
3878         /* Balance the btree using btrfs_search_slot() */
3879         cache = first_cache_extent(corrupt_blocks);
3880         while (cache) {
3881                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3882                                        cache);
3883                 memcpy(&key, &corrupt->key, sizeof(key));
3884                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3885                 if (ret < 0)
3886                         goto out;
3887                 /* return will always >0 since it won't find the item */
3888                 ret = 0;
3889                 btrfs_release_path(&path);
3890                 cache = next_cache_extent(cache);
3891         }
3892 out:
3893         btrfs_commit_transaction(trans, root);
3894         btrfs_release_path(&path);
3895         return ret;
3896 }
3897
3898 static int check_fs_root(struct btrfs_root *root,
3899                          struct cache_tree *root_cache,
3900                          struct walk_control *wc)
3901 {
3902         int ret = 0;
3903         int err = 0;
3904         int wret;
3905         int level;
3906         struct btrfs_path path;
3907         struct shared_node root_node;
3908         struct root_record *rec;
3909         struct btrfs_root_item *root_item = &root->root_item;
3910         struct cache_tree corrupt_blocks;
3911         struct orphan_data_extent *orphan;
3912         struct orphan_data_extent *tmp;
3913         enum btrfs_tree_block_status status;
3914         struct node_refs nrefs;
3915
3916         /*
3917          * Reuse the corrupt_block cache tree to record corrupted tree block
3918          *
3919          * Unlike the usage in extent tree check, here we do it in a per
3920          * fs/subvol tree base.
3921          */
3922         cache_tree_init(&corrupt_blocks);
3923         root->fs_info->corrupt_blocks = &corrupt_blocks;
3924
3925         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3926                 rec = get_root_rec(root_cache, root->root_key.objectid);
3927                 BUG_ON(IS_ERR(rec));
3928                 if (btrfs_root_refs(root_item) > 0)
3929                         rec->found_root_item = 1;
3930         }
3931
3932         btrfs_init_path(&path);
3933         memset(&root_node, 0, sizeof(root_node));
3934         cache_tree_init(&root_node.root_cache);
3935         cache_tree_init(&root_node.inode_cache);
3936         memset(&nrefs, 0, sizeof(nrefs));
3937
3938         /* Move the orphan extent record to corresponding inode_record */
3939         list_for_each_entry_safe(orphan, tmp,
3940                                  &root->orphan_data_extents, list) {
3941                 struct inode_record *inode;
3942
3943                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3944                                       1);
3945                 BUG_ON(IS_ERR(inode));
3946                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3947                 list_move(&orphan->list, &inode->orphan_extents);
3948         }
3949
3950         level = btrfs_header_level(root->node);
3951         memset(wc->nodes, 0, sizeof(wc->nodes));
3952         wc->nodes[level] = &root_node;
3953         wc->active_node = level;
3954         wc->root_level = level;
3955
3956         /* We may not have checked the root block, lets do that now */
3957         if (btrfs_is_leaf(root->node))
3958                 status = btrfs_check_leaf(root, NULL, root->node);
3959         else
3960                 status = btrfs_check_node(root, NULL, root->node);
3961         if (status != BTRFS_TREE_BLOCK_CLEAN)
3962                 return -EIO;
3963
3964         if (btrfs_root_refs(root_item) > 0 ||
3965             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3966                 path.nodes[level] = root->node;
3967                 extent_buffer_get(root->node);
3968                 path.slots[level] = 0;
3969         } else {
3970                 struct btrfs_key key;
3971                 struct btrfs_disk_key found_key;
3972
3973                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3974                 level = root_item->drop_level;
3975                 path.lowest_level = level;
3976                 if (level > btrfs_header_level(root->node) ||
3977                     level >= BTRFS_MAX_LEVEL) {
3978                         error("ignoring invalid drop level: %u", level);
3979                         goto skip_walking;
3980                 }
3981                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3982                 if (wret < 0)
3983                         goto skip_walking;
3984                 btrfs_node_key(path.nodes[level], &found_key,
3985                                 path.slots[level]);
3986                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3987                                         sizeof(found_key)));
3988         }
3989
3990         while (1) {
3991                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3992                 if (wret < 0)
3993                         ret = wret;
3994                 if (wret != 0)
3995                         break;
3996
3997                 wret = walk_up_tree(root, &path, wc, &level);
3998                 if (wret < 0)
3999                         ret = wret;
4000                 if (wret != 0)
4001                         break;
4002         }
4003 skip_walking:
4004         btrfs_release_path(&path);
4005
4006         if (!cache_tree_empty(&corrupt_blocks)) {
4007                 struct cache_extent *cache;
4008                 struct btrfs_corrupt_block *corrupt;
4009
4010                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4011                        root->root_key.objectid);
4012                 cache = first_cache_extent(&corrupt_blocks);
4013                 while (cache) {
4014                         corrupt = container_of(cache,
4015                                                struct btrfs_corrupt_block,
4016                                                cache);
4017                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4018                                cache->start, corrupt->level,
4019                                corrupt->key.objectid, corrupt->key.type,
4020                                corrupt->key.offset);
4021                         cache = next_cache_extent(cache);
4022                 }
4023                 if (repair) {
4024                         printf("Try to repair the btree for root %llu\n",
4025                                root->root_key.objectid);
4026                         ret = repair_btree(root, &corrupt_blocks);
4027                         if (ret < 0)
4028                                 fprintf(stderr, "Failed to repair btree: %s\n",
4029                                         strerror(-ret));
4030                         if (!ret)
4031                                 printf("Btree for root %llu is fixed\n",
4032                                        root->root_key.objectid);
4033                 }
4034         }
4035
4036         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4037         if (err < 0)
4038                 ret = err;
4039
4040         if (root_node.current) {
4041                 root_node.current->checked = 1;
4042                 maybe_free_inode_rec(&root_node.inode_cache,
4043                                 root_node.current);
4044         }
4045
4046         err = check_inode_recs(root, &root_node.inode_cache);
4047         if (!ret)
4048                 ret = err;
4049
4050         free_corrupt_blocks_tree(&corrupt_blocks);
4051         root->fs_info->corrupt_blocks = NULL;
4052         free_orphan_data_extents(&root->orphan_data_extents);
4053         return ret;
4054 }
4055
4056 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4057                           struct cache_tree *root_cache)
4058 {
4059         struct btrfs_path path;
4060         struct btrfs_key key;
4061         struct walk_control wc;
4062         struct extent_buffer *leaf, *tree_node;
4063         struct btrfs_root *tmp_root;
4064         struct btrfs_root *tree_root = fs_info->tree_root;
4065         int ret;
4066         int err = 0;
4067
4068         if (ctx.progress_enabled) {
4069                 ctx.tp = TASK_FS_ROOTS;
4070                 task_start(ctx.info);
4071         }
4072
4073         /*
4074          * Just in case we made any changes to the extent tree that weren't
4075          * reflected into the free space cache yet.
4076          */
4077         if (repair)
4078                 reset_cached_block_groups(fs_info);
4079         memset(&wc, 0, sizeof(wc));
4080         cache_tree_init(&wc.shared);
4081         btrfs_init_path(&path);
4082
4083 again:
4084         key.offset = 0;
4085         key.objectid = 0;
4086         key.type = BTRFS_ROOT_ITEM_KEY;
4087         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4088         if (ret < 0) {
4089                 err = 1;
4090                 goto out;
4091         }
4092         tree_node = tree_root->node;
4093         while (1) {
4094                 if (tree_node != tree_root->node) {
4095                         free_root_recs_tree(root_cache);
4096                         btrfs_release_path(&path);
4097                         goto again;
4098                 }
4099                 leaf = path.nodes[0];
4100                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4101                         ret = btrfs_next_leaf(tree_root, &path);
4102                         if (ret) {
4103                                 if (ret < 0)
4104                                         err = 1;
4105                                 break;
4106                         }
4107                         leaf = path.nodes[0];
4108                 }
4109                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4110                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4111                     fs_root_objectid(key.objectid)) {
4112                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4113                                 tmp_root = btrfs_read_fs_root_no_cache(
4114                                                 fs_info, &key);
4115                         } else {
4116                                 key.offset = (u64)-1;
4117                                 tmp_root = btrfs_read_fs_root(
4118                                                 fs_info, &key);
4119                         }
4120                         if (IS_ERR(tmp_root)) {
4121                                 err = 1;
4122                                 goto next;
4123                         }
4124                         ret = check_fs_root(tmp_root, root_cache, &wc);
4125                         if (ret == -EAGAIN) {
4126                                 free_root_recs_tree(root_cache);
4127                                 btrfs_release_path(&path);
4128                                 goto again;
4129                         }
4130                         if (ret)
4131                                 err = 1;
4132                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4133                                 btrfs_free_fs_root(tmp_root);
4134                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4135                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4136                         process_root_ref(leaf, path.slots[0], &key,
4137                                          root_cache);
4138                 }
4139 next:
4140                 path.slots[0]++;
4141         }
4142 out:
4143         btrfs_release_path(&path);
4144         if (err)
4145                 free_extent_cache_tree(&wc.shared);
4146         if (!cache_tree_empty(&wc.shared))
4147                 fprintf(stderr, "warning line %d\n", __LINE__);
4148
4149         task_stop(ctx.info);
4150
4151         return err;
4152 }
4153
4154 /*
4155  * Find the @index according by @ino and name.
4156  * Notice:time efficiency is O(N)
4157  *
4158  * @root:       the root of the fs/file tree
4159  * @index_ret:  the index as return value
4160  * @namebuf:    the name to match
4161  * @name_len:   the length of name to match
4162  * @file_type:  the file_type of INODE_ITEM to match
4163  *
4164  * Returns 0 if found and *@index_ret will be modified with right value
4165  * Returns< 0 not found and *@index_ret will be (u64)-1
4166  */
4167 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4168                           u64 *index_ret, char *namebuf, u32 name_len,
4169                           u8 file_type)
4170 {
4171         struct btrfs_path path;
4172         struct extent_buffer *node;
4173         struct btrfs_dir_item *di;
4174         struct btrfs_key key;
4175         struct btrfs_key location;
4176         char name[BTRFS_NAME_LEN] = {0};
4177
4178         u32 total;
4179         u32 cur = 0;
4180         u32 len;
4181         u32 data_len;
4182         u8 filetype;
4183         int slot;
4184         int ret;
4185
4186         ASSERT(index_ret);
4187
4188         /* search from the last index */
4189         key.objectid = dirid;
4190         key.offset = (u64)-1;
4191         key.type = BTRFS_DIR_INDEX_KEY;
4192
4193         btrfs_init_path(&path);
4194         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4195         if (ret < 0)
4196                 return ret;
4197
4198 loop:
4199         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4200         if (ret) {
4201                 ret = -ENOENT;
4202                 *index_ret = (64)-1;
4203                 goto out;
4204         }
4205         /* Check whether inode_id/filetype/name match */
4206         node = path.nodes[0];
4207         slot = path.slots[0];
4208         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4209         total = btrfs_item_size_nr(node, slot);
4210         while (cur < total) {
4211                 ret = -ENOENT;
4212                 len = btrfs_dir_name_len(node, di);
4213                 data_len = btrfs_dir_data_len(node, di);
4214
4215                 btrfs_dir_item_key_to_cpu(node, di, &location);
4216                 if (location.objectid != location_id ||
4217                     location.type != BTRFS_INODE_ITEM_KEY ||
4218                     location.offset != 0)
4219                         goto next;
4220
4221                 filetype = btrfs_dir_type(node, di);
4222                 if (file_type != filetype)
4223                         goto next;
4224
4225                 if (len > BTRFS_NAME_LEN)
4226                         len = BTRFS_NAME_LEN;
4227
4228                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4229                 if (len != name_len || strncmp(namebuf, name, len))
4230                         goto next;
4231
4232                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4233                 *index_ret = key.offset;
4234                 ret = 0;
4235                 goto out;
4236 next:
4237                 len += sizeof(*di) + data_len;
4238                 di = (struct btrfs_dir_item *)((char *)di + len);
4239                 cur += len;
4240         }
4241         goto loop;
4242
4243 out:
4244         btrfs_release_path(&path);
4245         return ret;
4246 }
4247
4248 /*
4249  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4250  * INODE_REF/INODE_EXTREF match.
4251  *
4252  * @root:       the root of the fs/file tree
4253  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4254  *              value while find index
4255  * @location_key: location key of the struct btrfs_dir_item to match
4256  * @name:       the name to match
4257  * @namelen:    the length of name
4258  * @file_type:  the type of file to math
4259  *
4260  * Return 0 if no error occurred.
4261  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4262  * DIR_ITEM/DIR_INDEX
4263  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4264  * and DIR_ITEM/DIR_INDEX mismatch
4265  */
4266 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4267                          struct btrfs_key *location_key, char *name,
4268                          u32 namelen, u8 file_type)
4269 {
4270         struct btrfs_path path;
4271         struct extent_buffer *node;
4272         struct btrfs_dir_item *di;
4273         struct btrfs_key location;
4274         char namebuf[BTRFS_NAME_LEN] = {0};
4275         u32 total;
4276         u32 cur = 0;
4277         u32 len;
4278         u32 data_len;
4279         u8 filetype;
4280         int slot;
4281         int ret;
4282
4283         /* get the index by traversing all index */
4284         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4285                 ret = find_dir_index(root, key->objectid,
4286                                      location_key->objectid, &key->offset,
4287                                      name, namelen, file_type);
4288                 if (ret)
4289                         ret = DIR_INDEX_MISSING;
4290                 return ret;
4291         }
4292
4293         btrfs_init_path(&path);
4294         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4295         if (ret) {
4296                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4297                         DIR_INDEX_MISSING;
4298                 goto out;
4299         }
4300
4301         /* Check whether inode_id/filetype/name match */
4302         node = path.nodes[0];
4303         slot = path.slots[0];
4304         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4305         total = btrfs_item_size_nr(node, slot);
4306         while (cur < total) {
4307                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4308                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4309
4310                 len = btrfs_dir_name_len(node, di);
4311                 data_len = btrfs_dir_data_len(node, di);
4312
4313                 btrfs_dir_item_key_to_cpu(node, di, &location);
4314                 if (location.objectid != location_key->objectid ||
4315                     location.type != location_key->type ||
4316                     location.offset != location_key->offset)
4317                         goto next;
4318
4319                 filetype = btrfs_dir_type(node, di);
4320                 if (file_type != filetype)
4321                         goto next;
4322
4323                 if (len > BTRFS_NAME_LEN) {
4324                         len = BTRFS_NAME_LEN;
4325                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4326                         root->objectid,
4327                         key->type == BTRFS_DIR_ITEM_KEY ?
4328                         "DIR_ITEM" : "DIR_INDEX",
4329                         key->objectid, key->offset, len);
4330                 }
4331                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4332                                    len);
4333                 if (len != namelen || strncmp(namebuf, name, len))
4334                         goto next;
4335
4336                 ret = 0;
4337                 goto out;
4338 next:
4339                 len += sizeof(*di) + data_len;
4340                 di = (struct btrfs_dir_item *)((char *)di + len);
4341                 cur += len;
4342         }
4343
4344 out:
4345         btrfs_release_path(&path);
4346         return ret;
4347 }
4348
4349 /*
4350  * Prints inode ref error message
4351  */
4352 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4353                                 u64 index, const char *namebuf, int name_len,
4354                                 u8 filetype, int err)
4355 {
4356         if (!err)
4357                 return;
4358
4359         /* root dir error */
4360         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4361                 error(
4362         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4363                       root->objectid, key->objectid, key->offset, namebuf);
4364                 return;
4365         }
4366
4367         /* normal error */
4368         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4369                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4370                       root->objectid, key->offset,
4371                       btrfs_name_hash(namebuf, name_len),
4372                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4373                       namebuf, filetype);
4374         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4375                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4376                       root->objectid, key->offset, index,
4377                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4378                       namebuf, filetype);
4379 }
4380
4381 /*
4382  * Insert the missing inode item.
4383  *
4384  * Returns 0 means success.
4385  * Returns <0 means error.
4386  */
4387 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4388                                      u8 filetype)
4389 {
4390         struct btrfs_key key;
4391         struct btrfs_trans_handle *trans;
4392         struct btrfs_path path;
4393         int ret;
4394
4395         key.objectid = ino;
4396         key.type = BTRFS_INODE_ITEM_KEY;
4397         key.offset = 0;
4398
4399         btrfs_init_path(&path);
4400         trans = btrfs_start_transaction(root, 1);
4401         if (IS_ERR(trans)) {
4402                 ret = -EIO;
4403                 goto out;
4404         }
4405
4406         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4407         if (ret < 0 || !ret)
4408                 goto fail;
4409
4410         /* insert inode item */
4411         create_inode_item_lowmem(trans, root, ino, filetype);
4412         ret = 0;
4413 fail:
4414         btrfs_commit_transaction(trans, root);
4415 out:
4416         if (ret)
4417                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4418                       root->objectid, ino);
4419         btrfs_release_path(&path);
4420         return ret;
4421 }
4422
4423 /*
4424  * The ternary means dir item, dir index and relative inode ref.
4425  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4426  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4427  * strategy:
4428  * If two of three is missing or mismatched, delete the existing one.
4429  * If one of three is missing or mismatched, add the missing one.
4430  *
4431  * returns 0 means success.
4432  * returns not 0 means on error;
4433  */
4434 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4435                           u64 index, char *name, int name_len, u8 filetype,
4436                           int err)
4437 {
4438         struct btrfs_trans_handle *trans;
4439         int stage = 0;
4440         int ret = 0;
4441
4442         /*
4443          * stage shall be one of following valild values:
4444          *      0: Fine, nothing to do.
4445          *      1: One of three is wrong, so add missing one.
4446          *      2: Two of three is wrong, so delete existed one.
4447          */
4448         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4449                 stage++;
4450         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4451                 stage++;
4452         if (err & (INODE_REF_MISSING))
4453                 stage++;
4454
4455         /* stage must be smllarer than 3 */
4456         ASSERT(stage < 3);
4457
4458         trans = btrfs_start_transaction(root, 1);
4459         if (stage == 2) {
4460                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4461                                    name_len, 0);
4462                 goto out;
4463         }
4464         if (stage == 1) {
4465                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4466                                filetype, &index, 1, 1);
4467                 goto out;
4468         }
4469 out:
4470         btrfs_commit_transaction(trans, root);
4471
4472         if (ret)
4473                 error("fail to repair inode %llu name %s filetype %u",
4474                       ino, name, filetype);
4475         else
4476                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4477                        stage == 2 ? "Delete" : "Add",
4478                        ino, name, filetype);
4479
4480         return ret;
4481 }
4482
4483 /*
4484  * Traverse the given INODE_REF and call find_dir_item() to find related
4485  * DIR_ITEM/DIR_INDEX.
4486  *
4487  * @root:       the root of the fs/file tree
4488  * @ref_key:    the key of the INODE_REF
4489  * @path        the path provides node and slot
4490  * @refs:       the count of INODE_REF
4491  * @mode:       the st_mode of INODE_ITEM
4492  * @name_ret:   returns with the first ref's name
4493  * @name_len_ret:    len of the name_ret
4494  *
4495  * Return 0 if no error occurred.
4496  */
4497 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4498                            struct btrfs_path *path, char *name_ret,
4499                            u32 *namelen_ret, u64 *refs_ret, int mode)
4500 {
4501         struct btrfs_key key;
4502         struct btrfs_key location;
4503         struct btrfs_inode_ref *ref;
4504         struct extent_buffer *node;
4505         char namebuf[BTRFS_NAME_LEN] = {0};
4506         u32 total;
4507         u32 cur = 0;
4508         u32 len;
4509         u32 name_len;
4510         u64 index;
4511         int ret;
4512         int err = 0;
4513         int tmp_err;
4514         int slot;
4515         int need_research = 0;
4516         u64 refs;
4517
4518 begin:
4519         err = 0;
4520         cur = 0;
4521         refs = *refs_ret;
4522
4523         /* since after repair, path and the dir item may be changed */
4524         if (need_research) {
4525                 need_research = 0;
4526                 btrfs_release_path(path);
4527                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4528                 /* the item was deleted, let path point to the last checked item */
4529                 if (ret > 0) {
4530                         if (path->slots[0] == 0)
4531                                 btrfs_prev_leaf(root, path);
4532                         else
4533                                 path->slots[0]--;
4534                 }
4535                 if (ret)
4536                         goto out;
4537         }
4538
4539         location.objectid = ref_key->objectid;
4540         location.type = BTRFS_INODE_ITEM_KEY;
4541         location.offset = 0;
4542         node = path->nodes[0];
4543         slot = path->slots[0];
4544
4545         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4546         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4547         total = btrfs_item_size_nr(node, slot);
4548
4549 next:
4550         /* Update inode ref count */
4551         refs++;
4552         tmp_err = 0;
4553         index = btrfs_inode_ref_index(node, ref);
4554         name_len = btrfs_inode_ref_name_len(node, ref);
4555
4556         if (name_len <= BTRFS_NAME_LEN) {
4557                 len = name_len;
4558         } else {
4559                 len = BTRFS_NAME_LEN;
4560                 warning("root %llu INODE_REF[%llu %llu] name too long",
4561                         root->objectid, ref_key->objectid, ref_key->offset);
4562         }
4563
4564         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4565
4566         /* copy the first name found to name_ret */
4567         if (refs == 1 && name_ret) {
4568                 memcpy(name_ret, namebuf, len);
4569                 *namelen_ret = len;
4570         }
4571
4572         /* Check root dir ref */
4573         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4574                 if (index != 0 || len != strlen("..") ||
4575                     strncmp("..", namebuf, len) ||
4576                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4577                         /* set err bits then repair will delete the ref */
4578                         err |= DIR_INDEX_MISSING;
4579                         err |= DIR_ITEM_MISSING;
4580                 }
4581                 goto end;
4582         }
4583
4584         /* Find related DIR_INDEX */
4585         key.objectid = ref_key->offset;
4586         key.type = BTRFS_DIR_INDEX_KEY;
4587         key.offset = index;
4588         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4589                             imode_to_type(mode));
4590
4591         /* Find related dir_item */
4592         key.objectid = ref_key->offset;
4593         key.type = BTRFS_DIR_ITEM_KEY;
4594         key.offset = btrfs_name_hash(namebuf, len);
4595         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4596                             imode_to_type(mode));
4597 end:
4598         if (tmp_err && repair) {
4599                 ret = repair_ternary_lowmem(root, ref_key->offset,
4600                                             ref_key->objectid, index, namebuf,
4601                                             name_len, imode_to_type(mode),
4602                                             tmp_err);
4603                 if (!ret) {
4604                         need_research = 1;
4605                         goto begin;
4606                 }
4607         }
4608         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4609                             imode_to_type(mode), tmp_err);
4610         err |= tmp_err;
4611         len = sizeof(*ref) + name_len;
4612         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4613         cur += len;
4614         if (cur < total)
4615                 goto next;
4616
4617 out:
4618         *refs_ret = refs;
4619         return err;
4620 }
4621
4622 /*
4623  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4624  * DIR_ITEM/DIR_INDEX.
4625  *
4626  * @root:       the root of the fs/file tree
4627  * @ref_key:    the key of the INODE_EXTREF
4628  * @refs:       the count of INODE_EXTREF
4629  * @mode:       the st_mode of INODE_ITEM
4630  *
4631  * Return 0 if no error occurred.
4632  */
4633 static int check_inode_extref(struct btrfs_root *root,
4634                               struct btrfs_key *ref_key,
4635                               struct extent_buffer *node, int slot, u64 *refs,
4636                               int mode)
4637 {
4638         struct btrfs_key key;
4639         struct btrfs_key location;
4640         struct btrfs_inode_extref *extref;
4641         char namebuf[BTRFS_NAME_LEN] = {0};
4642         u32 total;
4643         u32 cur = 0;
4644         u32 len;
4645         u32 name_len;
4646         u64 index;
4647         u64 parent;
4648         int ret;
4649         int err = 0;
4650
4651         location.objectid = ref_key->objectid;
4652         location.type = BTRFS_INODE_ITEM_KEY;
4653         location.offset = 0;
4654
4655         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4656         total = btrfs_item_size_nr(node, slot);
4657
4658 next:
4659         /* update inode ref count */
4660         (*refs)++;
4661         name_len = btrfs_inode_extref_name_len(node, extref);
4662         index = btrfs_inode_extref_index(node, extref);
4663         parent = btrfs_inode_extref_parent(node, extref);
4664         if (name_len <= BTRFS_NAME_LEN) {
4665                 len = name_len;
4666         } else {
4667                 len = BTRFS_NAME_LEN;
4668                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4669                         root->objectid, ref_key->objectid, ref_key->offset);
4670         }
4671         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4672
4673         /* Check root dir ref name */
4674         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4675                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4676                       root->objectid, ref_key->objectid, ref_key->offset,
4677                       namebuf);
4678                 err |= ROOT_DIR_ERROR;
4679         }
4680
4681         /* find related dir_index */
4682         key.objectid = parent;
4683         key.type = BTRFS_DIR_INDEX_KEY;
4684         key.offset = index;
4685         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4686         err |= ret;
4687
4688         /* find related dir_item */
4689         key.objectid = parent;
4690         key.type = BTRFS_DIR_ITEM_KEY;
4691         key.offset = btrfs_name_hash(namebuf, len);
4692         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4693         err |= ret;
4694
4695         len = sizeof(*extref) + name_len;
4696         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4697         cur += len;
4698
4699         if (cur < total)
4700                 goto next;
4701
4702         return err;
4703 }
4704
4705 /*
4706  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4707  * DIR_ITEM/DIR_INDEX match.
4708  * Return with @index_ret.
4709  *
4710  * @root:       the root of the fs/file tree
4711  * @key:        the key of the INODE_REF/INODE_EXTREF
4712  * @name:       the name in the INODE_REF/INODE_EXTREF
4713  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4714  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4715  *              value (64)-1 means do not check index
4716  * @ext_ref:    the EXTENDED_IREF feature
4717  *
4718  * Return 0 if no error occurred.
4719  * Return >0 for error bitmap
4720  */
4721 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4722                           char *name, int namelen, u64 *index_ret,
4723                           unsigned int ext_ref)
4724 {
4725         struct btrfs_path path;
4726         struct btrfs_inode_ref *ref;
4727         struct btrfs_inode_extref *extref;
4728         struct extent_buffer *node;
4729         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4730         u32 total;
4731         u32 cur = 0;
4732         u32 len;
4733         u32 ref_namelen;
4734         u64 ref_index;
4735         u64 parent;
4736         u64 dir_id;
4737         int slot;
4738         int ret;
4739
4740         ASSERT(index_ret);
4741
4742         btrfs_init_path(&path);
4743         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4744         if (ret) {
4745                 ret = INODE_REF_MISSING;
4746                 goto extref;
4747         }
4748
4749         node = path.nodes[0];
4750         slot = path.slots[0];
4751
4752         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4753         total = btrfs_item_size_nr(node, slot);
4754
4755         /* Iterate all entry of INODE_REF */
4756         while (cur < total) {
4757                 ret = INODE_REF_MISSING;
4758
4759                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4760                 ref_index = btrfs_inode_ref_index(node, ref);
4761                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4762                         goto next_ref;
4763
4764                 if (cur + sizeof(*ref) + ref_namelen > total ||
4765                     ref_namelen > BTRFS_NAME_LEN) {
4766                         warning("root %llu INODE %s[%llu %llu] name too long",
4767                                 root->objectid,
4768                                 key->type == BTRFS_INODE_REF_KEY ?
4769                                         "REF" : "EXTREF",
4770                                 key->objectid, key->offset);
4771
4772                         if (cur + sizeof(*ref) > total)
4773                                 break;
4774                         len = min_t(u32, total - cur - sizeof(*ref),
4775                                     BTRFS_NAME_LEN);
4776                 } else {
4777                         len = ref_namelen;
4778                 }
4779
4780                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4781                                    len);
4782
4783                 if (len != namelen || strncmp(ref_namebuf, name, len))
4784                         goto next_ref;
4785
4786                 *index_ret = ref_index;
4787                 ret = 0;
4788                 goto out;
4789 next_ref:
4790                 len = sizeof(*ref) + ref_namelen;
4791                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4792                 cur += len;
4793         }
4794
4795 extref:
4796         /* Skip if not support EXTENDED_IREF feature */
4797         if (!ext_ref)
4798                 goto out;
4799
4800         btrfs_release_path(&path);
4801         btrfs_init_path(&path);
4802
4803         dir_id = key->offset;
4804         key->type = BTRFS_INODE_EXTREF_KEY;
4805         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4806
4807         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4808         if (ret) {
4809                 ret = INODE_REF_MISSING;
4810                 goto out;
4811         }
4812
4813         node = path.nodes[0];
4814         slot = path.slots[0];
4815
4816         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4817         cur = 0;
4818         total = btrfs_item_size_nr(node, slot);
4819
4820         /* Iterate all entry of INODE_EXTREF */
4821         while (cur < total) {
4822                 ret = INODE_REF_MISSING;
4823
4824                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4825                 ref_index = btrfs_inode_extref_index(node, extref);
4826                 parent = btrfs_inode_extref_parent(node, extref);
4827                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4828                         goto next_extref;
4829
4830                 if (parent != dir_id)
4831                         goto next_extref;
4832
4833                 if (ref_namelen <= BTRFS_NAME_LEN) {
4834                         len = ref_namelen;
4835                 } else {
4836                         len = BTRFS_NAME_LEN;
4837                         warning("root %llu INODE %s[%llu %llu] name too long",
4838                                 root->objectid,
4839                                 key->type == BTRFS_INODE_REF_KEY ?
4840                                         "REF" : "EXTREF",
4841                                 key->objectid, key->offset);
4842                 }
4843                 read_extent_buffer(node, ref_namebuf,
4844                                    (unsigned long)(extref + 1), len);
4845
4846                 if (len != namelen || strncmp(ref_namebuf, name, len))
4847                         goto next_extref;
4848
4849                 *index_ret = ref_index;
4850                 ret = 0;
4851                 goto out;
4852
4853 next_extref:
4854                 len = sizeof(*extref) + ref_namelen;
4855                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4856                 cur += len;
4857
4858         }
4859 out:
4860         btrfs_release_path(&path);
4861         return ret;
4862 }
4863
4864 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4865                                u64 ino, u64 index, const char *namebuf,
4866                                int name_len, u8 filetype, int err)
4867 {
4868         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4869                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4870                       root->objectid, key->objectid, key->offset, namebuf,
4871                       filetype,
4872                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4873         }
4874
4875         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4876                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4877                       root->objectid, key->objectid, index, namebuf, filetype,
4878                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4879         }
4880
4881         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4882                 error(
4883                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4884                       root->objectid, ino, index, namebuf, filetype,
4885                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4886         }
4887
4888         if (err & INODE_REF_MISSING)
4889                 error(
4890                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
4891                       root->objectid, ino, key->objectid, namebuf, filetype);
4892
4893 }
4894
4895 /*
4896  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
4897  *
4898  * Returns error after repair
4899  */
4900 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
4901                            u64 index, u8 filetype, char *namebuf, u32 name_len,
4902                            int err)
4903 {
4904         int ret;
4905
4906         if (err & INODE_ITEM_MISSING) {
4907                 ret = repair_inode_item_missing(root, ino, filetype);
4908                 if (!ret)
4909                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
4910         }
4911
4912         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
4913                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
4914                                             name_len, filetype, err);
4915                 if (!ret) {
4916                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
4917                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
4918                         err &= ~(INODE_REF_MISSING);
4919                 }
4920         }
4921         return err;
4922 }
4923
4924 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
4925                 u64 *size_ret)
4926 {
4927         struct btrfs_key key;
4928         struct btrfs_path path;
4929         u32 len;
4930         struct btrfs_dir_item *di;
4931         int ret;
4932         int cur = 0;
4933         int total = 0;
4934
4935         ASSERT(size_ret);
4936         *size_ret = 0;
4937
4938         key.objectid = ino;
4939         key.type = type;
4940         key.offset = (u64)-1;
4941
4942         btrfs_init_path(&path);
4943         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4944         if (ret < 0) {
4945                 ret = -EIO;
4946                 goto out;
4947         }
4948         /* if found, go to spacial case */
4949         if (ret == 0)
4950                 goto special_case;
4951
4952 loop:
4953         ret = btrfs_previous_item(root, &path, ino, type);
4954
4955         if (ret) {
4956                 ret = 0;
4957                 goto out;
4958         }
4959
4960 special_case:
4961         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
4962         cur = 0;
4963         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
4964
4965         while (cur < total) {
4966                 len = btrfs_dir_name_len(path.nodes[0], di);
4967                 if (len > BTRFS_NAME_LEN)
4968                         len = BTRFS_NAME_LEN;
4969                 *size_ret += len;
4970
4971                 len += btrfs_dir_data_len(path.nodes[0], di);
4972                 len += sizeof(*di);
4973                 di = (struct btrfs_dir_item *)((char *)di + len);
4974                 cur += len;
4975         }
4976         goto loop;
4977
4978 out:
4979         btrfs_release_path(&path);
4980         return ret;
4981 }
4982
4983 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
4984 {
4985         u64 item_size;
4986         u64 index_size;
4987         int ret;
4988
4989         ASSERT(size);
4990         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
4991         if (ret)
4992                 goto out;
4993
4994         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
4995         if (ret)
4996                 goto out;
4997
4998         *size = item_size + index_size;
4999
5000 out:
5001         if (ret)
5002                 error("failed to count root %llu INODE[%llu] root size",
5003                       root->objectid, ino);
5004         return ret;
5005 }
5006
5007 /*
5008  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5009  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5010  *
5011  * @root:       the root of the fs/file tree
5012  * @key:        the key of the INODE_REF/INODE_EXTREF
5013  * @path:       the path
5014  * @size:       the st_size of the INODE_ITEM
5015  * @ext_ref:    the EXTENDED_IREF feature
5016  *
5017  * Return 0 if no error occurred.
5018  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5019  */
5020 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5021                           struct btrfs_path *path, u64 *size,
5022                           unsigned int ext_ref)
5023 {
5024         struct btrfs_dir_item *di;
5025         struct btrfs_inode_item *ii;
5026         struct btrfs_key key;
5027         struct btrfs_key location;
5028         struct extent_buffer *node;
5029         int slot;
5030         char namebuf[BTRFS_NAME_LEN] = {0};
5031         u32 total;
5032         u32 cur = 0;
5033         u32 len;
5034         u32 name_len;
5035         u32 data_len;
5036         u8 filetype;
5037         u32 mode = 0;
5038         u64 index;
5039         int ret;
5040         int err;
5041         int tmp_err;
5042         int need_research = 0;
5043
5044         /*
5045          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5046          * ignore index check.
5047          */
5048         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5049                 index = di_key->offset;
5050         else
5051                 index = (u64)-1;
5052 begin:
5053         err = 0;
5054         cur = 0;
5055
5056         /* since after repair, path and the dir item may be changed */
5057         if (need_research) {
5058                 need_research = 0;
5059                 err |= DIR_COUNT_AGAIN;
5060                 btrfs_release_path(path);
5061                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5062                 /* the item was deleted, let path point the last checked item */
5063                 if (ret > 0) {
5064                         if (path->slots[0] == 0)
5065                                 btrfs_prev_leaf(root, path);
5066                         else
5067                                 path->slots[0]--;
5068                 }
5069                 if (ret)
5070                         goto out;
5071         }
5072
5073         node = path->nodes[0];
5074         slot = path->slots[0];
5075
5076         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5077         total = btrfs_item_size_nr(node, slot);
5078         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5079
5080         while (cur < total) {
5081                 data_len = btrfs_dir_data_len(node, di);
5082                 tmp_err = 0;
5083                 if (data_len)
5084                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5085                               root->objectid,
5086               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5087                               di_key->objectid, di_key->offset, data_len);
5088
5089                 name_len = btrfs_dir_name_len(node, di);
5090                 if (name_len <= BTRFS_NAME_LEN) {
5091                         len = name_len;
5092                 } else {
5093                         len = BTRFS_NAME_LEN;
5094                         warning("root %llu %s[%llu %llu] name too long",
5095                                 root->objectid,
5096                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5097                                 di_key->objectid, di_key->offset);
5098                 }
5099                 (*size) += name_len;
5100                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5101                                    len);
5102                 filetype = btrfs_dir_type(node, di);
5103
5104                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5105                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5106                         err |= -EIO;
5107                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5108                         root->objectid, di_key->objectid, di_key->offset,
5109                         namebuf, len, filetype, di_key->offset,
5110                         btrfs_name_hash(namebuf, len));
5111                 }
5112
5113                 btrfs_dir_item_key_to_cpu(node, di, &location);
5114                 /* Ignore related ROOT_ITEM check */
5115                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5116                         goto next;
5117
5118                 btrfs_release_path(path);
5119                 /* Check relative INODE_ITEM(existence/filetype) */
5120                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5121                 if (ret) {
5122                         tmp_err |= INODE_ITEM_MISSING;
5123                         goto next;
5124                 }
5125
5126                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5127                                     struct btrfs_inode_item);
5128                 mode = btrfs_inode_mode(path->nodes[0], ii);
5129                 if (imode_to_type(mode) != filetype) {
5130                         tmp_err |= INODE_ITEM_MISMATCH;
5131                         goto next;
5132                 }
5133
5134                 /* Check relative INODE_REF/INODE_EXTREF */
5135                 key.objectid = location.objectid;
5136                 key.type = BTRFS_INODE_REF_KEY;
5137                 key.offset = di_key->objectid;
5138                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5139                                           &index, ext_ref);
5140
5141                 /* check relative INDEX/ITEM */
5142                 key.objectid = di_key->objectid;
5143                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5144                         key.type = BTRFS_DIR_INDEX_KEY;
5145                         key.offset = index;
5146                 } else {
5147                         key.type = BTRFS_DIR_ITEM_KEY;
5148                         key.offset = btrfs_name_hash(namebuf, name_len);
5149                 }
5150
5151                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5152                                          name_len, filetype);
5153                 /* find_dir_item may find index */
5154                 if (key.type == BTRFS_DIR_INDEX_KEY)
5155                         index = key.offset;
5156 next:
5157
5158                 if (tmp_err && repair) {
5159                         ret = repair_dir_item(root, di_key->objectid,
5160                                               location.objectid, index,
5161                                               imode_to_type(mode), namebuf,
5162                                               name_len, tmp_err);
5163                         if (ret != tmp_err) {
5164                                 need_research = 1;
5165                                 goto begin;
5166                         }
5167                 }
5168                 btrfs_release_path(path);
5169                 print_dir_item_err(root, di_key, location.objectid, index,
5170                                    namebuf, name_len, filetype, tmp_err);
5171                 err |= tmp_err;
5172                 len = sizeof(*di) + name_len + data_len;
5173                 di = (struct btrfs_dir_item *)((char *)di + len);
5174                 cur += len;
5175
5176                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5177                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5178                               root->objectid, di_key->objectid,
5179                               di_key->offset);
5180                         break;
5181                 }
5182         }
5183 out:
5184         /* research path */
5185         btrfs_release_path(path);
5186         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5187         if (ret)
5188                 err |= ret > 0 ? -ENOENT : ret;
5189         return err;
5190 }
5191
5192 /*
5193  * Wrapper function of btrfs_punch_hole.
5194  *
5195  * Returns 0 means success.
5196  * Returns not 0 means error.
5197  */
5198 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5199                              u64 len)
5200 {
5201         struct btrfs_trans_handle *trans;
5202         int ret = 0;
5203
5204         trans = btrfs_start_transaction(root, 1);
5205         if (IS_ERR(trans))
5206                 return PTR_ERR(trans);
5207
5208         ret = btrfs_punch_hole(trans, root, ino, start, len);
5209         if (ret)
5210                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5211                       start, len, ino);
5212         else
5213                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5214                        ino);
5215
5216         btrfs_commit_transaction(trans, root);
5217         return ret;
5218 }
5219
5220 /*
5221  * Check file extent datasum/hole, update the size of the file extents,
5222  * check and update the last offset of the file extent.
5223  *
5224  * @root:       the root of fs/file tree.
5225  * @fkey:       the key of the file extent.
5226  * @nodatasum:  INODE_NODATASUM feature.
5227  * @size:       the sum of all EXTENT_DATA items size for this inode.
5228  * @end:        the offset of the last extent.
5229  *
5230  * Return 0 if no error occurred.
5231  */
5232 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5233                              struct extent_buffer *node, int slot,
5234                              unsigned int nodatasum, u64 *size, u64 *end)
5235 {
5236         struct btrfs_file_extent_item *fi;
5237         u64 disk_bytenr;
5238         u64 disk_num_bytes;
5239         u64 extent_num_bytes;
5240         u64 extent_offset;
5241         u64 csum_found;         /* In byte size, sectorsize aligned */
5242         u64 search_start;       /* Logical range start we search for csum */
5243         u64 search_len;         /* Logical range len we search for csum */
5244         unsigned int extent_type;
5245         unsigned int is_hole;
5246         int compressed = 0;
5247         int ret;
5248         int err = 0;
5249
5250         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5251
5252         /* Check inline extent */
5253         extent_type = btrfs_file_extent_type(node, fi);
5254         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5255                 struct btrfs_item *e = btrfs_item_nr(slot);
5256                 u32 item_inline_len;
5257
5258                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5259                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5260                 compressed = btrfs_file_extent_compression(node, fi);
5261                 if (extent_num_bytes == 0) {
5262                         error(
5263                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5264                                 root->objectid, fkey->objectid, fkey->offset);
5265                         err |= FILE_EXTENT_ERROR;
5266                 }
5267                 if (!compressed && extent_num_bytes != item_inline_len) {
5268                         error(
5269                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5270                                 root->objectid, fkey->objectid, fkey->offset,
5271                                 extent_num_bytes, item_inline_len);
5272                         err |= FILE_EXTENT_ERROR;
5273                 }
5274                 *end += extent_num_bytes;
5275                 *size += extent_num_bytes;
5276                 return err;
5277         }
5278
5279         /* Check extent type */
5280         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5281                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5282                 err |= FILE_EXTENT_ERROR;
5283                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5284                       root->objectid, fkey->objectid, fkey->offset);
5285                 return err;
5286         }
5287
5288         /* Check REG_EXTENT/PREALLOC_EXTENT */
5289         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5290         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5291         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5292         extent_offset = btrfs_file_extent_offset(node, fi);
5293         compressed = btrfs_file_extent_compression(node, fi);
5294         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5295
5296         /*
5297          * Check EXTENT_DATA csum
5298          *
5299          * For plain (uncompressed) extent, we should only check the range
5300          * we're referring to, as it's possible that part of prealloc extent
5301          * has been written, and has csum:
5302          *
5303          * |<--- Original large preallocated extent A ---->|
5304          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5305          *      No csum                         Has csum
5306          *
5307          * For compressed extent, we should check the whole range.
5308          */
5309         if (!compressed) {
5310                 search_start = disk_bytenr + extent_offset;
5311                 search_len = extent_num_bytes;
5312         } else {
5313                 search_start = disk_bytenr;
5314                 search_len = disk_num_bytes;
5315         }
5316         ret = count_csum_range(root->fs_info, search_start, search_len, &csum_found);
5317         if (csum_found > 0 && nodatasum) {
5318                 err |= ODD_CSUM_ITEM;
5319                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5320                       root->objectid, fkey->objectid, fkey->offset);
5321         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5322                    !is_hole && (ret < 0 || csum_found < search_len)) {
5323                 err |= CSUM_ITEM_MISSING;
5324                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5325                       root->objectid, fkey->objectid, fkey->offset,
5326                       csum_found, search_len);
5327         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5328                 err |= ODD_CSUM_ITEM;
5329                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5330                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5331         }
5332
5333         /* Check EXTENT_DATA hole */
5334         if (!no_holes && *end != fkey->offset) {
5335                 if (repair)
5336                         ret = punch_extent_hole(root, fkey->objectid,
5337                                                 *end, fkey->offset - *end);
5338                 if (!repair || ret) {
5339                         err |= FILE_EXTENT_ERROR;
5340                         error(
5341 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5342                                 root->objectid, fkey->objectid, fkey->offset,
5343                                 fkey->objectid, *end);
5344                 }
5345         }
5346
5347         *end += extent_num_bytes;
5348         if (!is_hole)
5349                 *size += extent_num_bytes;
5350
5351         return err;
5352 }
5353
5354 /*
5355  * Set inode item nbytes to @nbytes
5356  *
5357  * Returns  0     on success
5358  * Returns  != 0  on error
5359  */
5360 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5361                                       struct btrfs_path *path,
5362                                       u64 ino, u64 nbytes)
5363 {
5364         struct btrfs_trans_handle *trans;
5365         struct btrfs_inode_item *ii;
5366         struct btrfs_key key;
5367         struct btrfs_key research_key;
5368         int err = 0;
5369         int ret;
5370
5371         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5372
5373         key.objectid = ino;
5374         key.type = BTRFS_INODE_ITEM_KEY;
5375         key.offset = 0;
5376
5377         trans = btrfs_start_transaction(root, 1);
5378         if (IS_ERR(trans)) {
5379                 ret = PTR_ERR(trans);
5380                 err |= ret;
5381                 goto out;
5382         }
5383
5384         btrfs_release_path(path);
5385         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5386         if (ret > 0)
5387                 ret = -ENOENT;
5388         if (ret) {
5389                 err |= ret;
5390                 goto fail;
5391         }
5392
5393         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5394                             struct btrfs_inode_item);
5395         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5396         btrfs_mark_buffer_dirty(path->nodes[0]);
5397 fail:
5398         btrfs_commit_transaction(trans, root);
5399 out:
5400         if (ret)
5401                 error("failed to set nbytes in inode %llu root %llu",
5402                       ino, root->root_key.objectid);
5403         else
5404                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5405                        root->root_key.objectid, nbytes);
5406
5407         /* research path */
5408         btrfs_release_path(path);
5409         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5410         err |= ret;
5411
5412         return err;
5413 }
5414
5415 /*
5416  * Set directory inode isize to @isize.
5417  *
5418  * Returns 0     on success.
5419  * Returns != 0  on error.
5420  */
5421 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5422                                    struct btrfs_path *path,
5423                                    u64 ino, u64 isize)
5424 {
5425         struct btrfs_trans_handle *trans;
5426         struct btrfs_inode_item *ii;
5427         struct btrfs_key key;
5428         struct btrfs_key research_key;
5429         int ret;
5430         int err = 0;
5431
5432         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5433
5434         key.objectid = ino;
5435         key.type = BTRFS_INODE_ITEM_KEY;
5436         key.offset = 0;
5437
5438         trans = btrfs_start_transaction(root, 1);
5439         if (IS_ERR(trans)) {
5440                 ret = PTR_ERR(trans);
5441                 err |= ret;
5442                 goto out;
5443         }
5444
5445         btrfs_release_path(path);
5446         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5447         if (ret > 0)
5448                 ret = -ENOENT;
5449         if (ret) {
5450                 err |= ret;
5451                 goto fail;
5452         }
5453
5454         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5455                             struct btrfs_inode_item);
5456         btrfs_set_inode_size(path->nodes[0], ii, isize);
5457         btrfs_mark_buffer_dirty(path->nodes[0]);
5458 fail:
5459         btrfs_commit_transaction(trans, root);
5460 out:
5461         if (ret)
5462                 error("failed to set isize in inode %llu root %llu",
5463                       ino, root->root_key.objectid);
5464         else
5465                 printf("Set isize in inode %llu root %llu to %llu\n",
5466                        ino, root->root_key.objectid, isize);
5467
5468         btrfs_release_path(path);
5469         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5470         err |= ret;
5471
5472         return err;
5473 }
5474
5475 /*
5476  * Wrapper function for btrfs_add_orphan_item().
5477  *
5478  * Returns 0     on success.
5479  * Returns != 0  on error.
5480  */
5481 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5482                                            struct btrfs_path *path, u64 ino)
5483 {
5484         struct btrfs_trans_handle *trans;
5485         struct btrfs_key research_key;
5486         int ret;
5487         int err = 0;
5488
5489         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5490
5491         trans = btrfs_start_transaction(root, 1);
5492         if (IS_ERR(trans)) {
5493                 ret = PTR_ERR(trans);
5494                 err |= ret;
5495                 goto out;
5496         }
5497
5498         btrfs_release_path(path);
5499         ret = btrfs_add_orphan_item(trans, root, path, ino);
5500         err |= ret;
5501         btrfs_commit_transaction(trans, root);
5502 out:
5503         if (ret)
5504                 error("failed to add inode %llu as orphan item root %llu",
5505                       ino, root->root_key.objectid);
5506         else
5507                 printf("Added inode %llu as orphan item root %llu\n",
5508                        ino, root->root_key.objectid);
5509
5510         btrfs_release_path(path);
5511         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5512         err |= ret;
5513
5514         return err;
5515 }
5516
5517 /* Set inode_item nlink to @ref_count.
5518  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5519  *
5520  * Returns 0 on success
5521  */
5522 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5523                                       struct btrfs_path *path, u64 ino,
5524                                       const char *name, u32 namelen,
5525                                       u64 ref_count, u8 filetype, u64 *nlink)
5526 {
5527         struct btrfs_trans_handle *trans;
5528         struct btrfs_inode_item *ii;
5529         struct btrfs_key key;
5530         struct btrfs_key old_key;
5531         char namebuf[BTRFS_NAME_LEN] = {0};
5532         int name_len;
5533         int ret;
5534         int ret2;
5535
5536         /* save the key */
5537         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5538
5539         if (name && namelen) {
5540                 ASSERT(namelen <= BTRFS_NAME_LEN);
5541                 memcpy(namebuf, name, namelen);
5542                 name_len = namelen;
5543         } else {
5544                 sprintf(namebuf, "%llu", ino);
5545                 name_len = count_digits(ino);
5546                 printf("Can't find file name for inode %llu, use %s instead\n",
5547                        ino, namebuf);
5548         }
5549
5550         trans = btrfs_start_transaction(root, 1);
5551         if (IS_ERR(trans)) {
5552                 ret = PTR_ERR(trans);
5553                 goto out;
5554         }
5555
5556         btrfs_release_path(path);
5557         /* if refs is 0, put it into lostfound */
5558         if (ref_count == 0) {
5559                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5560                                               name_len, filetype, &ref_count);
5561                 if (ret)
5562                         goto fail;
5563         }
5564
5565         /* reset inode_item's nlink to ref_count */
5566         key.objectid = ino;
5567         key.type = BTRFS_INODE_ITEM_KEY;
5568         key.offset = 0;
5569
5570         btrfs_release_path(path);
5571         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5572         if (ret > 0)
5573                 ret = -ENOENT;
5574         if (ret)
5575                 goto fail;
5576
5577         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5578                             struct btrfs_inode_item);
5579         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5580         btrfs_mark_buffer_dirty(path->nodes[0]);
5581
5582         if (nlink)
5583                 *nlink = ref_count;
5584 fail:
5585         btrfs_commit_transaction(trans, root);
5586 out:
5587         if (ret)
5588                 error(
5589         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5590                        root->objectid, ino, namebuf, filetype);
5591         else
5592                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5593                        root->objectid, ino, namebuf, filetype);
5594
5595         /* research */
5596         btrfs_release_path(path);
5597         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5598         if (ret2 < 0)
5599                 return ret |= ret2;
5600         return ret;
5601 }
5602
5603 /*
5604  * Check INODE_ITEM and related ITEMs (the same inode number)
5605  * 1. check link count
5606  * 2. check inode ref/extref
5607  * 3. check dir item/index
5608  *
5609  * @ext_ref:    the EXTENDED_IREF feature
5610  *
5611  * Return 0 if no error occurred.
5612  * Return >0 for error or hit the traversal is done(by error bitmap)
5613  */
5614 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5615                             unsigned int ext_ref)
5616 {
5617         struct extent_buffer *node;
5618         struct btrfs_inode_item *ii;
5619         struct btrfs_key key;
5620         struct btrfs_key last_key;
5621         u64 inode_id;
5622         u32 mode;
5623         u64 nlink;
5624         u64 nbytes;
5625         u64 isize;
5626         u64 size = 0;
5627         u64 refs = 0;
5628         u64 extent_end = 0;
5629         u64 extent_size = 0;
5630         unsigned int dir;
5631         unsigned int nodatasum;
5632         int slot;
5633         int ret;
5634         int err = 0;
5635         char namebuf[BTRFS_NAME_LEN] = {0};
5636         u32 name_len = 0;
5637
5638         node = path->nodes[0];
5639         slot = path->slots[0];
5640
5641         btrfs_item_key_to_cpu(node, &key, slot);
5642         inode_id = key.objectid;
5643
5644         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5645                 ret = btrfs_next_item(root, path);
5646                 if (ret > 0)
5647                         err |= LAST_ITEM;
5648                 return err;
5649         }
5650
5651         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5652         isize = btrfs_inode_size(node, ii);
5653         nbytes = btrfs_inode_nbytes(node, ii);
5654         mode = btrfs_inode_mode(node, ii);
5655         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5656         nlink = btrfs_inode_nlink(node, ii);
5657         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5658
5659         while (1) {
5660                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5661                 ret = btrfs_next_item(root, path);
5662                 if (ret < 0) {
5663                         /* out will fill 'err' rusing current statistics */
5664                         goto out;
5665                 } else if (ret > 0) {
5666                         err |= LAST_ITEM;
5667                         goto out;
5668                 }
5669
5670                 node = path->nodes[0];
5671                 slot = path->slots[0];
5672                 btrfs_item_key_to_cpu(node, &key, slot);
5673                 if (key.objectid != inode_id)
5674                         goto out;
5675
5676                 switch (key.type) {
5677                 case BTRFS_INODE_REF_KEY:
5678                         ret = check_inode_ref(root, &key, path, namebuf,
5679                                               &name_len, &refs, mode);
5680                         err |= ret;
5681                         break;
5682                 case BTRFS_INODE_EXTREF_KEY:
5683                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5684                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5685                                         root->objectid, key.objectid,
5686                                         key.offset);
5687                         ret = check_inode_extref(root, &key, node, slot, &refs,
5688                                                  mode);
5689                         err |= ret;
5690                         break;
5691                 case BTRFS_DIR_ITEM_KEY:
5692                 case BTRFS_DIR_INDEX_KEY:
5693                         if (!dir) {
5694                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5695                                         root->objectid, inode_id,
5696                                         imode_to_type(mode), key.objectid,
5697                                         key.offset);
5698                         }
5699                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5700                         err |= ret;
5701                         break;
5702                 case BTRFS_EXTENT_DATA_KEY:
5703                         if (dir) {
5704                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5705                                         root->objectid, inode_id, key.objectid,
5706                                         key.offset);
5707                         }
5708                         ret = check_file_extent(root, &key, node, slot,
5709                                                 nodatasum, &extent_size,
5710                                                 &extent_end);
5711                         err |= ret;
5712                         break;
5713                 case BTRFS_XATTR_ITEM_KEY:
5714                         break;
5715                 default:
5716                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5717                               key.objectid, key.type, key.offset);
5718                 }
5719         }
5720
5721 out:
5722         if (err & LAST_ITEM) {
5723                 btrfs_release_path(path);
5724                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5725                 if (ret)
5726                         return err;
5727         }
5728
5729         /* verify INODE_ITEM nlink/isize/nbytes */
5730         if (dir) {
5731                 if (repair && (err & DIR_COUNT_AGAIN)) {
5732                         err &= ~DIR_COUNT_AGAIN;
5733                         count_dir_isize(root, inode_id, &size);
5734                 }
5735
5736                 if ((nlink != 1 || refs != 1) && repair) {
5737                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5738                                 namebuf, name_len, refs, imode_to_type(mode),
5739                                 &nlink);
5740                 }
5741
5742                 if (nlink != 1) {
5743                         err |= LINK_COUNT_ERROR;
5744                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5745                               root->objectid, inode_id, nlink);
5746                 }
5747
5748                 /*
5749                  * Just a warning, as dir inode nbytes is just an
5750                  * instructive value.
5751                  */
5752                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5753                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5754                                 root->objectid, inode_id,
5755                                 root->fs_info->nodesize);
5756                 }
5757
5758                 if (isize != size) {
5759                         if (repair)
5760                                 ret = repair_dir_isize_lowmem(root, path,
5761                                                               inode_id, size);
5762                         if (!repair || ret) {
5763                                 err |= ISIZE_ERROR;
5764                                 error(
5765                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5766                                       root->objectid, inode_id, isize, size);
5767                         }
5768                 }
5769         } else {
5770                 if (nlink != refs) {
5771                         if (repair)
5772                                 ret = repair_inode_nlinks_lowmem(root, path,
5773                                          inode_id, namebuf, name_len, refs,
5774                                          imode_to_type(mode), &nlink);
5775                         if (!repair || ret) {
5776                                 err |= LINK_COUNT_ERROR;
5777                                 error(
5778                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5779                                       root->objectid, inode_id, nlink, refs);
5780                         }
5781                 } else if (!nlink) {
5782                         if (repair)
5783                                 ret = repair_inode_orphan_item_lowmem(root,
5784                                                               path, inode_id);
5785                         if (!repair || ret) {
5786                                 err |= ORPHAN_ITEM;
5787                                 error("root %llu INODE[%llu] is orphan item",
5788                                       root->objectid, inode_id);
5789                         }
5790                 }
5791
5792                 if (!nbytes && !no_holes && extent_end < isize) {
5793                         if (repair)
5794                                 ret = punch_extent_hole(root, inode_id,
5795                                                 extent_end, isize - extent_end);
5796                         if (!repair || ret) {
5797                                 err |= NBYTES_ERROR;
5798                                 error(
5799         "root %llu INODE[%llu] size %llu should have a file extent hole",
5800                                       root->objectid, inode_id, isize);
5801                         }
5802                 }
5803
5804                 if (nbytes != extent_size) {
5805                         if (repair)
5806                                 ret = repair_inode_nbytes_lowmem(root, path,
5807                                                          inode_id, extent_size);
5808                         if (!repair || ret) {
5809                                 err |= NBYTES_ERROR;
5810                                 error(
5811         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5812                                       root->objectid, inode_id, nbytes,
5813                                       extent_size);
5814                         }
5815                 }
5816         }
5817
5818         if (err & LAST_ITEM)
5819                 btrfs_next_item(root, path);
5820         return err;
5821 }
5822
5823 /*
5824  * Insert the missing inode item and inode ref.
5825  *
5826  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5827  * Root dir should be handled specially because root dir is the root of fs.
5828  *
5829  * returns err (>0 or 0) after repair
5830  */
5831 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5832 {
5833         struct btrfs_trans_handle *trans;
5834         struct btrfs_key key;
5835         struct btrfs_path path;
5836         int filetype = BTRFS_FT_DIR;
5837         int ret = 0;
5838
5839         btrfs_init_path(&path);
5840
5841         if (err & INODE_REF_MISSING) {
5842                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5843                 key.type = BTRFS_INODE_REF_KEY;
5844                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5845
5846                 trans = btrfs_start_transaction(root, 1);
5847                 if (IS_ERR(trans)) {
5848                         ret = PTR_ERR(trans);
5849                         goto out;
5850                 }
5851
5852                 btrfs_release_path(&path);
5853                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5854                 if (ret)
5855                         goto trans_fail;
5856
5857                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5858                                              BTRFS_FIRST_FREE_OBJECTID,
5859                                              BTRFS_FIRST_FREE_OBJECTID, 0);
5860                 if (ret)
5861                         goto trans_fail;
5862
5863                 printf("Add INODE_REF[%llu %llu] name %s\n",
5864                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5865                        "..");
5866                 err &= ~INODE_REF_MISSING;
5867 trans_fail:
5868                 if (ret)
5869                         error("fail to insert first inode's ref");
5870                 btrfs_commit_transaction(trans, root);
5871         }
5872
5873         if (err & INODE_ITEM_MISSING) {
5874                 ret = repair_inode_item_missing(root,
5875                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
5876                 if (ret)
5877                         goto out;
5878                 err &= ~INODE_ITEM_MISSING;
5879         }
5880 out:
5881         if (ret)
5882                 error("fail to repair first inode");
5883         btrfs_release_path(&path);
5884         return err;
5885 }
5886
5887 /*
5888  * check first root dir's inode_item and inode_ref
5889  *
5890  * returns 0 means no error
5891  * returns >0 means error
5892  * returns <0 means fatal error
5893  */
5894 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5895 {
5896         struct btrfs_path path;
5897         struct btrfs_key key;
5898         struct btrfs_inode_item *ii;
5899         u64 index;
5900         u32 mode;
5901         int err = 0;
5902         int ret;
5903
5904         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5905         key.type = BTRFS_INODE_ITEM_KEY;
5906         key.offset = 0;
5907
5908         /* For root being dropped, we don't need to check first inode */
5909         if (btrfs_root_refs(&root->root_item) == 0 &&
5910             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5911             BTRFS_FIRST_FREE_OBJECTID)
5912                 return 0;
5913
5914         btrfs_init_path(&path);
5915         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5916         if (ret < 0)
5917                 goto out;
5918         if (ret > 0) {
5919                 ret = 0;
5920                 err |= INODE_ITEM_MISSING;
5921         } else {
5922                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5923                                     struct btrfs_inode_item);
5924                 mode = btrfs_inode_mode(path.nodes[0], ii);
5925                 if (imode_to_type(mode) != BTRFS_FT_DIR)
5926                         err |= INODE_ITEM_MISMATCH;
5927         }
5928
5929         /* lookup first inode ref */
5930         key.offset = BTRFS_FIRST_FREE_OBJECTID;
5931         key.type = BTRFS_INODE_REF_KEY;
5932         /* special index value */
5933         index = 0;
5934
5935         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5936         if (ret < 0)
5937                 goto out;
5938         err |= ret;
5939
5940 out:
5941         btrfs_release_path(&path);
5942
5943         if (err && repair)
5944                 err = repair_fs_first_inode(root, err);
5945
5946         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5947                 error("root dir INODE_ITEM is %s",
5948                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5949         if (err & INODE_REF_MISSING)
5950                 error("root dir INODE_REF is missing");
5951
5952         return ret < 0 ? ret : err;
5953 }
5954
5955 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5956                                                 u64 parent, u64 root)
5957 {
5958         struct rb_node *node;
5959         struct tree_backref *back = NULL;
5960         struct tree_backref match = {
5961                 .node = {
5962                         .is_data = 0,
5963                 },
5964         };
5965
5966         if (parent) {
5967                 match.parent = parent;
5968                 match.node.full_backref = 1;
5969         } else {
5970                 match.root = root;
5971         }
5972
5973         node = rb_search(&rec->backref_tree, &match.node.node,
5974                          (rb_compare_keys)compare_extent_backref, NULL);
5975         if (node)
5976                 back = to_tree_backref(rb_node_to_extent_backref(node));
5977
5978         return back;
5979 }
5980
5981 static struct data_backref *find_data_backref(struct extent_record *rec,
5982                                                 u64 parent, u64 root,
5983                                                 u64 owner, u64 offset,
5984                                                 int found_ref,
5985                                                 u64 disk_bytenr, u64 bytes)
5986 {
5987         struct rb_node *node;
5988         struct data_backref *back = NULL;
5989         struct data_backref match = {
5990                 .node = {
5991                         .is_data = 1,
5992                 },
5993                 .owner = owner,
5994                 .offset = offset,
5995                 .bytes = bytes,
5996                 .found_ref = found_ref,
5997                 .disk_bytenr = disk_bytenr,
5998         };
5999
6000         if (parent) {
6001                 match.parent = parent;
6002                 match.node.full_backref = 1;
6003         } else {
6004                 match.root = root;
6005         }
6006
6007         node = rb_search(&rec->backref_tree, &match.node.node,
6008                          (rb_compare_keys)compare_extent_backref, NULL);
6009         if (node)
6010                 back = to_data_backref(rb_node_to_extent_backref(node));
6011
6012         return back;
6013 }
6014 /*
6015  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6016  * blocks and integrity of fs tree items.
6017  *
6018  * @root:         the root of the tree to be checked.
6019  * @ext_ref       feature EXTENDED_IREF is enable or not.
6020  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6021  *                otherwise means check fs tree(s) items relationship and
6022  *                @root MUST be a fs tree root.
6023  * Returns 0      represents OK.
6024  * Returns not 0  represents error.
6025  */
6026 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6027                             struct btrfs_root *root, unsigned int ext_ref,
6028                             int check_all)
6029
6030 {
6031         struct btrfs_path path;
6032         struct node_refs nrefs;
6033         struct btrfs_root_item *root_item = &root->root_item;
6034         int ret;
6035         int level;
6036         int err = 0;
6037
6038         memset(&nrefs, 0, sizeof(nrefs));
6039         if (!check_all) {
6040                 /*
6041                  * We need to manually check the first inode item (256)
6042                  * As the following traversal function will only start from
6043                  * the first inode item in the leaf, if inode item (256) is
6044                  * missing we will skip it forever.
6045                  */
6046                 ret = check_fs_first_inode(root, ext_ref);
6047                 if (ret < 0)
6048                         return ret;
6049         }
6050
6051
6052         level = btrfs_header_level(root->node);
6053         btrfs_init_path(&path);
6054
6055         if (btrfs_root_refs(root_item) > 0 ||
6056             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6057                 path.nodes[level] = root->node;
6058                 path.slots[level] = 0;
6059                 extent_buffer_get(root->node);
6060         } else {
6061                 struct btrfs_key key;
6062
6063                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6064                 level = root_item->drop_level;
6065                 path.lowest_level = level;
6066                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6067                 if (ret < 0)
6068                         goto out;
6069                 ret = 0;
6070         }
6071
6072         while (1) {
6073                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6074                                         ext_ref, check_all);
6075
6076                 err |= !!ret;
6077
6078                 /* if ret is negative, walk shall stop */
6079                 if (ret < 0) {
6080                         ret = err;
6081                         break;
6082                 }
6083
6084                 ret = walk_up_tree_v2(root, &path, &level);
6085                 if (ret != 0) {
6086                         /* Normal exit, reset ret to err */
6087                         ret = err;
6088                         break;
6089                 }
6090         }
6091
6092 out:
6093         btrfs_release_path(&path);
6094         return ret;
6095 }
6096
6097 /*
6098  * Iterate all items in the tree and call check_inode_item() to check.
6099  *
6100  * @root:       the root of the tree to be checked.
6101  * @ext_ref:    the EXTENDED_IREF feature
6102  *
6103  * Return 0 if no error found.
6104  * Return <0 for error.
6105  */
6106 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6107 {
6108         reset_cached_block_groups(root->fs_info);
6109         return check_btrfs_root(NULL, root, ext_ref, 0);
6110 }
6111
6112 /*
6113  * Find the relative ref for root_ref and root_backref.
6114  *
6115  * @root:       the root of the root tree.
6116  * @ref_key:    the key of the root ref.
6117  *
6118  * Return 0 if no error occurred.
6119  */
6120 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6121                           struct extent_buffer *node, int slot)
6122 {
6123         struct btrfs_path path;
6124         struct btrfs_key key;
6125         struct btrfs_root_ref *ref;
6126         struct btrfs_root_ref *backref;
6127         char ref_name[BTRFS_NAME_LEN] = {0};
6128         char backref_name[BTRFS_NAME_LEN] = {0};
6129         u64 ref_dirid;
6130         u64 ref_seq;
6131         u32 ref_namelen;
6132         u64 backref_dirid;
6133         u64 backref_seq;
6134         u32 backref_namelen;
6135         u32 len;
6136         int ret;
6137         int err = 0;
6138
6139         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6140         ref_dirid = btrfs_root_ref_dirid(node, ref);
6141         ref_seq = btrfs_root_ref_sequence(node, ref);
6142         ref_namelen = btrfs_root_ref_name_len(node, ref);
6143
6144         if (ref_namelen <= BTRFS_NAME_LEN) {
6145                 len = ref_namelen;
6146         } else {
6147                 len = BTRFS_NAME_LEN;
6148                 warning("%s[%llu %llu] ref_name too long",
6149                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6150                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6151                         ref_key->offset);
6152         }
6153         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6154
6155         /* Find relative root_ref */
6156         key.objectid = ref_key->offset;
6157         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6158         key.offset = ref_key->objectid;
6159
6160         btrfs_init_path(&path);
6161         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6162         if (ret) {
6163                 err |= ROOT_REF_MISSING;
6164                 error("%s[%llu %llu] couldn't find relative ref",
6165                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6166                       "ROOT_REF" : "ROOT_BACKREF",
6167                       ref_key->objectid, ref_key->offset);
6168                 goto out;
6169         }
6170
6171         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6172                                  struct btrfs_root_ref);
6173         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6174         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6175         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6176
6177         if (backref_namelen <= BTRFS_NAME_LEN) {
6178                 len = backref_namelen;
6179         } else {
6180                 len = BTRFS_NAME_LEN;
6181                 warning("%s[%llu %llu] ref_name too long",
6182                         key.type == BTRFS_ROOT_REF_KEY ?
6183                         "ROOT_REF" : "ROOT_BACKREF",
6184                         key.objectid, key.offset);
6185         }
6186         read_extent_buffer(path.nodes[0], backref_name,
6187                            (unsigned long)(backref + 1), len);
6188
6189         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6190             ref_namelen != backref_namelen ||
6191             strncmp(ref_name, backref_name, len)) {
6192                 err |= ROOT_REF_MISMATCH;
6193                 error("%s[%llu %llu] mismatch relative ref",
6194                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6195                       "ROOT_REF" : "ROOT_BACKREF",
6196                       ref_key->objectid, ref_key->offset);
6197         }
6198 out:
6199         btrfs_release_path(&path);
6200         return err;
6201 }
6202
6203 /*
6204  * Check all fs/file tree in low_memory mode.
6205  *
6206  * 1. for fs tree root item, call check_fs_root_v2()
6207  * 2. for fs tree root ref/backref, call check_root_ref()
6208  *
6209  * Return 0 if no error occurred.
6210  */
6211 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6212 {
6213         struct btrfs_root *tree_root = fs_info->tree_root;
6214         struct btrfs_root *cur_root = NULL;
6215         struct btrfs_path path;
6216         struct btrfs_key key;
6217         struct extent_buffer *node;
6218         unsigned int ext_ref;
6219         int slot;
6220         int ret;
6221         int err = 0;
6222
6223         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6224
6225         btrfs_init_path(&path);
6226         key.objectid = BTRFS_FS_TREE_OBJECTID;
6227         key.offset = 0;
6228         key.type = BTRFS_ROOT_ITEM_KEY;
6229
6230         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6231         if (ret < 0) {
6232                 err = ret;
6233                 goto out;
6234         } else if (ret > 0) {
6235                 err = -ENOENT;
6236                 goto out;
6237         }
6238
6239         while (1) {
6240                 node = path.nodes[0];
6241                 slot = path.slots[0];
6242                 btrfs_item_key_to_cpu(node, &key, slot);
6243                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6244                         goto out;
6245                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6246                     fs_root_objectid(key.objectid)) {
6247                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6248                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6249                                                                        &key);
6250                         } else {
6251                                 key.offset = (u64)-1;
6252                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6253                         }
6254
6255                         if (IS_ERR(cur_root)) {
6256                                 error("Fail to read fs/subvol tree: %lld",
6257                                       key.objectid);
6258                                 err = -EIO;
6259                                 goto next;
6260                         }
6261
6262                         ret = check_fs_root_v2(cur_root, ext_ref);
6263                         err |= ret;
6264
6265                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6266                                 btrfs_free_fs_root(cur_root);
6267                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6268                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6269                         ret = check_root_ref(tree_root, &key, node, slot);
6270                         err |= ret;
6271                 }
6272 next:
6273                 ret = btrfs_next_item(tree_root, &path);
6274                 if (ret > 0)
6275                         goto out;
6276                 if (ret < 0) {
6277                         err = ret;
6278                         goto out;
6279                 }
6280         }
6281
6282 out:
6283         btrfs_release_path(&path);
6284         return err;
6285 }
6286
6287 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6288                           struct cache_tree *root_cache)
6289 {
6290         int ret;
6291
6292         if (!ctx.progress_enabled)
6293                 fprintf(stderr, "checking fs roots\n");
6294         if (check_mode == CHECK_MODE_LOWMEM)
6295                 ret = check_fs_roots_v2(fs_info);
6296         else
6297                 ret = check_fs_roots(fs_info, root_cache);
6298
6299         return ret;
6300 }
6301
6302 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6303 {
6304         struct extent_backref *back, *tmp;
6305         struct tree_backref *tback;
6306         struct data_backref *dback;
6307         u64 found = 0;
6308         int err = 0;
6309
6310         rbtree_postorder_for_each_entry_safe(back, tmp,
6311                                              &rec->backref_tree, node) {
6312                 if (!back->found_extent_tree) {
6313                         err = 1;
6314                         if (!print_errs)
6315                                 goto out;
6316                         if (back->is_data) {
6317                                 dback = to_data_backref(back);
6318                                 fprintf(stderr, "Data backref %llu %s %llu"
6319                                         " owner %llu offset %llu num_refs %lu"
6320                                         " not found in extent tree\n",
6321                                         (unsigned long long)rec->start,
6322                                         back->full_backref ?
6323                                         "parent" : "root",
6324                                         back->full_backref ?
6325                                         (unsigned long long)dback->parent:
6326                                         (unsigned long long)dback->root,
6327                                         (unsigned long long)dback->owner,
6328                                         (unsigned long long)dback->offset,
6329                                         (unsigned long)dback->num_refs);
6330                         } else {
6331                                 tback = to_tree_backref(back);
6332                                 fprintf(stderr, "Tree backref %llu parent %llu"
6333                                         " root %llu not found in extent tree\n",
6334                                         (unsigned long long)rec->start,
6335                                         (unsigned long long)tback->parent,
6336                                         (unsigned long long)tback->root);
6337                         }
6338                 }
6339                 if (!back->is_data && !back->found_ref) {
6340                         err = 1;
6341                         if (!print_errs)
6342                                 goto out;
6343                         tback = to_tree_backref(back);
6344                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6345                                 (unsigned long long)rec->start,
6346                                 back->full_backref ? "parent" : "root",
6347                                 back->full_backref ?
6348                                 (unsigned long long)tback->parent :
6349                                 (unsigned long long)tback->root, back);
6350                 }
6351                 if (back->is_data) {
6352                         dback = to_data_backref(back);
6353                         if (dback->found_ref != dback->num_refs) {
6354                                 err = 1;
6355                                 if (!print_errs)
6356                                         goto out;
6357                                 fprintf(stderr, "Incorrect local backref count"
6358                                         " on %llu %s %llu owner %llu"
6359                                         " offset %llu found %u wanted %u back %p\n",
6360                                         (unsigned long long)rec->start,
6361                                         back->full_backref ?
6362                                         "parent" : "root",
6363                                         back->full_backref ?
6364                                         (unsigned long long)dback->parent:
6365                                         (unsigned long long)dback->root,
6366                                         (unsigned long long)dback->owner,
6367                                         (unsigned long long)dback->offset,
6368                                         dback->found_ref, dback->num_refs, back);
6369                         }
6370                         if (dback->disk_bytenr != rec->start) {
6371                                 err = 1;
6372                                 if (!print_errs)
6373                                         goto out;
6374                                 fprintf(stderr, "Backref disk bytenr does not"
6375                                         " match extent record, bytenr=%llu, "
6376                                         "ref bytenr=%llu\n",
6377                                         (unsigned long long)rec->start,
6378                                         (unsigned long long)dback->disk_bytenr);
6379                         }
6380
6381                         if (dback->bytes != rec->nr) {
6382                                 err = 1;
6383                                 if (!print_errs)
6384                                         goto out;
6385                                 fprintf(stderr, "Backref bytes do not match "
6386                                         "extent backref, bytenr=%llu, ref "
6387                                         "bytes=%llu, backref bytes=%llu\n",
6388                                         (unsigned long long)rec->start,
6389                                         (unsigned long long)rec->nr,
6390                                         (unsigned long long)dback->bytes);
6391                         }
6392                 }
6393                 if (!back->is_data) {
6394                         found += 1;
6395                 } else {
6396                         dback = to_data_backref(back);
6397                         found += dback->found_ref;
6398                 }
6399         }
6400         if (found != rec->refs) {
6401                 err = 1;
6402                 if (!print_errs)
6403                         goto out;
6404                 fprintf(stderr, "Incorrect global backref count "
6405                         "on %llu found %llu wanted %llu\n",
6406                         (unsigned long long)rec->start,
6407                         (unsigned long long)found,
6408                         (unsigned long long)rec->refs);
6409         }
6410 out:
6411         return err;
6412 }
6413
6414 static void __free_one_backref(struct rb_node *node)
6415 {
6416         struct extent_backref *back = rb_node_to_extent_backref(node);
6417
6418         free(back);
6419 }
6420
6421 static void free_all_extent_backrefs(struct extent_record *rec)
6422 {
6423         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6424 }
6425
6426 static void free_extent_record_cache(struct cache_tree *extent_cache)
6427 {
6428         struct cache_extent *cache;
6429         struct extent_record *rec;
6430
6431         while (1) {
6432                 cache = first_cache_extent(extent_cache);
6433                 if (!cache)
6434                         break;
6435                 rec = container_of(cache, struct extent_record, cache);
6436                 remove_cache_extent(extent_cache, cache);
6437                 free_all_extent_backrefs(rec);
6438                 free(rec);
6439         }
6440 }
6441
6442 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6443                                  struct extent_record *rec)
6444 {
6445         if (rec->content_checked && rec->owner_ref_checked &&
6446             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6447             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6448             !rec->bad_full_backref && !rec->crossing_stripes &&
6449             !rec->wrong_chunk_type) {
6450                 remove_cache_extent(extent_cache, &rec->cache);
6451                 free_all_extent_backrefs(rec);
6452                 list_del_init(&rec->list);
6453                 free(rec);
6454         }
6455         return 0;
6456 }
6457
6458 static int check_owner_ref(struct btrfs_root *root,
6459                             struct extent_record *rec,
6460                             struct extent_buffer *buf)
6461 {
6462         struct extent_backref *node, *tmp;
6463         struct tree_backref *back;
6464         struct btrfs_root *ref_root;
6465         struct btrfs_key key;
6466         struct btrfs_path path;
6467         struct extent_buffer *parent;
6468         int level;
6469         int found = 0;
6470         int ret;
6471
6472         rbtree_postorder_for_each_entry_safe(node, tmp,
6473                                              &rec->backref_tree, node) {
6474                 if (node->is_data)
6475                         continue;
6476                 if (!node->found_ref)
6477                         continue;
6478                 if (node->full_backref)
6479                         continue;
6480                 back = to_tree_backref(node);
6481                 if (btrfs_header_owner(buf) == back->root)
6482                         return 0;
6483         }
6484         BUG_ON(rec->is_root);
6485
6486         /* try to find the block by search corresponding fs tree */
6487         key.objectid = btrfs_header_owner(buf);
6488         key.type = BTRFS_ROOT_ITEM_KEY;
6489         key.offset = (u64)-1;
6490
6491         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6492         if (IS_ERR(ref_root))
6493                 return 1;
6494
6495         level = btrfs_header_level(buf);
6496         if (level == 0)
6497                 btrfs_item_key_to_cpu(buf, &key, 0);
6498         else
6499                 btrfs_node_key_to_cpu(buf, &key, 0);
6500
6501         btrfs_init_path(&path);
6502         path.lowest_level = level + 1;
6503         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6504         if (ret < 0)
6505                 return 0;
6506
6507         parent = path.nodes[level + 1];
6508         if (parent && buf->start == btrfs_node_blockptr(parent,
6509                                                         path.slots[level + 1]))
6510                 found = 1;
6511
6512         btrfs_release_path(&path);
6513         return found ? 0 : 1;
6514 }
6515
6516 static int is_extent_tree_record(struct extent_record *rec)
6517 {
6518         struct extent_backref *node, *tmp;
6519         struct tree_backref *back;
6520         int is_extent = 0;
6521
6522         rbtree_postorder_for_each_entry_safe(node, tmp,
6523                                              &rec->backref_tree, node) {
6524                 if (node->is_data)
6525                         return 0;
6526                 back = to_tree_backref(node);
6527                 if (node->full_backref)
6528                         return 0;
6529                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6530                         is_extent = 1;
6531         }
6532         return is_extent;
6533 }
6534
6535
6536 static int record_bad_block_io(struct btrfs_fs_info *info,
6537                                struct cache_tree *extent_cache,
6538                                u64 start, u64 len)
6539 {
6540         struct extent_record *rec;
6541         struct cache_extent *cache;
6542         struct btrfs_key key;
6543
6544         cache = lookup_cache_extent(extent_cache, start, len);
6545         if (!cache)
6546                 return 0;
6547
6548         rec = container_of(cache, struct extent_record, cache);
6549         if (!is_extent_tree_record(rec))
6550                 return 0;
6551
6552         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6553         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6554 }
6555
6556 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6557                        struct extent_buffer *buf, int slot)
6558 {
6559         if (btrfs_header_level(buf)) {
6560                 struct btrfs_key_ptr ptr1, ptr2;
6561
6562                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6563                                    sizeof(struct btrfs_key_ptr));
6564                 read_extent_buffer(buf, &ptr2,
6565                                    btrfs_node_key_ptr_offset(slot + 1),
6566                                    sizeof(struct btrfs_key_ptr));
6567                 write_extent_buffer(buf, &ptr1,
6568                                     btrfs_node_key_ptr_offset(slot + 1),
6569                                     sizeof(struct btrfs_key_ptr));
6570                 write_extent_buffer(buf, &ptr2,
6571                                     btrfs_node_key_ptr_offset(slot),
6572                                     sizeof(struct btrfs_key_ptr));
6573                 if (slot == 0) {
6574                         struct btrfs_disk_key key;
6575                         btrfs_node_key(buf, &key, 0);
6576                         btrfs_fixup_low_keys(root, path, &key,
6577                                              btrfs_header_level(buf) + 1);
6578                 }
6579         } else {
6580                 struct btrfs_item *item1, *item2;
6581                 struct btrfs_key k1, k2;
6582                 char *item1_data, *item2_data;
6583                 u32 item1_offset, item2_offset, item1_size, item2_size;
6584
6585                 item1 = btrfs_item_nr(slot);
6586                 item2 = btrfs_item_nr(slot + 1);
6587                 btrfs_item_key_to_cpu(buf, &k1, slot);
6588                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6589                 item1_offset = btrfs_item_offset(buf, item1);
6590                 item2_offset = btrfs_item_offset(buf, item2);
6591                 item1_size = btrfs_item_size(buf, item1);
6592                 item2_size = btrfs_item_size(buf, item2);
6593
6594                 item1_data = malloc(item1_size);
6595                 if (!item1_data)
6596                         return -ENOMEM;
6597                 item2_data = malloc(item2_size);
6598                 if (!item2_data) {
6599                         free(item1_data);
6600                         return -ENOMEM;
6601                 }
6602
6603                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6604                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6605
6606                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6607                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6608                 free(item1_data);
6609                 free(item2_data);
6610
6611                 btrfs_set_item_offset(buf, item1, item2_offset);
6612                 btrfs_set_item_offset(buf, item2, item1_offset);
6613                 btrfs_set_item_size(buf, item1, item2_size);
6614                 btrfs_set_item_size(buf, item2, item1_size);
6615
6616                 path->slots[0] = slot;
6617                 btrfs_set_item_key_unsafe(root, path, &k2);
6618                 path->slots[0] = slot + 1;
6619                 btrfs_set_item_key_unsafe(root, path, &k1);
6620         }
6621         return 0;
6622 }
6623
6624 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6625 {
6626         struct extent_buffer *buf;
6627         struct btrfs_key k1, k2;
6628         int i;
6629         int level = path->lowest_level;
6630         int ret = -EIO;
6631
6632         buf = path->nodes[level];
6633         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6634                 if (level) {
6635                         btrfs_node_key_to_cpu(buf, &k1, i);
6636                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6637                 } else {
6638                         btrfs_item_key_to_cpu(buf, &k1, i);
6639                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6640                 }
6641                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6642                         continue;
6643                 ret = swap_values(root, path, buf, i);
6644                 if (ret)
6645                         break;
6646                 btrfs_mark_buffer_dirty(buf);
6647                 i = 0;
6648         }
6649         return ret;
6650 }
6651
6652 static int delete_bogus_item(struct btrfs_root *root,
6653                              struct btrfs_path *path,
6654                              struct extent_buffer *buf, int slot)
6655 {
6656         struct btrfs_key key;
6657         int nritems = btrfs_header_nritems(buf);
6658
6659         btrfs_item_key_to_cpu(buf, &key, slot);
6660
6661         /* These are all the keys we can deal with missing. */
6662         if (key.type != BTRFS_DIR_INDEX_KEY &&
6663             key.type != BTRFS_EXTENT_ITEM_KEY &&
6664             key.type != BTRFS_METADATA_ITEM_KEY &&
6665             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6666             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6667                 return -1;
6668
6669         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6670                (unsigned long long)key.objectid, key.type,
6671                (unsigned long long)key.offset, slot, buf->start);
6672         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6673                               btrfs_item_nr_offset(slot + 1),
6674                               sizeof(struct btrfs_item) *
6675                               (nritems - slot - 1));
6676         btrfs_set_header_nritems(buf, nritems - 1);
6677         if (slot == 0) {
6678                 struct btrfs_disk_key disk_key;
6679
6680                 btrfs_item_key(buf, &disk_key, 0);
6681                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6682         }
6683         btrfs_mark_buffer_dirty(buf);
6684         return 0;
6685 }
6686
6687 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6688 {
6689         struct extent_buffer *buf;
6690         int i;
6691         int ret = 0;
6692
6693         /* We should only get this for leaves */
6694         BUG_ON(path->lowest_level);
6695         buf = path->nodes[0];
6696 again:
6697         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6698                 unsigned int shift = 0, offset;
6699
6700                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6701                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6702                         if (btrfs_item_end_nr(buf, i) >
6703                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6704                                 ret = delete_bogus_item(root, path, buf, i);
6705                                 if (!ret)
6706                                         goto again;
6707                                 fprintf(stderr, "item is off the end of the "
6708                                         "leaf, can't fix\n");
6709                                 ret = -EIO;
6710                                 break;
6711                         }
6712                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6713                                 btrfs_item_end_nr(buf, i);
6714                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6715                            btrfs_item_offset_nr(buf, i - 1)) {
6716                         if (btrfs_item_end_nr(buf, i) >
6717                             btrfs_item_offset_nr(buf, i - 1)) {
6718                                 ret = delete_bogus_item(root, path, buf, i);
6719                                 if (!ret)
6720                                         goto again;
6721                                 fprintf(stderr, "items overlap, can't fix\n");
6722                                 ret = -EIO;
6723                                 break;
6724                         }
6725                         shift = btrfs_item_offset_nr(buf, i - 1) -
6726                                 btrfs_item_end_nr(buf, i);
6727                 }
6728                 if (!shift)
6729                         continue;
6730
6731                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6732                        i, shift, (unsigned long long)buf->start);
6733                 offset = btrfs_item_offset_nr(buf, i);
6734                 memmove_extent_buffer(buf,
6735                                       btrfs_leaf_data(buf) + offset + shift,
6736                                       btrfs_leaf_data(buf) + offset,
6737                                       btrfs_item_size_nr(buf, i));
6738                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6739                                       offset + shift);
6740                 btrfs_mark_buffer_dirty(buf);
6741         }
6742
6743         /*
6744          * We may have moved things, in which case we want to exit so we don't
6745          * write those changes out.  Once we have proper abort functionality in
6746          * progs this can be changed to something nicer.
6747          */
6748         BUG_ON(ret);
6749         return ret;
6750 }
6751
6752 /*
6753  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6754  * then just return -EIO.
6755  */
6756 static int try_to_fix_bad_block(struct btrfs_root *root,
6757                                 struct extent_buffer *buf,
6758                                 enum btrfs_tree_block_status status)
6759 {
6760         struct btrfs_trans_handle *trans;
6761         struct ulist *roots;
6762         struct ulist_node *node;
6763         struct btrfs_root *search_root;
6764         struct btrfs_path path;
6765         struct ulist_iterator iter;
6766         struct btrfs_key root_key, key;
6767         int ret;
6768
6769         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6770             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6771                 return -EIO;
6772
6773         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6774         if (ret)
6775                 return -EIO;
6776
6777         btrfs_init_path(&path);
6778         ULIST_ITER_INIT(&iter);
6779         while ((node = ulist_next(roots, &iter))) {
6780                 root_key.objectid = node->val;
6781                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6782                 root_key.offset = (u64)-1;
6783
6784                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6785                 if (IS_ERR(root)) {
6786                         ret = -EIO;
6787                         break;
6788                 }
6789
6790
6791                 trans = btrfs_start_transaction(search_root, 0);
6792                 if (IS_ERR(trans)) {
6793                         ret = PTR_ERR(trans);
6794                         break;
6795                 }
6796
6797                 path.lowest_level = btrfs_header_level(buf);
6798                 path.skip_check_block = 1;
6799                 if (path.lowest_level)
6800                         btrfs_node_key_to_cpu(buf, &key, 0);
6801                 else
6802                         btrfs_item_key_to_cpu(buf, &key, 0);
6803                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6804                 if (ret) {
6805                         ret = -EIO;
6806                         btrfs_commit_transaction(trans, search_root);
6807                         break;
6808                 }
6809                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6810                         ret = fix_key_order(search_root, &path);
6811                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6812                         ret = fix_item_offset(search_root, &path);
6813                 if (ret) {
6814                         btrfs_commit_transaction(trans, search_root);
6815                         break;
6816                 }
6817                 btrfs_release_path(&path);
6818                 btrfs_commit_transaction(trans, search_root);
6819         }
6820         ulist_free(roots);
6821         btrfs_release_path(&path);
6822         return ret;
6823 }
6824
6825 static int check_block(struct btrfs_root *root,
6826                        struct cache_tree *extent_cache,
6827                        struct extent_buffer *buf, u64 flags)
6828 {
6829         struct extent_record *rec;
6830         struct cache_extent *cache;
6831         struct btrfs_key key;
6832         enum btrfs_tree_block_status status;
6833         int ret = 0;
6834         int level;
6835
6836         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6837         if (!cache)
6838                 return 1;
6839         rec = container_of(cache, struct extent_record, cache);
6840         rec->generation = btrfs_header_generation(buf);
6841
6842         level = btrfs_header_level(buf);
6843         if (btrfs_header_nritems(buf) > 0) {
6844
6845                 if (level == 0)
6846                         btrfs_item_key_to_cpu(buf, &key, 0);
6847                 else
6848                         btrfs_node_key_to_cpu(buf, &key, 0);
6849
6850                 rec->info_objectid = key.objectid;
6851         }
6852         rec->info_level = level;
6853
6854         if (btrfs_is_leaf(buf))
6855                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6856         else
6857                 status = btrfs_check_node(root, &rec->parent_key, buf);
6858
6859         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6860                 if (repair)
6861                         status = try_to_fix_bad_block(root, buf, status);
6862                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6863                         ret = -EIO;
6864                         fprintf(stderr, "bad block %llu\n",
6865                                 (unsigned long long)buf->start);
6866                 } else {
6867                         /*
6868                          * Signal to callers we need to start the scan over
6869                          * again since we'll have cowed blocks.
6870                          */
6871                         ret = -EAGAIN;
6872                 }
6873         } else {
6874                 rec->content_checked = 1;
6875                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6876                         rec->owner_ref_checked = 1;
6877                 else {
6878                         ret = check_owner_ref(root, rec, buf);
6879                         if (!ret)
6880                                 rec->owner_ref_checked = 1;
6881                 }
6882         }
6883         if (!ret)
6884                 maybe_free_extent_rec(extent_cache, rec);
6885         return ret;
6886 }
6887
6888 #if 0
6889 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6890                                                 u64 parent, u64 root)
6891 {
6892         struct list_head *cur = rec->backrefs.next;
6893         struct extent_backref *node;
6894         struct tree_backref *back;
6895
6896         while(cur != &rec->backrefs) {
6897                 node = to_extent_backref(cur);
6898                 cur = cur->next;
6899                 if (node->is_data)
6900                         continue;
6901                 back = to_tree_backref(node);
6902                 if (parent > 0) {
6903                         if (!node->full_backref)
6904                                 continue;
6905                         if (parent == back->parent)
6906                                 return back;
6907                 } else {
6908                         if (node->full_backref)
6909                                 continue;
6910                         if (back->root == root)
6911                                 return back;
6912                 }
6913         }
6914         return NULL;
6915 }
6916 #endif
6917
6918 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6919                                                 u64 parent, u64 root)
6920 {
6921         struct tree_backref *ref = malloc(sizeof(*ref));
6922
6923         if (!ref)
6924                 return NULL;
6925         memset(&ref->node, 0, sizeof(ref->node));
6926         if (parent > 0) {
6927                 ref->parent = parent;
6928                 ref->node.full_backref = 1;
6929         } else {
6930                 ref->root = root;
6931                 ref->node.full_backref = 0;
6932         }
6933
6934         return ref;
6935 }
6936
6937 #if 0
6938 static struct data_backref *find_data_backref(struct extent_record *rec,
6939                                                 u64 parent, u64 root,
6940                                                 u64 owner, u64 offset,
6941                                                 int found_ref,
6942                                                 u64 disk_bytenr, u64 bytes)
6943 {
6944         struct list_head *cur = rec->backrefs.next;
6945         struct extent_backref *node;
6946         struct data_backref *back;
6947
6948         while(cur != &rec->backrefs) {
6949                 node = to_extent_backref(cur);
6950                 cur = cur->next;
6951                 if (!node->is_data)
6952                         continue;
6953                 back = to_data_backref(node);
6954                 if (parent > 0) {
6955                         if (!node->full_backref)
6956                                 continue;
6957                         if (parent == back->parent)
6958                                 return back;
6959                 } else {
6960                         if (node->full_backref)
6961                                 continue;
6962                         if (back->root == root && back->owner == owner &&
6963                             back->offset == offset) {
6964                                 if (found_ref && node->found_ref &&
6965                                     (back->bytes != bytes ||
6966                                     back->disk_bytenr != disk_bytenr))
6967                                         continue;
6968                                 return back;
6969                         }
6970                 }
6971         }
6972         return NULL;
6973 }
6974 #endif
6975
6976 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6977                                                 u64 parent, u64 root,
6978                                                 u64 owner, u64 offset,
6979                                                 u64 max_size)
6980 {
6981         struct data_backref *ref = malloc(sizeof(*ref));
6982
6983         if (!ref)
6984                 return NULL;
6985         memset(&ref->node, 0, sizeof(ref->node));
6986         ref->node.is_data = 1;
6987
6988         if (parent > 0) {
6989                 ref->parent = parent;
6990                 ref->owner = 0;
6991                 ref->offset = 0;
6992                 ref->node.full_backref = 1;
6993         } else {
6994                 ref->root = root;
6995                 ref->owner = owner;
6996                 ref->offset = offset;
6997                 ref->node.full_backref = 0;
6998         }
6999         ref->bytes = max_size;
7000         ref->found_ref = 0;
7001         ref->num_refs = 0;
7002         if (max_size > rec->max_size)
7003                 rec->max_size = max_size;
7004         return ref;
7005 }
7006
7007 /* Check if the type of extent matches with its chunk */
7008 static void check_extent_type(struct extent_record *rec)
7009 {
7010         struct btrfs_block_group_cache *bg_cache;
7011
7012         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7013         if (!bg_cache)
7014                 return;
7015
7016         /* data extent, check chunk directly*/
7017         if (!rec->metadata) {
7018                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7019                         rec->wrong_chunk_type = 1;
7020                 return;
7021         }
7022
7023         /* metadata extent, check the obvious case first */
7024         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7025                                  BTRFS_BLOCK_GROUP_METADATA))) {
7026                 rec->wrong_chunk_type = 1;
7027                 return;
7028         }
7029
7030         /*
7031          * Check SYSTEM extent, as it's also marked as metadata, we can only
7032          * make sure it's a SYSTEM extent by its backref
7033          */
7034         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7035                 struct extent_backref *node;
7036                 struct tree_backref *tback;
7037                 u64 bg_type;
7038
7039                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7040                 if (node->is_data) {
7041                         /* tree block shouldn't have data backref */
7042                         rec->wrong_chunk_type = 1;
7043                         return;
7044                 }
7045                 tback = container_of(node, struct tree_backref, node);
7046
7047                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7048                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7049                 else
7050                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7051                 if (!(bg_cache->flags & bg_type))
7052                         rec->wrong_chunk_type = 1;
7053         }
7054 }
7055
7056 /*
7057  * Allocate a new extent record, fill default values from @tmpl and insert int
7058  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7059  * the cache, otherwise it fails.
7060  */
7061 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7062                 struct extent_record *tmpl)
7063 {
7064         struct extent_record *rec;
7065         int ret = 0;
7066
7067         BUG_ON(tmpl->max_size == 0);
7068         rec = malloc(sizeof(*rec));
7069         if (!rec)
7070                 return -ENOMEM;
7071         rec->start = tmpl->start;
7072         rec->max_size = tmpl->max_size;
7073         rec->nr = max(tmpl->nr, tmpl->max_size);
7074         rec->found_rec = tmpl->found_rec;
7075         rec->content_checked = tmpl->content_checked;
7076         rec->owner_ref_checked = tmpl->owner_ref_checked;
7077         rec->num_duplicates = 0;
7078         rec->metadata = tmpl->metadata;
7079         rec->flag_block_full_backref = FLAG_UNSET;
7080         rec->bad_full_backref = 0;
7081         rec->crossing_stripes = 0;
7082         rec->wrong_chunk_type = 0;
7083         rec->is_root = tmpl->is_root;
7084         rec->refs = tmpl->refs;
7085         rec->extent_item_refs = tmpl->extent_item_refs;
7086         rec->parent_generation = tmpl->parent_generation;
7087         INIT_LIST_HEAD(&rec->backrefs);
7088         INIT_LIST_HEAD(&rec->dups);
7089         INIT_LIST_HEAD(&rec->list);
7090         rec->backref_tree = RB_ROOT;
7091         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7092         rec->cache.start = tmpl->start;
7093         rec->cache.size = tmpl->nr;
7094         ret = insert_cache_extent(extent_cache, &rec->cache);
7095         if (ret) {
7096                 free(rec);
7097                 return ret;
7098         }
7099         bytes_used += rec->nr;
7100
7101         if (tmpl->metadata)
7102                 rec->crossing_stripes = check_crossing_stripes(global_info,
7103                                 rec->start, global_info->nodesize);
7104         check_extent_type(rec);
7105         return ret;
7106 }
7107
7108 /*
7109  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7110  * some are hints:
7111  * - refs              - if found, increase refs
7112  * - is_root           - if found, set
7113  * - content_checked   - if found, set
7114  * - owner_ref_checked - if found, set
7115  *
7116  * If not found, create a new one, initialize and insert.
7117  */
7118 static int add_extent_rec(struct cache_tree *extent_cache,
7119                 struct extent_record *tmpl)
7120 {
7121         struct extent_record *rec;
7122         struct cache_extent *cache;
7123         int ret = 0;
7124         int dup = 0;
7125
7126         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7127         if (cache) {
7128                 rec = container_of(cache, struct extent_record, cache);
7129                 if (tmpl->refs)
7130                         rec->refs++;
7131                 if (rec->nr == 1)
7132                         rec->nr = max(tmpl->nr, tmpl->max_size);
7133
7134                 /*
7135                  * We need to make sure to reset nr to whatever the extent
7136                  * record says was the real size, this way we can compare it to
7137                  * the backrefs.
7138                  */
7139                 if (tmpl->found_rec) {
7140                         if (tmpl->start != rec->start || rec->found_rec) {
7141                                 struct extent_record *tmp;
7142
7143                                 dup = 1;
7144                                 if (list_empty(&rec->list))
7145                                         list_add_tail(&rec->list,
7146                                                       &duplicate_extents);
7147
7148                                 /*
7149                                  * We have to do this song and dance in case we
7150                                  * find an extent record that falls inside of
7151                                  * our current extent record but does not have
7152                                  * the same objectid.
7153                                  */
7154                                 tmp = malloc(sizeof(*tmp));
7155                                 if (!tmp)
7156                                         return -ENOMEM;
7157                                 tmp->start = tmpl->start;
7158                                 tmp->max_size = tmpl->max_size;
7159                                 tmp->nr = tmpl->nr;
7160                                 tmp->found_rec = 1;
7161                                 tmp->metadata = tmpl->metadata;
7162                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7163                                 INIT_LIST_HEAD(&tmp->list);
7164                                 list_add_tail(&tmp->list, &rec->dups);
7165                                 rec->num_duplicates++;
7166                         } else {
7167                                 rec->nr = tmpl->nr;
7168                                 rec->found_rec = 1;
7169                         }
7170                 }
7171
7172                 if (tmpl->extent_item_refs && !dup) {
7173                         if (rec->extent_item_refs) {
7174                                 fprintf(stderr, "block %llu rec "
7175                                         "extent_item_refs %llu, passed %llu\n",
7176                                         (unsigned long long)tmpl->start,
7177                                         (unsigned long long)
7178                                                         rec->extent_item_refs,
7179                                         (unsigned long long)tmpl->extent_item_refs);
7180                         }
7181                         rec->extent_item_refs = tmpl->extent_item_refs;
7182                 }
7183                 if (tmpl->is_root)
7184                         rec->is_root = 1;
7185                 if (tmpl->content_checked)
7186                         rec->content_checked = 1;
7187                 if (tmpl->owner_ref_checked)
7188                         rec->owner_ref_checked = 1;
7189                 memcpy(&rec->parent_key, &tmpl->parent_key,
7190                                 sizeof(tmpl->parent_key));
7191                 if (tmpl->parent_generation)
7192                         rec->parent_generation = tmpl->parent_generation;
7193                 if (rec->max_size < tmpl->max_size)
7194                         rec->max_size = tmpl->max_size;
7195
7196                 /*
7197                  * A metadata extent can't cross stripe_len boundary, otherwise
7198                  * kernel scrub won't be able to handle it.
7199                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7200                  * it.
7201                  */
7202                 if (tmpl->metadata)
7203                         rec->crossing_stripes = check_crossing_stripes(
7204                                         global_info, rec->start,
7205                                         global_info->nodesize);
7206                 check_extent_type(rec);
7207                 maybe_free_extent_rec(extent_cache, rec);
7208                 return ret;
7209         }
7210
7211         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7212
7213         return ret;
7214 }
7215
7216 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7217                             u64 parent, u64 root, int found_ref)
7218 {
7219         struct extent_record *rec;
7220         struct tree_backref *back;
7221         struct cache_extent *cache;
7222         int ret;
7223         bool insert = false;
7224
7225         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7226         if (!cache) {
7227                 struct extent_record tmpl;
7228
7229                 memset(&tmpl, 0, sizeof(tmpl));
7230                 tmpl.start = bytenr;
7231                 tmpl.nr = 1;
7232                 tmpl.metadata = 1;
7233                 tmpl.max_size = 1;
7234
7235                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7236                 if (ret)
7237                         return ret;
7238
7239                 /* really a bug in cache_extent implement now */
7240                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7241                 if (!cache)
7242                         return -ENOENT;
7243         }
7244
7245         rec = container_of(cache, struct extent_record, cache);
7246         if (rec->start != bytenr) {
7247                 /*
7248                  * Several cause, from unaligned bytenr to over lapping extents
7249                  */
7250                 return -EEXIST;
7251         }
7252
7253         back = find_tree_backref(rec, parent, root);
7254         if (!back) {
7255                 back = alloc_tree_backref(rec, parent, root);
7256                 if (!back)
7257                         return -ENOMEM;
7258                 insert = true;
7259         }
7260
7261         if (found_ref) {
7262                 if (back->node.found_ref) {
7263                         fprintf(stderr, "Extent back ref already exists "
7264                                 "for %llu parent %llu root %llu \n",
7265                                 (unsigned long long)bytenr,
7266                                 (unsigned long long)parent,
7267                                 (unsigned long long)root);
7268                 }
7269                 back->node.found_ref = 1;
7270         } else {
7271                 if (back->node.found_extent_tree) {
7272                         fprintf(stderr, "Extent back ref already exists "
7273                                 "for %llu parent %llu root %llu \n",
7274                                 (unsigned long long)bytenr,
7275                                 (unsigned long long)parent,
7276                                 (unsigned long long)root);
7277                 }
7278                 back->node.found_extent_tree = 1;
7279         }
7280         if (insert)
7281                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7282                         compare_extent_backref));
7283         check_extent_type(rec);
7284         maybe_free_extent_rec(extent_cache, rec);
7285         return 0;
7286 }
7287
7288 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7289                             u64 parent, u64 root, u64 owner, u64 offset,
7290                             u32 num_refs, int found_ref, u64 max_size)
7291 {
7292         struct extent_record *rec;
7293         struct data_backref *back;
7294         struct cache_extent *cache;
7295         int ret;
7296         bool insert = false;
7297
7298         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7299         if (!cache) {
7300                 struct extent_record tmpl;
7301
7302                 memset(&tmpl, 0, sizeof(tmpl));
7303                 tmpl.start = bytenr;
7304                 tmpl.nr = 1;
7305                 tmpl.max_size = max_size;
7306
7307                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7308                 if (ret)
7309                         return ret;
7310
7311                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7312                 if (!cache)
7313                         abort();
7314         }
7315
7316         rec = container_of(cache, struct extent_record, cache);
7317         if (rec->max_size < max_size)
7318                 rec->max_size = max_size;
7319
7320         /*
7321          * If found_ref is set then max_size is the real size and must match the
7322          * existing refs.  So if we have already found a ref then we need to
7323          * make sure that this ref matches the existing one, otherwise we need
7324          * to add a new backref so we can notice that the backrefs don't match
7325          * and we need to figure out who is telling the truth.  This is to
7326          * account for that awful fsync bug I introduced where we'd end up with
7327          * a btrfs_file_extent_item that would have its length include multiple
7328          * prealloc extents or point inside of a prealloc extent.
7329          */
7330         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7331                                  bytenr, max_size);
7332         if (!back) {
7333                 back = alloc_data_backref(rec, parent, root, owner, offset,
7334                                           max_size);
7335                 BUG_ON(!back);
7336                 insert = true;
7337         }
7338
7339         if (found_ref) {
7340                 BUG_ON(num_refs != 1);
7341                 if (back->node.found_ref)
7342                         BUG_ON(back->bytes != max_size);
7343                 back->node.found_ref = 1;
7344                 back->found_ref += 1;
7345                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7346                         back->bytes = max_size;
7347                         back->disk_bytenr = bytenr;
7348
7349                         /* Need to reinsert if not already in the tree */
7350                         if (!insert) {
7351                                 rb_erase(&back->node.node, &rec->backref_tree);
7352                                 insert = true;
7353                         }
7354                 }
7355                 rec->refs += 1;
7356                 rec->content_checked = 1;
7357                 rec->owner_ref_checked = 1;
7358         } else {
7359                 if (back->node.found_extent_tree) {
7360                         fprintf(stderr, "Extent back ref already exists "
7361                                 "for %llu parent %llu root %llu "
7362                                 "owner %llu offset %llu num_refs %lu\n",
7363                                 (unsigned long long)bytenr,
7364                                 (unsigned long long)parent,
7365                                 (unsigned long long)root,
7366                                 (unsigned long long)owner,
7367                                 (unsigned long long)offset,
7368                                 (unsigned long)num_refs);
7369                 }
7370                 back->num_refs = num_refs;
7371                 back->node.found_extent_tree = 1;
7372         }
7373         if (insert)
7374                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7375                         compare_extent_backref));
7376
7377         maybe_free_extent_rec(extent_cache, rec);
7378         return 0;
7379 }
7380
7381 static int add_pending(struct cache_tree *pending,
7382                        struct cache_tree *seen, u64 bytenr, u32 size)
7383 {
7384         int ret;
7385         ret = add_cache_extent(seen, bytenr, size);
7386         if (ret)
7387                 return ret;
7388         add_cache_extent(pending, bytenr, size);
7389         return 0;
7390 }
7391
7392 static int pick_next_pending(struct cache_tree *pending,
7393                         struct cache_tree *reada,
7394                         struct cache_tree *nodes,
7395                         u64 last, struct block_info *bits, int bits_nr,
7396                         int *reada_bits)
7397 {
7398         unsigned long node_start = last;
7399         struct cache_extent *cache;
7400         int ret;
7401
7402         cache = search_cache_extent(reada, 0);
7403         if (cache) {
7404                 bits[0].start = cache->start;
7405                 bits[0].size = cache->size;
7406                 *reada_bits = 1;
7407                 return 1;
7408         }
7409         *reada_bits = 0;
7410         if (node_start > 32768)
7411                 node_start -= 32768;
7412
7413         cache = search_cache_extent(nodes, node_start);
7414         if (!cache)
7415                 cache = search_cache_extent(nodes, 0);
7416
7417         if (!cache) {
7418                  cache = search_cache_extent(pending, 0);
7419                  if (!cache)
7420                          return 0;
7421                  ret = 0;
7422                  do {
7423                          bits[ret].start = cache->start;
7424                          bits[ret].size = cache->size;
7425                          cache = next_cache_extent(cache);
7426                          ret++;
7427                  } while (cache && ret < bits_nr);
7428                  return ret;
7429         }
7430
7431         ret = 0;
7432         do {
7433                 bits[ret].start = cache->start;
7434                 bits[ret].size = cache->size;
7435                 cache = next_cache_extent(cache);
7436                 ret++;
7437         } while (cache && ret < bits_nr);
7438
7439         if (bits_nr - ret > 8) {
7440                 u64 lookup = bits[0].start + bits[0].size;
7441                 struct cache_extent *next;
7442                 next = search_cache_extent(pending, lookup);
7443                 while(next) {
7444                         if (next->start - lookup > 32768)
7445                                 break;
7446                         bits[ret].start = next->start;
7447                         bits[ret].size = next->size;
7448                         lookup = next->start + next->size;
7449                         ret++;
7450                         if (ret == bits_nr)
7451                                 break;
7452                         next = next_cache_extent(next);
7453                         if (!next)
7454                                 break;
7455                 }
7456         }
7457         return ret;
7458 }
7459
7460 static void free_chunk_record(struct cache_extent *cache)
7461 {
7462         struct chunk_record *rec;
7463
7464         rec = container_of(cache, struct chunk_record, cache);
7465         list_del_init(&rec->list);
7466         list_del_init(&rec->dextents);
7467         free(rec);
7468 }
7469
7470 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7471 {
7472         cache_tree_free_extents(chunk_cache, free_chunk_record);
7473 }
7474
7475 static void free_device_record(struct rb_node *node)
7476 {
7477         struct device_record *rec;
7478
7479         rec = container_of(node, struct device_record, node);
7480         free(rec);
7481 }
7482
7483 FREE_RB_BASED_TREE(device_cache, free_device_record);
7484
7485 int insert_block_group_record(struct block_group_tree *tree,
7486                               struct block_group_record *bg_rec)
7487 {
7488         int ret;
7489
7490         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7491         if (ret)
7492                 return ret;
7493
7494         list_add_tail(&bg_rec->list, &tree->block_groups);
7495         return 0;
7496 }
7497
7498 static void free_block_group_record(struct cache_extent *cache)
7499 {
7500         struct block_group_record *rec;
7501
7502         rec = container_of(cache, struct block_group_record, cache);
7503         list_del_init(&rec->list);
7504         free(rec);
7505 }
7506
7507 void free_block_group_tree(struct block_group_tree *tree)
7508 {
7509         cache_tree_free_extents(&tree->tree, free_block_group_record);
7510 }
7511
7512 int insert_device_extent_record(struct device_extent_tree *tree,
7513                                 struct device_extent_record *de_rec)
7514 {
7515         int ret;
7516
7517         /*
7518          * Device extent is a bit different from the other extents, because
7519          * the extents which belong to the different devices may have the
7520          * same start and size, so we need use the special extent cache
7521          * search/insert functions.
7522          */
7523         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7524         if (ret)
7525                 return ret;
7526
7527         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7528         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7529         return 0;
7530 }
7531
7532 static void free_device_extent_record(struct cache_extent *cache)
7533 {
7534         struct device_extent_record *rec;
7535
7536         rec = container_of(cache, struct device_extent_record, cache);
7537         if (!list_empty(&rec->chunk_list))
7538                 list_del_init(&rec->chunk_list);
7539         if (!list_empty(&rec->device_list))
7540                 list_del_init(&rec->device_list);
7541         free(rec);
7542 }
7543
7544 void free_device_extent_tree(struct device_extent_tree *tree)
7545 {
7546         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7547 }
7548
7549 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7550 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7551                                  struct extent_buffer *leaf, int slot)
7552 {
7553         struct btrfs_extent_ref_v0 *ref0;
7554         struct btrfs_key key;
7555         int ret;
7556
7557         btrfs_item_key_to_cpu(leaf, &key, slot);
7558         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7559         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7560                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7561                                 0, 0);
7562         } else {
7563                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7564                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7565         }
7566         return ret;
7567 }
7568 #endif
7569
7570 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7571                                             struct btrfs_key *key,
7572                                             int slot)
7573 {
7574         struct btrfs_chunk *ptr;
7575         struct chunk_record *rec;
7576         int num_stripes, i;
7577
7578         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7579         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7580
7581         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7582         if (!rec) {
7583                 fprintf(stderr, "memory allocation failed\n");
7584                 exit(-1);
7585         }
7586
7587         INIT_LIST_HEAD(&rec->list);
7588         INIT_LIST_HEAD(&rec->dextents);
7589         rec->bg_rec = NULL;
7590
7591         rec->cache.start = key->offset;
7592         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7593
7594         rec->generation = btrfs_header_generation(leaf);
7595
7596         rec->objectid = key->objectid;
7597         rec->type = key->type;
7598         rec->offset = key->offset;
7599
7600         rec->length = rec->cache.size;
7601         rec->owner = btrfs_chunk_owner(leaf, ptr);
7602         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7603         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7604         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7605         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7606         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7607         rec->num_stripes = num_stripes;
7608         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7609
7610         for (i = 0; i < rec->num_stripes; ++i) {
7611                 rec->stripes[i].devid =
7612                         btrfs_stripe_devid_nr(leaf, ptr, i);
7613                 rec->stripes[i].offset =
7614                         btrfs_stripe_offset_nr(leaf, ptr, i);
7615                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7616                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7617                                 BTRFS_UUID_SIZE);
7618         }
7619
7620         return rec;
7621 }
7622
7623 static int process_chunk_item(struct cache_tree *chunk_cache,
7624                               struct btrfs_key *key, struct extent_buffer *eb,
7625                               int slot)
7626 {
7627         struct chunk_record *rec;
7628         struct btrfs_chunk *chunk;
7629         int ret = 0;
7630
7631         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7632         /*
7633          * Do extra check for this chunk item,
7634          *
7635          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7636          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7637          * and owner<->key_type check.
7638          */
7639         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7640                                       key->offset);
7641         if (ret < 0) {
7642                 error("chunk(%llu, %llu) is not valid, ignore it",
7643                       key->offset, btrfs_chunk_length(eb, chunk));
7644                 return 0;
7645         }
7646         rec = btrfs_new_chunk_record(eb, key, slot);
7647         ret = insert_cache_extent(chunk_cache, &rec->cache);
7648         if (ret) {
7649                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7650                         rec->offset, rec->length);
7651                 free(rec);
7652         }
7653
7654         return ret;
7655 }
7656
7657 static int process_device_item(struct rb_root *dev_cache,
7658                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7659 {
7660         struct btrfs_dev_item *ptr;
7661         struct device_record *rec;
7662         int ret = 0;
7663
7664         ptr = btrfs_item_ptr(eb,
7665                 slot, struct btrfs_dev_item);
7666
7667         rec = malloc(sizeof(*rec));
7668         if (!rec) {
7669                 fprintf(stderr, "memory allocation failed\n");
7670                 return -ENOMEM;
7671         }
7672
7673         rec->devid = key->offset;
7674         rec->generation = btrfs_header_generation(eb);
7675
7676         rec->objectid = key->objectid;
7677         rec->type = key->type;
7678         rec->offset = key->offset;
7679
7680         rec->devid = btrfs_device_id(eb, ptr);
7681         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7682         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7683
7684         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7685         if (ret) {
7686                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7687                 free(rec);
7688         }
7689
7690         return ret;
7691 }
7692
7693 struct block_group_record *
7694 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7695                              int slot)
7696 {
7697         struct btrfs_block_group_item *ptr;
7698         struct block_group_record *rec;
7699
7700         rec = calloc(1, sizeof(*rec));
7701         if (!rec) {
7702                 fprintf(stderr, "memory allocation failed\n");
7703                 exit(-1);
7704         }
7705
7706         rec->cache.start = key->objectid;
7707         rec->cache.size = key->offset;
7708
7709         rec->generation = btrfs_header_generation(leaf);
7710
7711         rec->objectid = key->objectid;
7712         rec->type = key->type;
7713         rec->offset = key->offset;
7714
7715         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7716         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7717
7718         INIT_LIST_HEAD(&rec->list);
7719
7720         return rec;
7721 }
7722
7723 static int process_block_group_item(struct block_group_tree *block_group_cache,
7724                                     struct btrfs_key *key,
7725                                     struct extent_buffer *eb, int slot)
7726 {
7727         struct block_group_record *rec;
7728         int ret = 0;
7729
7730         rec = btrfs_new_block_group_record(eb, key, slot);
7731         ret = insert_block_group_record(block_group_cache, rec);
7732         if (ret) {
7733                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7734                         rec->objectid, rec->offset);
7735                 free(rec);
7736         }
7737
7738         return ret;
7739 }
7740
7741 struct device_extent_record *
7742 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7743                                struct btrfs_key *key, int slot)
7744 {
7745         struct device_extent_record *rec;
7746         struct btrfs_dev_extent *ptr;
7747
7748         rec = calloc(1, sizeof(*rec));
7749         if (!rec) {
7750                 fprintf(stderr, "memory allocation failed\n");
7751                 exit(-1);
7752         }
7753
7754         rec->cache.objectid = key->objectid;
7755         rec->cache.start = key->offset;
7756
7757         rec->generation = btrfs_header_generation(leaf);
7758
7759         rec->objectid = key->objectid;
7760         rec->type = key->type;
7761         rec->offset = key->offset;
7762
7763         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7764         rec->chunk_objecteid =
7765                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7766         rec->chunk_offset =
7767                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7768         rec->length = btrfs_dev_extent_length(leaf, ptr);
7769         rec->cache.size = rec->length;
7770
7771         INIT_LIST_HEAD(&rec->chunk_list);
7772         INIT_LIST_HEAD(&rec->device_list);
7773
7774         return rec;
7775 }
7776
7777 static int
7778 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7779                            struct btrfs_key *key, struct extent_buffer *eb,
7780                            int slot)
7781 {
7782         struct device_extent_record *rec;
7783         int ret;
7784
7785         rec = btrfs_new_device_extent_record(eb, key, slot);
7786         ret = insert_device_extent_record(dev_extent_cache, rec);
7787         if (ret) {
7788                 fprintf(stderr,
7789                         "Device extent[%llu, %llu, %llu] existed.\n",
7790                         rec->objectid, rec->offset, rec->length);
7791                 free(rec);
7792         }
7793
7794         return ret;
7795 }
7796
7797 static int process_extent_item(struct btrfs_root *root,
7798                                struct cache_tree *extent_cache,
7799                                struct extent_buffer *eb, int slot)
7800 {
7801         struct btrfs_extent_item *ei;
7802         struct btrfs_extent_inline_ref *iref;
7803         struct btrfs_extent_data_ref *dref;
7804         struct btrfs_shared_data_ref *sref;
7805         struct btrfs_key key;
7806         struct extent_record tmpl;
7807         unsigned long end;
7808         unsigned long ptr;
7809         int ret;
7810         int type;
7811         u32 item_size = btrfs_item_size_nr(eb, slot);
7812         u64 refs = 0;
7813         u64 offset;
7814         u64 num_bytes;
7815         int metadata = 0;
7816
7817         btrfs_item_key_to_cpu(eb, &key, slot);
7818
7819         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7820                 metadata = 1;
7821                 num_bytes = root->fs_info->nodesize;
7822         } else {
7823                 num_bytes = key.offset;
7824         }
7825
7826         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7827                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7828                       key.objectid, root->fs_info->sectorsize);
7829                 return -EIO;
7830         }
7831         if (item_size < sizeof(*ei)) {
7832 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7833                 struct btrfs_extent_item_v0 *ei0;
7834                 if (item_size != sizeof(*ei0)) {
7835                         error(
7836         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
7837                                 key.objectid, key.type, key.offset,
7838                                 btrfs_header_bytenr(eb), slot);
7839                         BUG();
7840                 }
7841                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7842                 refs = btrfs_extent_refs_v0(eb, ei0);
7843 #else
7844                 BUG();
7845 #endif
7846                 memset(&tmpl, 0, sizeof(tmpl));
7847                 tmpl.start = key.objectid;
7848                 tmpl.nr = num_bytes;
7849                 tmpl.extent_item_refs = refs;
7850                 tmpl.metadata = metadata;
7851                 tmpl.found_rec = 1;
7852                 tmpl.max_size = num_bytes;
7853
7854                 return add_extent_rec(extent_cache, &tmpl);
7855         }
7856
7857         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7858         refs = btrfs_extent_refs(eb, ei);
7859         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7860                 metadata = 1;
7861         else
7862                 metadata = 0;
7863         if (metadata && num_bytes != root->fs_info->nodesize) {
7864                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7865                       num_bytes, root->fs_info->nodesize);
7866                 return -EIO;
7867         }
7868         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7869                 error("ignore invalid data extent, length %llu is not aligned to %u",
7870                       num_bytes, root->fs_info->sectorsize);
7871                 return -EIO;
7872         }
7873
7874         memset(&tmpl, 0, sizeof(tmpl));
7875         tmpl.start = key.objectid;
7876         tmpl.nr = num_bytes;
7877         tmpl.extent_item_refs = refs;
7878         tmpl.metadata = metadata;
7879         tmpl.found_rec = 1;
7880         tmpl.max_size = num_bytes;
7881         add_extent_rec(extent_cache, &tmpl);
7882
7883         ptr = (unsigned long)(ei + 1);
7884         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7885             key.type == BTRFS_EXTENT_ITEM_KEY)
7886                 ptr += sizeof(struct btrfs_tree_block_info);
7887
7888         end = (unsigned long)ei + item_size;
7889         while (ptr < end) {
7890                 iref = (struct btrfs_extent_inline_ref *)ptr;
7891                 type = btrfs_extent_inline_ref_type(eb, iref);
7892                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7893                 switch (type) {
7894                 case BTRFS_TREE_BLOCK_REF_KEY:
7895                         ret = add_tree_backref(extent_cache, key.objectid,
7896                                         0, offset, 0);
7897                         if (ret < 0)
7898                                 error(
7899                         "add_tree_backref failed (extent items tree block): %s",
7900                                       strerror(-ret));
7901                         break;
7902                 case BTRFS_SHARED_BLOCK_REF_KEY:
7903                         ret = add_tree_backref(extent_cache, key.objectid,
7904                                         offset, 0, 0);
7905                         if (ret < 0)
7906                                 error(
7907                         "add_tree_backref failed (extent items shared block): %s",
7908                                       strerror(-ret));
7909                         break;
7910                 case BTRFS_EXTENT_DATA_REF_KEY:
7911                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7912                         add_data_backref(extent_cache, key.objectid, 0,
7913                                         btrfs_extent_data_ref_root(eb, dref),
7914                                         btrfs_extent_data_ref_objectid(eb,
7915                                                                        dref),
7916                                         btrfs_extent_data_ref_offset(eb, dref),
7917                                         btrfs_extent_data_ref_count(eb, dref),
7918                                         0, num_bytes);
7919                         break;
7920                 case BTRFS_SHARED_DATA_REF_KEY:
7921                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7922                         add_data_backref(extent_cache, key.objectid, offset,
7923                                         0, 0, 0,
7924                                         btrfs_shared_data_ref_count(eb, sref),
7925                                         0, num_bytes);
7926                         break;
7927                 default:
7928                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7929                                 key.objectid, key.type, num_bytes);
7930                         goto out;
7931                 }
7932                 ptr += btrfs_extent_inline_ref_size(type);
7933         }
7934         WARN_ON(ptr > end);
7935 out:
7936         return 0;
7937 }
7938
7939 static int check_cache_range(struct btrfs_root *root,
7940                              struct btrfs_block_group_cache *cache,
7941                              u64 offset, u64 bytes)
7942 {
7943         struct btrfs_free_space *entry;
7944         u64 *logical;
7945         u64 bytenr;
7946         int stripe_len;
7947         int i, nr, ret;
7948
7949         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7950                 bytenr = btrfs_sb_offset(i);
7951                 ret = btrfs_rmap_block(root->fs_info,
7952                                        cache->key.objectid, bytenr, 0,
7953                                        &logical, &nr, &stripe_len);
7954                 if (ret)
7955                         return ret;
7956
7957                 while (nr--) {
7958                         if (logical[nr] + stripe_len <= offset)
7959                                 continue;
7960                         if (offset + bytes <= logical[nr])
7961                                 continue;
7962                         if (logical[nr] == offset) {
7963                                 if (stripe_len >= bytes) {
7964                                         free(logical);
7965                                         return 0;
7966                                 }
7967                                 bytes -= stripe_len;
7968                                 offset += stripe_len;
7969                         } else if (logical[nr] < offset) {
7970                                 if (logical[nr] + stripe_len >=
7971                                     offset + bytes) {
7972                                         free(logical);
7973                                         return 0;
7974                                 }
7975                                 bytes = (offset + bytes) -
7976                                         (logical[nr] + stripe_len);
7977                                 offset = logical[nr] + stripe_len;
7978                         } else {
7979                                 /*
7980                                  * Could be tricky, the super may land in the
7981                                  * middle of the area we're checking.  First
7982                                  * check the easiest case, it's at the end.
7983                                  */
7984                                 if (logical[nr] + stripe_len >=
7985                                     bytes + offset) {
7986                                         bytes = logical[nr] - offset;
7987                                         continue;
7988                                 }
7989
7990                                 /* Check the left side */
7991                                 ret = check_cache_range(root, cache,
7992                                                         offset,
7993                                                         logical[nr] - offset);
7994                                 if (ret) {
7995                                         free(logical);
7996                                         return ret;
7997                                 }
7998
7999                                 /* Now we continue with the right side */
8000                                 bytes = (offset + bytes) -
8001                                         (logical[nr] + stripe_len);
8002                                 offset = logical[nr] + stripe_len;
8003                         }
8004                 }
8005
8006                 free(logical);
8007         }
8008
8009         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8010         if (!entry) {
8011                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8012                         offset, offset+bytes);
8013                 return -EINVAL;
8014         }
8015
8016         if (entry->offset != offset) {
8017                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8018                         entry->offset);
8019                 return -EINVAL;
8020         }
8021
8022         if (entry->bytes != bytes) {
8023                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8024                         bytes, entry->bytes, offset);
8025                 return -EINVAL;
8026         }
8027
8028         unlink_free_space(cache->free_space_ctl, entry);
8029         free(entry);
8030         return 0;
8031 }
8032
8033 static int verify_space_cache(struct btrfs_root *root,
8034                               struct btrfs_block_group_cache *cache)
8035 {
8036         struct btrfs_path path;
8037         struct extent_buffer *leaf;
8038         struct btrfs_key key;
8039         u64 last;
8040         int ret = 0;
8041
8042         root = root->fs_info->extent_root;
8043
8044         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8045
8046         btrfs_init_path(&path);
8047         key.objectid = last;
8048         key.offset = 0;
8049         key.type = BTRFS_EXTENT_ITEM_KEY;
8050         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8051         if (ret < 0)
8052                 goto out;
8053         ret = 0;
8054         while (1) {
8055                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8056                         ret = btrfs_next_leaf(root, &path);
8057                         if (ret < 0)
8058                                 goto out;
8059                         if (ret > 0) {
8060                                 ret = 0;
8061                                 break;
8062                         }
8063                 }
8064                 leaf = path.nodes[0];
8065                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8066                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8067                         break;
8068                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8069                     key.type != BTRFS_METADATA_ITEM_KEY) {
8070                         path.slots[0]++;
8071                         continue;
8072                 }
8073
8074                 if (last == key.objectid) {
8075                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8076                                 last = key.objectid + key.offset;
8077                         else
8078                                 last = key.objectid + root->fs_info->nodesize;
8079                         path.slots[0]++;
8080                         continue;
8081                 }
8082
8083                 ret = check_cache_range(root, cache, last,
8084                                         key.objectid - last);
8085                 if (ret)
8086                         break;
8087                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8088                         last = key.objectid + key.offset;
8089                 else
8090                         last = key.objectid + root->fs_info->nodesize;
8091                 path.slots[0]++;
8092         }
8093
8094         if (last < cache->key.objectid + cache->key.offset)
8095                 ret = check_cache_range(root, cache, last,
8096                                         cache->key.objectid +
8097                                         cache->key.offset - last);
8098
8099 out:
8100         btrfs_release_path(&path);
8101
8102         if (!ret &&
8103             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8104                 fprintf(stderr, "There are still entries left in the space "
8105                         "cache\n");
8106                 ret = -EINVAL;
8107         }
8108
8109         return ret;
8110 }
8111
8112 static int check_space_cache(struct btrfs_root *root)
8113 {
8114         struct btrfs_block_group_cache *cache;
8115         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8116         int ret;
8117         int error = 0;
8118
8119         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8120             btrfs_super_generation(root->fs_info->super_copy) !=
8121             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8122                 printf("cache and super generation don't match, space cache "
8123                        "will be invalidated\n");
8124                 return 0;
8125         }
8126
8127         if (ctx.progress_enabled) {
8128                 ctx.tp = TASK_FREE_SPACE;
8129                 task_start(ctx.info);
8130         }
8131
8132         while (1) {
8133                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8134                 if (!cache)
8135                         break;
8136
8137                 start = cache->key.objectid + cache->key.offset;
8138                 if (!cache->free_space_ctl) {
8139                         if (btrfs_init_free_space_ctl(cache,
8140                                                 root->fs_info->sectorsize)) {
8141                                 ret = -ENOMEM;
8142                                 break;
8143                         }
8144                 } else {
8145                         btrfs_remove_free_space_cache(cache);
8146                 }
8147
8148                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8149                         ret = exclude_super_stripes(root, cache);
8150                         if (ret) {
8151                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8152                                         strerror(-ret));
8153                                 error++;
8154                                 continue;
8155                         }
8156                         ret = load_free_space_tree(root->fs_info, cache);
8157                         free_excluded_extents(root, cache);
8158                         if (ret < 0) {
8159                                 fprintf(stderr, "could not load free space tree: %s\n",
8160                                         strerror(-ret));
8161                                 error++;
8162                                 continue;
8163                         }
8164                         error += ret;
8165                 } else {
8166                         ret = load_free_space_cache(root->fs_info, cache);
8167                         if (!ret)
8168                                 continue;
8169                 }
8170
8171                 ret = verify_space_cache(root, cache);
8172                 if (ret) {
8173                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8174                                 cache->key.objectid);
8175                         error++;
8176                 }
8177         }
8178
8179         task_stop(ctx.info);
8180
8181         return error ? -EINVAL : 0;
8182 }
8183
8184 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8185                         u64 num_bytes, unsigned long leaf_offset,
8186                         struct extent_buffer *eb) {
8187
8188         struct btrfs_fs_info *fs_info = root->fs_info;
8189         u64 offset = 0;
8190         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8191         char *data;
8192         unsigned long csum_offset;
8193         u32 csum;
8194         u32 csum_expected;
8195         u64 read_len;
8196         u64 data_checked = 0;
8197         u64 tmp;
8198         int ret = 0;
8199         int mirror;
8200         int num_copies;
8201
8202         if (num_bytes % fs_info->sectorsize)
8203                 return -EINVAL;
8204
8205         data = malloc(num_bytes);
8206         if (!data)
8207                 return -ENOMEM;
8208
8209         while (offset < num_bytes) {
8210                 mirror = 0;
8211 again:
8212                 read_len = num_bytes - offset;
8213                 /* read as much space once a time */
8214                 ret = read_extent_data(fs_info, data + offset,
8215                                 bytenr + offset, &read_len, mirror);
8216                 if (ret)
8217                         goto out;
8218                 data_checked = 0;
8219                 /* verify every 4k data's checksum */
8220                 while (data_checked < read_len) {
8221                         csum = ~(u32)0;
8222                         tmp = offset + data_checked;
8223
8224                         csum = btrfs_csum_data((char *)data + tmp,
8225                                                csum, fs_info->sectorsize);
8226                         btrfs_csum_final(csum, (u8 *)&csum);
8227
8228                         csum_offset = leaf_offset +
8229                                  tmp / fs_info->sectorsize * csum_size;
8230                         read_extent_buffer(eb, (char *)&csum_expected,
8231                                            csum_offset, csum_size);
8232                         /* try another mirror */
8233                         if (csum != csum_expected) {
8234                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8235                                                 mirror, bytenr + tmp,
8236                                                 csum, csum_expected);
8237                                 num_copies = btrfs_num_copies(root->fs_info,
8238                                                 bytenr, num_bytes);
8239                                 if (mirror < num_copies - 1) {
8240                                         mirror += 1;
8241                                         goto again;
8242                                 }
8243                         }
8244                         data_checked += fs_info->sectorsize;
8245                 }
8246                 offset += read_len;
8247         }
8248 out:
8249         free(data);
8250         return ret;
8251 }
8252
8253 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8254                                u64 num_bytes)
8255 {
8256         struct btrfs_path path;
8257         struct extent_buffer *leaf;
8258         struct btrfs_key key;
8259         int ret;
8260
8261         btrfs_init_path(&path);
8262         key.objectid = bytenr;
8263         key.type = BTRFS_EXTENT_ITEM_KEY;
8264         key.offset = (u64)-1;
8265
8266 again:
8267         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8268                                 0, 0);
8269         if (ret < 0) {
8270                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8271                 btrfs_release_path(&path);
8272                 return ret;
8273         } else if (ret) {
8274                 if (path.slots[0] > 0) {
8275                         path.slots[0]--;
8276                 } else {
8277                         ret = btrfs_prev_leaf(root, &path);
8278                         if (ret < 0) {
8279                                 goto out;
8280                         } else if (ret > 0) {
8281                                 ret = 0;
8282                                 goto out;
8283                         }
8284                 }
8285         }
8286
8287         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8288
8289         /*
8290          * Block group items come before extent items if they have the same
8291          * bytenr, so walk back one more just in case.  Dear future traveller,
8292          * first congrats on mastering time travel.  Now if it's not too much
8293          * trouble could you go back to 2006 and tell Chris to make the
8294          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8295          * EXTENT_ITEM_KEY please?
8296          */
8297         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8298                 if (path.slots[0] > 0) {
8299                         path.slots[0]--;
8300                 } else {
8301                         ret = btrfs_prev_leaf(root, &path);
8302                         if (ret < 0) {
8303                                 goto out;
8304                         } else if (ret > 0) {
8305                                 ret = 0;
8306                                 goto out;
8307                         }
8308                 }
8309                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8310         }
8311
8312         while (num_bytes) {
8313                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8314                         ret = btrfs_next_leaf(root, &path);
8315                         if (ret < 0) {
8316                                 fprintf(stderr, "Error going to next leaf "
8317                                         "%d\n", ret);
8318                                 btrfs_release_path(&path);
8319                                 return ret;
8320                         } else if (ret) {
8321                                 break;
8322                         }
8323                 }
8324                 leaf = path.nodes[0];
8325                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8326                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8327                         path.slots[0]++;
8328                         continue;
8329                 }
8330                 if (key.objectid + key.offset < bytenr) {
8331                         path.slots[0]++;
8332                         continue;
8333                 }
8334                 if (key.objectid > bytenr + num_bytes)
8335                         break;
8336
8337                 if (key.objectid == bytenr) {
8338                         if (key.offset >= num_bytes) {
8339                                 num_bytes = 0;
8340                                 break;
8341                         }
8342                         num_bytes -= key.offset;
8343                         bytenr += key.offset;
8344                 } else if (key.objectid < bytenr) {
8345                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8346                                 num_bytes = 0;
8347                                 break;
8348                         }
8349                         num_bytes = (bytenr + num_bytes) -
8350                                 (key.objectid + key.offset);
8351                         bytenr = key.objectid + key.offset;
8352                 } else {
8353                         if (key.objectid + key.offset < bytenr + num_bytes) {
8354                                 u64 new_start = key.objectid + key.offset;
8355                                 u64 new_bytes = bytenr + num_bytes - new_start;
8356
8357                                 /*
8358                                  * Weird case, the extent is in the middle of
8359                                  * our range, we'll have to search one side
8360                                  * and then the other.  Not sure if this happens
8361                                  * in real life, but no harm in coding it up
8362                                  * anyway just in case.
8363                                  */
8364                                 btrfs_release_path(&path);
8365                                 ret = check_extent_exists(root, new_start,
8366                                                           new_bytes);
8367                                 if (ret) {
8368                                         fprintf(stderr, "Right section didn't "
8369                                                 "have a record\n");
8370                                         break;
8371                                 }
8372                                 num_bytes = key.objectid - bytenr;
8373                                 goto again;
8374                         }
8375                         num_bytes = key.objectid - bytenr;
8376                 }
8377                 path.slots[0]++;
8378         }
8379         ret = 0;
8380
8381 out:
8382         if (num_bytes && !ret) {
8383                 fprintf(stderr, "There are no extents for csum range "
8384                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8385                 ret = 1;
8386         }
8387
8388         btrfs_release_path(&path);
8389         return ret;
8390 }
8391
8392 static int check_csums(struct btrfs_root *root)
8393 {
8394         struct btrfs_path path;
8395         struct extent_buffer *leaf;
8396         struct btrfs_key key;
8397         u64 offset = 0, num_bytes = 0;
8398         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8399         int errors = 0;
8400         int ret;
8401         u64 data_len;
8402         unsigned long leaf_offset;
8403
8404         root = root->fs_info->csum_root;
8405         if (!extent_buffer_uptodate(root->node)) {
8406                 fprintf(stderr, "No valid csum tree found\n");
8407                 return -ENOENT;
8408         }
8409
8410         btrfs_init_path(&path);
8411         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8412         key.type = BTRFS_EXTENT_CSUM_KEY;
8413         key.offset = 0;
8414         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8415         if (ret < 0) {
8416                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8417                 btrfs_release_path(&path);
8418                 return ret;
8419         }
8420
8421         if (ret > 0 && path.slots[0])
8422                 path.slots[0]--;
8423         ret = 0;
8424
8425         while (1) {
8426                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8427                         ret = btrfs_next_leaf(root, &path);
8428                         if (ret < 0) {
8429                                 fprintf(stderr, "Error going to next leaf "
8430                                         "%d\n", ret);
8431                                 break;
8432                         }
8433                         if (ret)
8434                                 break;
8435                 }
8436                 leaf = path.nodes[0];
8437
8438                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8439                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8440                         path.slots[0]++;
8441                         continue;
8442                 }
8443
8444                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8445                               csum_size) * root->fs_info->sectorsize;
8446                 if (!check_data_csum)
8447                         goto skip_csum_check;
8448                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8449                 ret = check_extent_csums(root, key.offset, data_len,
8450                                          leaf_offset, leaf);
8451                 if (ret)
8452                         break;
8453 skip_csum_check:
8454                 if (!num_bytes) {
8455                         offset = key.offset;
8456                 } else if (key.offset != offset + num_bytes) {
8457                         ret = check_extent_exists(root, offset, num_bytes);
8458                         if (ret) {
8459                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8460                                         "there is no extent record\n",
8461                                         offset, offset+num_bytes);
8462                                 errors++;
8463                         }
8464                         offset = key.offset;
8465                         num_bytes = 0;
8466                 }
8467                 num_bytes += data_len;
8468                 path.slots[0]++;
8469         }
8470
8471         btrfs_release_path(&path);
8472         return errors;
8473 }
8474
8475 static int is_dropped_key(struct btrfs_key *key,
8476                           struct btrfs_key *drop_key) {
8477         if (key->objectid < drop_key->objectid)
8478                 return 1;
8479         else if (key->objectid == drop_key->objectid) {
8480                 if (key->type < drop_key->type)
8481                         return 1;
8482                 else if (key->type == drop_key->type) {
8483                         if (key->offset < drop_key->offset)
8484                                 return 1;
8485                 }
8486         }
8487         return 0;
8488 }
8489
8490 /*
8491  * Here are the rules for FULL_BACKREF.
8492  *
8493  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8494  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8495  *      FULL_BACKREF set.
8496  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8497  *    if it happened after the relocation occurred since we'll have dropped the
8498  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8499  *    have no real way to know for sure.
8500  *
8501  * We process the blocks one root at a time, and we start from the lowest root
8502  * objectid and go to the highest.  So we can just lookup the owner backref for
8503  * the record and if we don't find it then we know it doesn't exist and we have
8504  * a FULL BACKREF.
8505  *
8506  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8507  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8508  * be set or not and then we can check later once we've gathered all the refs.
8509  */
8510 static int calc_extent_flag(struct cache_tree *extent_cache,
8511                            struct extent_buffer *buf,
8512                            struct root_item_record *ri,
8513                            u64 *flags)
8514 {
8515         struct extent_record *rec;
8516         struct cache_extent *cache;
8517         struct tree_backref *tback;
8518         u64 owner = 0;
8519
8520         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8521         /* we have added this extent before */
8522         if (!cache)
8523                 return -ENOENT;
8524
8525         rec = container_of(cache, struct extent_record, cache);
8526
8527         /*
8528          * Except file/reloc tree, we can not have
8529          * FULL BACKREF MODE
8530          */
8531         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8532                 goto normal;
8533         /*
8534          * root node
8535          */
8536         if (buf->start == ri->bytenr)
8537                 goto normal;
8538
8539         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8540                 goto full_backref;
8541
8542         owner = btrfs_header_owner(buf);
8543         if (owner == ri->objectid)
8544                 goto normal;
8545
8546         tback = find_tree_backref(rec, 0, owner);
8547         if (!tback)
8548                 goto full_backref;
8549 normal:
8550         *flags = 0;
8551         if (rec->flag_block_full_backref != FLAG_UNSET &&
8552             rec->flag_block_full_backref != 0)
8553                 rec->bad_full_backref = 1;
8554         return 0;
8555 full_backref:
8556         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8557         if (rec->flag_block_full_backref != FLAG_UNSET &&
8558             rec->flag_block_full_backref != 1)
8559                 rec->bad_full_backref = 1;
8560         return 0;
8561 }
8562
8563 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8564 {
8565         fprintf(stderr, "Invalid key type(");
8566         print_key_type(stderr, 0, key_type);
8567         fprintf(stderr, ") found in root(");
8568         print_objectid(stderr, rootid, 0);
8569         fprintf(stderr, ")\n");
8570 }
8571
8572 /*
8573  * Check if the key is valid with its extent buffer.
8574  *
8575  * This is a early check in case invalid key exists in a extent buffer
8576  * This is not comprehensive yet, but should prevent wrong key/item passed
8577  * further
8578  */
8579 static int check_type_with_root(u64 rootid, u8 key_type)
8580 {
8581         switch (key_type) {
8582         /* Only valid in chunk tree */
8583         case BTRFS_DEV_ITEM_KEY:
8584         case BTRFS_CHUNK_ITEM_KEY:
8585                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8586                         goto err;
8587                 break;
8588         /* valid in csum and log tree */
8589         case BTRFS_CSUM_TREE_OBJECTID:
8590                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8591                       is_fstree(rootid)))
8592                         goto err;
8593                 break;
8594         case BTRFS_EXTENT_ITEM_KEY:
8595         case BTRFS_METADATA_ITEM_KEY:
8596         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8597                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8598                         goto err;
8599                 break;
8600         case BTRFS_ROOT_ITEM_KEY:
8601                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8602                         goto err;
8603                 break;
8604         case BTRFS_DEV_EXTENT_KEY:
8605                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8606                         goto err;
8607                 break;
8608         }
8609         return 0;
8610 err:
8611         report_mismatch_key_root(key_type, rootid);
8612         return -EINVAL;
8613 }
8614
8615 static int run_next_block(struct btrfs_root *root,
8616                           struct block_info *bits,
8617                           int bits_nr,
8618                           u64 *last,
8619                           struct cache_tree *pending,
8620                           struct cache_tree *seen,
8621                           struct cache_tree *reada,
8622                           struct cache_tree *nodes,
8623                           struct cache_tree *extent_cache,
8624                           struct cache_tree *chunk_cache,
8625                           struct rb_root *dev_cache,
8626                           struct block_group_tree *block_group_cache,
8627                           struct device_extent_tree *dev_extent_cache,
8628                           struct root_item_record *ri)
8629 {
8630         struct btrfs_fs_info *fs_info = root->fs_info;
8631         struct extent_buffer *buf;
8632         struct extent_record *rec = NULL;
8633         u64 bytenr;
8634         u32 size;
8635         u64 parent;
8636         u64 owner;
8637         u64 flags;
8638         u64 ptr;
8639         u64 gen = 0;
8640         int ret = 0;
8641         int i;
8642         int nritems;
8643         struct btrfs_key key;
8644         struct cache_extent *cache;
8645         int reada_bits;
8646
8647         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8648                                     bits_nr, &reada_bits);
8649         if (nritems == 0)
8650                 return 1;
8651
8652         if (!reada_bits) {
8653                 for(i = 0; i < nritems; i++) {
8654                         ret = add_cache_extent(reada, bits[i].start,
8655                                                bits[i].size);
8656                         if (ret == -EEXIST)
8657                                 continue;
8658
8659                         /* fixme, get the parent transid */
8660                         readahead_tree_block(fs_info, bits[i].start, 0);
8661                 }
8662         }
8663         *last = bits[0].start;
8664         bytenr = bits[0].start;
8665         size = bits[0].size;
8666
8667         cache = lookup_cache_extent(pending, bytenr, size);
8668         if (cache) {
8669                 remove_cache_extent(pending, cache);
8670                 free(cache);
8671         }
8672         cache = lookup_cache_extent(reada, bytenr, size);
8673         if (cache) {
8674                 remove_cache_extent(reada, cache);
8675                 free(cache);
8676         }
8677         cache = lookup_cache_extent(nodes, bytenr, size);
8678         if (cache) {
8679                 remove_cache_extent(nodes, cache);
8680                 free(cache);
8681         }
8682         cache = lookup_cache_extent(extent_cache, bytenr, size);
8683         if (cache) {
8684                 rec = container_of(cache, struct extent_record, cache);
8685                 gen = rec->parent_generation;
8686         }
8687
8688         /* fixme, get the real parent transid */
8689         buf = read_tree_block(root->fs_info, bytenr, gen);
8690         if (!extent_buffer_uptodate(buf)) {
8691                 record_bad_block_io(root->fs_info,
8692                                     extent_cache, bytenr, size);
8693                 goto out;
8694         }
8695
8696         nritems = btrfs_header_nritems(buf);
8697
8698         flags = 0;
8699         if (!init_extent_tree) {
8700                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8701                                        btrfs_header_level(buf), 1, NULL,
8702                                        &flags);
8703                 if (ret < 0) {
8704                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8705                         if (ret < 0) {
8706                                 fprintf(stderr, "Couldn't calc extent flags\n");
8707                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8708                         }
8709                 }
8710         } else {
8711                 flags = 0;
8712                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8713                 if (ret < 0) {
8714                         fprintf(stderr, "Couldn't calc extent flags\n");
8715                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8716                 }
8717         }
8718
8719         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8720                 if (ri != NULL &&
8721                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8722                     ri->objectid == btrfs_header_owner(buf)) {
8723                         /*
8724                          * Ok we got to this block from it's original owner and
8725                          * we have FULL_BACKREF set.  Relocation can leave
8726                          * converted blocks over so this is altogether possible,
8727                          * however it's not possible if the generation > the
8728                          * last snapshot, so check for this case.
8729                          */
8730                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8731                             btrfs_header_generation(buf) > ri->last_snapshot) {
8732                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8733                                 rec->bad_full_backref = 1;
8734                         }
8735                 }
8736         } else {
8737                 if (ri != NULL &&
8738                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8739                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8740                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8741                         rec->bad_full_backref = 1;
8742                 }
8743         }
8744
8745         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8746                 rec->flag_block_full_backref = 1;
8747                 parent = bytenr;
8748                 owner = 0;
8749         } else {
8750                 rec->flag_block_full_backref = 0;
8751                 parent = 0;
8752                 owner = btrfs_header_owner(buf);
8753         }
8754
8755         ret = check_block(root, extent_cache, buf, flags);
8756         if (ret)
8757                 goto out;
8758
8759         if (btrfs_is_leaf(buf)) {
8760                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8761                 for (i = 0; i < nritems; i++) {
8762                         struct btrfs_file_extent_item *fi;
8763                         btrfs_item_key_to_cpu(buf, &key, i);
8764                         /*
8765                          * Check key type against the leaf owner.
8766                          * Could filter quite a lot of early error if
8767                          * owner is correct
8768                          */
8769                         if (check_type_with_root(btrfs_header_owner(buf),
8770                                                  key.type)) {
8771                                 fprintf(stderr, "ignoring invalid key\n");
8772                                 continue;
8773                         }
8774                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8775                                 process_extent_item(root, extent_cache, buf,
8776                                                     i);
8777                                 continue;
8778                         }
8779                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8780                                 process_extent_item(root, extent_cache, buf,
8781                                                     i);
8782                                 continue;
8783                         }
8784                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8785                                 total_csum_bytes +=
8786                                         btrfs_item_size_nr(buf, i);
8787                                 continue;
8788                         }
8789                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8790                                 process_chunk_item(chunk_cache, &key, buf, i);
8791                                 continue;
8792                         }
8793                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8794                                 process_device_item(dev_cache, &key, buf, i);
8795                                 continue;
8796                         }
8797                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8798                                 process_block_group_item(block_group_cache,
8799                                         &key, buf, i);
8800                                 continue;
8801                         }
8802                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8803                                 process_device_extent_item(dev_extent_cache,
8804                                         &key, buf, i);
8805                                 continue;
8806
8807                         }
8808                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8809 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8810                                 process_extent_ref_v0(extent_cache, buf, i);
8811 #else
8812                                 BUG();
8813 #endif
8814                                 continue;
8815                         }
8816
8817                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8818                                 ret = add_tree_backref(extent_cache,
8819                                                 key.objectid, 0, key.offset, 0);
8820                                 if (ret < 0)
8821                                         error(
8822                                 "add_tree_backref failed (leaf tree block): %s",
8823                                               strerror(-ret));
8824                                 continue;
8825                         }
8826                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8827                                 ret = add_tree_backref(extent_cache,
8828                                                 key.objectid, key.offset, 0, 0);
8829                                 if (ret < 0)
8830                                         error(
8831                                 "add_tree_backref failed (leaf shared block): %s",
8832                                               strerror(-ret));
8833                                 continue;
8834                         }
8835                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8836                                 struct btrfs_extent_data_ref *ref;
8837                                 ref = btrfs_item_ptr(buf, i,
8838                                                 struct btrfs_extent_data_ref);
8839                                 add_data_backref(extent_cache,
8840                                         key.objectid, 0,
8841                                         btrfs_extent_data_ref_root(buf, ref),
8842                                         btrfs_extent_data_ref_objectid(buf,
8843                                                                        ref),
8844                                         btrfs_extent_data_ref_offset(buf, ref),
8845                                         btrfs_extent_data_ref_count(buf, ref),
8846                                         0, root->fs_info->sectorsize);
8847                                 continue;
8848                         }
8849                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8850                                 struct btrfs_shared_data_ref *ref;
8851                                 ref = btrfs_item_ptr(buf, i,
8852                                                 struct btrfs_shared_data_ref);
8853                                 add_data_backref(extent_cache,
8854                                         key.objectid, key.offset, 0, 0, 0,
8855                                         btrfs_shared_data_ref_count(buf, ref),
8856                                         0, root->fs_info->sectorsize);
8857                                 continue;
8858                         }
8859                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8860                                 struct bad_item *bad;
8861
8862                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8863                                         continue;
8864                                 if (!owner)
8865                                         continue;
8866                                 bad = malloc(sizeof(struct bad_item));
8867                                 if (!bad)
8868                                         continue;
8869                                 INIT_LIST_HEAD(&bad->list);
8870                                 memcpy(&bad->key, &key,
8871                                        sizeof(struct btrfs_key));
8872                                 bad->root_id = owner;
8873                                 list_add_tail(&bad->list, &delete_items);
8874                                 continue;
8875                         }
8876                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8877                                 continue;
8878                         fi = btrfs_item_ptr(buf, i,
8879                                             struct btrfs_file_extent_item);
8880                         if (btrfs_file_extent_type(buf, fi) ==
8881                             BTRFS_FILE_EXTENT_INLINE)
8882                                 continue;
8883                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8884                                 continue;
8885
8886                         data_bytes_allocated +=
8887                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8888                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8889                                 abort();
8890                         }
8891                         data_bytes_referenced +=
8892                                 btrfs_file_extent_num_bytes(buf, fi);
8893                         add_data_backref(extent_cache,
8894                                 btrfs_file_extent_disk_bytenr(buf, fi),
8895                                 parent, owner, key.objectid, key.offset -
8896                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8897                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8898                 }
8899         } else {
8900                 int level;
8901                 struct btrfs_key first_key;
8902
8903                 first_key.objectid = 0;
8904
8905                 if (nritems > 0)
8906                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8907                 level = btrfs_header_level(buf);
8908                 for (i = 0; i < nritems; i++) {
8909                         struct extent_record tmpl;
8910
8911                         ptr = btrfs_node_blockptr(buf, i);
8912                         size = root->fs_info->nodesize;
8913                         btrfs_node_key_to_cpu(buf, &key, i);
8914                         if (ri != NULL) {
8915                                 if ((level == ri->drop_level)
8916                                     && is_dropped_key(&key, &ri->drop_key)) {
8917                                         continue;
8918                                 }
8919                         }
8920
8921                         memset(&tmpl, 0, sizeof(tmpl));
8922                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8923                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8924                         tmpl.start = ptr;
8925                         tmpl.nr = size;
8926                         tmpl.refs = 1;
8927                         tmpl.metadata = 1;
8928                         tmpl.max_size = size;
8929                         ret = add_extent_rec(extent_cache, &tmpl);
8930                         if (ret < 0)
8931                                 goto out;
8932
8933                         ret = add_tree_backref(extent_cache, ptr, parent,
8934                                         owner, 1);
8935                         if (ret < 0) {
8936                                 error(
8937                                 "add_tree_backref failed (non-leaf block): %s",
8938                                       strerror(-ret));
8939                                 continue;
8940                         }
8941
8942                         if (level > 1) {
8943                                 add_pending(nodes, seen, ptr, size);
8944                         } else {
8945                                 add_pending(pending, seen, ptr, size);
8946                         }
8947                 }
8948                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
8949                                       nritems) * sizeof(struct btrfs_key_ptr);
8950         }
8951         total_btree_bytes += buf->len;
8952         if (fs_root_objectid(btrfs_header_owner(buf)))
8953                 total_fs_tree_bytes += buf->len;
8954         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8955                 total_extent_tree_bytes += buf->len;
8956 out:
8957         free_extent_buffer(buf);
8958         return ret;
8959 }
8960
8961 static int add_root_to_pending(struct extent_buffer *buf,
8962                                struct cache_tree *extent_cache,
8963                                struct cache_tree *pending,
8964                                struct cache_tree *seen,
8965                                struct cache_tree *nodes,
8966                                u64 objectid)
8967 {
8968         struct extent_record tmpl;
8969         int ret;
8970
8971         if (btrfs_header_level(buf) > 0)
8972                 add_pending(nodes, seen, buf->start, buf->len);
8973         else
8974                 add_pending(pending, seen, buf->start, buf->len);
8975
8976         memset(&tmpl, 0, sizeof(tmpl));
8977         tmpl.start = buf->start;
8978         tmpl.nr = buf->len;
8979         tmpl.is_root = 1;
8980         tmpl.refs = 1;
8981         tmpl.metadata = 1;
8982         tmpl.max_size = buf->len;
8983         add_extent_rec(extent_cache, &tmpl);
8984
8985         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8986             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8987                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8988                                 0, 1);
8989         else
8990                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8991                                 1);
8992         return ret;
8993 }
8994
8995 /* as we fix the tree, we might be deleting blocks that
8996  * we're tracking for repair.  This hook makes sure we
8997  * remove any backrefs for blocks as we are fixing them.
8998  */
8999 static int free_extent_hook(struct btrfs_trans_handle *trans,
9000                             struct btrfs_root *root,
9001                             u64 bytenr, u64 num_bytes, u64 parent,
9002                             u64 root_objectid, u64 owner, u64 offset,
9003                             int refs_to_drop)
9004 {
9005         struct extent_record *rec;
9006         struct cache_extent *cache;
9007         int is_data;
9008         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9009
9010         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9011         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9012         if (!cache)
9013                 return 0;
9014
9015         rec = container_of(cache, struct extent_record, cache);
9016         if (is_data) {
9017                 struct data_backref *back;
9018                 back = find_data_backref(rec, parent, root_objectid, owner,
9019                                          offset, 1, bytenr, num_bytes);
9020                 if (!back)
9021                         goto out;
9022                 if (back->node.found_ref) {
9023                         back->found_ref -= refs_to_drop;
9024                         if (rec->refs)
9025                                 rec->refs -= refs_to_drop;
9026                 }
9027                 if (back->node.found_extent_tree) {
9028                         back->num_refs -= refs_to_drop;
9029                         if (rec->extent_item_refs)
9030                                 rec->extent_item_refs -= refs_to_drop;
9031                 }
9032                 if (back->found_ref == 0)
9033                         back->node.found_ref = 0;
9034                 if (back->num_refs == 0)
9035                         back->node.found_extent_tree = 0;
9036
9037                 if (!back->node.found_extent_tree && back->node.found_ref) {
9038                         rb_erase(&back->node.node, &rec->backref_tree);
9039                         free(back);
9040                 }
9041         } else {
9042                 struct tree_backref *back;
9043                 back = find_tree_backref(rec, parent, root_objectid);
9044                 if (!back)
9045                         goto out;
9046                 if (back->node.found_ref) {
9047                         if (rec->refs)
9048                                 rec->refs--;
9049                         back->node.found_ref = 0;
9050                 }
9051                 if (back->node.found_extent_tree) {
9052                         if (rec->extent_item_refs)
9053                                 rec->extent_item_refs--;
9054                         back->node.found_extent_tree = 0;
9055                 }
9056                 if (!back->node.found_extent_tree && back->node.found_ref) {
9057                         rb_erase(&back->node.node, &rec->backref_tree);
9058                         free(back);
9059                 }
9060         }
9061         maybe_free_extent_rec(extent_cache, rec);
9062 out:
9063         return 0;
9064 }
9065
9066 static int delete_extent_records(struct btrfs_trans_handle *trans,
9067                                  struct btrfs_root *root,
9068                                  struct btrfs_path *path,
9069                                  u64 bytenr)
9070 {
9071         struct btrfs_key key;
9072         struct btrfs_key found_key;
9073         struct extent_buffer *leaf;
9074         int ret;
9075         int slot;
9076
9077
9078         key.objectid = bytenr;
9079         key.type = (u8)-1;
9080         key.offset = (u64)-1;
9081
9082         while(1) {
9083                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9084                                         &key, path, 0, 1);
9085                 if (ret < 0)
9086                         break;
9087
9088                 if (ret > 0) {
9089                         ret = 0;
9090                         if (path->slots[0] == 0)
9091                                 break;
9092                         path->slots[0]--;
9093                 }
9094                 ret = 0;
9095
9096                 leaf = path->nodes[0];
9097                 slot = path->slots[0];
9098
9099                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9100                 if (found_key.objectid != bytenr)
9101                         break;
9102
9103                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9104                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9105                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9106                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9107                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9108                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9109                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9110                         btrfs_release_path(path);
9111                         if (found_key.type == 0) {
9112                                 if (found_key.offset == 0)
9113                                         break;
9114                                 key.offset = found_key.offset - 1;
9115                                 key.type = found_key.type;
9116                         }
9117                         key.type = found_key.type - 1;
9118                         key.offset = (u64)-1;
9119                         continue;
9120                 }
9121
9122                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9123                         found_key.objectid, found_key.type, found_key.offset);
9124
9125                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9126                 if (ret)
9127                         break;
9128                 btrfs_release_path(path);
9129
9130                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9131                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9132                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9133                                 found_key.offset : root->fs_info->nodesize;
9134
9135                         ret = btrfs_update_block_group(root, bytenr,
9136                                                        bytes, 0, 0);
9137                         if (ret)
9138                                 break;
9139                 }
9140         }
9141
9142         btrfs_release_path(path);
9143         return ret;
9144 }
9145
9146 /*
9147  * for a single backref, this will allocate a new extent
9148  * and add the backref to it.
9149  */
9150 static int record_extent(struct btrfs_trans_handle *trans,
9151                          struct btrfs_fs_info *info,
9152                          struct btrfs_path *path,
9153                          struct extent_record *rec,
9154                          struct extent_backref *back,
9155                          int allocated, u64 flags)
9156 {
9157         int ret = 0;
9158         struct btrfs_root *extent_root = info->extent_root;
9159         struct extent_buffer *leaf;
9160         struct btrfs_key ins_key;
9161         struct btrfs_extent_item *ei;
9162         struct data_backref *dback;
9163         struct btrfs_tree_block_info *bi;
9164
9165         if (!back->is_data)
9166                 rec->max_size = max_t(u64, rec->max_size,
9167                                     info->nodesize);
9168
9169         if (!allocated) {
9170                 u32 item_size = sizeof(*ei);
9171
9172                 if (!back->is_data)
9173                         item_size += sizeof(*bi);
9174
9175                 ins_key.objectid = rec->start;
9176                 ins_key.offset = rec->max_size;
9177                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9178
9179                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9180                                         &ins_key, item_size);
9181                 if (ret)
9182                         goto fail;
9183
9184                 leaf = path->nodes[0];
9185                 ei = btrfs_item_ptr(leaf, path->slots[0],
9186                                     struct btrfs_extent_item);
9187
9188                 btrfs_set_extent_refs(leaf, ei, 0);
9189                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9190
9191                 if (back->is_data) {
9192                         btrfs_set_extent_flags(leaf, ei,
9193                                                BTRFS_EXTENT_FLAG_DATA);
9194                 } else {
9195                         struct btrfs_disk_key copy_key;;
9196
9197                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9198                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9199                                              sizeof(*bi));
9200
9201                         btrfs_set_disk_key_objectid(&copy_key,
9202                                                     rec->info_objectid);
9203                         btrfs_set_disk_key_type(&copy_key, 0);
9204                         btrfs_set_disk_key_offset(&copy_key, 0);
9205
9206                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9207                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9208
9209                         btrfs_set_extent_flags(leaf, ei,
9210                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9211                 }
9212
9213                 btrfs_mark_buffer_dirty(leaf);
9214                 ret = btrfs_update_block_group(extent_root, rec->start,
9215                                                rec->max_size, 1, 0);
9216                 if (ret)
9217                         goto fail;
9218                 btrfs_release_path(path);
9219         }
9220
9221         if (back->is_data) {
9222                 u64 parent;
9223                 int i;
9224
9225                 dback = to_data_backref(back);
9226                 if (back->full_backref)
9227                         parent = dback->parent;
9228                 else
9229                         parent = 0;
9230
9231                 for (i = 0; i < dback->found_ref; i++) {
9232                         /* if parent != 0, we're doing a full backref
9233                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9234                          * just makes the backref allocator create a data
9235                          * backref
9236                          */
9237                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9238                                                    rec->start, rec->max_size,
9239                                                    parent,
9240                                                    dback->root,
9241                                                    parent ?
9242                                                    BTRFS_FIRST_FREE_OBJECTID :
9243                                                    dback->owner,
9244                                                    dback->offset);
9245                         if (ret)
9246                                 break;
9247                 }
9248                 fprintf(stderr, "adding new data backref"
9249                                 " on %llu %s %llu owner %llu"
9250                                 " offset %llu found %d\n",
9251                                 (unsigned long long)rec->start,
9252                                 back->full_backref ?
9253                                 "parent" : "root",
9254                                 back->full_backref ?
9255                                 (unsigned long long)parent :
9256                                 (unsigned long long)dback->root,
9257                                 (unsigned long long)dback->owner,
9258                                 (unsigned long long)dback->offset,
9259                                 dback->found_ref);
9260         } else {
9261                 u64 parent;
9262                 struct tree_backref *tback;
9263
9264                 tback = to_tree_backref(back);
9265                 if (back->full_backref)
9266                         parent = tback->parent;
9267                 else
9268                         parent = 0;
9269
9270                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9271                                            rec->start, rec->max_size,
9272                                            parent, tback->root, 0, 0);
9273                 fprintf(stderr, "adding new tree backref on "
9274                         "start %llu len %llu parent %llu root %llu\n",
9275                         rec->start, rec->max_size, parent, tback->root);
9276         }
9277 fail:
9278         btrfs_release_path(path);
9279         return ret;
9280 }
9281
9282 static struct extent_entry *find_entry(struct list_head *entries,
9283                                        u64 bytenr, u64 bytes)
9284 {
9285         struct extent_entry *entry = NULL;
9286
9287         list_for_each_entry(entry, entries, list) {
9288                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9289                         return entry;
9290         }
9291
9292         return NULL;
9293 }
9294
9295 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9296 {
9297         struct extent_entry *entry, *best = NULL, *prev = NULL;
9298
9299         list_for_each_entry(entry, entries, list) {
9300                 /*
9301                  * If there are as many broken entries as entries then we know
9302                  * not to trust this particular entry.
9303                  */
9304                 if (entry->broken == entry->count)
9305                         continue;
9306
9307                 /*
9308                  * Special case, when there are only two entries and 'best' is
9309                  * the first one
9310                  */
9311                 if (!prev) {
9312                         best = entry;
9313                         prev = entry;
9314                         continue;
9315                 }
9316
9317                 /*
9318                  * If our current entry == best then we can't be sure our best
9319                  * is really the best, so we need to keep searching.
9320                  */
9321                 if (best && best->count == entry->count) {
9322                         prev = entry;
9323                         best = NULL;
9324                         continue;
9325                 }
9326
9327                 /* Prev == entry, not good enough, have to keep searching */
9328                 if (!prev->broken && prev->count == entry->count)
9329                         continue;
9330
9331                 if (!best)
9332                         best = (prev->count > entry->count) ? prev : entry;
9333                 else if (best->count < entry->count)
9334                         best = entry;
9335                 prev = entry;
9336         }
9337
9338         return best;
9339 }
9340
9341 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9342                       struct data_backref *dback, struct extent_entry *entry)
9343 {
9344         struct btrfs_trans_handle *trans;
9345         struct btrfs_root *root;
9346         struct btrfs_file_extent_item *fi;
9347         struct extent_buffer *leaf;
9348         struct btrfs_key key;
9349         u64 bytenr, bytes;
9350         int ret, err;
9351
9352         key.objectid = dback->root;
9353         key.type = BTRFS_ROOT_ITEM_KEY;
9354         key.offset = (u64)-1;
9355         root = btrfs_read_fs_root(info, &key);
9356         if (IS_ERR(root)) {
9357                 fprintf(stderr, "Couldn't find root for our ref\n");
9358                 return -EINVAL;
9359         }
9360
9361         /*
9362          * The backref points to the original offset of the extent if it was
9363          * split, so we need to search down to the offset we have and then walk
9364          * forward until we find the backref we're looking for.
9365          */
9366         key.objectid = dback->owner;
9367         key.type = BTRFS_EXTENT_DATA_KEY;
9368         key.offset = dback->offset;
9369         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9370         if (ret < 0) {
9371                 fprintf(stderr, "Error looking up ref %d\n", ret);
9372                 return ret;
9373         }
9374
9375         while (1) {
9376                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9377                         ret = btrfs_next_leaf(root, path);
9378                         if (ret) {
9379                                 fprintf(stderr, "Couldn't find our ref, next\n");
9380                                 return -EINVAL;
9381                         }
9382                 }
9383                 leaf = path->nodes[0];
9384                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9385                 if (key.objectid != dback->owner ||
9386                     key.type != BTRFS_EXTENT_DATA_KEY) {
9387                         fprintf(stderr, "Couldn't find our ref, search\n");
9388                         return -EINVAL;
9389                 }
9390                 fi = btrfs_item_ptr(leaf, path->slots[0],
9391                                     struct btrfs_file_extent_item);
9392                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9393                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9394
9395                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9396                         break;
9397                 path->slots[0]++;
9398         }
9399
9400         btrfs_release_path(path);
9401
9402         trans = btrfs_start_transaction(root, 1);
9403         if (IS_ERR(trans))
9404                 return PTR_ERR(trans);
9405
9406         /*
9407          * Ok we have the key of the file extent we want to fix, now we can cow
9408          * down to the thing and fix it.
9409          */
9410         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9411         if (ret < 0) {
9412                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9413                         key.objectid, key.type, key.offset, ret);
9414                 goto out;
9415         }
9416         if (ret > 0) {
9417                 fprintf(stderr, "Well that's odd, we just found this key "
9418                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9419                         key.offset);
9420                 ret = -EINVAL;
9421                 goto out;
9422         }
9423         leaf = path->nodes[0];
9424         fi = btrfs_item_ptr(leaf, path->slots[0],
9425                             struct btrfs_file_extent_item);
9426
9427         if (btrfs_file_extent_compression(leaf, fi) &&
9428             dback->disk_bytenr != entry->bytenr) {
9429                 fprintf(stderr, "Ref doesn't match the record start and is "
9430                         "compressed, please take a btrfs-image of this file "
9431                         "system and send it to a btrfs developer so they can "
9432                         "complete this functionality for bytenr %Lu\n",
9433                         dback->disk_bytenr);
9434                 ret = -EINVAL;
9435                 goto out;
9436         }
9437
9438         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9439                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9440         } else if (dback->disk_bytenr > entry->bytenr) {
9441                 u64 off_diff, offset;
9442
9443                 off_diff = dback->disk_bytenr - entry->bytenr;
9444                 offset = btrfs_file_extent_offset(leaf, fi);
9445                 if (dback->disk_bytenr + offset +
9446                     btrfs_file_extent_num_bytes(leaf, fi) >
9447                     entry->bytenr + entry->bytes) {
9448                         fprintf(stderr, "Ref is past the entry end, please "
9449                                 "take a btrfs-image of this file system and "
9450                                 "send it to a btrfs developer, ref %Lu\n",
9451                                 dback->disk_bytenr);
9452                         ret = -EINVAL;
9453                         goto out;
9454                 }
9455                 offset += off_diff;
9456                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9457                 btrfs_set_file_extent_offset(leaf, fi, offset);
9458         } else if (dback->disk_bytenr < entry->bytenr) {
9459                 u64 offset;
9460
9461                 offset = btrfs_file_extent_offset(leaf, fi);
9462                 if (dback->disk_bytenr + offset < entry->bytenr) {
9463                         fprintf(stderr, "Ref is before the entry start, please"
9464                                 " take a btrfs-image of this file system and "
9465                                 "send it to a btrfs developer, ref %Lu\n",
9466                                 dback->disk_bytenr);
9467                         ret = -EINVAL;
9468                         goto out;
9469                 }
9470
9471                 offset += dback->disk_bytenr;
9472                 offset -= entry->bytenr;
9473                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9474                 btrfs_set_file_extent_offset(leaf, fi, offset);
9475         }
9476
9477         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9478
9479         /*
9480          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9481          * only do this if we aren't using compression, otherwise it's a
9482          * trickier case.
9483          */
9484         if (!btrfs_file_extent_compression(leaf, fi))
9485                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9486         else
9487                 printf("ram bytes may be wrong?\n");
9488         btrfs_mark_buffer_dirty(leaf);
9489 out:
9490         err = btrfs_commit_transaction(trans, root);
9491         btrfs_release_path(path);
9492         return ret ? ret : err;
9493 }
9494
9495 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9496                            struct extent_record *rec)
9497 {
9498         struct extent_backref *back, *tmp;
9499         struct data_backref *dback;
9500         struct extent_entry *entry, *best = NULL;
9501         LIST_HEAD(entries);
9502         int nr_entries = 0;
9503         int broken_entries = 0;
9504         int ret = 0;
9505         short mismatch = 0;
9506
9507         /*
9508          * Metadata is easy and the backrefs should always agree on bytenr and
9509          * size, if not we've got bigger issues.
9510          */
9511         if (rec->metadata)
9512                 return 0;
9513
9514         rbtree_postorder_for_each_entry_safe(back, tmp,
9515                                              &rec->backref_tree, node) {
9516                 if (back->full_backref || !back->is_data)
9517                         continue;
9518
9519                 dback = to_data_backref(back);
9520
9521                 /*
9522                  * We only pay attention to backrefs that we found a real
9523                  * backref for.
9524                  */
9525                 if (dback->found_ref == 0)
9526                         continue;
9527
9528                 /*
9529                  * For now we only catch when the bytes don't match, not the
9530                  * bytenr.  We can easily do this at the same time, but I want
9531                  * to have a fs image to test on before we just add repair
9532                  * functionality willy-nilly so we know we won't screw up the
9533                  * repair.
9534                  */
9535
9536                 entry = find_entry(&entries, dback->disk_bytenr,
9537                                    dback->bytes);
9538                 if (!entry) {
9539                         entry = malloc(sizeof(struct extent_entry));
9540                         if (!entry) {
9541                                 ret = -ENOMEM;
9542                                 goto out;
9543                         }
9544                         memset(entry, 0, sizeof(*entry));
9545                         entry->bytenr = dback->disk_bytenr;
9546                         entry->bytes = dback->bytes;
9547                         list_add_tail(&entry->list, &entries);
9548                         nr_entries++;
9549                 }
9550
9551                 /*
9552                  * If we only have on entry we may think the entries agree when
9553                  * in reality they don't so we have to do some extra checking.
9554                  */
9555                 if (dback->disk_bytenr != rec->start ||
9556                     dback->bytes != rec->nr || back->broken)
9557                         mismatch = 1;
9558
9559                 if (back->broken) {
9560                         entry->broken++;
9561                         broken_entries++;
9562                 }
9563
9564                 entry->count++;
9565         }
9566
9567         /* Yay all the backrefs agree, carry on good sir */
9568         if (nr_entries <= 1 && !mismatch)
9569                 goto out;
9570
9571         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9572                 "%Lu\n", rec->start);
9573
9574         /*
9575          * First we want to see if the backrefs can agree amongst themselves who
9576          * is right, so figure out which one of the entries has the highest
9577          * count.
9578          */
9579         best = find_most_right_entry(&entries);
9580
9581         /*
9582          * Ok so we may have an even split between what the backrefs think, so
9583          * this is where we use the extent ref to see what it thinks.
9584          */
9585         if (!best) {
9586                 entry = find_entry(&entries, rec->start, rec->nr);
9587                 if (!entry && (!broken_entries || !rec->found_rec)) {
9588                         fprintf(stderr, "Backrefs don't agree with each other "
9589                                 "and extent record doesn't agree with anybody,"
9590                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9591                                 rec->start, rec->nr);
9592                         ret = -EINVAL;
9593                         goto out;
9594                 } else if (!entry) {
9595                         /*
9596                          * Ok our backrefs were broken, we'll assume this is the
9597                          * correct value and add an entry for this range.
9598                          */
9599                         entry = malloc(sizeof(struct extent_entry));
9600                         if (!entry) {
9601                                 ret = -ENOMEM;
9602                                 goto out;
9603                         }
9604                         memset(entry, 0, sizeof(*entry));
9605                         entry->bytenr = rec->start;
9606                         entry->bytes = rec->nr;
9607                         list_add_tail(&entry->list, &entries);
9608                         nr_entries++;
9609                 }
9610                 entry->count++;
9611                 best = find_most_right_entry(&entries);
9612                 if (!best) {
9613                         fprintf(stderr, "Backrefs and extent record evenly "
9614                                 "split on who is right, this is going to "
9615                                 "require user input to fix bytenr %Lu bytes "
9616                                 "%Lu\n", rec->start, rec->nr);
9617                         ret = -EINVAL;
9618                         goto out;
9619                 }
9620         }
9621
9622         /*
9623          * I don't think this can happen currently as we'll abort() if we catch
9624          * this case higher up, but in case somebody removes that we still can't
9625          * deal with it properly here yet, so just bail out of that's the case.
9626          */
9627         if (best->bytenr != rec->start) {
9628                 fprintf(stderr, "Extent start and backref starts don't match, "
9629                         "please use btrfs-image on this file system and send "
9630                         "it to a btrfs developer so they can make fsck fix "
9631                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9632                         rec->start, rec->nr);
9633                 ret = -EINVAL;
9634                 goto out;
9635         }
9636
9637         /*
9638          * Ok great we all agreed on an extent record, let's go find the real
9639          * references and fix up the ones that don't match.
9640          */
9641         rbtree_postorder_for_each_entry_safe(back, tmp,
9642                                              &rec->backref_tree, node) {
9643                 if (back->full_backref || !back->is_data)
9644                         continue;
9645
9646                 dback = to_data_backref(back);
9647
9648                 /*
9649                  * Still ignoring backrefs that don't have a real ref attached
9650                  * to them.
9651                  */
9652                 if (dback->found_ref == 0)
9653                         continue;
9654
9655                 if (dback->bytes == best->bytes &&
9656                     dback->disk_bytenr == best->bytenr)
9657                         continue;
9658
9659                 ret = repair_ref(info, path, dback, best);
9660                 if (ret)
9661                         goto out;
9662         }
9663
9664         /*
9665          * Ok we messed with the actual refs, which means we need to drop our
9666          * entire cache and go back and rescan.  I know this is a huge pain and
9667          * adds a lot of extra work, but it's the only way to be safe.  Once all
9668          * the backrefs agree we may not need to do anything to the extent
9669          * record itself.
9670          */
9671         ret = -EAGAIN;
9672 out:
9673         while (!list_empty(&entries)) {
9674                 entry = list_entry(entries.next, struct extent_entry, list);
9675                 list_del_init(&entry->list);
9676                 free(entry);
9677         }
9678         return ret;
9679 }
9680
9681 static int process_duplicates(struct cache_tree *extent_cache,
9682                               struct extent_record *rec)
9683 {
9684         struct extent_record *good, *tmp;
9685         struct cache_extent *cache;
9686         int ret;
9687
9688         /*
9689          * If we found a extent record for this extent then return, or if we
9690          * have more than one duplicate we are likely going to need to delete
9691          * something.
9692          */
9693         if (rec->found_rec || rec->num_duplicates > 1)
9694                 return 0;
9695
9696         /* Shouldn't happen but just in case */
9697         BUG_ON(!rec->num_duplicates);
9698
9699         /*
9700          * So this happens if we end up with a backref that doesn't match the
9701          * actual extent entry.  So either the backref is bad or the extent
9702          * entry is bad.  Either way we want to have the extent_record actually
9703          * reflect what we found in the extent_tree, so we need to take the
9704          * duplicate out and use that as the extent_record since the only way we
9705          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9706          */
9707         remove_cache_extent(extent_cache, &rec->cache);
9708
9709         good = to_extent_record(rec->dups.next);
9710         list_del_init(&good->list);
9711         INIT_LIST_HEAD(&good->backrefs);
9712         INIT_LIST_HEAD(&good->dups);
9713         good->cache.start = good->start;
9714         good->cache.size = good->nr;
9715         good->content_checked = 0;
9716         good->owner_ref_checked = 0;
9717         good->num_duplicates = 0;
9718         good->refs = rec->refs;
9719         list_splice_init(&rec->backrefs, &good->backrefs);
9720         while (1) {
9721                 cache = lookup_cache_extent(extent_cache, good->start,
9722                                             good->nr);
9723                 if (!cache)
9724                         break;
9725                 tmp = container_of(cache, struct extent_record, cache);
9726
9727                 /*
9728                  * If we find another overlapping extent and it's found_rec is
9729                  * set then it's a duplicate and we need to try and delete
9730                  * something.
9731                  */
9732                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9733                         if (list_empty(&good->list))
9734                                 list_add_tail(&good->list,
9735                                               &duplicate_extents);
9736                         good->num_duplicates += tmp->num_duplicates + 1;
9737                         list_splice_init(&tmp->dups, &good->dups);
9738                         list_del_init(&tmp->list);
9739                         list_add_tail(&tmp->list, &good->dups);
9740                         remove_cache_extent(extent_cache, &tmp->cache);
9741                         continue;
9742                 }
9743
9744                 /*
9745                  * Ok we have another non extent item backed extent rec, so lets
9746                  * just add it to this extent and carry on like we did above.
9747                  */
9748                 good->refs += tmp->refs;
9749                 list_splice_init(&tmp->backrefs, &good->backrefs);
9750                 remove_cache_extent(extent_cache, &tmp->cache);
9751                 free(tmp);
9752         }
9753         ret = insert_cache_extent(extent_cache, &good->cache);
9754         BUG_ON(ret);
9755         free(rec);
9756         return good->num_duplicates ? 0 : 1;
9757 }
9758
9759 static int delete_duplicate_records(struct btrfs_root *root,
9760                                     struct extent_record *rec)
9761 {
9762         struct btrfs_trans_handle *trans;
9763         LIST_HEAD(delete_list);
9764         struct btrfs_path path;
9765         struct extent_record *tmp, *good, *n;
9766         int nr_del = 0;
9767         int ret = 0, err;
9768         struct btrfs_key key;
9769
9770         btrfs_init_path(&path);
9771
9772         good = rec;
9773         /* Find the record that covers all of the duplicates. */
9774         list_for_each_entry(tmp, &rec->dups, list) {
9775                 if (good->start < tmp->start)
9776                         continue;
9777                 if (good->nr > tmp->nr)
9778                         continue;
9779
9780                 if (tmp->start + tmp->nr < good->start + good->nr) {
9781                         fprintf(stderr, "Ok we have overlapping extents that "
9782                                 "aren't completely covered by each other, this "
9783                                 "is going to require more careful thought.  "
9784                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9785                                 tmp->start, tmp->nr, good->start, good->nr);
9786                         abort();
9787                 }
9788                 good = tmp;
9789         }
9790
9791         if (good != rec)
9792                 list_add_tail(&rec->list, &delete_list);
9793
9794         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9795                 if (tmp == good)
9796                         continue;
9797                 list_move_tail(&tmp->list, &delete_list);
9798         }
9799
9800         root = root->fs_info->extent_root;
9801         trans = btrfs_start_transaction(root, 1);
9802         if (IS_ERR(trans)) {
9803                 ret = PTR_ERR(trans);
9804                 goto out;
9805         }
9806
9807         list_for_each_entry(tmp, &delete_list, list) {
9808                 if (tmp->found_rec == 0)
9809                         continue;
9810                 key.objectid = tmp->start;
9811                 key.type = BTRFS_EXTENT_ITEM_KEY;
9812                 key.offset = tmp->nr;
9813
9814                 /* Shouldn't happen but just in case */
9815                 if (tmp->metadata) {
9816                         fprintf(stderr, "Well this shouldn't happen, extent "
9817                                 "record overlaps but is metadata? "
9818                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9819                         abort();
9820                 }
9821
9822                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9823                 if (ret) {
9824                         if (ret > 0)
9825                                 ret = -EINVAL;
9826                         break;
9827                 }
9828                 ret = btrfs_del_item(trans, root, &path);
9829                 if (ret)
9830                         break;
9831                 btrfs_release_path(&path);
9832                 nr_del++;
9833         }
9834         err = btrfs_commit_transaction(trans, root);
9835         if (err && !ret)
9836                 ret = err;
9837 out:
9838         while (!list_empty(&delete_list)) {
9839                 tmp = to_extent_record(delete_list.next);
9840                 list_del_init(&tmp->list);
9841                 if (tmp == rec)
9842                         continue;
9843                 free(tmp);
9844         }
9845
9846         while (!list_empty(&rec->dups)) {
9847                 tmp = to_extent_record(rec->dups.next);
9848                 list_del_init(&tmp->list);
9849                 free(tmp);
9850         }
9851
9852         btrfs_release_path(&path);
9853
9854         if (!ret && !nr_del)
9855                 rec->num_duplicates = 0;
9856
9857         return ret ? ret : nr_del;
9858 }
9859
9860 static int find_possible_backrefs(struct btrfs_fs_info *info,
9861                                   struct btrfs_path *path,
9862                                   struct cache_tree *extent_cache,
9863                                   struct extent_record *rec)
9864 {
9865         struct btrfs_root *root;
9866         struct extent_backref *back, *tmp;
9867         struct data_backref *dback;
9868         struct cache_extent *cache;
9869         struct btrfs_file_extent_item *fi;
9870         struct btrfs_key key;
9871         u64 bytenr, bytes;
9872         int ret;
9873
9874         rbtree_postorder_for_each_entry_safe(back, tmp,
9875                                              &rec->backref_tree, node) {
9876                 /* Don't care about full backrefs (poor unloved backrefs) */
9877                 if (back->full_backref || !back->is_data)
9878                         continue;
9879
9880                 dback = to_data_backref(back);
9881
9882                 /* We found this one, we don't need to do a lookup */
9883                 if (dback->found_ref)
9884                         continue;
9885
9886                 key.objectid = dback->root;
9887                 key.type = BTRFS_ROOT_ITEM_KEY;
9888                 key.offset = (u64)-1;
9889
9890                 root = btrfs_read_fs_root(info, &key);
9891
9892                 /* No root, definitely a bad ref, skip */
9893                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9894                         continue;
9895                 /* Other err, exit */
9896                 if (IS_ERR(root))
9897                         return PTR_ERR(root);
9898
9899                 key.objectid = dback->owner;
9900                 key.type = BTRFS_EXTENT_DATA_KEY;
9901                 key.offset = dback->offset;
9902                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9903                 if (ret) {
9904                         btrfs_release_path(path);
9905                         if (ret < 0)
9906                                 return ret;
9907                         /* Didn't find it, we can carry on */
9908                         ret = 0;
9909                         continue;
9910                 }
9911
9912                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9913                                     struct btrfs_file_extent_item);
9914                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9915                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9916                 btrfs_release_path(path);
9917                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9918                 if (cache) {
9919                         struct extent_record *tmp;
9920                         tmp = container_of(cache, struct extent_record, cache);
9921
9922                         /*
9923                          * If we found an extent record for the bytenr for this
9924                          * particular backref then we can't add it to our
9925                          * current extent record.  We only want to add backrefs
9926                          * that don't have a corresponding extent item in the
9927                          * extent tree since they likely belong to this record
9928                          * and we need to fix it if it doesn't match bytenrs.
9929                          */
9930                         if  (tmp->found_rec)
9931                                 continue;
9932                 }
9933
9934                 dback->found_ref += 1;
9935                 dback->disk_bytenr = bytenr;
9936                 dback->bytes = bytes;
9937
9938                 /*
9939                  * Set this so the verify backref code knows not to trust the
9940                  * values in this backref.
9941                  */
9942                 back->broken = 1;
9943         }
9944
9945         return 0;
9946 }
9947
9948 /*
9949  * Record orphan data ref into corresponding root.
9950  *
9951  * Return 0 if the extent item contains data ref and recorded.
9952  * Return 1 if the extent item contains no useful data ref
9953  *   On that case, it may contains only shared_dataref or metadata backref
9954  *   or the file extent exists(this should be handled by the extent bytenr
9955  *   recovery routine)
9956  * Return <0 if something goes wrong.
9957  */
9958 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9959                                       struct extent_record *rec)
9960 {
9961         struct btrfs_key key;
9962         struct btrfs_root *dest_root;
9963         struct extent_backref *back, *tmp;
9964         struct data_backref *dback;
9965         struct orphan_data_extent *orphan;
9966         struct btrfs_path path;
9967         int recorded_data_ref = 0;
9968         int ret = 0;
9969
9970         if (rec->metadata)
9971                 return 1;
9972         btrfs_init_path(&path);
9973         rbtree_postorder_for_each_entry_safe(back, tmp,
9974                                              &rec->backref_tree, node) {
9975                 if (back->full_backref || !back->is_data ||
9976                     !back->found_extent_tree)
9977                         continue;
9978                 dback = to_data_backref(back);
9979                 if (dback->found_ref)
9980                         continue;
9981                 key.objectid = dback->root;
9982                 key.type = BTRFS_ROOT_ITEM_KEY;
9983                 key.offset = (u64)-1;
9984
9985                 dest_root = btrfs_read_fs_root(fs_info, &key);
9986
9987                 /* For non-exist root we just skip it */
9988                 if (IS_ERR(dest_root) || !dest_root)
9989                         continue;
9990
9991                 key.objectid = dback->owner;
9992                 key.type = BTRFS_EXTENT_DATA_KEY;
9993                 key.offset = dback->offset;
9994
9995                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9996                 btrfs_release_path(&path);
9997                 /*
9998                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9999                  * we need to record it for inode/file extent rebuild.
10000                  * For ret > 0, we record it only for file extent rebuild.
10001                  * For ret == 0, the file extent exists but only bytenr
10002                  * mismatch, let the original bytenr fix routine to handle,
10003                  * don't record it.
10004                  */
10005                 if (ret == 0)
10006                         continue;
10007                 ret = 0;
10008                 orphan = malloc(sizeof(*orphan));
10009                 if (!orphan) {
10010                         ret = -ENOMEM;
10011                         goto out;
10012                 }
10013                 INIT_LIST_HEAD(&orphan->list);
10014                 orphan->root = dback->root;
10015                 orphan->objectid = dback->owner;
10016                 orphan->offset = dback->offset;
10017                 orphan->disk_bytenr = rec->cache.start;
10018                 orphan->disk_len = rec->cache.size;
10019                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10020                 recorded_data_ref = 1;
10021         }
10022 out:
10023         btrfs_release_path(&path);
10024         if (!ret)
10025                 return !recorded_data_ref;
10026         else
10027                 return ret;
10028 }
10029
10030 /*
10031  * when an incorrect extent item is found, this will delete
10032  * all of the existing entries for it and recreate them
10033  * based on what the tree scan found.
10034  */
10035 static int fixup_extent_refs(struct btrfs_fs_info *info,
10036                              struct cache_tree *extent_cache,
10037                              struct extent_record *rec)
10038 {
10039         struct btrfs_trans_handle *trans = NULL;
10040         int ret;
10041         struct btrfs_path path;
10042         struct cache_extent *cache;
10043         struct extent_backref *back, *tmp;
10044         int allocated = 0;
10045         u64 flags = 0;
10046
10047         if (rec->flag_block_full_backref)
10048                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10049
10050         btrfs_init_path(&path);
10051         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10052                 /*
10053                  * Sometimes the backrefs themselves are so broken they don't
10054                  * get attached to any meaningful rec, so first go back and
10055                  * check any of our backrefs that we couldn't find and throw
10056                  * them into the list if we find the backref so that
10057                  * verify_backrefs can figure out what to do.
10058                  */
10059                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10060                 if (ret < 0)
10061                         goto out;
10062         }
10063
10064         /* step one, make sure all of the backrefs agree */
10065         ret = verify_backrefs(info, &path, rec);
10066         if (ret < 0)
10067                 goto out;
10068
10069         trans = btrfs_start_transaction(info->extent_root, 1);
10070         if (IS_ERR(trans)) {
10071                 ret = PTR_ERR(trans);
10072                 goto out;
10073         }
10074
10075         /* step two, delete all the existing records */
10076         ret = delete_extent_records(trans, info->extent_root, &path,
10077                                     rec->start);
10078
10079         if (ret < 0)
10080                 goto out;
10081
10082         /* was this block corrupt?  If so, don't add references to it */
10083         cache = lookup_cache_extent(info->corrupt_blocks,
10084                                     rec->start, rec->max_size);
10085         if (cache) {
10086                 ret = 0;
10087                 goto out;
10088         }
10089
10090         /* step three, recreate all the refs we did find */
10091         rbtree_postorder_for_each_entry_safe(back, tmp,
10092                                              &rec->backref_tree, node) {
10093                 /*
10094                  * if we didn't find any references, don't create a
10095                  * new extent record
10096                  */
10097                 if (!back->found_ref)
10098                         continue;
10099
10100                 rec->bad_full_backref = 0;
10101                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10102                 allocated = 1;
10103
10104                 if (ret)
10105                         goto out;
10106         }
10107 out:
10108         if (trans) {
10109                 int err = btrfs_commit_transaction(trans, info->extent_root);
10110                 if (!ret)
10111                         ret = err;
10112         }
10113
10114         if (!ret)
10115                 fprintf(stderr, "Repaired extent references for %llu\n",
10116                                 (unsigned long long)rec->start);
10117
10118         btrfs_release_path(&path);
10119         return ret;
10120 }
10121
10122 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10123                               struct extent_record *rec)
10124 {
10125         struct btrfs_trans_handle *trans;
10126         struct btrfs_root *root = fs_info->extent_root;
10127         struct btrfs_path path;
10128         struct btrfs_extent_item *ei;
10129         struct btrfs_key key;
10130         u64 flags;
10131         int ret = 0;
10132
10133         key.objectid = rec->start;
10134         if (rec->metadata) {
10135                 key.type = BTRFS_METADATA_ITEM_KEY;
10136                 key.offset = rec->info_level;
10137         } else {
10138                 key.type = BTRFS_EXTENT_ITEM_KEY;
10139                 key.offset = rec->max_size;
10140         }
10141
10142         trans = btrfs_start_transaction(root, 0);
10143         if (IS_ERR(trans))
10144                 return PTR_ERR(trans);
10145
10146         btrfs_init_path(&path);
10147         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10148         if (ret < 0) {
10149                 btrfs_release_path(&path);
10150                 btrfs_commit_transaction(trans, root);
10151                 return ret;
10152         } else if (ret) {
10153                 fprintf(stderr, "Didn't find extent for %llu\n",
10154                         (unsigned long long)rec->start);
10155                 btrfs_release_path(&path);
10156                 btrfs_commit_transaction(trans, root);
10157                 return -ENOENT;
10158         }
10159
10160         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10161                             struct btrfs_extent_item);
10162         flags = btrfs_extent_flags(path.nodes[0], ei);
10163         if (rec->flag_block_full_backref) {
10164                 fprintf(stderr, "setting full backref on %llu\n",
10165                         (unsigned long long)key.objectid);
10166                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10167         } else {
10168                 fprintf(stderr, "clearing full backref on %llu\n",
10169                         (unsigned long long)key.objectid);
10170                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10171         }
10172         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10173         btrfs_mark_buffer_dirty(path.nodes[0]);
10174         btrfs_release_path(&path);
10175         ret = btrfs_commit_transaction(trans, root);
10176         if (!ret)
10177                 fprintf(stderr, "Repaired extent flags for %llu\n",
10178                                 (unsigned long long)rec->start);
10179
10180         return ret;
10181 }
10182
10183 /* right now we only prune from the extent allocation tree */
10184 static int prune_one_block(struct btrfs_trans_handle *trans,
10185                            struct btrfs_fs_info *info,
10186                            struct btrfs_corrupt_block *corrupt)
10187 {
10188         int ret;
10189         struct btrfs_path path;
10190         struct extent_buffer *eb;
10191         u64 found;
10192         int slot;
10193         int nritems;
10194         int level = corrupt->level + 1;
10195
10196         btrfs_init_path(&path);
10197 again:
10198         /* we want to stop at the parent to our busted block */
10199         path.lowest_level = level;
10200
10201         ret = btrfs_search_slot(trans, info->extent_root,
10202                                 &corrupt->key, &path, -1, 1);
10203
10204         if (ret < 0)
10205                 goto out;
10206
10207         eb = path.nodes[level];
10208         if (!eb) {
10209                 ret = -ENOENT;
10210                 goto out;
10211         }
10212
10213         /*
10214          * hopefully the search gave us the block we want to prune,
10215          * lets try that first
10216          */
10217         slot = path.slots[level];
10218         found =  btrfs_node_blockptr(eb, slot);
10219         if (found == corrupt->cache.start)
10220                 goto del_ptr;
10221
10222         nritems = btrfs_header_nritems(eb);
10223
10224         /* the search failed, lets scan this node and hope we find it */
10225         for (slot = 0; slot < nritems; slot++) {
10226                 found =  btrfs_node_blockptr(eb, slot);
10227                 if (found == corrupt->cache.start)
10228                         goto del_ptr;
10229         }
10230         /*
10231          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10232          * to this block
10233          */
10234         if (eb == info->extent_root->node) {
10235                 ret = -ENOENT;
10236                 goto out;
10237         } else {
10238                 level++;
10239                 btrfs_release_path(&path);
10240                 goto again;
10241         }
10242
10243 del_ptr:
10244         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10245         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10246
10247 out:
10248         btrfs_release_path(&path);
10249         return ret;
10250 }
10251
10252 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10253 {
10254         struct btrfs_trans_handle *trans = NULL;
10255         struct cache_extent *cache;
10256         struct btrfs_corrupt_block *corrupt;
10257
10258         while (1) {
10259                 cache = search_cache_extent(info->corrupt_blocks, 0);
10260                 if (!cache)
10261                         break;
10262                 if (!trans) {
10263                         trans = btrfs_start_transaction(info->extent_root, 1);
10264                         if (IS_ERR(trans))
10265                                 return PTR_ERR(trans);
10266                 }
10267                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10268                 prune_one_block(trans, info, corrupt);
10269                 remove_cache_extent(info->corrupt_blocks, cache);
10270         }
10271         if (trans)
10272                 return btrfs_commit_transaction(trans, info->extent_root);
10273         return 0;
10274 }
10275
10276 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10277 {
10278         struct btrfs_block_group_cache *cache;
10279         u64 start, end;
10280         int ret;
10281
10282         while (1) {
10283                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10284                                             &start, &end, EXTENT_DIRTY);
10285                 if (ret)
10286                         break;
10287                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10288         }
10289
10290         start = 0;
10291         while (1) {
10292                 cache = btrfs_lookup_first_block_group(fs_info, start);
10293                 if (!cache)
10294                         break;
10295                 if (cache->cached)
10296                         cache->cached = 0;
10297                 start = cache->key.objectid + cache->key.offset;
10298         }
10299 }
10300
10301 static int check_extent_refs(struct btrfs_root *root,
10302                              struct cache_tree *extent_cache)
10303 {
10304         struct extent_record *rec;
10305         struct cache_extent *cache;
10306         int ret = 0;
10307         int had_dups = 0;
10308         int err = 0;
10309
10310         if (repair) {
10311                 /*
10312                  * if we're doing a repair, we have to make sure
10313                  * we don't allocate from the problem extents.
10314                  * In the worst case, this will be all the
10315                  * extents in the FS
10316                  */
10317                 cache = search_cache_extent(extent_cache, 0);
10318                 while(cache) {
10319                         rec = container_of(cache, struct extent_record, cache);
10320                         set_extent_dirty(root->fs_info->excluded_extents,
10321                                          rec->start,
10322                                          rec->start + rec->max_size - 1);
10323                         cache = next_cache_extent(cache);
10324                 }
10325
10326                 /* pin down all the corrupted blocks too */
10327                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10328                 while(cache) {
10329                         set_extent_dirty(root->fs_info->excluded_extents,
10330                                          cache->start,
10331                                          cache->start + cache->size - 1);
10332                         cache = next_cache_extent(cache);
10333                 }
10334                 prune_corrupt_blocks(root->fs_info);
10335                 reset_cached_block_groups(root->fs_info);
10336         }
10337
10338         reset_cached_block_groups(root->fs_info);
10339
10340         /*
10341          * We need to delete any duplicate entries we find first otherwise we
10342          * could mess up the extent tree when we have backrefs that actually
10343          * belong to a different extent item and not the weird duplicate one.
10344          */
10345         while (repair && !list_empty(&duplicate_extents)) {
10346                 rec = to_extent_record(duplicate_extents.next);
10347                 list_del_init(&rec->list);
10348
10349                 /* Sometimes we can find a backref before we find an actual
10350                  * extent, so we need to process it a little bit to see if there
10351                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10352                  * if this is a backref screwup.  If we need to delete stuff
10353                  * process_duplicates() will return 0, otherwise it will return
10354                  * 1 and we
10355                  */
10356                 if (process_duplicates(extent_cache, rec))
10357                         continue;
10358                 ret = delete_duplicate_records(root, rec);
10359                 if (ret < 0)
10360                         return ret;
10361                 /*
10362                  * delete_duplicate_records will return the number of entries
10363                  * deleted, so if it's greater than 0 then we know we actually
10364                  * did something and we need to remove.
10365                  */
10366                 if (ret)
10367                         had_dups = 1;
10368         }
10369
10370         if (had_dups)
10371                 return -EAGAIN;
10372
10373         while(1) {
10374                 int cur_err = 0;
10375                 int fix = 0;
10376
10377                 cache = search_cache_extent(extent_cache, 0);
10378                 if (!cache)
10379                         break;
10380                 rec = container_of(cache, struct extent_record, cache);
10381                 if (rec->num_duplicates) {
10382                         fprintf(stderr, "extent item %llu has multiple extent "
10383                                 "items\n", (unsigned long long)rec->start);
10384                         cur_err = 1;
10385                 }
10386
10387                 if (rec->refs != rec->extent_item_refs) {
10388                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10389                                 (unsigned long long)rec->start,
10390                                 (unsigned long long)rec->nr);
10391                         fprintf(stderr, "extent item %llu, found %llu\n",
10392                                 (unsigned long long)rec->extent_item_refs,
10393                                 (unsigned long long)rec->refs);
10394                         ret = record_orphan_data_extents(root->fs_info, rec);
10395                         if (ret < 0)
10396                                 goto repair_abort;
10397                         fix = ret;
10398                         cur_err = 1;
10399                 }
10400                 if (all_backpointers_checked(rec, 1)) {
10401                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10402                                 (unsigned long long)rec->start,
10403                                 (unsigned long long)rec->nr);
10404                         fix = 1;
10405                         cur_err = 1;
10406                 }
10407                 if (!rec->owner_ref_checked) {
10408                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10409                                 (unsigned long long)rec->start,
10410                                 (unsigned long long)rec->nr);
10411                         fix = 1;
10412                         cur_err = 1;
10413                 }
10414
10415                 if (repair && fix) {
10416                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10417                         if (ret)
10418                                 goto repair_abort;
10419                 }
10420
10421
10422                 if (rec->bad_full_backref) {
10423                         fprintf(stderr, "bad full backref, on [%llu]\n",
10424                                 (unsigned long long)rec->start);
10425                         if (repair) {
10426                                 ret = fixup_extent_flags(root->fs_info, rec);
10427                                 if (ret)
10428                                         goto repair_abort;
10429                                 fix = 1;
10430                         }
10431                         cur_err = 1;
10432                 }
10433                 /*
10434                  * Although it's not a extent ref's problem, we reuse this
10435                  * routine for error reporting.
10436                  * No repair function yet.
10437                  */
10438                 if (rec->crossing_stripes) {
10439                         fprintf(stderr,
10440                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10441                                 rec->start, rec->start + rec->max_size);
10442                         cur_err = 1;
10443                 }
10444
10445                 if (rec->wrong_chunk_type) {
10446                         fprintf(stderr,
10447                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10448                                 rec->start, rec->start + rec->max_size);
10449                         cur_err = 1;
10450                 }
10451
10452                 err = cur_err;
10453                 remove_cache_extent(extent_cache, cache);
10454                 free_all_extent_backrefs(rec);
10455                 if (!init_extent_tree && repair && (!cur_err || fix))
10456                         clear_extent_dirty(root->fs_info->excluded_extents,
10457                                            rec->start,
10458                                            rec->start + rec->max_size - 1);
10459                 free(rec);
10460         }
10461 repair_abort:
10462         if (repair) {
10463                 if (ret && ret != -EAGAIN) {
10464                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10465                         exit(1);
10466                 } else if (!ret) {
10467                         struct btrfs_trans_handle *trans;
10468
10469                         root = root->fs_info->extent_root;
10470                         trans = btrfs_start_transaction(root, 1);
10471                         if (IS_ERR(trans)) {
10472                                 ret = PTR_ERR(trans);
10473                                 goto repair_abort;
10474                         }
10475
10476                         ret = btrfs_fix_block_accounting(trans, root);
10477                         if (ret)
10478                                 goto repair_abort;
10479                         ret = btrfs_commit_transaction(trans, root);
10480                         if (ret)
10481                                 goto repair_abort;
10482                 }
10483                 return ret;
10484         }
10485
10486         if (err)
10487                 err = -EIO;
10488         return err;
10489 }
10490
10491 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10492 {
10493         u64 stripe_size;
10494
10495         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10496                 stripe_size = length;
10497                 stripe_size /= num_stripes;
10498         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10499                 stripe_size = length * 2;
10500                 stripe_size /= num_stripes;
10501         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10502                 stripe_size = length;
10503                 stripe_size /= (num_stripes - 1);
10504         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10505                 stripe_size = length;
10506                 stripe_size /= (num_stripes - 2);
10507         } else {
10508                 stripe_size = length;
10509         }
10510         return stripe_size;
10511 }
10512
10513 /*
10514  * Check the chunk with its block group/dev list ref:
10515  * Return 0 if all refs seems valid.
10516  * Return 1 if part of refs seems valid, need later check for rebuild ref
10517  * like missing block group and needs to search extent tree to rebuild them.
10518  * Return -1 if essential refs are missing and unable to rebuild.
10519  */
10520 static int check_chunk_refs(struct chunk_record *chunk_rec,
10521                             struct block_group_tree *block_group_cache,
10522                             struct device_extent_tree *dev_extent_cache,
10523                             int silent)
10524 {
10525         struct cache_extent *block_group_item;
10526         struct block_group_record *block_group_rec;
10527         struct cache_extent *dev_extent_item;
10528         struct device_extent_record *dev_extent_rec;
10529         u64 devid;
10530         u64 offset;
10531         u64 length;
10532         int metadump_v2 = 0;
10533         int i;
10534         int ret = 0;
10535
10536         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10537                                                chunk_rec->offset,
10538                                                chunk_rec->length);
10539         if (block_group_item) {
10540                 block_group_rec = container_of(block_group_item,
10541                                                struct block_group_record,
10542                                                cache);
10543                 if (chunk_rec->length != block_group_rec->offset ||
10544                     chunk_rec->offset != block_group_rec->objectid ||
10545                     (!metadump_v2 &&
10546                      chunk_rec->type_flags != block_group_rec->flags)) {
10547                         if (!silent)
10548                                 fprintf(stderr,
10549                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10550                                         chunk_rec->objectid,
10551                                         chunk_rec->type,
10552                                         chunk_rec->offset,
10553                                         chunk_rec->length,
10554                                         chunk_rec->offset,
10555                                         chunk_rec->type_flags,
10556                                         block_group_rec->objectid,
10557                                         block_group_rec->type,
10558                                         block_group_rec->offset,
10559                                         block_group_rec->offset,
10560                                         block_group_rec->objectid,
10561                                         block_group_rec->flags);
10562                         ret = -1;
10563                 } else {
10564                         list_del_init(&block_group_rec->list);
10565                         chunk_rec->bg_rec = block_group_rec;
10566                 }
10567         } else {
10568                 if (!silent)
10569                         fprintf(stderr,
10570                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10571                                 chunk_rec->objectid,
10572                                 chunk_rec->type,
10573                                 chunk_rec->offset,
10574                                 chunk_rec->length,
10575                                 chunk_rec->offset,
10576                                 chunk_rec->type_flags);
10577                 ret = 1;
10578         }
10579
10580         if (metadump_v2)
10581                 return ret;
10582
10583         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10584                                     chunk_rec->num_stripes);
10585         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10586                 devid = chunk_rec->stripes[i].devid;
10587                 offset = chunk_rec->stripes[i].offset;
10588                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10589                                                        devid, offset, length);
10590                 if (dev_extent_item) {
10591                         dev_extent_rec = container_of(dev_extent_item,
10592                                                 struct device_extent_record,
10593                                                 cache);
10594                         if (dev_extent_rec->objectid != devid ||
10595                             dev_extent_rec->offset != offset ||
10596                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10597                             dev_extent_rec->length != length) {
10598                                 if (!silent)
10599                                         fprintf(stderr,
10600                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10601                                                 chunk_rec->objectid,
10602                                                 chunk_rec->type,
10603                                                 chunk_rec->offset,
10604                                                 chunk_rec->stripes[i].devid,
10605                                                 chunk_rec->stripes[i].offset,
10606                                                 dev_extent_rec->objectid,
10607                                                 dev_extent_rec->offset,
10608                                                 dev_extent_rec->length);
10609                                 ret = -1;
10610                         } else {
10611                                 list_move(&dev_extent_rec->chunk_list,
10612                                           &chunk_rec->dextents);
10613                         }
10614                 } else {
10615                         if (!silent)
10616                                 fprintf(stderr,
10617                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10618                                         chunk_rec->objectid,
10619                                         chunk_rec->type,
10620                                         chunk_rec->offset,
10621                                         chunk_rec->stripes[i].devid,
10622                                         chunk_rec->stripes[i].offset);
10623                         ret = -1;
10624                 }
10625         }
10626         return ret;
10627 }
10628
10629 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10630 int check_chunks(struct cache_tree *chunk_cache,
10631                  struct block_group_tree *block_group_cache,
10632                  struct device_extent_tree *dev_extent_cache,
10633                  struct list_head *good, struct list_head *bad,
10634                  struct list_head *rebuild, int silent)
10635 {
10636         struct cache_extent *chunk_item;
10637         struct chunk_record *chunk_rec;
10638         struct block_group_record *bg_rec;
10639         struct device_extent_record *dext_rec;
10640         int err;
10641         int ret = 0;
10642
10643         chunk_item = first_cache_extent(chunk_cache);
10644         while (chunk_item) {
10645                 chunk_rec = container_of(chunk_item, struct chunk_record,
10646                                          cache);
10647                 err = check_chunk_refs(chunk_rec, block_group_cache,
10648                                        dev_extent_cache, silent);
10649                 if (err < 0)
10650                         ret = err;
10651                 if (err == 0 && good)
10652                         list_add_tail(&chunk_rec->list, good);
10653                 if (err > 0 && rebuild)
10654                         list_add_tail(&chunk_rec->list, rebuild);
10655                 if (err < 0 && bad)
10656                         list_add_tail(&chunk_rec->list, bad);
10657                 chunk_item = next_cache_extent(chunk_item);
10658         }
10659
10660         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10661                 if (!silent)
10662                         fprintf(stderr,
10663                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10664                                 bg_rec->objectid,
10665                                 bg_rec->offset,
10666                                 bg_rec->flags);
10667                 if (!ret)
10668                         ret = 1;
10669         }
10670
10671         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10672                             chunk_list) {
10673                 if (!silent)
10674                         fprintf(stderr,
10675                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10676                                 dext_rec->objectid,
10677                                 dext_rec->offset,
10678                                 dext_rec->length);
10679                 if (!ret)
10680                         ret = 1;
10681         }
10682         return ret;
10683 }
10684
10685
10686 static int check_device_used(struct device_record *dev_rec,
10687                              struct device_extent_tree *dext_cache)
10688 {
10689         struct cache_extent *cache;
10690         struct device_extent_record *dev_extent_rec;
10691         u64 total_byte = 0;
10692
10693         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10694         while (cache) {
10695                 dev_extent_rec = container_of(cache,
10696                                               struct device_extent_record,
10697                                               cache);
10698                 if (dev_extent_rec->objectid != dev_rec->devid)
10699                         break;
10700
10701                 list_del_init(&dev_extent_rec->device_list);
10702                 total_byte += dev_extent_rec->length;
10703                 cache = next_cache_extent(cache);
10704         }
10705
10706         if (total_byte != dev_rec->byte_used) {
10707                 fprintf(stderr,
10708                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10709                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10710                         dev_rec->type, dev_rec->offset);
10711                 return -1;
10712         } else {
10713                 return 0;
10714         }
10715 }
10716
10717 /*
10718  * Unlike device size alignment check above, some super total_bytes check
10719  * failure can lead to mount failure for newer kernel.
10720  *
10721  * So this function will return the error for a fatal super total_bytes problem.
10722  */
10723 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10724 {
10725         struct btrfs_device *dev;
10726         struct list_head *dev_list = &fs_info->fs_devices->devices;
10727         u64 total_bytes = 0;
10728         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10729
10730         list_for_each_entry(dev, dev_list, dev_list)
10731                 total_bytes += dev->total_bytes;
10732
10733         /* Important check, which can cause unmountable fs */
10734         if (super_bytes < total_bytes) {
10735                 error("super total bytes %llu smaller than real device(s) size %llu",
10736                         super_bytes, total_bytes);
10737                 error("mounting this fs may fail for newer kernels");
10738                 error("this can be fixed by 'btrfs rescue fix-device-size'");
10739                 return false;
10740         }
10741
10742         /*
10743          * Optional check, just to make everything aligned and match with each
10744          * other.
10745          *
10746          * For a btrfs-image restored fs, we don't need to check it anyway.
10747          */
10748         if (btrfs_super_flags(fs_info->super_copy) &
10749             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10750                 return true;
10751         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10752             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10753             super_bytes != total_bytes) {
10754                 warning("minor unaligned/mismatch device size detected");
10755                 warning(
10756                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10757         }
10758         return true;
10759 }
10760
10761 /* check btrfs_dev_item -> btrfs_dev_extent */
10762 static int check_devices(struct rb_root *dev_cache,
10763                          struct device_extent_tree *dev_extent_cache)
10764 {
10765         struct rb_node *dev_node;
10766         struct device_record *dev_rec;
10767         struct device_extent_record *dext_rec;
10768         int err;
10769         int ret = 0;
10770
10771         dev_node = rb_first(dev_cache);
10772         while (dev_node) {
10773                 dev_rec = container_of(dev_node, struct device_record, node);
10774                 err = check_device_used(dev_rec, dev_extent_cache);
10775                 if (err)
10776                         ret = err;
10777
10778                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
10779                                          global_info->sectorsize);
10780                 dev_node = rb_next(dev_node);
10781         }
10782         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10783                             device_list) {
10784                 fprintf(stderr,
10785                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10786                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10787                 if (!ret)
10788                         ret = 1;
10789         }
10790         return ret;
10791 }
10792
10793 static int add_root_item_to_list(struct list_head *head,
10794                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10795                                   u8 level, u8 drop_level,
10796                                   struct btrfs_key *drop_key)
10797 {
10798
10799         struct root_item_record *ri_rec;
10800         ri_rec = malloc(sizeof(*ri_rec));
10801         if (!ri_rec)
10802                 return -ENOMEM;
10803         ri_rec->bytenr = bytenr;
10804         ri_rec->objectid = objectid;
10805         ri_rec->level = level;
10806         ri_rec->drop_level = drop_level;
10807         ri_rec->last_snapshot = last_snapshot;
10808         if (drop_key)
10809                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10810         list_add_tail(&ri_rec->list, head);
10811
10812         return 0;
10813 }
10814
10815 static void free_root_item_list(struct list_head *list)
10816 {
10817         struct root_item_record *ri_rec;
10818
10819         while (!list_empty(list)) {
10820                 ri_rec = list_first_entry(list, struct root_item_record,
10821                                           list);
10822                 list_del_init(&ri_rec->list);
10823                 free(ri_rec);
10824         }
10825 }
10826
10827 static int deal_root_from_list(struct list_head *list,
10828                                struct btrfs_root *root,
10829                                struct block_info *bits,
10830                                int bits_nr,
10831                                struct cache_tree *pending,
10832                                struct cache_tree *seen,
10833                                struct cache_tree *reada,
10834                                struct cache_tree *nodes,
10835                                struct cache_tree *extent_cache,
10836                                struct cache_tree *chunk_cache,
10837                                struct rb_root *dev_cache,
10838                                struct block_group_tree *block_group_cache,
10839                                struct device_extent_tree *dev_extent_cache)
10840 {
10841         int ret = 0;
10842         u64 last;
10843
10844         while (!list_empty(list)) {
10845                 struct root_item_record *rec;
10846                 struct extent_buffer *buf;
10847                 rec = list_entry(list->next,
10848                                  struct root_item_record, list);
10849                 last = 0;
10850                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10851                 if (!extent_buffer_uptodate(buf)) {
10852                         free_extent_buffer(buf);
10853                         ret = -EIO;
10854                         break;
10855                 }
10856                 ret = add_root_to_pending(buf, extent_cache, pending,
10857                                     seen, nodes, rec->objectid);
10858                 if (ret < 0)
10859                         break;
10860                 /*
10861                  * To rebuild extent tree, we need deal with snapshot
10862                  * one by one, otherwise we deal with node firstly which
10863                  * can maximize readahead.
10864                  */
10865                 while (1) {
10866                         ret = run_next_block(root, bits, bits_nr, &last,
10867                                              pending, seen, reada, nodes,
10868                                              extent_cache, chunk_cache,
10869                                              dev_cache, block_group_cache,
10870                                              dev_extent_cache, rec);
10871                         if (ret != 0)
10872                                 break;
10873                 }
10874                 free_extent_buffer(buf);
10875                 list_del(&rec->list);
10876                 free(rec);
10877                 if (ret < 0)
10878                         break;
10879         }
10880         while (ret >= 0) {
10881                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10882                                      reada, nodes, extent_cache, chunk_cache,
10883                                      dev_cache, block_group_cache,
10884                                      dev_extent_cache, NULL);
10885                 if (ret != 0) {
10886                         if (ret > 0)
10887                                 ret = 0;
10888                         break;
10889                 }
10890         }
10891         return ret;
10892 }
10893
10894 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10895 {
10896         struct rb_root dev_cache;
10897         struct cache_tree chunk_cache;
10898         struct block_group_tree block_group_cache;
10899         struct device_extent_tree dev_extent_cache;
10900         struct cache_tree extent_cache;
10901         struct cache_tree seen;
10902         struct cache_tree pending;
10903         struct cache_tree reada;
10904         struct cache_tree nodes;
10905         struct extent_io_tree excluded_extents;
10906         struct cache_tree corrupt_blocks;
10907         struct btrfs_path path;
10908         struct btrfs_key key;
10909         struct btrfs_key found_key;
10910         int ret, err = 0;
10911         struct block_info *bits;
10912         int bits_nr;
10913         struct extent_buffer *leaf;
10914         int slot;
10915         struct btrfs_root_item ri;
10916         struct list_head dropping_trees;
10917         struct list_head normal_trees;
10918         struct btrfs_root *root1;
10919         struct btrfs_root *root;
10920         u64 objectid;
10921         u8 level;
10922
10923         root = fs_info->fs_root;
10924         dev_cache = RB_ROOT;
10925         cache_tree_init(&chunk_cache);
10926         block_group_tree_init(&block_group_cache);
10927         device_extent_tree_init(&dev_extent_cache);
10928
10929         cache_tree_init(&extent_cache);
10930         cache_tree_init(&seen);
10931         cache_tree_init(&pending);
10932         cache_tree_init(&nodes);
10933         cache_tree_init(&reada);
10934         cache_tree_init(&corrupt_blocks);
10935         extent_io_tree_init(&excluded_extents);
10936         INIT_LIST_HEAD(&dropping_trees);
10937         INIT_LIST_HEAD(&normal_trees);
10938
10939         if (repair) {
10940                 fs_info->excluded_extents = &excluded_extents;
10941                 fs_info->fsck_extent_cache = &extent_cache;
10942                 fs_info->free_extent_hook = free_extent_hook;
10943                 fs_info->corrupt_blocks = &corrupt_blocks;
10944         }
10945
10946         bits_nr = 1024;
10947         bits = malloc(bits_nr * sizeof(struct block_info));
10948         if (!bits) {
10949                 perror("malloc");
10950                 exit(1);
10951         }
10952
10953         if (ctx.progress_enabled) {
10954                 ctx.tp = TASK_EXTENTS;
10955                 task_start(ctx.info);
10956         }
10957
10958 again:
10959         root1 = fs_info->tree_root;
10960         level = btrfs_header_level(root1->node);
10961         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10962                                     root1->node->start, 0, level, 0, NULL);
10963         if (ret < 0)
10964                 goto out;
10965         root1 = fs_info->chunk_root;
10966         level = btrfs_header_level(root1->node);
10967         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10968                                     root1->node->start, 0, level, 0, NULL);
10969         if (ret < 0)
10970                 goto out;
10971         btrfs_init_path(&path);
10972         key.offset = 0;
10973         key.objectid = 0;
10974         key.type = BTRFS_ROOT_ITEM_KEY;
10975         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10976         if (ret < 0)
10977                 goto out;
10978         while(1) {
10979                 leaf = path.nodes[0];
10980                 slot = path.slots[0];
10981                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10982                         ret = btrfs_next_leaf(root, &path);
10983                         if (ret != 0)
10984                                 break;
10985                         leaf = path.nodes[0];
10986                         slot = path.slots[0];
10987                 }
10988                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10989                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10990                         unsigned long offset;
10991                         u64 last_snapshot;
10992
10993                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10994                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10995                         last_snapshot = btrfs_root_last_snapshot(&ri);
10996                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10997                                 level = btrfs_root_level(&ri);
10998                                 ret = add_root_item_to_list(&normal_trees,
10999                                                 found_key.objectid,
11000                                                 btrfs_root_bytenr(&ri),
11001                                                 last_snapshot, level,
11002                                                 0, NULL);
11003                                 if (ret < 0)
11004                                         goto out;
11005                         } else {
11006                                 level = btrfs_root_level(&ri);
11007                                 objectid = found_key.objectid;
11008                                 btrfs_disk_key_to_cpu(&found_key,
11009                                                       &ri.drop_progress);
11010                                 ret = add_root_item_to_list(&dropping_trees,
11011                                                 objectid,
11012                                                 btrfs_root_bytenr(&ri),
11013                                                 last_snapshot, level,
11014                                                 ri.drop_level, &found_key);
11015                                 if (ret < 0)
11016                                         goto out;
11017                         }
11018                 }
11019                 path.slots[0]++;
11020         }
11021         btrfs_release_path(&path);
11022
11023         /*
11024          * check_block can return -EAGAIN if it fixes something, please keep
11025          * this in mind when dealing with return values from these functions, if
11026          * we get -EAGAIN we want to fall through and restart the loop.
11027          */
11028         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11029                                   &seen, &reada, &nodes, &extent_cache,
11030                                   &chunk_cache, &dev_cache, &block_group_cache,
11031                                   &dev_extent_cache);
11032         if (ret < 0) {
11033                 if (ret == -EAGAIN)
11034                         goto loop;
11035                 goto out;
11036         }
11037         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11038                                   &pending, &seen, &reada, &nodes,
11039                                   &extent_cache, &chunk_cache, &dev_cache,
11040                                   &block_group_cache, &dev_extent_cache);
11041         if (ret < 0) {
11042                 if (ret == -EAGAIN)
11043                         goto loop;
11044                 goto out;
11045         }
11046
11047         ret = check_chunks(&chunk_cache, &block_group_cache,
11048                            &dev_extent_cache, NULL, NULL, NULL, 0);
11049         if (ret) {
11050                 if (ret == -EAGAIN)
11051                         goto loop;
11052                 err = ret;
11053         }
11054
11055         ret = check_extent_refs(root, &extent_cache);
11056         if (ret < 0) {
11057                 if (ret == -EAGAIN)
11058                         goto loop;
11059                 goto out;
11060         }
11061
11062         ret = check_devices(&dev_cache, &dev_extent_cache);
11063         if (ret && err)
11064                 ret = err;
11065
11066 out:
11067         task_stop(ctx.info);
11068         if (repair) {
11069                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11070                 extent_io_tree_cleanup(&excluded_extents);
11071                 fs_info->fsck_extent_cache = NULL;
11072                 fs_info->free_extent_hook = NULL;
11073                 fs_info->corrupt_blocks = NULL;
11074                 fs_info->excluded_extents = NULL;
11075         }
11076         free(bits);
11077         free_chunk_cache_tree(&chunk_cache);
11078         free_device_cache_tree(&dev_cache);
11079         free_block_group_tree(&block_group_cache);
11080         free_device_extent_tree(&dev_extent_cache);
11081         free_extent_cache_tree(&seen);
11082         free_extent_cache_tree(&pending);
11083         free_extent_cache_tree(&reada);
11084         free_extent_cache_tree(&nodes);
11085         free_root_item_list(&normal_trees);
11086         free_root_item_list(&dropping_trees);
11087         return ret;
11088 loop:
11089         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11090         free_extent_cache_tree(&seen);
11091         free_extent_cache_tree(&pending);
11092         free_extent_cache_tree(&reada);
11093         free_extent_cache_tree(&nodes);
11094         free_chunk_cache_tree(&chunk_cache);
11095         free_block_group_tree(&block_group_cache);
11096         free_device_cache_tree(&dev_cache);
11097         free_device_extent_tree(&dev_extent_cache);
11098         free_extent_record_cache(&extent_cache);
11099         free_root_item_list(&normal_trees);
11100         free_root_item_list(&dropping_trees);
11101         extent_io_tree_cleanup(&excluded_extents);
11102         goto again;
11103 }
11104
11105 static int check_extent_inline_ref(struct extent_buffer *eb,
11106                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11107 {
11108         int ret;
11109         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11110
11111         switch (type) {
11112         case BTRFS_TREE_BLOCK_REF_KEY:
11113         case BTRFS_EXTENT_DATA_REF_KEY:
11114         case BTRFS_SHARED_BLOCK_REF_KEY:
11115         case BTRFS_SHARED_DATA_REF_KEY:
11116                 ret = 0;
11117                 break;
11118         default:
11119                 error("extent[%llu %u %llu] has unknown ref type: %d",
11120                       key->objectid, key->type, key->offset, type);
11121                 ret = UNKNOWN_TYPE;
11122                 break;
11123         }
11124
11125         return ret;
11126 }
11127
11128 /*
11129  * Check backrefs of a tree block given by @bytenr or @eb.
11130  *
11131  * @root:       the root containing the @bytenr or @eb
11132  * @eb:         tree block extent buffer, can be NULL
11133  * @bytenr:     bytenr of the tree block to search
11134  * @level:      tree level of the tree block
11135  * @owner:      owner of the tree block
11136  *
11137  * Return >0 for any error found and output error message
11138  * Return 0 for no error found
11139  */
11140 static int check_tree_block_ref(struct btrfs_root *root,
11141                                 struct extent_buffer *eb, u64 bytenr,
11142                                 int level, u64 owner, struct node_refs *nrefs)
11143 {
11144         struct btrfs_key key;
11145         struct btrfs_root *extent_root = root->fs_info->extent_root;
11146         struct btrfs_path path;
11147         struct btrfs_extent_item *ei;
11148         struct btrfs_extent_inline_ref *iref;
11149         struct extent_buffer *leaf;
11150         unsigned long end;
11151         unsigned long ptr;
11152         int slot;
11153         int skinny_level;
11154         int root_level = btrfs_header_level(root->node);
11155         int type;
11156         u32 nodesize = root->fs_info->nodesize;
11157         u32 item_size;
11158         u64 offset;
11159         int found_ref = 0;
11160         int err = 0;
11161         int ret;
11162         int strict = 1;
11163         int parent = 0;
11164
11165         btrfs_init_path(&path);
11166         key.objectid = bytenr;
11167         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11168                 key.type = BTRFS_METADATA_ITEM_KEY;
11169         else
11170                 key.type = BTRFS_EXTENT_ITEM_KEY;
11171         key.offset = (u64)-1;
11172
11173         /* Search for the backref in extent tree */
11174         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11175         if (ret < 0) {
11176                 err |= BACKREF_MISSING;
11177                 goto out;
11178         }
11179         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11180         if (ret) {
11181                 err |= BACKREF_MISSING;
11182                 goto out;
11183         }
11184
11185         leaf = path.nodes[0];
11186         slot = path.slots[0];
11187         btrfs_item_key_to_cpu(leaf, &key, slot);
11188
11189         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11190
11191         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11192                 skinny_level = (int)key.offset;
11193                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11194         } else {
11195                 struct btrfs_tree_block_info *info;
11196
11197                 info = (struct btrfs_tree_block_info *)(ei + 1);
11198                 skinny_level = btrfs_tree_block_level(leaf, info);
11199                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11200         }
11201
11202
11203         if (eb) {
11204                 u64 header_gen;
11205                 u64 extent_gen;
11206
11207                 /*
11208                  * Due to the feature of shared tree blocks, if the upper node
11209                  * is a fs root or shared node, the extent of checked node may
11210                  * not be updated until the next CoW.
11211                  */
11212                 if (nrefs)
11213                         strict = should_check_extent_strictly(root, nrefs,
11214                                         level);
11215                 if (!(btrfs_extent_flags(leaf, ei) &
11216                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11217                         error(
11218                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11219                                 key.objectid, nodesize,
11220                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11221                         err = BACKREF_MISMATCH;
11222                 }
11223                 header_gen = btrfs_header_generation(eb);
11224                 extent_gen = btrfs_extent_generation(leaf, ei);
11225                 if (header_gen != extent_gen) {
11226                         error(
11227         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11228                                 key.objectid, nodesize, header_gen,
11229                                 extent_gen);
11230                         err = BACKREF_MISMATCH;
11231                 }
11232                 if (level != skinny_level) {
11233                         error(
11234                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11235                                 key.objectid, nodesize, level, skinny_level);
11236                         err = BACKREF_MISMATCH;
11237                 }
11238                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11239                         error(
11240                         "extent[%llu %u] is referred by other roots than %llu",
11241                                 key.objectid, nodesize, root->objectid);
11242                         err = BACKREF_MISMATCH;
11243                 }
11244         }
11245
11246         /*
11247          * Iterate the extent/metadata item to find the exact backref
11248          */
11249         item_size = btrfs_item_size_nr(leaf, slot);
11250         ptr = (unsigned long)iref;
11251         end = (unsigned long)ei + item_size;
11252
11253         while (ptr < end) {
11254                 iref = (struct btrfs_extent_inline_ref *)ptr;
11255                 type = btrfs_extent_inline_ref_type(leaf, iref);
11256                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11257
11258                 ret = check_extent_inline_ref(leaf, &key, iref);
11259                 if (ret) {
11260                         err |= ret;
11261                         break;
11262                 }
11263                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11264                         if (offset == root->objectid)
11265                                 found_ref = 1;
11266                         if (!strict && owner == offset)
11267                                 found_ref = 1;
11268                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11269                         /*
11270                          * Backref of tree reloc root points to itself, no need
11271                          * to check backref any more.
11272                          *
11273                          * This may be an error of loop backref, but extent tree
11274                          * checker should have already handled it.
11275                          * Here we only need to avoid infinite iteration.
11276                          */
11277                         if (offset == bytenr) {
11278                                 found_ref = 1;
11279                         } else {
11280                                 /*
11281                                  * Check if the backref points to valid
11282                                  * referencer
11283                                  */
11284                                 found_ref = !check_tree_block_ref( root, NULL,
11285                                                 offset, level + 1, owner,
11286                                                 NULL);
11287                         }
11288                 }
11289
11290                 if (found_ref)
11291                         break;
11292                 ptr += btrfs_extent_inline_ref_size(type);
11293         }
11294
11295         /*
11296          * Inlined extent item doesn't have what we need, check
11297          * TREE_BLOCK_REF_KEY
11298          */
11299         if (!found_ref) {
11300                 btrfs_release_path(&path);
11301                 key.objectid = bytenr;
11302                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11303                 key.offset = root->objectid;
11304
11305                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11306                 if (!ret)
11307                         found_ref = 1;
11308         }
11309         /*
11310          * Finally check SHARED BLOCK REF, any found will be good
11311          * Here we're not doing comprehensive extent backref checking,
11312          * only need to ensure there is some extent referring to this
11313          * tree block.
11314          */
11315         if (!found_ref) {
11316                 btrfs_release_path(&path);
11317                 key.objectid = bytenr;
11318                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11319                 key.offset = (u64)-1;
11320
11321                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11322                 if (ret < 0) {
11323                         err |= BACKREF_MISSING;
11324                         goto out;
11325                 }
11326                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11327                 if (ret) {
11328                         err |= BACKREF_MISSING;
11329                         goto out;
11330                 }
11331                 found_ref = 1;
11332         }
11333         if (!found_ref)
11334                 err |= BACKREF_MISSING;
11335 out:
11336         btrfs_release_path(&path);
11337         if (nrefs && strict &&
11338             level < root_level && nrefs->full_backref[level + 1])
11339                 parent = nrefs->bytenr[level + 1];
11340         if (eb && (err & BACKREF_MISSING))
11341                 error(
11342         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11343                       bytenr, nodesize, owner, level,
11344                       parent ? "parent" : "root",
11345                       parent ? parent : root->objectid);
11346         return err;
11347 }
11348
11349 /*
11350  * If @err contains BACKREF_MISSING then add extent of the
11351  * file_extent_data_item.
11352  *
11353  * Returns error bits after reapir.
11354  */
11355 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11356                                    struct btrfs_root *root,
11357                                    struct btrfs_path *pathp,
11358                                    struct node_refs *nrefs,
11359                                    int err)
11360 {
11361         struct btrfs_file_extent_item *fi;
11362         struct btrfs_key fi_key;
11363         struct btrfs_key key;
11364         struct btrfs_extent_item *ei;
11365         struct btrfs_path path;
11366         struct btrfs_root *extent_root = root->fs_info->extent_root;
11367         struct extent_buffer *eb;
11368         u64 size;
11369         u64 disk_bytenr;
11370         u64 num_bytes;
11371         u64 parent;
11372         u64 offset;
11373         u64 extent_offset;
11374         u64 file_offset;
11375         int generation;
11376         int slot;
11377         int ret = 0;
11378
11379         eb = pathp->nodes[0];
11380         slot = pathp->slots[0];
11381         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11382         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11383
11384         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11385             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11386                 return err;
11387
11388         file_offset = fi_key.offset;
11389         generation = btrfs_file_extent_generation(eb, fi);
11390         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11391         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11392         extent_offset = btrfs_file_extent_offset(eb, fi);
11393         offset = file_offset - extent_offset;
11394
11395         /* now repair only adds backref */
11396         if ((err & BACKREF_MISSING) == 0)
11397                 return err;
11398
11399         /* search extent item */
11400         key.objectid = disk_bytenr;
11401         key.type = BTRFS_EXTENT_ITEM_KEY;
11402         key.offset = num_bytes;
11403
11404         btrfs_init_path(&path);
11405         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11406         if (ret < 0) {
11407                 ret = -EIO;
11408                 goto out;
11409         }
11410
11411         /* insert an extent item */
11412         if (ret > 0) {
11413                 key.objectid = disk_bytenr;
11414                 key.type = BTRFS_EXTENT_ITEM_KEY;
11415                 key.offset = num_bytes;
11416                 size = sizeof(*ei);
11417
11418                 btrfs_release_path(&path);
11419                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11420                                               size);
11421                 if (ret)
11422                         goto out;
11423                 eb = path.nodes[0];
11424                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11425
11426                 btrfs_set_extent_refs(eb, ei, 0);
11427                 btrfs_set_extent_generation(eb, ei, generation);
11428                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11429
11430                 btrfs_mark_buffer_dirty(eb);
11431                 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11432                                                num_bytes, 1, 0);
11433                 btrfs_release_path(&path);
11434         }
11435
11436         if (nrefs->full_backref[0])
11437                 parent = btrfs_header_bytenr(eb);
11438         else
11439                 parent = 0;
11440
11441         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11442                                    root->objectid,
11443                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11444                                    offset);
11445         if (ret) {
11446                 error(
11447                 "failed to increase extent data backref[%llu %llu] root %llu",
11448                       disk_bytenr, num_bytes, root->objectid);
11449                 goto out;
11450         } else {
11451                 printf("Add one extent data backref [%llu %llu]\n",
11452                        disk_bytenr, num_bytes);
11453         }
11454
11455         err &= ~BACKREF_MISSING;
11456 out:
11457         if (ret)
11458                 error("can't repair root %llu extent data item[%llu %llu]",
11459                       root->objectid, disk_bytenr, num_bytes);
11460         return err;
11461 }
11462
11463 /*
11464  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11465  *
11466  * Return >0 any error found and output error message
11467  * Return 0 for no error found
11468  */
11469 static int check_extent_data_item(struct btrfs_root *root,
11470                                   struct btrfs_path *pathp,
11471                                   struct node_refs *nrefs,  int account_bytes)
11472 {
11473         struct btrfs_file_extent_item *fi;
11474         struct extent_buffer *eb = pathp->nodes[0];
11475         struct btrfs_path path;
11476         struct btrfs_root *extent_root = root->fs_info->extent_root;
11477         struct btrfs_key fi_key;
11478         struct btrfs_key dbref_key;
11479         struct extent_buffer *leaf;
11480         struct btrfs_extent_item *ei;
11481         struct btrfs_extent_inline_ref *iref;
11482         struct btrfs_extent_data_ref *dref;
11483         u64 owner;
11484         u64 disk_bytenr;
11485         u64 disk_num_bytes;
11486         u64 extent_num_bytes;
11487         u64 extent_flags;
11488         u64 offset;
11489         u32 item_size;
11490         unsigned long end;
11491         unsigned long ptr;
11492         int type;
11493         int found_dbackref = 0;
11494         int slot = pathp->slots[0];
11495         int err = 0;
11496         int ret;
11497         int strict;
11498
11499         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11500         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11501
11502         /* Nothing to check for hole and inline data extents */
11503         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11504             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11505                 return 0;
11506
11507         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11508         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11509         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11510         offset = btrfs_file_extent_offset(eb, fi);
11511
11512         /* Check unaligned disk_num_bytes and num_bytes */
11513         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11514                 error(
11515 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11516                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11517                         root->fs_info->sectorsize);
11518                 err |= BYTES_UNALIGNED;
11519         } else if (account_bytes) {
11520                 data_bytes_allocated += disk_num_bytes;
11521         }
11522         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11523                 error(
11524 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11525                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11526                         root->fs_info->sectorsize);
11527                 err |= BYTES_UNALIGNED;
11528         } else if (account_bytes) {
11529                 data_bytes_referenced += extent_num_bytes;
11530         }
11531         owner = btrfs_header_owner(eb);
11532
11533         /* Check the extent item of the file extent in extent tree */
11534         btrfs_init_path(&path);
11535         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11536         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11537         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11538
11539         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11540         if (ret)
11541                 goto out;
11542
11543         leaf = path.nodes[0];
11544         slot = path.slots[0];
11545         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11546
11547         extent_flags = btrfs_extent_flags(leaf, ei);
11548
11549         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11550                 error(
11551                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11552                     disk_bytenr, disk_num_bytes,
11553                     BTRFS_EXTENT_FLAG_DATA);
11554                 err |= BACKREF_MISMATCH;
11555         }
11556
11557         /* Check data backref inside that extent item */
11558         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11559         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11560         ptr = (unsigned long)iref;
11561         end = (unsigned long)ei + item_size;
11562         strict = should_check_extent_strictly(root, nrefs, -1);
11563
11564         while (ptr < end) {
11565                 u64 ref_root;
11566                 u64 ref_objectid;
11567                 u64 ref_offset;
11568                 bool match = false;
11569
11570                 iref = (struct btrfs_extent_inline_ref *)ptr;
11571                 type = btrfs_extent_inline_ref_type(leaf, iref);
11572                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11573
11574                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11575                 if (ret) {
11576                         err |= ret;
11577                         break;
11578                 }
11579                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11580                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11581                         ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11582                         ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11583
11584                         if (ref_objectid == fi_key.objectid &&
11585                             ref_offset == fi_key.offset - offset)
11586                                 match = true;
11587                         if (ref_root == root->objectid && match)
11588                                 found_dbackref = 1;
11589                         else if (!strict && owner == ref_root && match)
11590                                 found_dbackref = 1;
11591                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11592                         found_dbackref = !check_tree_block_ref(root, NULL,
11593                                 btrfs_extent_inline_ref_offset(leaf, iref),
11594                                 0, owner, NULL);
11595                 }
11596
11597                 if (found_dbackref)
11598                         break;
11599                 ptr += btrfs_extent_inline_ref_size(type);
11600         }
11601
11602         if (!found_dbackref) {
11603                 btrfs_release_path(&path);
11604
11605                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11606                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11607                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11608                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11609                                 fi_key.objectid, fi_key.offset - offset);
11610
11611                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11612                                         &dbref_key, &path, 0, 0);
11613                 if (!ret) {
11614                         found_dbackref = 1;
11615                         goto out;
11616                 }
11617
11618                 btrfs_release_path(&path);
11619
11620                 /*
11621                  * Neither inlined nor EXTENT_DATA_REF found, try
11622                  * SHARED_DATA_REF as last chance.
11623                  */
11624                 dbref_key.objectid = disk_bytenr;
11625                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11626                 dbref_key.offset = eb->start;
11627
11628                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11629                                         &dbref_key, &path, 0, 0);
11630                 if (!ret) {
11631                         found_dbackref = 1;
11632                         goto out;
11633                 }
11634         }
11635
11636 out:
11637         if (!found_dbackref)
11638                 err |= BACKREF_MISSING;
11639         btrfs_release_path(&path);
11640         if (err & BACKREF_MISSING) {
11641                 error("data extent[%llu %llu] backref lost",
11642                       disk_bytenr, disk_num_bytes);
11643         }
11644         return err;
11645 }
11646
11647 /*
11648  * Get real tree block level for the case like shared block
11649  * Return >= 0 as tree level
11650  * Return <0 for error
11651  */
11652 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11653 {
11654         struct extent_buffer *eb;
11655         struct btrfs_path path;
11656         struct btrfs_key key;
11657         struct btrfs_extent_item *ei;
11658         u64 flags;
11659         u64 transid;
11660         u8 backref_level;
11661         u8 header_level;
11662         int ret;
11663
11664         /* Search extent tree for extent generation and level */
11665         key.objectid = bytenr;
11666         key.type = BTRFS_METADATA_ITEM_KEY;
11667         key.offset = (u64)-1;
11668
11669         btrfs_init_path(&path);
11670         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11671         if (ret < 0)
11672                 goto release_out;
11673         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11674         if (ret < 0)
11675                 goto release_out;
11676         if (ret > 0) {
11677                 ret = -ENOENT;
11678                 goto release_out;
11679         }
11680
11681         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11682         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11683                             struct btrfs_extent_item);
11684         flags = btrfs_extent_flags(path.nodes[0], ei);
11685         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11686                 ret = -ENOENT;
11687                 goto release_out;
11688         }
11689
11690         /* Get transid for later read_tree_block() check */
11691         transid = btrfs_extent_generation(path.nodes[0], ei);
11692
11693         /* Get backref level as one source */
11694         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11695                 backref_level = key.offset;
11696         } else {
11697                 struct btrfs_tree_block_info *info;
11698
11699                 info = (struct btrfs_tree_block_info *)(ei + 1);
11700                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11701         }
11702         btrfs_release_path(&path);
11703
11704         /* Get level from tree block as an alternative source */
11705         eb = read_tree_block(fs_info, bytenr, transid);
11706         if (!extent_buffer_uptodate(eb)) {
11707                 free_extent_buffer(eb);
11708                 return -EIO;
11709         }
11710         header_level = btrfs_header_level(eb);
11711         free_extent_buffer(eb);
11712
11713         if (header_level != backref_level)
11714                 return -EIO;
11715         return header_level;
11716
11717 release_out:
11718         btrfs_release_path(&path);
11719         return ret;
11720 }
11721
11722 /*
11723  * Check if a tree block backref is valid (points to a valid tree block)
11724  * if level == -1, level will be resolved
11725  * Return >0 for any error found and print error message
11726  */
11727 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11728                                     u64 bytenr, int level)
11729 {
11730         struct btrfs_root *root;
11731         struct btrfs_key key;
11732         struct btrfs_path path;
11733         struct extent_buffer *eb;
11734         struct extent_buffer *node;
11735         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11736         int err = 0;
11737         int ret;
11738
11739         /* Query level for level == -1 special case */
11740         if (level == -1)
11741                 level = query_tree_block_level(fs_info, bytenr);
11742         if (level < 0) {
11743                 err |= REFERENCER_MISSING;
11744                 goto out;
11745         }
11746
11747         key.objectid = root_id;
11748         key.type = BTRFS_ROOT_ITEM_KEY;
11749         key.offset = (u64)-1;
11750
11751         root = btrfs_read_fs_root(fs_info, &key);
11752         if (IS_ERR(root)) {
11753                 err |= REFERENCER_MISSING;
11754                 goto out;
11755         }
11756
11757         /* Read out the tree block to get item/node key */
11758         eb = read_tree_block(fs_info, bytenr, 0);
11759         if (!extent_buffer_uptodate(eb)) {
11760                 err |= REFERENCER_MISSING;
11761                 free_extent_buffer(eb);
11762                 goto out;
11763         }
11764
11765         /* Empty tree, no need to check key */
11766         if (!btrfs_header_nritems(eb) && !level) {
11767                 free_extent_buffer(eb);
11768                 goto out;
11769         }
11770
11771         if (level)
11772                 btrfs_node_key_to_cpu(eb, &key, 0);
11773         else
11774                 btrfs_item_key_to_cpu(eb, &key, 0);
11775
11776         free_extent_buffer(eb);
11777
11778         btrfs_init_path(&path);
11779         path.lowest_level = level;
11780         /* Search with the first key, to ensure we can reach it */
11781         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11782         if (ret < 0) {
11783                 err |= REFERENCER_MISSING;
11784                 goto release_out;
11785         }
11786
11787         node = path.nodes[level];
11788         if (btrfs_header_bytenr(node) != bytenr) {
11789                 error(
11790         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11791                         bytenr, nodesize, bytenr,
11792                         btrfs_header_bytenr(node));
11793                 err |= REFERENCER_MISMATCH;
11794         }
11795         if (btrfs_header_level(node) != level) {
11796                 error(
11797         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11798                         bytenr, nodesize, level,
11799                         btrfs_header_level(node));
11800                 err |= REFERENCER_MISMATCH;
11801         }
11802
11803 release_out:
11804         btrfs_release_path(&path);
11805 out:
11806         if (err & REFERENCER_MISSING) {
11807                 if (level < 0)
11808                         error("extent [%llu %d] lost referencer (owner: %llu)",
11809                                 bytenr, nodesize, root_id);
11810                 else
11811                         error(
11812                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11813                                 bytenr, nodesize, root_id, level);
11814         }
11815
11816         return err;
11817 }
11818
11819 /*
11820  * Check if tree block @eb is tree reloc root.
11821  * Return 0 if it's not or any problem happens
11822  * Return 1 if it's a tree reloc root
11823  */
11824 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11825                                  struct extent_buffer *eb)
11826 {
11827         struct btrfs_root *tree_reloc_root;
11828         struct btrfs_key key;
11829         u64 bytenr = btrfs_header_bytenr(eb);
11830         u64 owner = btrfs_header_owner(eb);
11831         int ret = 0;
11832
11833         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11834         key.offset = owner;
11835         key.type = BTRFS_ROOT_ITEM_KEY;
11836
11837         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11838         if (IS_ERR(tree_reloc_root))
11839                 return 0;
11840
11841         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11842                 ret = 1;
11843         btrfs_free_fs_root(tree_reloc_root);
11844         return ret;
11845 }
11846
11847 /*
11848  * Check referencer for shared block backref
11849  * If level == -1, this function will resolve the level.
11850  */
11851 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11852                                      u64 parent, u64 bytenr, int level)
11853 {
11854         struct extent_buffer *eb;
11855         u32 nr;
11856         int found_parent = 0;
11857         int i;
11858
11859         eb = read_tree_block(fs_info, parent, 0);
11860         if (!extent_buffer_uptodate(eb))
11861                 goto out;
11862
11863         if (level == -1)
11864                 level = query_tree_block_level(fs_info, bytenr);
11865         if (level < 0)
11866                 goto out;
11867
11868         /* It's possible it's a tree reloc root */
11869         if (parent == bytenr) {
11870                 if (is_tree_reloc_root(fs_info, eb))
11871                         found_parent = 1;
11872                 goto out;
11873         }
11874
11875         if (level + 1 != btrfs_header_level(eb))
11876                 goto out;
11877
11878         nr = btrfs_header_nritems(eb);
11879         for (i = 0; i < nr; i++) {
11880                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11881                         found_parent = 1;
11882                         break;
11883                 }
11884         }
11885 out:
11886         free_extent_buffer(eb);
11887         if (!found_parent) {
11888                 error(
11889         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11890                         bytenr, fs_info->nodesize, parent, level);
11891                 return REFERENCER_MISSING;
11892         }
11893         return 0;
11894 }
11895
11896 /*
11897  * Check referencer for normal (inlined) data ref
11898  * If len == 0, it will be resolved by searching in extent tree
11899  */
11900 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11901                                      u64 root_id, u64 objectid, u64 offset,
11902                                      u64 bytenr, u64 len, u32 count)
11903 {
11904         struct btrfs_root *root;
11905         struct btrfs_root *extent_root = fs_info->extent_root;
11906         struct btrfs_key key;
11907         struct btrfs_path path;
11908         struct extent_buffer *leaf;
11909         struct btrfs_file_extent_item *fi;
11910         u32 found_count = 0;
11911         int slot;
11912         int ret = 0;
11913
11914         if (!len) {
11915                 key.objectid = bytenr;
11916                 key.type = BTRFS_EXTENT_ITEM_KEY;
11917                 key.offset = (u64)-1;
11918
11919                 btrfs_init_path(&path);
11920                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11921                 if (ret < 0)
11922                         goto out;
11923                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11924                 if (ret)
11925                         goto out;
11926                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11927                 if (key.objectid != bytenr ||
11928                     key.type != BTRFS_EXTENT_ITEM_KEY)
11929                         goto out;
11930                 len = key.offset;
11931                 btrfs_release_path(&path);
11932         }
11933         key.objectid = root_id;
11934         key.type = BTRFS_ROOT_ITEM_KEY;
11935         key.offset = (u64)-1;
11936         btrfs_init_path(&path);
11937
11938         root = btrfs_read_fs_root(fs_info, &key);
11939         if (IS_ERR(root))
11940                 goto out;
11941
11942         key.objectid = objectid;
11943         key.type = BTRFS_EXTENT_DATA_KEY;
11944         /*
11945          * It can be nasty as data backref offset is
11946          * file offset - file extent offset, which is smaller or
11947          * equal to original backref offset.  The only special case is
11948          * overflow.  So we need to special check and do further search.
11949          */
11950         key.offset = offset & (1ULL << 63) ? 0 : offset;
11951
11952         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11953         if (ret < 0)
11954                 goto out;
11955
11956         /*
11957          * Search afterwards to get correct one
11958          * NOTE: As we must do a comprehensive check on the data backref to
11959          * make sure the dref count also matches, we must iterate all file
11960          * extents for that inode.
11961          */
11962         while (1) {
11963                 leaf = path.nodes[0];
11964                 slot = path.slots[0];
11965
11966                 if (slot >= btrfs_header_nritems(leaf) ||
11967                     btrfs_header_owner(leaf) != root_id)
11968                         goto next;
11969                 btrfs_item_key_to_cpu(leaf, &key, slot);
11970                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11971                         break;
11972                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11973                 /*
11974                  * Except normal disk bytenr and disk num bytes, we still
11975                  * need to do extra check on dbackref offset as
11976                  * dbackref offset = file_offset - file_extent_offset
11977                  *
11978                  * Also, we must check the leaf owner.
11979                  * In case of shared tree blocks (snapshots) we can inherit
11980                  * leaves from source snapshot.
11981                  * In that case, reference from source snapshot should not
11982                  * count.
11983                  */
11984                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11985                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11986                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11987                     offset && btrfs_header_owner(leaf) == root_id)
11988                         found_count++;
11989
11990 next:
11991                 ret = btrfs_next_item(root, &path);
11992                 if (ret)
11993                         break;
11994         }
11995 out:
11996         btrfs_release_path(&path);
11997         if (found_count != count) {
11998                 error(
11999 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12000                         bytenr, len, root_id, objectid, offset, count, found_count);
12001                 return REFERENCER_MISSING;
12002         }
12003         return 0;
12004 }
12005
12006 /*
12007  * Check if the referencer of a shared data backref exists
12008  */
12009 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12010                                      u64 parent, u64 bytenr)
12011 {
12012         struct extent_buffer *eb;
12013         struct btrfs_key key;
12014         struct btrfs_file_extent_item *fi;
12015         u32 nr;
12016         int found_parent = 0;
12017         int i;
12018
12019         eb = read_tree_block(fs_info, parent, 0);
12020         if (!extent_buffer_uptodate(eb))
12021                 goto out;
12022
12023         nr = btrfs_header_nritems(eb);
12024         for (i = 0; i < nr; i++) {
12025                 btrfs_item_key_to_cpu(eb, &key, i);
12026                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12027                         continue;
12028
12029                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12030                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12031                         continue;
12032
12033                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12034                         found_parent = 1;
12035                         break;
12036                 }
12037         }
12038
12039 out:
12040         free_extent_buffer(eb);
12041         if (!found_parent) {
12042                 error("shared extent %llu referencer lost (parent: %llu)",
12043                         bytenr, parent);
12044                 return REFERENCER_MISSING;
12045         }
12046         return 0;
12047 }
12048
12049 /*
12050  * Only delete backref if REFERENCER_MISSING now
12051  *
12052  * Returns <0   the extent was deleted
12053  * Returns >0   the backref was deleted but extent still exists, returned value
12054  *               means error after repair
12055  * Returns  0   nothing happened
12056  */
12057 static int repair_extent_item(struct btrfs_trans_handle *trans,
12058                       struct btrfs_root *root, struct btrfs_path *path,
12059                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12060                       u64 owner, u64 offset, int err)
12061 {
12062         struct btrfs_key old_key;
12063         int freed = 0;
12064         int ret;
12065
12066         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12067
12068         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12069                 /* delete the backref */
12070                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12071                           num_bytes, parent, root_objectid, owner, offset);
12072                 if (!ret) {
12073                         freed = 1;
12074                         err &= ~REFERENCER_MISSING;
12075                         printf("Delete backref in extent [%llu %llu]\n",
12076                                bytenr, num_bytes);
12077                 } else {
12078                         error("fail to delete backref in extent [%llu %llu]",
12079                                bytenr, num_bytes);
12080                 }
12081         }
12082
12083         /* btrfs_free_extent may delete the extent */
12084         btrfs_release_path(path);
12085         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12086
12087         if (ret)
12088                 ret = -ENOENT;
12089         else if (freed)
12090                 ret = err;
12091         return ret;
12092 }
12093
12094 /*
12095  * This function will check a given extent item, including its backref and
12096  * itself (like crossing stripe boundary and type)
12097  *
12098  * Since we don't use extent_record anymore, introduce new error bit
12099  */
12100 static int check_extent_item(struct btrfs_trans_handle *trans,
12101                              struct btrfs_fs_info *fs_info,
12102                              struct btrfs_path *path)
12103 {
12104         struct btrfs_extent_item *ei;
12105         struct btrfs_extent_inline_ref *iref;
12106         struct btrfs_extent_data_ref *dref;
12107         struct extent_buffer *eb = path->nodes[0];
12108         unsigned long end;
12109         unsigned long ptr;
12110         int slot = path->slots[0];
12111         int type;
12112         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12113         u32 item_size = btrfs_item_size_nr(eb, slot);
12114         u64 flags;
12115         u64 offset;
12116         u64 parent;
12117         u64 num_bytes;
12118         u64 root_objectid;
12119         u64 owner;
12120         u64 owner_offset;
12121         int metadata = 0;
12122         int level;
12123         struct btrfs_key key;
12124         int ret;
12125         int err = 0;
12126
12127         btrfs_item_key_to_cpu(eb, &key, slot);
12128         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12129                 bytes_used += key.offset;
12130                 num_bytes = key.offset;
12131         } else {
12132                 bytes_used += nodesize;
12133                 num_bytes = nodesize;
12134         }
12135
12136         if (item_size < sizeof(*ei)) {
12137                 /*
12138                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12139                  * old thing when on disk format is still un-determined.
12140                  * No need to care about it anymore
12141                  */
12142                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12143                 return -ENOTTY;
12144         }
12145
12146         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12147         flags = btrfs_extent_flags(eb, ei);
12148
12149         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12150                 metadata = 1;
12151         if (metadata && check_crossing_stripes(global_info, key.objectid,
12152                                                eb->len)) {
12153                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12154                       key.objectid, key.objectid + nodesize);
12155                 err |= CROSSING_STRIPE_BOUNDARY;
12156         }
12157
12158         ptr = (unsigned long)(ei + 1);
12159
12160         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12161                 /* Old EXTENT_ITEM metadata */
12162                 struct btrfs_tree_block_info *info;
12163
12164                 info = (struct btrfs_tree_block_info *)ptr;
12165                 level = btrfs_tree_block_level(eb, info);
12166                 ptr += sizeof(struct btrfs_tree_block_info);
12167         } else {
12168                 /* New METADATA_ITEM */
12169                 level = key.offset;
12170         }
12171         end = (unsigned long)ei + item_size;
12172
12173 next:
12174         /* Reached extent item end normally */
12175         if (ptr == end)
12176                 goto out;
12177
12178         /* Beyond extent item end, wrong item size */
12179         if (ptr > end) {
12180                 err |= ITEM_SIZE_MISMATCH;
12181                 error("extent item at bytenr %llu slot %d has wrong size",
12182                         eb->start, slot);
12183                 goto out;
12184         }
12185
12186         parent = 0;
12187         root_objectid = 0;
12188         owner = 0;
12189         owner_offset = 0;
12190         /* Now check every backref in this extent item */
12191         iref = (struct btrfs_extent_inline_ref *)ptr;
12192         type = btrfs_extent_inline_ref_type(eb, iref);
12193         offset = btrfs_extent_inline_ref_offset(eb, iref);
12194         switch (type) {
12195         case BTRFS_TREE_BLOCK_REF_KEY:
12196                 root_objectid = offset;
12197                 owner = level;
12198                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12199                                                level);
12200                 err |= ret;
12201                 break;
12202         case BTRFS_SHARED_BLOCK_REF_KEY:
12203                 parent = offset;
12204                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12205                                                  level);
12206                 err |= ret;
12207                 break;
12208         case BTRFS_EXTENT_DATA_REF_KEY:
12209                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12210                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12211                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12212                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12213                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12214                                         owner_offset, key.objectid, key.offset,
12215                                         btrfs_extent_data_ref_count(eb, dref));
12216                 err |= ret;
12217                 break;
12218         case BTRFS_SHARED_DATA_REF_KEY:
12219                 parent = offset;
12220                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12221                 err |= ret;
12222                 break;
12223         default:
12224                 error("extent[%llu %d %llu] has unknown ref type: %d",
12225                         key.objectid, key.type, key.offset, type);
12226                 ret = UNKNOWN_TYPE;
12227                 err |= ret;
12228                 goto out;
12229         }
12230
12231         if (err && repair) {
12232                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12233                          key.objectid, num_bytes, parent, root_objectid,
12234                          owner, owner_offset, ret);
12235                 if (ret < 0)
12236                         goto out;
12237                 if (ret) {
12238                         goto next;
12239                         err = ret;
12240                 }
12241         }
12242
12243         ptr += btrfs_extent_inline_ref_size(type);
12244         goto next;
12245
12246 out:
12247         return err;
12248 }
12249
12250 /*
12251  * Check if a dev extent item is referred correctly by its chunk
12252  */
12253 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12254                                  struct extent_buffer *eb, int slot)
12255 {
12256         struct btrfs_root *chunk_root = fs_info->chunk_root;
12257         struct btrfs_dev_extent *ptr;
12258         struct btrfs_path path;
12259         struct btrfs_key chunk_key;
12260         struct btrfs_key devext_key;
12261         struct btrfs_chunk *chunk;
12262         struct extent_buffer *l;
12263         int num_stripes;
12264         u64 length;
12265         int i;
12266         int found_chunk = 0;
12267         int ret;
12268
12269         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12270         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12271         length = btrfs_dev_extent_length(eb, ptr);
12272
12273         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12274         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12275         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12276
12277         btrfs_init_path(&path);
12278         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12279         if (ret)
12280                 goto out;
12281
12282         l = path.nodes[0];
12283         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12284         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12285                                       chunk_key.offset);
12286         if (ret < 0)
12287                 goto out;
12288
12289         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12290                 goto out;
12291
12292         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12293         for (i = 0; i < num_stripes; i++) {
12294                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12295                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12296
12297                 if (devid == devext_key.objectid &&
12298                     offset == devext_key.offset) {
12299                         found_chunk = 1;
12300                         break;
12301                 }
12302         }
12303 out:
12304         btrfs_release_path(&path);
12305         if (!found_chunk) {
12306                 error(
12307                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12308                         devext_key.objectid, devext_key.offset, length);
12309                 return REFERENCER_MISSING;
12310         }
12311         return 0;
12312 }
12313
12314 /*
12315  * Check if the used space is correct with the dev item
12316  */
12317 static int check_dev_item(struct btrfs_fs_info *fs_info,
12318                           struct extent_buffer *eb, int slot)
12319 {
12320         struct btrfs_root *dev_root = fs_info->dev_root;
12321         struct btrfs_dev_item *dev_item;
12322         struct btrfs_path path;
12323         struct btrfs_key key;
12324         struct btrfs_dev_extent *ptr;
12325         u64 total_bytes;
12326         u64 dev_id;
12327         u64 used;
12328         u64 total = 0;
12329         int ret;
12330
12331         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12332         dev_id = btrfs_device_id(eb, dev_item);
12333         used = btrfs_device_bytes_used(eb, dev_item);
12334         total_bytes = btrfs_device_total_bytes(eb, dev_item);
12335
12336         key.objectid = dev_id;
12337         key.type = BTRFS_DEV_EXTENT_KEY;
12338         key.offset = 0;
12339
12340         btrfs_init_path(&path);
12341         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12342         if (ret < 0) {
12343                 btrfs_item_key_to_cpu(eb, &key, slot);
12344                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12345                         key.objectid, key.type, key.offset);
12346                 btrfs_release_path(&path);
12347                 return REFERENCER_MISSING;
12348         }
12349
12350         /* Iterate dev_extents to calculate the used space of a device */
12351         while (1) {
12352                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12353                         goto next;
12354
12355                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12356                 if (key.objectid > dev_id)
12357                         break;
12358                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12359                         goto next;
12360
12361                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12362                                      struct btrfs_dev_extent);
12363                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12364 next:
12365                 ret = btrfs_next_item(dev_root, &path);
12366                 if (ret)
12367                         break;
12368         }
12369         btrfs_release_path(&path);
12370
12371         if (used != total) {
12372                 btrfs_item_key_to_cpu(eb, &key, slot);
12373                 error(
12374 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12375                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12376                         BTRFS_DEV_EXTENT_KEY, dev_id);
12377                 return ACCOUNTING_MISMATCH;
12378         }
12379         check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12380
12381         return 0;
12382 }
12383
12384 /*
12385  * Check a block group item with its referener (chunk) and its used space
12386  * with extent/metadata item
12387  */
12388 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12389                                   struct extent_buffer *eb, int slot)
12390 {
12391         struct btrfs_root *extent_root = fs_info->extent_root;
12392         struct btrfs_root *chunk_root = fs_info->chunk_root;
12393         struct btrfs_block_group_item *bi;
12394         struct btrfs_block_group_item bg_item;
12395         struct btrfs_path path;
12396         struct btrfs_key bg_key;
12397         struct btrfs_key chunk_key;
12398         struct btrfs_key extent_key;
12399         struct btrfs_chunk *chunk;
12400         struct extent_buffer *leaf;
12401         struct btrfs_extent_item *ei;
12402         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12403         u64 flags;
12404         u64 bg_flags;
12405         u64 used;
12406         u64 total = 0;
12407         int ret;
12408         int err = 0;
12409
12410         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12411         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12412         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12413         used = btrfs_block_group_used(&bg_item);
12414         bg_flags = btrfs_block_group_flags(&bg_item);
12415
12416         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12417         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12418         chunk_key.offset = bg_key.objectid;
12419
12420         btrfs_init_path(&path);
12421         /* Search for the referencer chunk */
12422         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12423         if (ret) {
12424                 error(
12425                 "block group[%llu %llu] did not find the related chunk item",
12426                         bg_key.objectid, bg_key.offset);
12427                 err |= REFERENCER_MISSING;
12428         } else {
12429                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12430                                         struct btrfs_chunk);
12431                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12432                                                 bg_key.offset) {
12433                         error(
12434         "block group[%llu %llu] related chunk item length does not match",
12435                                 bg_key.objectid, bg_key.offset);
12436                         err |= REFERENCER_MISMATCH;
12437                 }
12438         }
12439         btrfs_release_path(&path);
12440
12441         /* Search from the block group bytenr */
12442         extent_key.objectid = bg_key.objectid;
12443         extent_key.type = 0;
12444         extent_key.offset = 0;
12445
12446         btrfs_init_path(&path);
12447         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12448         if (ret < 0)
12449                 goto out;
12450
12451         /* Iterate extent tree to account used space */
12452         while (1) {
12453                 leaf = path.nodes[0];
12454
12455                 /* Search slot can point to the last item beyond leaf nritems */
12456                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12457                         goto next;
12458
12459                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12460                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12461                         break;
12462
12463                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12464                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12465                         goto next;
12466                 if (extent_key.objectid < bg_key.objectid)
12467                         goto next;
12468
12469                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12470                         total += nodesize;
12471                 else
12472                         total += extent_key.offset;
12473
12474                 ei = btrfs_item_ptr(leaf, path.slots[0],
12475                                     struct btrfs_extent_item);
12476                 flags = btrfs_extent_flags(leaf, ei);
12477                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12478                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12479                                 error(
12480                         "bad extent[%llu, %llu) type mismatch with chunk",
12481                                         extent_key.objectid,
12482                                         extent_key.objectid + extent_key.offset);
12483                                 err |= CHUNK_TYPE_MISMATCH;
12484                         }
12485                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12486                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12487                                     BTRFS_BLOCK_GROUP_METADATA))) {
12488                                 error(
12489                         "bad extent[%llu, %llu) type mismatch with chunk",
12490                                         extent_key.objectid,
12491                                         extent_key.objectid + nodesize);
12492                                 err |= CHUNK_TYPE_MISMATCH;
12493                         }
12494                 }
12495 next:
12496                 ret = btrfs_next_item(extent_root, &path);
12497                 if (ret)
12498                         break;
12499         }
12500
12501 out:
12502         btrfs_release_path(&path);
12503
12504         if (total != used) {
12505                 error(
12506                 "block group[%llu %llu] used %llu but extent items used %llu",
12507                         bg_key.objectid, bg_key.offset, used, total);
12508                 err |= BG_ACCOUNTING_ERROR;
12509         }
12510         return err;
12511 }
12512
12513 /*
12514  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12515  * FIXME: We still need to repair error of dev_item.
12516  *
12517  * Returns error after repair.
12518  */
12519 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12520                              struct btrfs_root *chunk_root,
12521                              struct btrfs_path *path, int err)
12522 {
12523         struct btrfs_chunk *chunk;
12524         struct btrfs_key chunk_key;
12525         struct extent_buffer *eb = path->nodes[0];
12526         u64 length;
12527         int slot = path->slots[0];
12528         u64 type;
12529         int ret = 0;
12530
12531         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12532         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12533                 return err;
12534         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12535         type = btrfs_chunk_type(path->nodes[0], chunk);
12536         length = btrfs_chunk_length(eb, chunk);
12537
12538         if (err & REFERENCER_MISSING) {
12539                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12540                                              type, chunk_key.offset, length);
12541                 if (ret) {
12542                         error("fail to add block group item[%llu %llu]",
12543                               chunk_key.offset, length);
12544                         goto out;
12545                 } else {
12546                         err &= ~REFERENCER_MISSING;
12547                         printf("Added block group item[%llu %llu]\n",
12548                                chunk_key.offset, length);
12549                 }
12550         }
12551
12552 out:
12553         return err;
12554 }
12555
12556 /*
12557  * Check a chunk item.
12558  * Including checking all referred dev_extents and block group
12559  */
12560 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12561                             struct extent_buffer *eb, int slot)
12562 {
12563         struct btrfs_root *extent_root = fs_info->extent_root;
12564         struct btrfs_root *dev_root = fs_info->dev_root;
12565         struct btrfs_path path;
12566         struct btrfs_key chunk_key;
12567         struct btrfs_key bg_key;
12568         struct btrfs_key devext_key;
12569         struct btrfs_chunk *chunk;
12570         struct extent_buffer *leaf;
12571         struct btrfs_block_group_item *bi;
12572         struct btrfs_block_group_item bg_item;
12573         struct btrfs_dev_extent *ptr;
12574         u64 length;
12575         u64 chunk_end;
12576         u64 stripe_len;
12577         u64 type;
12578         int num_stripes;
12579         u64 offset;
12580         u64 objectid;
12581         int i;
12582         int ret;
12583         int err = 0;
12584
12585         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12586         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12587         length = btrfs_chunk_length(eb, chunk);
12588         chunk_end = chunk_key.offset + length;
12589         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12590                                       chunk_key.offset);
12591         if (ret < 0) {
12592                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12593                         chunk_end);
12594                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12595                 goto out;
12596         }
12597         type = btrfs_chunk_type(eb, chunk);
12598
12599         bg_key.objectid = chunk_key.offset;
12600         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12601         bg_key.offset = length;
12602
12603         btrfs_init_path(&path);
12604         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12605         if (ret) {
12606                 error(
12607                 "chunk[%llu %llu) did not find the related block group item",
12608                         chunk_key.offset, chunk_end);
12609                 err |= REFERENCER_MISSING;
12610         } else{
12611                 leaf = path.nodes[0];
12612                 bi = btrfs_item_ptr(leaf, path.slots[0],
12613                                     struct btrfs_block_group_item);
12614                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12615                                    sizeof(bg_item));
12616                 if (btrfs_block_group_flags(&bg_item) != type) {
12617                         error(
12618 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12619                                 chunk_key.offset, chunk_end, type,
12620                                 btrfs_block_group_flags(&bg_item));
12621                         err |= REFERENCER_MISSING;
12622                 }
12623         }
12624
12625         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12626         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12627         for (i = 0; i < num_stripes; i++) {
12628                 btrfs_release_path(&path);
12629                 btrfs_init_path(&path);
12630                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12631                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12632                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12633
12634                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12635                                         0, 0);
12636                 if (ret)
12637                         goto not_match_dev;
12638
12639                 leaf = path.nodes[0];
12640                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12641                                      struct btrfs_dev_extent);
12642                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12643                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12644                 if (objectid != chunk_key.objectid ||
12645                     offset != chunk_key.offset ||
12646                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12647                         goto not_match_dev;
12648                 continue;
12649 not_match_dev:
12650                 err |= BACKREF_MISSING;
12651                 error(
12652                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12653                         chunk_key.objectid, chunk_end, i);
12654                 continue;
12655         }
12656         btrfs_release_path(&path);
12657 out:
12658         return err;
12659 }
12660
12661 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12662                                    struct btrfs_root *root,
12663                                    struct btrfs_path *path)
12664 {
12665         struct btrfs_key key;
12666         int ret = 0;
12667
12668         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12669         btrfs_release_path(path);
12670         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12671         if (ret) {
12672                 ret = -ENOENT;
12673                 goto out;
12674         }
12675
12676         ret = btrfs_del_item(trans, root, path);
12677         if (ret)
12678                 goto out;
12679
12680         if (path->slots[0] == 0)
12681                 btrfs_prev_leaf(root, path);
12682         else
12683                 path->slots[0]--;
12684 out:
12685         if (ret)
12686                 error("failed to delete root %llu item[%llu, %u, %llu]",
12687                       root->objectid, key.objectid, key.type, key.offset);
12688         else
12689                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12690                        root->objectid, key.objectid, key.type, key.offset);
12691         return ret;
12692 }
12693
12694 /*
12695  * Main entry function to check known items and update related accounting info
12696  */
12697 static int check_leaf_items(struct btrfs_trans_handle *trans,
12698                             struct btrfs_root *root, struct btrfs_path *path,
12699                             struct node_refs *nrefs, int account_bytes)
12700 {
12701         struct btrfs_fs_info *fs_info = root->fs_info;
12702         struct btrfs_key key;
12703         struct extent_buffer *eb;
12704         int slot;
12705         int type;
12706         struct btrfs_extent_data_ref *dref;
12707         int ret = 0;
12708         int err = 0;
12709
12710 again:
12711         eb = path->nodes[0];
12712         slot = path->slots[0];
12713         if (slot >= btrfs_header_nritems(eb)) {
12714                 if (slot == 0) {
12715                         error("empty leaf [%llu %u] root %llu", eb->start,
12716                                 root->fs_info->nodesize, root->objectid);
12717                         err |= EIO;
12718                 }
12719                 goto out;
12720         }
12721
12722         btrfs_item_key_to_cpu(eb, &key, slot);
12723         type = key.type;
12724
12725         switch (type) {
12726         case BTRFS_EXTENT_DATA_KEY:
12727                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12728                 if (repair && ret)
12729                         ret = repair_extent_data_item(trans, root, path, nrefs,
12730                                                       ret);
12731                 err |= ret;
12732                 break;
12733         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12734                 ret = check_block_group_item(fs_info, eb, slot);
12735                 if (repair &&
12736                     ret & REFERENCER_MISSING)
12737                         ret = delete_extent_tree_item(trans, root, path);
12738                 err |= ret;
12739                 break;
12740         case BTRFS_DEV_ITEM_KEY:
12741                 ret = check_dev_item(fs_info, eb, slot);
12742                 err |= ret;
12743                 break;
12744         case BTRFS_CHUNK_ITEM_KEY:
12745                 ret = check_chunk_item(fs_info, eb, slot);
12746                 if (repair && ret)
12747                         ret = repair_chunk_item(trans, root, path, ret);
12748                 err |= ret;
12749                 break;
12750         case BTRFS_DEV_EXTENT_KEY:
12751                 ret = check_dev_extent_item(fs_info, eb, slot);
12752                 err |= ret;
12753                 break;
12754         case BTRFS_EXTENT_ITEM_KEY:
12755         case BTRFS_METADATA_ITEM_KEY:
12756                 ret = check_extent_item(trans, fs_info, path);
12757                 err |= ret;
12758                 break;
12759         case BTRFS_EXTENT_CSUM_KEY:
12760                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12761                 err |= ret;
12762                 break;
12763         case BTRFS_TREE_BLOCK_REF_KEY:
12764                 ret = check_tree_block_backref(fs_info, key.offset,
12765                                                key.objectid, -1);
12766                 if (repair &&
12767                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12768                         ret = delete_extent_tree_item(trans, root, path);
12769                 err |= ret;
12770                 break;
12771         case BTRFS_EXTENT_DATA_REF_KEY:
12772                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12773                 ret = check_extent_data_backref(fs_info,
12774                                 btrfs_extent_data_ref_root(eb, dref),
12775                                 btrfs_extent_data_ref_objectid(eb, dref),
12776                                 btrfs_extent_data_ref_offset(eb, dref),
12777                                 key.objectid, 0,
12778                                 btrfs_extent_data_ref_count(eb, dref));
12779                 if (repair &&
12780                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12781                         ret = delete_extent_tree_item(trans, root, path);
12782                 err |= ret;
12783                 break;
12784         case BTRFS_SHARED_BLOCK_REF_KEY:
12785                 ret = check_shared_block_backref(fs_info, key.offset,
12786                                                  key.objectid, -1);
12787                 if (repair &&
12788                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12789                         ret = delete_extent_tree_item(trans, root, path);
12790                 err |= ret;
12791                 break;
12792         case BTRFS_SHARED_DATA_REF_KEY:
12793                 ret = check_shared_data_backref(fs_info, key.offset,
12794                                                 key.objectid);
12795                 if (repair &&
12796                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12797                         ret = delete_extent_tree_item(trans, root, path);
12798                 err |= ret;
12799                 break;
12800         default:
12801                 break;
12802         }
12803
12804         ++path->slots[0];
12805         goto again;
12806 out:
12807         return err;
12808 }
12809
12810 /*
12811  * Low memory usage version check_chunks_and_extents.
12812  */
12813 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12814 {
12815         struct btrfs_trans_handle *trans = NULL;
12816         struct btrfs_path path;
12817         struct btrfs_key old_key;
12818         struct btrfs_key key;
12819         struct btrfs_root *root1;
12820         struct btrfs_root *root;
12821         struct btrfs_root *cur_root;
12822         int err = 0;
12823         int ret;
12824
12825         root = fs_info->fs_root;
12826
12827         if (repair) {
12828                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12829                 if (IS_ERR(trans)) {
12830                         error("failed to start transaction before check");
12831                         return PTR_ERR(trans);
12832                 }
12833         }
12834
12835         root1 = root->fs_info->chunk_root;
12836         ret = check_btrfs_root(trans, root1, 0, 1);
12837         err |= ret;
12838
12839         root1 = root->fs_info->tree_root;
12840         ret = check_btrfs_root(trans, root1, 0, 1);
12841         err |= ret;
12842
12843         btrfs_init_path(&path);
12844         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12845         key.offset = 0;
12846         key.type = BTRFS_ROOT_ITEM_KEY;
12847
12848         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12849         if (ret) {
12850                 error("cannot find extent tree in tree_root");
12851                 goto out;
12852         }
12853
12854         while (1) {
12855                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12856                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12857                         goto next;
12858                 old_key = key;
12859                 key.offset = (u64)-1;
12860
12861                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12862                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12863                                         &key);
12864                 else
12865                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12866                 if (IS_ERR(cur_root) || !cur_root) {
12867                         error("failed to read tree: %lld", key.objectid);
12868                         goto next;
12869                 }
12870
12871                 ret = check_btrfs_root(trans, cur_root, 0, 1);
12872                 err |= ret;
12873
12874                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12875                         btrfs_free_fs_root(cur_root);
12876
12877                 btrfs_release_path(&path);
12878                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
12879                                         &old_key, &path, 0, 0);
12880                 if (ret)
12881                         goto out;
12882 next:
12883                 ret = btrfs_next_item(root1, &path);
12884                 if (ret)
12885                         goto out;
12886         }
12887 out:
12888
12889         /* if repair, update block accounting */
12890         if (repair) {
12891                 ret = btrfs_fix_block_accounting(trans, root);
12892                 if (ret)
12893                         err |= ret;
12894                 else
12895                         err &= ~BG_ACCOUNTING_ERROR;
12896         }
12897
12898         if (trans)
12899                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
12900
12901         btrfs_release_path(&path);
12902
12903         return err;
12904 }
12905
12906 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12907 {
12908         int ret;
12909
12910         if (!ctx.progress_enabled)
12911                 fprintf(stderr, "checking extents\n");
12912         if (check_mode == CHECK_MODE_LOWMEM)
12913                 ret = check_chunks_and_extents_v2(fs_info);
12914         else
12915                 ret = check_chunks_and_extents(fs_info);
12916
12917         /* Also repair device size related problems */
12918         if (repair && !ret) {
12919                 ret = btrfs_fix_device_and_super_size(fs_info);
12920                 if (ret > 0)
12921                         ret = 0;
12922         }
12923         return ret;
12924 }
12925
12926 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12927                            struct btrfs_root *root, int overwrite)
12928 {
12929         struct extent_buffer *c;
12930         struct extent_buffer *old = root->node;
12931         int level;
12932         int ret;
12933         struct btrfs_disk_key disk_key = {0,0,0};
12934
12935         level = 0;
12936
12937         if (overwrite) {
12938                 c = old;
12939                 extent_buffer_get(c);
12940                 goto init;
12941         }
12942         c = btrfs_alloc_free_block(trans, root,
12943                                    root->fs_info->nodesize,
12944                                    root->root_key.objectid,
12945                                    &disk_key, level, 0, 0);
12946         if (IS_ERR(c)) {
12947                 c = old;
12948                 extent_buffer_get(c);
12949                 overwrite = 1;
12950         }
12951 init:
12952         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12953         btrfs_set_header_level(c, level);
12954         btrfs_set_header_bytenr(c, c->start);
12955         btrfs_set_header_generation(c, trans->transid);
12956         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12957         btrfs_set_header_owner(c, root->root_key.objectid);
12958
12959         write_extent_buffer(c, root->fs_info->fsid,
12960                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12961
12962         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12963                             btrfs_header_chunk_tree_uuid(c),
12964                             BTRFS_UUID_SIZE);
12965
12966         btrfs_mark_buffer_dirty(c);
12967         /*
12968          * this case can happen in the following case:
12969          *
12970          * 1.overwrite previous root.
12971          *
12972          * 2.reinit reloc data root, this is because we skip pin
12973          * down reloc data tree before which means we can allocate
12974          * same block bytenr here.
12975          */
12976         if (old->start == c->start) {
12977                 btrfs_set_root_generation(&root->root_item,
12978                                           trans->transid);
12979                 root->root_item.level = btrfs_header_level(root->node);
12980                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12981                                         &root->root_key, &root->root_item);
12982                 if (ret) {
12983                         free_extent_buffer(c);
12984                         return ret;
12985                 }
12986         }
12987         free_extent_buffer(old);
12988         root->node = c;
12989         add_root_to_dirty_list(root);
12990         return 0;
12991 }
12992
12993 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12994                                 struct extent_buffer *eb, int tree_root)
12995 {
12996         struct extent_buffer *tmp;
12997         struct btrfs_root_item *ri;
12998         struct btrfs_key key;
12999         u64 bytenr;
13000         int level = btrfs_header_level(eb);
13001         int nritems;
13002         int ret;
13003         int i;
13004
13005         /*
13006          * If we have pinned this block before, don't pin it again.
13007          * This can not only avoid forever loop with broken filesystem
13008          * but also give us some speedups.
13009          */
13010         if (test_range_bit(&fs_info->pinned_extents, eb->start,
13011                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13012                 return 0;
13013
13014         btrfs_pin_extent(fs_info, eb->start, eb->len);
13015
13016         nritems = btrfs_header_nritems(eb);
13017         for (i = 0; i < nritems; i++) {
13018                 if (level == 0) {
13019                         btrfs_item_key_to_cpu(eb, &key, i);
13020                         if (key.type != BTRFS_ROOT_ITEM_KEY)
13021                                 continue;
13022                         /* Skip the extent root and reloc roots */
13023                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13024                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13025                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13026                                 continue;
13027                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13028                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13029
13030                         /*
13031                          * If at any point we start needing the real root we
13032                          * will have to build a stump root for the root we are
13033                          * in, but for now this doesn't actually use the root so
13034                          * just pass in extent_root.
13035                          */
13036                         tmp = read_tree_block(fs_info, bytenr, 0);
13037                         if (!extent_buffer_uptodate(tmp)) {
13038                                 fprintf(stderr, "Error reading root block\n");
13039                                 return -EIO;
13040                         }
13041                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13042                         free_extent_buffer(tmp);
13043                         if (ret)
13044                                 return ret;
13045                 } else {
13046                         bytenr = btrfs_node_blockptr(eb, i);
13047
13048                         /* If we aren't the tree root don't read the block */
13049                         if (level == 1 && !tree_root) {
13050                                 btrfs_pin_extent(fs_info, bytenr,
13051                                                 fs_info->nodesize);
13052                                 continue;
13053                         }
13054
13055                         tmp = read_tree_block(fs_info, bytenr, 0);
13056                         if (!extent_buffer_uptodate(tmp)) {
13057                                 fprintf(stderr, "Error reading tree block\n");
13058                                 return -EIO;
13059                         }
13060                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13061                         free_extent_buffer(tmp);
13062                         if (ret)
13063                                 return ret;
13064                 }
13065         }
13066
13067         return 0;
13068 }
13069
13070 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13071 {
13072         int ret;
13073
13074         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13075         if (ret)
13076                 return ret;
13077
13078         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13079 }
13080
13081 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13082 {
13083         struct btrfs_block_group_cache *cache;
13084         struct btrfs_path path;
13085         struct extent_buffer *leaf;
13086         struct btrfs_chunk *chunk;
13087         struct btrfs_key key;
13088         int ret;
13089         u64 start;
13090
13091         btrfs_init_path(&path);
13092         key.objectid = 0;
13093         key.type = BTRFS_CHUNK_ITEM_KEY;
13094         key.offset = 0;
13095         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13096         if (ret < 0) {
13097                 btrfs_release_path(&path);
13098                 return ret;
13099         }
13100
13101         /*
13102          * We do this in case the block groups were screwed up and had alloc
13103          * bits that aren't actually set on the chunks.  This happens with
13104          * restored images every time and could happen in real life I guess.
13105          */
13106         fs_info->avail_data_alloc_bits = 0;
13107         fs_info->avail_metadata_alloc_bits = 0;
13108         fs_info->avail_system_alloc_bits = 0;
13109
13110         /* First we need to create the in-memory block groups */
13111         while (1) {
13112                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13113                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13114                         if (ret < 0) {
13115                                 btrfs_release_path(&path);
13116                                 return ret;
13117                         }
13118                         if (ret) {
13119                                 ret = 0;
13120                                 break;
13121                         }
13122                 }
13123                 leaf = path.nodes[0];
13124                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13125                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13126                         path.slots[0]++;
13127                         continue;
13128                 }
13129
13130                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13131                 btrfs_add_block_group(fs_info, 0,
13132                                       btrfs_chunk_type(leaf, chunk), key.offset,
13133                                       btrfs_chunk_length(leaf, chunk));
13134                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13135                                  key.offset + btrfs_chunk_length(leaf, chunk));
13136                 path.slots[0]++;
13137         }
13138         start = 0;
13139         while (1) {
13140                 cache = btrfs_lookup_first_block_group(fs_info, start);
13141                 if (!cache)
13142                         break;
13143                 cache->cached = 1;
13144                 start = cache->key.objectid + cache->key.offset;
13145         }
13146
13147         btrfs_release_path(&path);
13148         return 0;
13149 }
13150
13151 static int reset_balance(struct btrfs_trans_handle *trans,
13152                          struct btrfs_fs_info *fs_info)
13153 {
13154         struct btrfs_root *root = fs_info->tree_root;
13155         struct btrfs_path path;
13156         struct extent_buffer *leaf;
13157         struct btrfs_key key;
13158         int del_slot, del_nr = 0;
13159         int ret;
13160         int found = 0;
13161
13162         btrfs_init_path(&path);
13163         key.objectid = BTRFS_BALANCE_OBJECTID;
13164         key.type = BTRFS_BALANCE_ITEM_KEY;
13165         key.offset = 0;
13166         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13167         if (ret) {
13168                 if (ret > 0)
13169                         ret = 0;
13170                 if (!ret)
13171                         goto reinit_data_reloc;
13172                 else
13173                         goto out;
13174         }
13175
13176         ret = btrfs_del_item(trans, root, &path);
13177         if (ret)
13178                 goto out;
13179         btrfs_release_path(&path);
13180
13181         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13182         key.type = BTRFS_ROOT_ITEM_KEY;
13183         key.offset = 0;
13184         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13185         if (ret < 0)
13186                 goto out;
13187         while (1) {
13188                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13189                         if (!found)
13190                                 break;
13191
13192                         if (del_nr) {
13193                                 ret = btrfs_del_items(trans, root, &path,
13194                                                       del_slot, del_nr);
13195                                 del_nr = 0;
13196                                 if (ret)
13197                                         goto out;
13198                         }
13199                         key.offset++;
13200                         btrfs_release_path(&path);
13201
13202                         found = 0;
13203                         ret = btrfs_search_slot(trans, root, &key, &path,
13204                                                 -1, 1);
13205                         if (ret < 0)
13206                                 goto out;
13207                         continue;
13208                 }
13209                 found = 1;
13210                 leaf = path.nodes[0];
13211                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13212                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13213                         break;
13214                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13215                         path.slots[0]++;
13216                         continue;
13217                 }
13218                 if (!del_nr) {
13219                         del_slot = path.slots[0];
13220                         del_nr = 1;
13221                 } else {
13222                         del_nr++;
13223                 }
13224                 path.slots[0]++;
13225         }
13226
13227         if (del_nr) {
13228                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13229                 if (ret)
13230                         goto out;
13231         }
13232         btrfs_release_path(&path);
13233
13234 reinit_data_reloc:
13235         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13236         key.type = BTRFS_ROOT_ITEM_KEY;
13237         key.offset = (u64)-1;
13238         root = btrfs_read_fs_root(fs_info, &key);
13239         if (IS_ERR(root)) {
13240                 fprintf(stderr, "Error reading data reloc tree\n");
13241                 ret = PTR_ERR(root);
13242                 goto out;
13243         }
13244         record_root_in_trans(trans, root);
13245         ret = btrfs_fsck_reinit_root(trans, root, 0);
13246         if (ret)
13247                 goto out;
13248         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13249 out:
13250         btrfs_release_path(&path);
13251         return ret;
13252 }
13253
13254 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13255                               struct btrfs_fs_info *fs_info)
13256 {
13257         u64 start = 0;
13258         int ret;
13259
13260         /*
13261          * The only reason we don't do this is because right now we're just
13262          * walking the trees we find and pinning down their bytes, we don't look
13263          * at any of the leaves.  In order to do mixed groups we'd have to check
13264          * the leaves of any fs roots and pin down the bytes for any file
13265          * extents we find.  Not hard but why do it if we don't have to?
13266          */
13267         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13268                 fprintf(stderr, "We don't support re-initing the extent tree "
13269                         "for mixed block groups yet, please notify a btrfs "
13270                         "developer you want to do this so they can add this "
13271                         "functionality.\n");
13272                 return -EINVAL;
13273         }
13274
13275         /*
13276          * first we need to walk all of the trees except the extent tree and pin
13277          * down the bytes that are in use so we don't overwrite any existing
13278          * metadata.
13279          */
13280         ret = pin_metadata_blocks(fs_info);
13281         if (ret) {
13282                 fprintf(stderr, "error pinning down used bytes\n");
13283                 return ret;
13284         }
13285
13286         /*
13287          * Need to drop all the block groups since we're going to recreate all
13288          * of them again.
13289          */
13290         btrfs_free_block_groups(fs_info);
13291         ret = reset_block_groups(fs_info);
13292         if (ret) {
13293                 fprintf(stderr, "error resetting the block groups\n");
13294                 return ret;
13295         }
13296
13297         /* Ok we can allocate now, reinit the extent root */
13298         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13299         if (ret) {
13300                 fprintf(stderr, "extent root initialization failed\n");
13301                 /*
13302                  * When the transaction code is updated we should end the
13303                  * transaction, but for now progs only knows about commit so
13304                  * just return an error.
13305                  */
13306                 return ret;
13307         }
13308
13309         /*
13310          * Now we have all the in-memory block groups setup so we can make
13311          * allocations properly, and the metadata we care about is safe since we
13312          * pinned all of it above.
13313          */
13314         while (1) {
13315                 struct btrfs_block_group_cache *cache;
13316
13317                 cache = btrfs_lookup_first_block_group(fs_info, start);
13318                 if (!cache)
13319                         break;
13320                 start = cache->key.objectid + cache->key.offset;
13321                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13322                                         &cache->key, &cache->item,
13323                                         sizeof(cache->item));
13324                 if (ret) {
13325                         fprintf(stderr, "Error adding block group\n");
13326                         return ret;
13327                 }
13328                 btrfs_extent_post_op(trans, fs_info->extent_root);
13329         }
13330
13331         ret = reset_balance(trans, fs_info);
13332         if (ret)
13333                 fprintf(stderr, "error resetting the pending balance\n");
13334
13335         return ret;
13336 }
13337
13338 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13339 {
13340         struct btrfs_path path;
13341         struct btrfs_trans_handle *trans;
13342         struct btrfs_key key;
13343         int ret;
13344
13345         printf("Recowing metadata block %llu\n", eb->start);
13346         key.objectid = btrfs_header_owner(eb);
13347         key.type = BTRFS_ROOT_ITEM_KEY;
13348         key.offset = (u64)-1;
13349
13350         root = btrfs_read_fs_root(root->fs_info, &key);
13351         if (IS_ERR(root)) {
13352                 fprintf(stderr, "Couldn't find owner root %llu\n",
13353                         key.objectid);
13354                 return PTR_ERR(root);
13355         }
13356
13357         trans = btrfs_start_transaction(root, 1);
13358         if (IS_ERR(trans))
13359                 return PTR_ERR(trans);
13360
13361         btrfs_init_path(&path);
13362         path.lowest_level = btrfs_header_level(eb);
13363         if (path.lowest_level)
13364                 btrfs_node_key_to_cpu(eb, &key, 0);
13365         else
13366                 btrfs_item_key_to_cpu(eb, &key, 0);
13367
13368         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13369         btrfs_commit_transaction(trans, root);
13370         btrfs_release_path(&path);
13371         return ret;
13372 }
13373
13374 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13375 {
13376         struct btrfs_path path;
13377         struct btrfs_trans_handle *trans;
13378         struct btrfs_key key;
13379         int ret;
13380
13381         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13382                bad->key.type, bad->key.offset);
13383         key.objectid = bad->root_id;
13384         key.type = BTRFS_ROOT_ITEM_KEY;
13385         key.offset = (u64)-1;
13386
13387         root = btrfs_read_fs_root(root->fs_info, &key);
13388         if (IS_ERR(root)) {
13389                 fprintf(stderr, "Couldn't find owner root %llu\n",
13390                         key.objectid);
13391                 return PTR_ERR(root);
13392         }
13393
13394         trans = btrfs_start_transaction(root, 1);
13395         if (IS_ERR(trans))
13396                 return PTR_ERR(trans);
13397
13398         btrfs_init_path(&path);
13399         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13400         if (ret) {
13401                 if (ret > 0)
13402                         ret = 0;
13403                 goto out;
13404         }
13405         ret = btrfs_del_item(trans, root, &path);
13406 out:
13407         btrfs_commit_transaction(trans, root);
13408         btrfs_release_path(&path);
13409         return ret;
13410 }
13411
13412 static int zero_log_tree(struct btrfs_root *root)
13413 {
13414         struct btrfs_trans_handle *trans;
13415         int ret;
13416
13417         trans = btrfs_start_transaction(root, 1);
13418         if (IS_ERR(trans)) {
13419                 ret = PTR_ERR(trans);
13420                 return ret;
13421         }
13422         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13423         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13424         ret = btrfs_commit_transaction(trans, root);
13425         return ret;
13426 }
13427
13428 static int populate_csum(struct btrfs_trans_handle *trans,
13429                          struct btrfs_root *csum_root, char *buf, u64 start,
13430                          u64 len)
13431 {
13432         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13433         u64 offset = 0;
13434         u64 sectorsize;
13435         int ret = 0;
13436
13437         while (offset < len) {
13438                 sectorsize = fs_info->sectorsize;
13439                 ret = read_extent_data(fs_info, buf, start + offset,
13440                                        &sectorsize, 0);
13441                 if (ret)
13442                         break;
13443                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13444                                             start + offset, buf, sectorsize);
13445                 if (ret)
13446                         break;
13447                 offset += sectorsize;
13448         }
13449         return ret;
13450 }
13451
13452 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13453                                       struct btrfs_root *csum_root,
13454                                       struct btrfs_root *cur_root)
13455 {
13456         struct btrfs_path path;
13457         struct btrfs_key key;
13458         struct extent_buffer *node;
13459         struct btrfs_file_extent_item *fi;
13460         char *buf = NULL;
13461         u64 start = 0;
13462         u64 len = 0;
13463         int slot = 0;
13464         int ret = 0;
13465
13466         buf = malloc(cur_root->fs_info->sectorsize);
13467         if (!buf)
13468                 return -ENOMEM;
13469
13470         btrfs_init_path(&path);
13471         key.objectid = 0;
13472         key.offset = 0;
13473         key.type = 0;
13474         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13475         if (ret < 0)
13476                 goto out;
13477         /* Iterate all regular file extents and fill its csum */
13478         while (1) {
13479                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13480
13481                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13482                         goto next;
13483                 node = path.nodes[0];
13484                 slot = path.slots[0];
13485                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13486                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13487                         goto next;
13488                 start = btrfs_file_extent_disk_bytenr(node, fi);
13489                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13490
13491                 ret = populate_csum(trans, csum_root, buf, start, len);
13492                 if (ret == -EEXIST)
13493                         ret = 0;
13494                 if (ret < 0)
13495                         goto out;
13496 next:
13497                 /*
13498                  * TODO: if next leaf is corrupted, jump to nearest next valid
13499                  * leaf.
13500                  */
13501                 ret = btrfs_next_item(cur_root, &path);
13502                 if (ret < 0)
13503                         goto out;
13504                 if (ret > 0) {
13505                         ret = 0;
13506                         goto out;
13507                 }
13508         }
13509
13510 out:
13511         btrfs_release_path(&path);
13512         free(buf);
13513         return ret;
13514 }
13515
13516 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13517                                   struct btrfs_root *csum_root)
13518 {
13519         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13520         struct btrfs_path path;
13521         struct btrfs_root *tree_root = fs_info->tree_root;
13522         struct btrfs_root *cur_root;
13523         struct extent_buffer *node;
13524         struct btrfs_key key;
13525         int slot = 0;
13526         int ret = 0;
13527
13528         btrfs_init_path(&path);
13529         key.objectid = BTRFS_FS_TREE_OBJECTID;
13530         key.offset = 0;
13531         key.type = BTRFS_ROOT_ITEM_KEY;
13532         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13533         if (ret < 0)
13534                 goto out;
13535         if (ret > 0) {
13536                 ret = -ENOENT;
13537                 goto out;
13538         }
13539
13540         while (1) {
13541                 node = path.nodes[0];
13542                 slot = path.slots[0];
13543                 btrfs_item_key_to_cpu(node, &key, slot);
13544                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13545                         goto out;
13546                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13547                         goto next;
13548                 if (!is_fstree(key.objectid))
13549                         goto next;
13550                 key.offset = (u64)-1;
13551
13552                 cur_root = btrfs_read_fs_root(fs_info, &key);
13553                 if (IS_ERR(cur_root) || !cur_root) {
13554                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13555                                 key.objectid);
13556                         goto out;
13557                 }
13558                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13559                                 cur_root);
13560                 if (ret < 0)
13561                         goto out;
13562 next:
13563                 ret = btrfs_next_item(tree_root, &path);
13564                 if (ret > 0) {
13565                         ret = 0;
13566                         goto out;
13567                 }
13568                 if (ret < 0)
13569                         goto out;
13570         }
13571
13572 out:
13573         btrfs_release_path(&path);
13574         return ret;
13575 }
13576
13577 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13578                                       struct btrfs_root *csum_root)
13579 {
13580         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13581         struct btrfs_path path;
13582         struct btrfs_extent_item *ei;
13583         struct extent_buffer *leaf;
13584         char *buf;
13585         struct btrfs_key key;
13586         int ret;
13587
13588         btrfs_init_path(&path);
13589         key.objectid = 0;
13590         key.type = BTRFS_EXTENT_ITEM_KEY;
13591         key.offset = 0;
13592         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13593         if (ret < 0) {
13594                 btrfs_release_path(&path);
13595                 return ret;
13596         }
13597
13598         buf = malloc(csum_root->fs_info->sectorsize);
13599         if (!buf) {
13600                 btrfs_release_path(&path);
13601                 return -ENOMEM;
13602         }
13603
13604         while (1) {
13605                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13606                         ret = btrfs_next_leaf(extent_root, &path);
13607                         if (ret < 0)
13608                                 break;
13609                         if (ret) {
13610                                 ret = 0;
13611                                 break;
13612                         }
13613                 }
13614                 leaf = path.nodes[0];
13615
13616                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13617                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13618                         path.slots[0]++;
13619                         continue;
13620                 }
13621
13622                 ei = btrfs_item_ptr(leaf, path.slots[0],
13623                                     struct btrfs_extent_item);
13624                 if (!(btrfs_extent_flags(leaf, ei) &
13625                       BTRFS_EXTENT_FLAG_DATA)) {
13626                         path.slots[0]++;
13627                         continue;
13628                 }
13629
13630                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13631                                     key.offset);
13632                 if (ret)
13633                         break;
13634                 path.slots[0]++;
13635         }
13636
13637         btrfs_release_path(&path);
13638         free(buf);
13639         return ret;
13640 }
13641
13642 /*
13643  * Recalculate the csum and put it into the csum tree.
13644  *
13645  * Extent tree init will wipe out all the extent info, so in that case, we
13646  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13647  * will use fs/subvol trees to init the csum tree.
13648  */
13649 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13650                           struct btrfs_root *csum_root,
13651                           int search_fs_tree)
13652 {
13653         if (search_fs_tree)
13654                 return fill_csum_tree_from_fs(trans, csum_root);
13655         else
13656                 return fill_csum_tree_from_extent(trans, csum_root);
13657 }
13658
13659 static void free_roots_info_cache(void)
13660 {
13661         if (!roots_info_cache)
13662                 return;
13663
13664         while (!cache_tree_empty(roots_info_cache)) {
13665                 struct cache_extent *entry;
13666                 struct root_item_info *rii;
13667
13668                 entry = first_cache_extent(roots_info_cache);
13669                 if (!entry)
13670                         break;
13671                 remove_cache_extent(roots_info_cache, entry);
13672                 rii = container_of(entry, struct root_item_info, cache_extent);
13673                 free(rii);
13674         }
13675
13676         free(roots_info_cache);
13677         roots_info_cache = NULL;
13678 }
13679
13680 static int build_roots_info_cache(struct btrfs_fs_info *info)
13681 {
13682         int ret = 0;
13683         struct btrfs_key key;
13684         struct extent_buffer *leaf;
13685         struct btrfs_path path;
13686
13687         if (!roots_info_cache) {
13688                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13689                 if (!roots_info_cache)
13690                         return -ENOMEM;
13691                 cache_tree_init(roots_info_cache);
13692         }
13693
13694         btrfs_init_path(&path);
13695         key.objectid = 0;
13696         key.type = BTRFS_EXTENT_ITEM_KEY;
13697         key.offset = 0;
13698         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13699         if (ret < 0)
13700                 goto out;
13701         leaf = path.nodes[0];
13702
13703         while (1) {
13704                 struct btrfs_key found_key;
13705                 struct btrfs_extent_item *ei;
13706                 struct btrfs_extent_inline_ref *iref;
13707                 int slot = path.slots[0];
13708                 int type;
13709                 u64 flags;
13710                 u64 root_id;
13711                 u8 level;
13712                 struct cache_extent *entry;
13713                 struct root_item_info *rii;
13714
13715                 if (slot >= btrfs_header_nritems(leaf)) {
13716                         ret = btrfs_next_leaf(info->extent_root, &path);
13717                         if (ret < 0) {
13718                                 break;
13719                         } else if (ret) {
13720                                 ret = 0;
13721                                 break;
13722                         }
13723                         leaf = path.nodes[0];
13724                         slot = path.slots[0];
13725                 }
13726
13727                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13728
13729                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13730                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13731                         goto next;
13732
13733                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13734                 flags = btrfs_extent_flags(leaf, ei);
13735
13736                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13737                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13738                         goto next;
13739
13740                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13741                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13742                         level = found_key.offset;
13743                 } else {
13744                         struct btrfs_tree_block_info *binfo;
13745
13746                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13747                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13748                         level = btrfs_tree_block_level(leaf, binfo);
13749                 }
13750
13751                 /*
13752                  * For a root extent, it must be of the following type and the
13753                  * first (and only one) iref in the item.
13754                  */
13755                 type = btrfs_extent_inline_ref_type(leaf, iref);
13756                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13757                         goto next;
13758
13759                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13760                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13761                 if (!entry) {
13762                         rii = malloc(sizeof(struct root_item_info));
13763                         if (!rii) {
13764                                 ret = -ENOMEM;
13765                                 goto out;
13766                         }
13767                         rii->cache_extent.start = root_id;
13768                         rii->cache_extent.size = 1;
13769                         rii->level = (u8)-1;
13770                         entry = &rii->cache_extent;
13771                         ret = insert_cache_extent(roots_info_cache, entry);
13772                         ASSERT(ret == 0);
13773                 } else {
13774                         rii = container_of(entry, struct root_item_info,
13775                                            cache_extent);
13776                 }
13777
13778                 ASSERT(rii->cache_extent.start == root_id);
13779                 ASSERT(rii->cache_extent.size == 1);
13780
13781                 if (level > rii->level || rii->level == (u8)-1) {
13782                         rii->level = level;
13783                         rii->bytenr = found_key.objectid;
13784                         rii->gen = btrfs_extent_generation(leaf, ei);
13785                         rii->node_count = 1;
13786                 } else if (level == rii->level) {
13787                         rii->node_count++;
13788                 }
13789 next:
13790                 path.slots[0]++;
13791         }
13792
13793 out:
13794         btrfs_release_path(&path);
13795
13796         return ret;
13797 }
13798
13799 static int maybe_repair_root_item(struct btrfs_path *path,
13800                                   const struct btrfs_key *root_key,
13801                                   const int read_only_mode)
13802 {
13803         const u64 root_id = root_key->objectid;
13804         struct cache_extent *entry;
13805         struct root_item_info *rii;
13806         struct btrfs_root_item ri;
13807         unsigned long offset;
13808
13809         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13810         if (!entry) {
13811                 fprintf(stderr,
13812                         "Error: could not find extent items for root %llu\n",
13813                         root_key->objectid);
13814                 return -ENOENT;
13815         }
13816
13817         rii = container_of(entry, struct root_item_info, cache_extent);
13818         ASSERT(rii->cache_extent.start == root_id);
13819         ASSERT(rii->cache_extent.size == 1);
13820
13821         if (rii->node_count != 1) {
13822                 fprintf(stderr,
13823                         "Error: could not find btree root extent for root %llu\n",
13824                         root_id);
13825                 return -ENOENT;
13826         }
13827
13828         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13829         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13830
13831         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13832             btrfs_root_level(&ri) != rii->level ||
13833             btrfs_root_generation(&ri) != rii->gen) {
13834
13835                 /*
13836                  * If we're in repair mode but our caller told us to not update
13837                  * the root item, i.e. just check if it needs to be updated, don't
13838                  * print this message, since the caller will call us again shortly
13839                  * for the same root item without read only mode (the caller will
13840                  * open a transaction first).
13841                  */
13842                 if (!(read_only_mode && repair))
13843                         fprintf(stderr,
13844                                 "%sroot item for root %llu,"
13845                                 " current bytenr %llu, current gen %llu, current level %u,"
13846                                 " new bytenr %llu, new gen %llu, new level %u\n",
13847                                 (read_only_mode ? "" : "fixing "),
13848                                 root_id,
13849                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13850                                 btrfs_root_level(&ri),
13851                                 rii->bytenr, rii->gen, rii->level);
13852
13853                 if (btrfs_root_generation(&ri) > rii->gen) {
13854                         fprintf(stderr,
13855                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13856                                 root_id, btrfs_root_generation(&ri), rii->gen);
13857                         return -EINVAL;
13858                 }
13859
13860                 if (!read_only_mode) {
13861                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13862                         btrfs_set_root_level(&ri, rii->level);
13863                         btrfs_set_root_generation(&ri, rii->gen);
13864                         write_extent_buffer(path->nodes[0], &ri,
13865                                             offset, sizeof(ri));
13866                 }
13867
13868                 return 1;
13869         }
13870
13871         return 0;
13872 }
13873
13874 /*
13875  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13876  * caused read-only snapshots to be corrupted if they were created at a moment
13877  * when the source subvolume/snapshot had orphan items. The issue was that the
13878  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13879  * node instead of the post orphan cleanup root node.
13880  * So this function, and its callees, just detects and fixes those cases. Even
13881  * though the regression was for read-only snapshots, this function applies to
13882  * any snapshot/subvolume root.
13883  * This must be run before any other repair code - not doing it so, makes other
13884  * repair code delete or modify backrefs in the extent tree for example, which
13885  * will result in an inconsistent fs after repairing the root items.
13886  */
13887 static int repair_root_items(struct btrfs_fs_info *info)
13888 {
13889         struct btrfs_path path;
13890         struct btrfs_key key;
13891         struct extent_buffer *leaf;
13892         struct btrfs_trans_handle *trans = NULL;
13893         int ret = 0;
13894         int bad_roots = 0;
13895         int need_trans = 0;
13896
13897         btrfs_init_path(&path);
13898
13899         ret = build_roots_info_cache(info);
13900         if (ret)
13901                 goto out;
13902
13903         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13904         key.type = BTRFS_ROOT_ITEM_KEY;
13905         key.offset = 0;
13906
13907 again:
13908         /*
13909          * Avoid opening and committing transactions if a leaf doesn't have
13910          * any root items that need to be fixed, so that we avoid rotating
13911          * backup roots unnecessarily.
13912          */
13913         if (need_trans) {
13914                 trans = btrfs_start_transaction(info->tree_root, 1);
13915                 if (IS_ERR(trans)) {
13916                         ret = PTR_ERR(trans);
13917                         goto out;
13918                 }
13919         }
13920
13921         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13922                                 0, trans ? 1 : 0);
13923         if (ret < 0)
13924                 goto out;
13925         leaf = path.nodes[0];
13926
13927         while (1) {
13928                 struct btrfs_key found_key;
13929
13930                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13931                         int no_more_keys = find_next_key(&path, &key);
13932
13933                         btrfs_release_path(&path);
13934                         if (trans) {
13935                                 ret = btrfs_commit_transaction(trans,
13936                                                                info->tree_root);
13937                                 trans = NULL;
13938                                 if (ret < 0)
13939                                         goto out;
13940                         }
13941                         need_trans = 0;
13942                         if (no_more_keys)
13943                                 break;
13944                         goto again;
13945                 }
13946
13947                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13948
13949                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13950                         goto next;
13951                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13952                         goto next;
13953
13954                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13955                 if (ret < 0)
13956                         goto out;
13957                 if (ret) {
13958                         if (!trans && repair) {
13959                                 need_trans = 1;
13960                                 key = found_key;
13961                                 btrfs_release_path(&path);
13962                                 goto again;
13963                         }
13964                         bad_roots++;
13965                 }
13966 next:
13967                 path.slots[0]++;
13968         }
13969         ret = 0;
13970 out:
13971         free_roots_info_cache();
13972         btrfs_release_path(&path);
13973         if (trans)
13974                 btrfs_commit_transaction(trans, info->tree_root);
13975         if (ret < 0)
13976                 return ret;
13977
13978         return bad_roots;
13979 }
13980
13981 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13982 {
13983         struct btrfs_trans_handle *trans;
13984         struct btrfs_block_group_cache *bg_cache;
13985         u64 current = 0;
13986         int ret = 0;
13987
13988         /* Clear all free space cache inodes and its extent data */
13989         while (1) {
13990                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13991                 if (!bg_cache)
13992                         break;
13993                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13994                 if (ret < 0)
13995                         return ret;
13996                 current = bg_cache->key.objectid + bg_cache->key.offset;
13997         }
13998
13999         /* Don't forget to set cache_generation to -1 */
14000         trans = btrfs_start_transaction(fs_info->tree_root, 0);
14001         if (IS_ERR(trans)) {
14002                 error("failed to update super block cache generation");
14003                 return PTR_ERR(trans);
14004         }
14005         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14006         btrfs_commit_transaction(trans, fs_info->tree_root);
14007
14008         return ret;
14009 }
14010
14011 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14012                 int clear_version)
14013 {
14014         int ret = 0;
14015
14016         if (clear_version == 1) {
14017                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14018                         error(
14019                 "free space cache v2 detected, use --clear-space-cache v2");
14020                         ret = 1;
14021                         goto close_out;
14022                 }
14023                 printf("Clearing free space cache\n");
14024                 ret = clear_free_space_cache(fs_info);
14025                 if (ret) {
14026                         error("failed to clear free space cache");
14027                         ret = 1;
14028                 } else {
14029                         printf("Free space cache cleared\n");
14030                 }
14031         } else if (clear_version == 2) {
14032                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14033                         printf("no free space cache v2 to clear\n");
14034                         ret = 0;
14035                         goto close_out;
14036                 }
14037                 printf("Clear free space cache v2\n");
14038                 ret = btrfs_clear_free_space_tree(fs_info);
14039                 if (ret) {
14040                         error("failed to clear free space cache v2: %d", ret);
14041                         ret = 1;
14042                 } else {
14043                         printf("free space cache v2 cleared\n");
14044                 }
14045         }
14046 close_out:
14047         return ret;
14048 }
14049
14050 const char * const cmd_check_usage[] = {
14051         "btrfs check [options] <device>",
14052         "Check structural integrity of a filesystem (unmounted).",
14053         "Check structural integrity of an unmounted filesystem. Verify internal",
14054         "trees' consistency and item connectivity. In the repair mode try to",
14055         "fix the problems found. ",
14056         "WARNING: the repair mode is considered dangerous",
14057         "",
14058         "-s|--super <superblock>     use this superblock copy",
14059         "-b|--backup                 use the first valid backup root copy",
14060         "--force                     skip mount checks, repair is not possible",
14061         "--repair                    try to repair the filesystem",
14062         "--readonly                  run in read-only mode (default)",
14063         "--init-csum-tree            create a new CRC tree",
14064         "--init-extent-tree          create a new extent tree",
14065         "--mode <MODE>               allows choice of memory/IO trade-offs",
14066         "                            where MODE is one of:",
14067         "                            original - read inodes and extents to memory (requires",
14068         "                                       more memory, does less IO)",
14069         "                            lowmem   - try to use less memory but read blocks again",
14070         "                                       when needed",
14071         "--check-data-csum           verify checksums of data blocks",
14072         "-Q|--qgroup-report          print a report on qgroup consistency",
14073         "-E|--subvol-extents <subvolid>",
14074         "                            print subvolume extents and sharing state",
14075         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14076         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14077         "-p|--progress               indicate progress",
14078         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14079         NULL
14080 };
14081
14082 int cmd_check(int argc, char **argv)
14083 {
14084         struct cache_tree root_cache;
14085         struct btrfs_root *root;
14086         struct btrfs_fs_info *info;
14087         u64 bytenr = 0;
14088         u64 subvolid = 0;
14089         u64 tree_root_bytenr = 0;
14090         u64 chunk_root_bytenr = 0;
14091         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14092         int ret = 0;
14093         int err = 0;
14094         u64 num;
14095         int init_csum_tree = 0;
14096         int readonly = 0;
14097         int clear_space_cache = 0;
14098         int qgroup_report = 0;
14099         int qgroups_repaired = 0;
14100         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14101         int force = 0;
14102
14103         while(1) {
14104                 int c;
14105                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14106                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14107                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14108                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14109                         GETOPT_VAL_FORCE };
14110                 static const struct option long_options[] = {
14111                         { "super", required_argument, NULL, 's' },
14112                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14113                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14114                         { "init-csum-tree", no_argument, NULL,
14115                                 GETOPT_VAL_INIT_CSUM },
14116                         { "init-extent-tree", no_argument, NULL,
14117                                 GETOPT_VAL_INIT_EXTENT },
14118                         { "check-data-csum", no_argument, NULL,
14119                                 GETOPT_VAL_CHECK_CSUM },
14120                         { "backup", no_argument, NULL, 'b' },
14121                         { "subvol-extents", required_argument, NULL, 'E' },
14122                         { "qgroup-report", no_argument, NULL, 'Q' },
14123                         { "tree-root", required_argument, NULL, 'r' },
14124                         { "chunk-root", required_argument, NULL,
14125                                 GETOPT_VAL_CHUNK_TREE },
14126                         { "progress", no_argument, NULL, 'p' },
14127                         { "mode", required_argument, NULL,
14128                                 GETOPT_VAL_MODE },
14129                         { "clear-space-cache", required_argument, NULL,
14130                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14131                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14132                         { NULL, 0, NULL, 0}
14133                 };
14134
14135                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14136                 if (c < 0)
14137                         break;
14138                 switch(c) {
14139                         case 'a': /* ignored */ break;
14140                         case 'b':
14141                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14142                                 break;
14143                         case 's':
14144                                 num = arg_strtou64(optarg);
14145                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14146                                         error(
14147                                         "super mirror should be less than %d",
14148                                                 BTRFS_SUPER_MIRROR_MAX);
14149                                         exit(1);
14150                                 }
14151                                 bytenr = btrfs_sb_offset(((int)num));
14152                                 printf("using SB copy %llu, bytenr %llu\n", num,
14153                                        (unsigned long long)bytenr);
14154                                 break;
14155                         case 'Q':
14156                                 qgroup_report = 1;
14157                                 break;
14158                         case 'E':
14159                                 subvolid = arg_strtou64(optarg);
14160                                 break;
14161                         case 'r':
14162                                 tree_root_bytenr = arg_strtou64(optarg);
14163                                 break;
14164                         case GETOPT_VAL_CHUNK_TREE:
14165                                 chunk_root_bytenr = arg_strtou64(optarg);
14166                                 break;
14167                         case 'p':
14168                                 ctx.progress_enabled = true;
14169                                 break;
14170                         case '?':
14171                         case 'h':
14172                                 usage(cmd_check_usage);
14173                         case GETOPT_VAL_REPAIR:
14174                                 printf("enabling repair mode\n");
14175                                 repair = 1;
14176                                 ctree_flags |= OPEN_CTREE_WRITES;
14177                                 break;
14178                         case GETOPT_VAL_READONLY:
14179                                 readonly = 1;
14180                                 break;
14181                         case GETOPT_VAL_INIT_CSUM:
14182                                 printf("Creating a new CRC tree\n");
14183                                 init_csum_tree = 1;
14184                                 repair = 1;
14185                                 ctree_flags |= OPEN_CTREE_WRITES;
14186                                 break;
14187                         case GETOPT_VAL_INIT_EXTENT:
14188                                 init_extent_tree = 1;
14189                                 ctree_flags |= (OPEN_CTREE_WRITES |
14190                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14191                                 repair = 1;
14192                                 break;
14193                         case GETOPT_VAL_CHECK_CSUM:
14194                                 check_data_csum = 1;
14195                                 break;
14196                         case GETOPT_VAL_MODE:
14197                                 check_mode = parse_check_mode(optarg);
14198                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14199                                         error("unknown mode: %s", optarg);
14200                                         exit(1);
14201                                 }
14202                                 break;
14203                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14204                                 if (strcmp(optarg, "v1") == 0) {
14205                                         clear_space_cache = 1;
14206                                 } else if (strcmp(optarg, "v2") == 0) {
14207                                         clear_space_cache = 2;
14208                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14209                                 } else {
14210                                         error(
14211                 "invalid argument to --clear-space-cache, must be v1 or v2");
14212                                         exit(1);
14213                                 }
14214                                 ctree_flags |= OPEN_CTREE_WRITES;
14215                                 break;
14216                         case GETOPT_VAL_FORCE:
14217                                 force = 1;
14218                                 break;
14219                 }
14220         }
14221
14222         if (check_argc_exact(argc - optind, 1))
14223                 usage(cmd_check_usage);
14224
14225         if (ctx.progress_enabled) {
14226                 ctx.tp = TASK_NOTHING;
14227                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14228         }
14229
14230         /* This check is the only reason for --readonly to exist */
14231         if (readonly && repair) {
14232                 error("repair options are not compatible with --readonly");
14233                 exit(1);
14234         }
14235
14236         /*
14237          * experimental and dangerous
14238          */
14239         if (repair && check_mode == CHECK_MODE_LOWMEM)
14240                 warning("low-memory mode repair support is only partial");
14241
14242         radix_tree_init();
14243         cache_tree_init(&root_cache);
14244
14245         ret = check_mounted(argv[optind]);
14246         if (!force) {
14247                 if (ret < 0) {
14248                         error("could not check mount status: %s",
14249                                         strerror(-ret));
14250                         err |= !!ret;
14251                         goto err_out;
14252                 } else if (ret) {
14253                         error(
14254 "%s is currently mounted, use --force if you really intend to check the filesystem",
14255                                 argv[optind]);
14256                         ret = -EBUSY;
14257                         err |= !!ret;
14258                         goto err_out;
14259                 }
14260         } else {
14261                 if (repair) {
14262                         error("repair and --force is not yet supported");
14263                         ret = 1;
14264                         err |= !!ret;
14265                         goto err_out;
14266                 }
14267                 if (ret < 0) {
14268                         warning(
14269 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14270                                 argv[optind]);
14271                 } else if (ret) {
14272                         warning(
14273                         "filesystem mounted, continuing because of --force");
14274                 }
14275                 /* A block device is mounted in exclusive mode by kernel */
14276                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14277         }
14278
14279         /* only allow partial opening under repair mode */
14280         if (repair)
14281                 ctree_flags |= OPEN_CTREE_PARTIAL;
14282
14283         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14284                                   chunk_root_bytenr, ctree_flags);
14285         if (!info) {
14286                 error("cannot open file system");
14287                 ret = -EIO;
14288                 err |= !!ret;
14289                 goto err_out;
14290         }
14291
14292         global_info = info;
14293         root = info->fs_root;
14294         uuid_unparse(info->super_copy->fsid, uuidbuf);
14295
14296         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14297
14298         /*
14299          * Check the bare minimum before starting anything else that could rely
14300          * on it, namely the tree roots, any local consistency checks
14301          */
14302         if (!extent_buffer_uptodate(info->tree_root->node) ||
14303             !extent_buffer_uptodate(info->dev_root->node) ||
14304             !extent_buffer_uptodate(info->chunk_root->node)) {
14305                 error("critical roots corrupted, unable to check the filesystem");
14306                 err |= !!ret;
14307                 ret = -EIO;
14308                 goto close_out;
14309         }
14310
14311         if (clear_space_cache) {
14312                 ret = do_clear_free_space_cache(info, clear_space_cache);
14313                 err |= !!ret;
14314                 goto close_out;
14315         }
14316
14317         /*
14318          * repair mode will force us to commit transaction which
14319          * will make us fail to load log tree when mounting.
14320          */
14321         if (repair && btrfs_super_log_root(info->super_copy)) {
14322                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14323                 if (!ret) {
14324                         ret = 1;
14325                         err |= !!ret;
14326                         goto close_out;
14327                 }
14328                 ret = zero_log_tree(root);
14329                 err |= !!ret;
14330                 if (ret) {
14331                         error("failed to zero log tree: %d", ret);
14332                         goto close_out;
14333                 }
14334         }
14335
14336         if (qgroup_report) {
14337                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14338                        uuidbuf);
14339                 ret = qgroup_verify_all(info);
14340                 err |= !!ret;
14341                 if (ret == 0)
14342                         report_qgroups(1);
14343                 goto close_out;
14344         }
14345         if (subvolid) {
14346                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14347                        subvolid, argv[optind], uuidbuf);
14348                 ret = print_extent_state(info, subvolid);
14349                 err |= !!ret;
14350                 goto close_out;
14351         }
14352
14353         if (init_extent_tree || init_csum_tree) {
14354                 struct btrfs_trans_handle *trans;
14355
14356                 trans = btrfs_start_transaction(info->extent_root, 0);
14357                 if (IS_ERR(trans)) {
14358                         error("error starting transaction");
14359                         ret = PTR_ERR(trans);
14360                         err |= !!ret;
14361                         goto close_out;
14362                 }
14363
14364                 if (init_extent_tree) {
14365                         printf("Creating a new extent tree\n");
14366                         ret = reinit_extent_tree(trans, info);
14367                         err |= !!ret;
14368                         if (ret)
14369                                 goto close_out;
14370                 }
14371
14372                 if (init_csum_tree) {
14373                         printf("Reinitialize checksum tree\n");
14374                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14375                         if (ret) {
14376                                 error("checksum tree initialization failed: %d",
14377                                                 ret);
14378                                 ret = -EIO;
14379                                 err |= !!ret;
14380                                 goto close_out;
14381                         }
14382
14383                         ret = fill_csum_tree(trans, info->csum_root,
14384                                              init_extent_tree);
14385                         err |= !!ret;
14386                         if (ret) {
14387                                 error("checksum tree refilling failed: %d", ret);
14388                                 return -EIO;
14389                         }
14390                 }
14391                 /*
14392                  * Ok now we commit and run the normal fsck, which will add
14393                  * extent entries for all of the items it finds.
14394                  */
14395                 ret = btrfs_commit_transaction(trans, info->extent_root);
14396                 err |= !!ret;
14397                 if (ret)
14398                         goto close_out;
14399         }
14400         if (!extent_buffer_uptodate(info->extent_root->node)) {
14401                 error("critical: extent_root, unable to check the filesystem");
14402                 ret = -EIO;
14403                 err |= !!ret;
14404                 goto close_out;
14405         }
14406         if (!extent_buffer_uptodate(info->csum_root->node)) {
14407                 error("critical: csum_root, unable to check the filesystem");
14408                 ret = -EIO;
14409                 err |= !!ret;
14410                 goto close_out;
14411         }
14412
14413         if (!init_extent_tree) {
14414                 ret = repair_root_items(info);
14415                 if (ret < 0) {
14416                         err = !!ret;
14417                         error("failed to repair root items: %s", strerror(-ret));
14418                         goto close_out;
14419                 }
14420                 if (repair) {
14421                         fprintf(stderr, "Fixed %d roots.\n", ret);
14422                         ret = 0;
14423                 } else if (ret > 0) {
14424                         fprintf(stderr,
14425                                 "Found %d roots with an outdated root item.\n",
14426                                 ret);
14427                         fprintf(stderr,
14428         "Please run a filesystem check with the option --repair to fix them.\n");
14429                         ret = 1;
14430                         err |= ret;
14431                         goto close_out;
14432                 }
14433         }
14434
14435         ret = do_check_chunks_and_extents(info);
14436         err |= !!ret;
14437         if (ret)
14438                 error(
14439                 "errors found in extent allocation tree or chunk allocation");
14440
14441         /* Only re-check super size after we checked and repaired the fs */
14442         err |= !is_super_size_valid(info);
14443
14444         if (!ctx.progress_enabled) {
14445                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14446                         fprintf(stderr, "checking free space tree\n");
14447                 else
14448                         fprintf(stderr, "checking free space cache\n");
14449         }
14450         ret = check_space_cache(root);
14451         err |= !!ret;
14452         if (ret) {
14453                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14454                         error("errors found in free space tree");
14455                 else
14456                         error("errors found in free space cache");
14457                 goto out;
14458         }
14459
14460         /*
14461          * We used to have to have these hole extents in between our real
14462          * extents so if we don't have this flag set we need to make sure there
14463          * are no gaps in the file extents for inodes, otherwise we can just
14464          * ignore it when this happens.
14465          */
14466         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14467         ret = do_check_fs_roots(info, &root_cache);
14468         err |= !!ret;
14469         if (ret) {
14470                 error("errors found in fs roots");
14471                 goto out;
14472         }
14473
14474         fprintf(stderr, "checking csums\n");
14475         ret = check_csums(root);
14476         err |= !!ret;
14477         if (ret) {
14478                 error("errors found in csum tree");
14479                 goto out;
14480         }
14481
14482         fprintf(stderr, "checking root refs\n");
14483         /* For low memory mode, check_fs_roots_v2 handles root refs */
14484         if (check_mode != CHECK_MODE_LOWMEM) {
14485                 ret = check_root_refs(root, &root_cache);
14486                 err |= !!ret;
14487                 if (ret) {
14488                         error("errors found in root refs");
14489                         goto out;
14490                 }
14491         }
14492
14493         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14494                 struct extent_buffer *eb;
14495
14496                 eb = list_first_entry(&root->fs_info->recow_ebs,
14497                                       struct extent_buffer, recow);
14498                 list_del_init(&eb->recow);
14499                 ret = recow_extent_buffer(root, eb);
14500                 err |= !!ret;
14501                 if (ret) {
14502                         error("fails to fix transid errors");
14503                         break;
14504                 }
14505         }
14506
14507         while (!list_empty(&delete_items)) {
14508                 struct bad_item *bad;
14509
14510                 bad = list_first_entry(&delete_items, struct bad_item, list);
14511                 list_del_init(&bad->list);
14512                 if (repair) {
14513                         ret = delete_bad_item(root, bad);
14514                         err |= !!ret;
14515                 }
14516                 free(bad);
14517         }
14518
14519         if (info->quota_enabled) {
14520                 fprintf(stderr, "checking quota groups\n");
14521                 ret = qgroup_verify_all(info);
14522                 err |= !!ret;
14523                 if (ret) {
14524                         error("failed to check quota groups");
14525                         goto out;
14526                 }
14527                 report_qgroups(0);
14528                 ret = repair_qgroups(info, &qgroups_repaired);
14529                 err |= !!ret;
14530                 if (err) {
14531                         error("failed to repair quota groups");
14532                         goto out;
14533                 }
14534                 ret = 0;
14535         }
14536
14537         if (!list_empty(&root->fs_info->recow_ebs)) {
14538                 error("transid errors in file system");
14539                 ret = 1;
14540                 err |= !!ret;
14541         }
14542 out:
14543         printf("found %llu bytes used, ",
14544                (unsigned long long)bytes_used);
14545         if (err)
14546                 printf("error(s) found\n");
14547         else
14548                 printf("no error found\n");
14549         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14550         printf("total tree bytes: %llu\n",
14551                (unsigned long long)total_btree_bytes);
14552         printf("total fs tree bytes: %llu\n",
14553                (unsigned long long)total_fs_tree_bytes);
14554         printf("total extent tree bytes: %llu\n",
14555                (unsigned long long)total_extent_tree_bytes);
14556         printf("btree space waste bytes: %llu\n",
14557                (unsigned long long)btree_space_waste);
14558         printf("file data blocks allocated: %llu\n referenced %llu\n",
14559                 (unsigned long long)data_bytes_allocated,
14560                 (unsigned long long)data_bytes_referenced);
14561
14562         free_qgroup_counts();
14563         free_root_recs_tree(&root_cache);
14564 close_out:
14565         close_ctree(root);
14566 err_out:
14567         if (ctx.progress_enabled)
14568                 task_deinit(ctx.info);
14569
14570         return err;
14571 }