btrfs-progs: check: Move definitions of lowmem mode to check/lowmem.h
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48
49 enum task_position {
50         TASK_EXTENTS,
51         TASK_FREE_SPACE,
52         TASK_FS_ROOTS,
53         TASK_NOTHING, /* have to be the last element */
54 };
55
56 struct task_ctx {
57         int progress_enabled;
58         enum task_position tp;
59
60         struct task_info *info;
61 };
62
63 static u64 bytes_used = 0;
64 static u64 total_csum_bytes = 0;
65 static u64 total_btree_bytes = 0;
66 static u64 total_fs_tree_bytes = 0;
67 static u64 total_extent_tree_bytes = 0;
68 static u64 btree_space_waste = 0;
69 static u64 data_bytes_allocated = 0;
70 static u64 data_bytes_referenced = 0;
71 static LIST_HEAD(duplicate_extents);
72 static LIST_HEAD(delete_items);
73 static int no_holes = 0;
74 static int init_extent_tree = 0;
75 static int check_data_csum = 0;
76 static struct btrfs_fs_info *global_info;
77 static struct task_ctx ctx = { 0 };
78 static struct cache_tree *roots_info_cache = NULL;
79
80 enum btrfs_check_mode {
81         CHECK_MODE_ORIGINAL,
82         CHECK_MODE_LOWMEM,
83         CHECK_MODE_UNKNOWN,
84         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 };
86
87 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88
89 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
90 {
91         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
92         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
93         struct data_backref *back1 = to_data_backref(ext1);
94         struct data_backref *back2 = to_data_backref(ext2);
95
96         WARN_ON(!ext1->is_data);
97         WARN_ON(!ext2->is_data);
98
99         /* parent and root are a union, so this covers both */
100         if (back1->parent > back2->parent)
101                 return 1;
102         if (back1->parent < back2->parent)
103                 return -1;
104
105         /* This is a full backref and the parents match. */
106         if (back1->node.full_backref)
107                 return 0;
108
109         if (back1->owner > back2->owner)
110                 return 1;
111         if (back1->owner < back2->owner)
112                 return -1;
113
114         if (back1->offset > back2->offset)
115                 return 1;
116         if (back1->offset < back2->offset)
117                 return -1;
118
119         if (back1->found_ref && back2->found_ref) {
120                 if (back1->disk_bytenr > back2->disk_bytenr)
121                         return 1;
122                 if (back1->disk_bytenr < back2->disk_bytenr)
123                         return -1;
124
125                 if (back1->bytes > back2->bytes)
126                         return 1;
127                 if (back1->bytes < back2->bytes)
128                         return -1;
129         }
130
131         return 0;
132 }
133
134 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
135 {
136         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
137         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
138         struct tree_backref *back1 = to_tree_backref(ext1);
139         struct tree_backref *back2 = to_tree_backref(ext2);
140
141         WARN_ON(ext1->is_data);
142         WARN_ON(ext2->is_data);
143
144         /* parent and root are a union, so this covers both */
145         if (back1->parent > back2->parent)
146                 return 1;
147         if (back1->parent < back2->parent)
148                 return -1;
149
150         return 0;
151 }
152
153 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
154 {
155         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
156         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
157
158         if (ext1->is_data > ext2->is_data)
159                 return 1;
160
161         if (ext1->is_data < ext2->is_data)
162                 return -1;
163
164         if (ext1->full_backref > ext2->full_backref)
165                 return 1;
166         if (ext1->full_backref < ext2->full_backref)
167                 return -1;
168
169         if (ext1->is_data)
170                 return compare_data_backref(node1, node2);
171         else
172                 return compare_tree_backref(node1, node2);
173 }
174
175
176 static void *print_status_check(void *p)
177 {
178         struct task_ctx *priv = p;
179         const char work_indicator[] = { '.', 'o', 'O', 'o' };
180         uint32_t count = 0;
181         static char *task_position_string[] = {
182                 "checking extents",
183                 "checking free space cache",
184                 "checking fs roots",
185         };
186
187         task_period_start(priv->info, 1000 /* 1s */);
188
189         if (priv->tp == TASK_NOTHING)
190                 return NULL;
191
192         while (1) {
193                 printf("%s [%c]\r", task_position_string[priv->tp],
194                                 work_indicator[count % 4]);
195                 count++;
196                 fflush(stdout);
197                 task_period_wait(priv->info);
198         }
199         return NULL;
200 }
201
202 static int print_status_return(void *p)
203 {
204         printf("\n");
205         fflush(stdout);
206
207         return 0;
208 }
209
210 static enum btrfs_check_mode parse_check_mode(const char *str)
211 {
212         if (strcmp(str, "lowmem") == 0)
213                 return CHECK_MODE_LOWMEM;
214         if (strcmp(str, "orig") == 0)
215                 return CHECK_MODE_ORIGINAL;
216         if (strcmp(str, "original") == 0)
217                 return CHECK_MODE_ORIGINAL;
218
219         return CHECK_MODE_UNKNOWN;
220 }
221
222 /* Compatible function to allow reuse of old codes */
223 static u64 first_extent_gap(struct rb_root *holes)
224 {
225         struct file_extent_hole *hole;
226
227         if (RB_EMPTY_ROOT(holes))
228                 return (u64)-1;
229
230         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
231         return hole->start;
232 }
233
234 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
235 {
236         struct file_extent_hole *hole1;
237         struct file_extent_hole *hole2;
238
239         hole1 = rb_entry(node1, struct file_extent_hole, node);
240         hole2 = rb_entry(node2, struct file_extent_hole, node);
241
242         if (hole1->start > hole2->start)
243                 return -1;
244         if (hole1->start < hole2->start)
245                 return 1;
246         /* Now hole1->start == hole2->start */
247         if (hole1->len >= hole2->len)
248                 /*
249                  * Hole 1 will be merge center
250                  * Same hole will be merged later
251                  */
252                 return -1;
253         /* Hole 2 will be merge center */
254         return 1;
255 }
256
257 /*
258  * Add a hole to the record
259  *
260  * This will do hole merge for copy_file_extent_holes(),
261  * which will ensure there won't be continuous holes.
262  */
263 static int add_file_extent_hole(struct rb_root *holes,
264                                 u64 start, u64 len)
265 {
266         struct file_extent_hole *hole;
267         struct file_extent_hole *prev = NULL;
268         struct file_extent_hole *next = NULL;
269
270         hole = malloc(sizeof(*hole));
271         if (!hole)
272                 return -ENOMEM;
273         hole->start = start;
274         hole->len = len;
275         /* Since compare will not return 0, no -EEXIST will happen */
276         rb_insert(holes, &hole->node, compare_hole);
277
278         /* simple merge with previous hole */
279         if (rb_prev(&hole->node))
280                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
281                                 node);
282         if (prev && prev->start + prev->len >= hole->start) {
283                 hole->len = hole->start + hole->len - prev->start;
284                 hole->start = prev->start;
285                 rb_erase(&prev->node, holes);
286                 free(prev);
287                 prev = NULL;
288         }
289
290         /* iterate merge with next holes */
291         while (1) {
292                 if (!rb_next(&hole->node))
293                         break;
294                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
295                                         node);
296                 if (hole->start + hole->len >= next->start) {
297                         if (hole->start + hole->len <= next->start + next->len)
298                                 hole->len = next->start + next->len -
299                                             hole->start;
300                         rb_erase(&next->node, holes);
301                         free(next);
302                         next = NULL;
303                 } else
304                         break;
305         }
306         return 0;
307 }
308
309 static int compare_hole_range(struct rb_node *node, void *data)
310 {
311         struct file_extent_hole *hole;
312         u64 start;
313
314         hole = (struct file_extent_hole *)data;
315         start = hole->start;
316
317         hole = rb_entry(node, struct file_extent_hole, node);
318         if (start < hole->start)
319                 return -1;
320         if (start >= hole->start && start < hole->start + hole->len)
321                 return 0;
322         return 1;
323 }
324
325 /*
326  * Delete a hole in the record
327  *
328  * This will do the hole split and is much restrict than add.
329  */
330 static int del_file_extent_hole(struct rb_root *holes,
331                                 u64 start, u64 len)
332 {
333         struct file_extent_hole *hole;
334         struct file_extent_hole tmp;
335         u64 prev_start = 0;
336         u64 prev_len = 0;
337         u64 next_start = 0;
338         u64 next_len = 0;
339         struct rb_node *node;
340         int have_prev = 0;
341         int have_next = 0;
342         int ret = 0;
343
344         tmp.start = start;
345         tmp.len = len;
346         node = rb_search(holes, &tmp, compare_hole_range, NULL);
347         if (!node)
348                 return -EEXIST;
349         hole = rb_entry(node, struct file_extent_hole, node);
350         if (start + len > hole->start + hole->len)
351                 return -EEXIST;
352
353         /*
354          * Now there will be no overlap, delete the hole and re-add the
355          * split(s) if they exists.
356          */
357         if (start > hole->start) {
358                 prev_start = hole->start;
359                 prev_len = start - hole->start;
360                 have_prev = 1;
361         }
362         if (hole->start + hole->len > start + len) {
363                 next_start = start + len;
364                 next_len = hole->start + hole->len - start - len;
365                 have_next = 1;
366         }
367         rb_erase(node, holes);
368         free(hole);
369         if (have_prev) {
370                 ret = add_file_extent_hole(holes, prev_start, prev_len);
371                 if (ret < 0)
372                         return ret;
373         }
374         if (have_next) {
375                 ret = add_file_extent_hole(holes, next_start, next_len);
376                 if (ret < 0)
377                         return ret;
378         }
379         return 0;
380 }
381
382 static int copy_file_extent_holes(struct rb_root *dst,
383                                   struct rb_root *src)
384 {
385         struct file_extent_hole *hole;
386         struct rb_node *node;
387         int ret = 0;
388
389         node = rb_first(src);
390         while (node) {
391                 hole = rb_entry(node, struct file_extent_hole, node);
392                 ret = add_file_extent_hole(dst, hole->start, hole->len);
393                 if (ret)
394                         break;
395                 node = rb_next(node);
396         }
397         return ret;
398 }
399
400 static void free_file_extent_holes(struct rb_root *holes)
401 {
402         struct rb_node *node;
403         struct file_extent_hole *hole;
404
405         node = rb_first(holes);
406         while (node) {
407                 hole = rb_entry(node, struct file_extent_hole, node);
408                 rb_erase(node, holes);
409                 free(hole);
410                 node = rb_first(holes);
411         }
412 }
413
414 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
415
416 static void record_root_in_trans(struct btrfs_trans_handle *trans,
417                                  struct btrfs_root *root)
418 {
419         if (root->last_trans != trans->transid) {
420                 root->track_dirty = 1;
421                 root->last_trans = trans->transid;
422                 root->commit_root = root->node;
423                 extent_buffer_get(root->node);
424         }
425 }
426
427 static u8 imode_to_type(u32 imode)
428 {
429 #define S_SHIFT 12
430         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
431                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
432                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
433                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
434                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
435                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
436                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
437                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
438         };
439
440         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
441 #undef S_SHIFT
442 }
443
444 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
445 {
446         struct device_record *rec1;
447         struct device_record *rec2;
448
449         rec1 = rb_entry(node1, struct device_record, node);
450         rec2 = rb_entry(node2, struct device_record, node);
451         if (rec1->devid > rec2->devid)
452                 return -1;
453         else if (rec1->devid < rec2->devid)
454                 return 1;
455         else
456                 return 0;
457 }
458
459 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
460 {
461         struct inode_record *rec;
462         struct inode_backref *backref;
463         struct inode_backref *orig;
464         struct inode_backref *tmp;
465         struct orphan_data_extent *src_orphan;
466         struct orphan_data_extent *dst_orphan;
467         struct rb_node *rb;
468         size_t size;
469         int ret;
470
471         rec = malloc(sizeof(*rec));
472         if (!rec)
473                 return ERR_PTR(-ENOMEM);
474         memcpy(rec, orig_rec, sizeof(*rec));
475         rec->refs = 1;
476         INIT_LIST_HEAD(&rec->backrefs);
477         INIT_LIST_HEAD(&rec->orphan_extents);
478         rec->holes = RB_ROOT;
479
480         list_for_each_entry(orig, &orig_rec->backrefs, list) {
481                 size = sizeof(*orig) + orig->namelen + 1;
482                 backref = malloc(size);
483                 if (!backref) {
484                         ret = -ENOMEM;
485                         goto cleanup;
486                 }
487                 memcpy(backref, orig, size);
488                 list_add_tail(&backref->list, &rec->backrefs);
489         }
490         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
491                 dst_orphan = malloc(sizeof(*dst_orphan));
492                 if (!dst_orphan) {
493                         ret = -ENOMEM;
494                         goto cleanup;
495                 }
496                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
497                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
498         }
499         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
500         if (ret < 0)
501                 goto cleanup_rb;
502
503         return rec;
504
505 cleanup_rb:
506         rb = rb_first(&rec->holes);
507         while (rb) {
508                 struct file_extent_hole *hole;
509
510                 hole = rb_entry(rb, struct file_extent_hole, node);
511                 rb = rb_next(rb);
512                 free(hole);
513         }
514
515 cleanup:
516         if (!list_empty(&rec->backrefs))
517                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
518                         list_del(&orig->list);
519                         free(orig);
520                 }
521
522         if (!list_empty(&rec->orphan_extents))
523                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
524                         list_del(&orig->list);
525                         free(orig);
526                 }
527
528         free(rec);
529
530         return ERR_PTR(ret);
531 }
532
533 static void print_orphan_data_extents(struct list_head *orphan_extents,
534                                       u64 objectid)
535 {
536         struct orphan_data_extent *orphan;
537
538         if (list_empty(orphan_extents))
539                 return;
540         printf("The following data extent is lost in tree %llu:\n",
541                objectid);
542         list_for_each_entry(orphan, orphan_extents, list) {
543                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
544                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
545                        orphan->disk_len);
546         }
547 }
548
549 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
550 {
551         u64 root_objectid = root->root_key.objectid;
552         int errors = rec->errors;
553
554         if (!errors)
555                 return;
556         /* reloc root errors, we print its corresponding fs root objectid*/
557         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
558                 root_objectid = root->root_key.offset;
559                 fprintf(stderr, "reloc");
560         }
561         fprintf(stderr, "root %llu inode %llu errors %x",
562                 (unsigned long long) root_objectid,
563                 (unsigned long long) rec->ino, rec->errors);
564
565         if (errors & I_ERR_NO_INODE_ITEM)
566                 fprintf(stderr, ", no inode item");
567         if (errors & I_ERR_NO_ORPHAN_ITEM)
568                 fprintf(stderr, ", no orphan item");
569         if (errors & I_ERR_DUP_INODE_ITEM)
570                 fprintf(stderr, ", dup inode item");
571         if (errors & I_ERR_DUP_DIR_INDEX)
572                 fprintf(stderr, ", dup dir index");
573         if (errors & I_ERR_ODD_DIR_ITEM)
574                 fprintf(stderr, ", odd dir item");
575         if (errors & I_ERR_ODD_FILE_EXTENT)
576                 fprintf(stderr, ", odd file extent");
577         if (errors & I_ERR_BAD_FILE_EXTENT)
578                 fprintf(stderr, ", bad file extent");
579         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
580                 fprintf(stderr, ", file extent overlap");
581         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
582                 fprintf(stderr, ", file extent discount");
583         if (errors & I_ERR_DIR_ISIZE_WRONG)
584                 fprintf(stderr, ", dir isize wrong");
585         if (errors & I_ERR_FILE_NBYTES_WRONG)
586                 fprintf(stderr, ", nbytes wrong");
587         if (errors & I_ERR_ODD_CSUM_ITEM)
588                 fprintf(stderr, ", odd csum item");
589         if (errors & I_ERR_SOME_CSUM_MISSING)
590                 fprintf(stderr, ", some csum missing");
591         if (errors & I_ERR_LINK_COUNT_WRONG)
592                 fprintf(stderr, ", link count wrong");
593         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
594                 fprintf(stderr, ", orphan file extent");
595         fprintf(stderr, "\n");
596         /* Print the orphan extents if needed */
597         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
598                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
599
600         /* Print the holes if needed */
601         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
602                 struct file_extent_hole *hole;
603                 struct rb_node *node;
604                 int found = 0;
605
606                 node = rb_first(&rec->holes);
607                 fprintf(stderr, "Found file extent holes:\n");
608                 while (node) {
609                         found = 1;
610                         hole = rb_entry(node, struct file_extent_hole, node);
611                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
612                                 hole->start, hole->len);
613                         node = rb_next(node);
614                 }
615                 if (!found)
616                         fprintf(stderr, "\tstart: 0, len: %llu\n",
617                                 round_up(rec->isize,
618                                          root->fs_info->sectorsize));
619         }
620 }
621
622 static void print_ref_error(int errors)
623 {
624         if (errors & REF_ERR_NO_DIR_ITEM)
625                 fprintf(stderr, ", no dir item");
626         if (errors & REF_ERR_NO_DIR_INDEX)
627                 fprintf(stderr, ", no dir index");
628         if (errors & REF_ERR_NO_INODE_REF)
629                 fprintf(stderr, ", no inode ref");
630         if (errors & REF_ERR_DUP_DIR_ITEM)
631                 fprintf(stderr, ", dup dir item");
632         if (errors & REF_ERR_DUP_DIR_INDEX)
633                 fprintf(stderr, ", dup dir index");
634         if (errors & REF_ERR_DUP_INODE_REF)
635                 fprintf(stderr, ", dup inode ref");
636         if (errors & REF_ERR_INDEX_UNMATCH)
637                 fprintf(stderr, ", index mismatch");
638         if (errors & REF_ERR_FILETYPE_UNMATCH)
639                 fprintf(stderr, ", filetype mismatch");
640         if (errors & REF_ERR_NAME_TOO_LONG)
641                 fprintf(stderr, ", name too long");
642         if (errors & REF_ERR_NO_ROOT_REF)
643                 fprintf(stderr, ", no root ref");
644         if (errors & REF_ERR_NO_ROOT_BACKREF)
645                 fprintf(stderr, ", no root backref");
646         if (errors & REF_ERR_DUP_ROOT_REF)
647                 fprintf(stderr, ", dup root ref");
648         if (errors & REF_ERR_DUP_ROOT_BACKREF)
649                 fprintf(stderr, ", dup root backref");
650         fprintf(stderr, "\n");
651 }
652
653 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
654                                           u64 ino, int mod)
655 {
656         struct ptr_node *node;
657         struct cache_extent *cache;
658         struct inode_record *rec = NULL;
659         int ret;
660
661         cache = lookup_cache_extent(inode_cache, ino, 1);
662         if (cache) {
663                 node = container_of(cache, struct ptr_node, cache);
664                 rec = node->data;
665                 if (mod && rec->refs > 1) {
666                         node->data = clone_inode_rec(rec);
667                         if (IS_ERR(node->data))
668                                 return node->data;
669                         rec->refs--;
670                         rec = node->data;
671                 }
672         } else if (mod) {
673                 rec = calloc(1, sizeof(*rec));
674                 if (!rec)
675                         return ERR_PTR(-ENOMEM);
676                 rec->ino = ino;
677                 rec->extent_start = (u64)-1;
678                 rec->refs = 1;
679                 INIT_LIST_HEAD(&rec->backrefs);
680                 INIT_LIST_HEAD(&rec->orphan_extents);
681                 rec->holes = RB_ROOT;
682
683                 node = malloc(sizeof(*node));
684                 if (!node) {
685                         free(rec);
686                         return ERR_PTR(-ENOMEM);
687                 }
688                 node->cache.start = ino;
689                 node->cache.size = 1;
690                 node->data = rec;
691
692                 if (ino == BTRFS_FREE_INO_OBJECTID)
693                         rec->found_link = 1;
694
695                 ret = insert_cache_extent(inode_cache, &node->cache);
696                 if (ret)
697                         return ERR_PTR(-EEXIST);
698         }
699         return rec;
700 }
701
702 static void free_orphan_data_extents(struct list_head *orphan_extents)
703 {
704         struct orphan_data_extent *orphan;
705
706         while (!list_empty(orphan_extents)) {
707                 orphan = list_entry(orphan_extents->next,
708                                     struct orphan_data_extent, list);
709                 list_del(&orphan->list);
710                 free(orphan);
711         }
712 }
713
714 static void free_inode_rec(struct inode_record *rec)
715 {
716         struct inode_backref *backref;
717
718         if (--rec->refs > 0)
719                 return;
720
721         while (!list_empty(&rec->backrefs)) {
722                 backref = to_inode_backref(rec->backrefs.next);
723                 list_del(&backref->list);
724                 free(backref);
725         }
726         free_orphan_data_extents(&rec->orphan_extents);
727         free_file_extent_holes(&rec->holes);
728         free(rec);
729 }
730
731 static int can_free_inode_rec(struct inode_record *rec)
732 {
733         if (!rec->errors && rec->checked && rec->found_inode_item &&
734             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
735                 return 1;
736         return 0;
737 }
738
739 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
740                                  struct inode_record *rec)
741 {
742         struct cache_extent *cache;
743         struct inode_backref *tmp, *backref;
744         struct ptr_node *node;
745         u8 filetype;
746
747         if (!rec->found_inode_item)
748                 return;
749
750         filetype = imode_to_type(rec->imode);
751         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
752                 if (backref->found_dir_item && backref->found_dir_index) {
753                         if (backref->filetype != filetype)
754                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
755                         if (!backref->errors && backref->found_inode_ref &&
756                             rec->nlink == rec->found_link) {
757                                 list_del(&backref->list);
758                                 free(backref);
759                         }
760                 }
761         }
762
763         if (!rec->checked || rec->merging)
764                 return;
765
766         if (S_ISDIR(rec->imode)) {
767                 if (rec->found_size != rec->isize)
768                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
769                 if (rec->found_file_extent)
770                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
771         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
772                 if (rec->found_dir_item)
773                         rec->errors |= I_ERR_ODD_DIR_ITEM;
774                 if (rec->found_size != rec->nbytes)
775                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
776                 if (rec->nlink > 0 && !no_holes &&
777                     (rec->extent_end < rec->isize ||
778                      first_extent_gap(&rec->holes) < rec->isize))
779                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
780         }
781
782         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
783                 if (rec->found_csum_item && rec->nodatasum)
784                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
785                 if (rec->some_csum_missing && !rec->nodatasum)
786                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
787         }
788
789         BUG_ON(rec->refs != 1);
790         if (can_free_inode_rec(rec)) {
791                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
792                 node = container_of(cache, struct ptr_node, cache);
793                 BUG_ON(node->data != rec);
794                 remove_cache_extent(inode_cache, &node->cache);
795                 free(node);
796                 free_inode_rec(rec);
797         }
798 }
799
800 static int check_orphan_item(struct btrfs_root *root, u64 ino)
801 {
802         struct btrfs_path path;
803         struct btrfs_key key;
804         int ret;
805
806         key.objectid = BTRFS_ORPHAN_OBJECTID;
807         key.type = BTRFS_ORPHAN_ITEM_KEY;
808         key.offset = ino;
809
810         btrfs_init_path(&path);
811         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
812         btrfs_release_path(&path);
813         if (ret > 0)
814                 ret = -ENOENT;
815         return ret;
816 }
817
818 static int process_inode_item(struct extent_buffer *eb,
819                               int slot, struct btrfs_key *key,
820                               struct shared_node *active_node)
821 {
822         struct inode_record *rec;
823         struct btrfs_inode_item *item;
824
825         rec = active_node->current;
826         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
827         if (rec->found_inode_item) {
828                 rec->errors |= I_ERR_DUP_INODE_ITEM;
829                 return 1;
830         }
831         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
832         rec->nlink = btrfs_inode_nlink(eb, item);
833         rec->isize = btrfs_inode_size(eb, item);
834         rec->nbytes = btrfs_inode_nbytes(eb, item);
835         rec->imode = btrfs_inode_mode(eb, item);
836         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
837                 rec->nodatasum = 1;
838         rec->found_inode_item = 1;
839         if (rec->nlink == 0)
840                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
841         maybe_free_inode_rec(&active_node->inode_cache, rec);
842         return 0;
843 }
844
845 static struct inode_backref *get_inode_backref(struct inode_record *rec,
846                                                 const char *name,
847                                                 int namelen, u64 dir)
848 {
849         struct inode_backref *backref;
850
851         list_for_each_entry(backref, &rec->backrefs, list) {
852                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
853                         break;
854                 if (backref->dir != dir || backref->namelen != namelen)
855                         continue;
856                 if (memcmp(name, backref->name, namelen))
857                         continue;
858                 return backref;
859         }
860
861         backref = malloc(sizeof(*backref) + namelen + 1);
862         if (!backref)
863                 return NULL;
864         memset(backref, 0, sizeof(*backref));
865         backref->dir = dir;
866         backref->namelen = namelen;
867         memcpy(backref->name, name, namelen);
868         backref->name[namelen] = '\0';
869         list_add_tail(&backref->list, &rec->backrefs);
870         return backref;
871 }
872
873 static int add_inode_backref(struct cache_tree *inode_cache,
874                              u64 ino, u64 dir, u64 index,
875                              const char *name, int namelen,
876                              u8 filetype, u8 itemtype, int errors)
877 {
878         struct inode_record *rec;
879         struct inode_backref *backref;
880
881         rec = get_inode_rec(inode_cache, ino, 1);
882         BUG_ON(IS_ERR(rec));
883         backref = get_inode_backref(rec, name, namelen, dir);
884         BUG_ON(!backref);
885         if (errors)
886                 backref->errors |= errors;
887         if (itemtype == BTRFS_DIR_INDEX_KEY) {
888                 if (backref->found_dir_index)
889                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
890                 if (backref->found_inode_ref && backref->index != index)
891                         backref->errors |= REF_ERR_INDEX_UNMATCH;
892                 if (backref->found_dir_item && backref->filetype != filetype)
893                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
894
895                 backref->index = index;
896                 backref->filetype = filetype;
897                 backref->found_dir_index = 1;
898         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
899                 rec->found_link++;
900                 if (backref->found_dir_item)
901                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
902                 if (backref->found_dir_index && backref->filetype != filetype)
903                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
904
905                 backref->filetype = filetype;
906                 backref->found_dir_item = 1;
907         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
908                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
909                 if (backref->found_inode_ref)
910                         backref->errors |= REF_ERR_DUP_INODE_REF;
911                 if (backref->found_dir_index && backref->index != index)
912                         backref->errors |= REF_ERR_INDEX_UNMATCH;
913                 else
914                         backref->index = index;
915
916                 backref->ref_type = itemtype;
917                 backref->found_inode_ref = 1;
918         } else {
919                 BUG_ON(1);
920         }
921
922         maybe_free_inode_rec(inode_cache, rec);
923         return 0;
924 }
925
926 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
927                             struct cache_tree *dst_cache)
928 {
929         struct inode_backref *backref;
930         u32 dir_count = 0;
931         int ret = 0;
932
933         dst->merging = 1;
934         list_for_each_entry(backref, &src->backrefs, list) {
935                 if (backref->found_dir_index) {
936                         add_inode_backref(dst_cache, dst->ino, backref->dir,
937                                         backref->index, backref->name,
938                                         backref->namelen, backref->filetype,
939                                         BTRFS_DIR_INDEX_KEY, backref->errors);
940                 }
941                 if (backref->found_dir_item) {
942                         dir_count++;
943                         add_inode_backref(dst_cache, dst->ino,
944                                         backref->dir, 0, backref->name,
945                                         backref->namelen, backref->filetype,
946                                         BTRFS_DIR_ITEM_KEY, backref->errors);
947                 }
948                 if (backref->found_inode_ref) {
949                         add_inode_backref(dst_cache, dst->ino,
950                                         backref->dir, backref->index,
951                                         backref->name, backref->namelen, 0,
952                                         backref->ref_type, backref->errors);
953                 }
954         }
955
956         if (src->found_dir_item)
957                 dst->found_dir_item = 1;
958         if (src->found_file_extent)
959                 dst->found_file_extent = 1;
960         if (src->found_csum_item)
961                 dst->found_csum_item = 1;
962         if (src->some_csum_missing)
963                 dst->some_csum_missing = 1;
964         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
965                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
966                 if (ret < 0)
967                         return ret;
968         }
969
970         BUG_ON(src->found_link < dir_count);
971         dst->found_link += src->found_link - dir_count;
972         dst->found_size += src->found_size;
973         if (src->extent_start != (u64)-1) {
974                 if (dst->extent_start == (u64)-1) {
975                         dst->extent_start = src->extent_start;
976                         dst->extent_end = src->extent_end;
977                 } else {
978                         if (dst->extent_end > src->extent_start)
979                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
980                         else if (dst->extent_end < src->extent_start) {
981                                 ret = add_file_extent_hole(&dst->holes,
982                                         dst->extent_end,
983                                         src->extent_start - dst->extent_end);
984                         }
985                         if (dst->extent_end < src->extent_end)
986                                 dst->extent_end = src->extent_end;
987                 }
988         }
989
990         dst->errors |= src->errors;
991         if (src->found_inode_item) {
992                 if (!dst->found_inode_item) {
993                         dst->nlink = src->nlink;
994                         dst->isize = src->isize;
995                         dst->nbytes = src->nbytes;
996                         dst->imode = src->imode;
997                         dst->nodatasum = src->nodatasum;
998                         dst->found_inode_item = 1;
999                 } else {
1000                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1001                 }
1002         }
1003         dst->merging = 0;
1004
1005         return 0;
1006 }
1007
1008 static int splice_shared_node(struct shared_node *src_node,
1009                               struct shared_node *dst_node)
1010 {
1011         struct cache_extent *cache;
1012         struct ptr_node *node, *ins;
1013         struct cache_tree *src, *dst;
1014         struct inode_record *rec, *conflict;
1015         u64 current_ino = 0;
1016         int splice = 0;
1017         int ret;
1018
1019         if (--src_node->refs == 0)
1020                 splice = 1;
1021         if (src_node->current)
1022                 current_ino = src_node->current->ino;
1023
1024         src = &src_node->root_cache;
1025         dst = &dst_node->root_cache;
1026 again:
1027         cache = search_cache_extent(src, 0);
1028         while (cache) {
1029                 node = container_of(cache, struct ptr_node, cache);
1030                 rec = node->data;
1031                 cache = next_cache_extent(cache);
1032
1033                 if (splice) {
1034                         remove_cache_extent(src, &node->cache);
1035                         ins = node;
1036                 } else {
1037                         ins = malloc(sizeof(*ins));
1038                         BUG_ON(!ins);
1039                         ins->cache.start = node->cache.start;
1040                         ins->cache.size = node->cache.size;
1041                         ins->data = rec;
1042                         rec->refs++;
1043                 }
1044                 ret = insert_cache_extent(dst, &ins->cache);
1045                 if (ret == -EEXIST) {
1046                         conflict = get_inode_rec(dst, rec->ino, 1);
1047                         BUG_ON(IS_ERR(conflict));
1048                         merge_inode_recs(rec, conflict, dst);
1049                         if (rec->checked) {
1050                                 conflict->checked = 1;
1051                                 if (dst_node->current == conflict)
1052                                         dst_node->current = NULL;
1053                         }
1054                         maybe_free_inode_rec(dst, conflict);
1055                         free_inode_rec(rec);
1056                         free(ins);
1057                 } else {
1058                         BUG_ON(ret);
1059                 }
1060         }
1061
1062         if (src == &src_node->root_cache) {
1063                 src = &src_node->inode_cache;
1064                 dst = &dst_node->inode_cache;
1065                 goto again;
1066         }
1067
1068         if (current_ino > 0 && (!dst_node->current ||
1069             current_ino > dst_node->current->ino)) {
1070                 if (dst_node->current) {
1071                         dst_node->current->checked = 1;
1072                         maybe_free_inode_rec(dst, dst_node->current);
1073                 }
1074                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1075                 BUG_ON(IS_ERR(dst_node->current));
1076         }
1077         return 0;
1078 }
1079
1080 static void free_inode_ptr(struct cache_extent *cache)
1081 {
1082         struct ptr_node *node;
1083         struct inode_record *rec;
1084
1085         node = container_of(cache, struct ptr_node, cache);
1086         rec = node->data;
1087         free_inode_rec(rec);
1088         free(node);
1089 }
1090
1091 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1092
1093 static struct shared_node *find_shared_node(struct cache_tree *shared,
1094                                             u64 bytenr)
1095 {
1096         struct cache_extent *cache;
1097         struct shared_node *node;
1098
1099         cache = lookup_cache_extent(shared, bytenr, 1);
1100         if (cache) {
1101                 node = container_of(cache, struct shared_node, cache);
1102                 return node;
1103         }
1104         return NULL;
1105 }
1106
1107 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1108 {
1109         int ret;
1110         struct shared_node *node;
1111
1112         node = calloc(1, sizeof(*node));
1113         if (!node)
1114                 return -ENOMEM;
1115         node->cache.start = bytenr;
1116         node->cache.size = 1;
1117         cache_tree_init(&node->root_cache);
1118         cache_tree_init(&node->inode_cache);
1119         node->refs = refs;
1120
1121         ret = insert_cache_extent(shared, &node->cache);
1122
1123         return ret;
1124 }
1125
1126 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1127                              struct walk_control *wc, int level)
1128 {
1129         struct shared_node *node;
1130         struct shared_node *dest;
1131         int ret;
1132
1133         if (level == wc->active_node)
1134                 return 0;
1135
1136         BUG_ON(wc->active_node <= level);
1137         node = find_shared_node(&wc->shared, bytenr);
1138         if (!node) {
1139                 ret = add_shared_node(&wc->shared, bytenr, refs);
1140                 BUG_ON(ret);
1141                 node = find_shared_node(&wc->shared, bytenr);
1142                 wc->nodes[level] = node;
1143                 wc->active_node = level;
1144                 return 0;
1145         }
1146
1147         if (wc->root_level == wc->active_node &&
1148             btrfs_root_refs(&root->root_item) == 0) {
1149                 if (--node->refs == 0) {
1150                         free_inode_recs_tree(&node->root_cache);
1151                         free_inode_recs_tree(&node->inode_cache);
1152                         remove_cache_extent(&wc->shared, &node->cache);
1153                         free(node);
1154                 }
1155                 return 1;
1156         }
1157
1158         dest = wc->nodes[wc->active_node];
1159         splice_shared_node(node, dest);
1160         if (node->refs == 0) {
1161                 remove_cache_extent(&wc->shared, &node->cache);
1162                 free(node);
1163         }
1164         return 1;
1165 }
1166
1167 static int leave_shared_node(struct btrfs_root *root,
1168                              struct walk_control *wc, int level)
1169 {
1170         struct shared_node *node;
1171         struct shared_node *dest;
1172         int i;
1173
1174         if (level == wc->root_level)
1175                 return 0;
1176
1177         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1178                 if (wc->nodes[i])
1179                         break;
1180         }
1181         BUG_ON(i >= BTRFS_MAX_LEVEL);
1182
1183         node = wc->nodes[wc->active_node];
1184         wc->nodes[wc->active_node] = NULL;
1185         wc->active_node = i;
1186
1187         dest = wc->nodes[wc->active_node];
1188         if (wc->active_node < wc->root_level ||
1189             btrfs_root_refs(&root->root_item) > 0) {
1190                 BUG_ON(node->refs <= 1);
1191                 splice_shared_node(node, dest);
1192         } else {
1193                 BUG_ON(node->refs < 2);
1194                 node->refs--;
1195         }
1196         return 0;
1197 }
1198
1199 /*
1200  * Returns:
1201  * < 0 - on error
1202  * 1   - if the root with id child_root_id is a child of root parent_root_id
1203  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1204  *       has other root(s) as parent(s)
1205  * 2   - if the root child_root_id doesn't have any parent roots
1206  */
1207 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1208                          u64 child_root_id)
1209 {
1210         struct btrfs_path path;
1211         struct btrfs_key key;
1212         struct extent_buffer *leaf;
1213         int has_parent = 0;
1214         int ret;
1215
1216         btrfs_init_path(&path);
1217
1218         key.objectid = parent_root_id;
1219         key.type = BTRFS_ROOT_REF_KEY;
1220         key.offset = child_root_id;
1221         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1222                                 0, 0);
1223         if (ret < 0)
1224                 return ret;
1225         btrfs_release_path(&path);
1226         if (!ret)
1227                 return 1;
1228
1229         key.objectid = child_root_id;
1230         key.type = BTRFS_ROOT_BACKREF_KEY;
1231         key.offset = 0;
1232         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1233                                 0, 0);
1234         if (ret < 0)
1235                 goto out;
1236
1237         while (1) {
1238                 leaf = path.nodes[0];
1239                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1240                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1241                         if (ret)
1242                                 break;
1243                         leaf = path.nodes[0];
1244                 }
1245
1246                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1247                 if (key.objectid != child_root_id ||
1248                     key.type != BTRFS_ROOT_BACKREF_KEY)
1249                         break;
1250
1251                 has_parent = 1;
1252
1253                 if (key.offset == parent_root_id) {
1254                         btrfs_release_path(&path);
1255                         return 1;
1256                 }
1257
1258                 path.slots[0]++;
1259         }
1260 out:
1261         btrfs_release_path(&path);
1262         if (ret < 0)
1263                 return ret;
1264         return has_parent ? 0 : 2;
1265 }
1266
1267 static int process_dir_item(struct extent_buffer *eb,
1268                             int slot, struct btrfs_key *key,
1269                             struct shared_node *active_node)
1270 {
1271         u32 total;
1272         u32 cur = 0;
1273         u32 len;
1274         u32 name_len;
1275         u32 data_len;
1276         int error;
1277         int nritems = 0;
1278         u8 filetype;
1279         struct btrfs_dir_item *di;
1280         struct inode_record *rec;
1281         struct cache_tree *root_cache;
1282         struct cache_tree *inode_cache;
1283         struct btrfs_key location;
1284         char namebuf[BTRFS_NAME_LEN];
1285
1286         root_cache = &active_node->root_cache;
1287         inode_cache = &active_node->inode_cache;
1288         rec = active_node->current;
1289         rec->found_dir_item = 1;
1290
1291         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1292         total = btrfs_item_size_nr(eb, slot);
1293         while (cur < total) {
1294                 nritems++;
1295                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1296                 name_len = btrfs_dir_name_len(eb, di);
1297                 data_len = btrfs_dir_data_len(eb, di);
1298                 filetype = btrfs_dir_type(eb, di);
1299
1300                 rec->found_size += name_len;
1301                 if (cur + sizeof(*di) + name_len > total ||
1302                     name_len > BTRFS_NAME_LEN) {
1303                         error = REF_ERR_NAME_TOO_LONG;
1304
1305                         if (cur + sizeof(*di) > total)
1306                                 break;
1307                         len = min_t(u32, total - cur - sizeof(*di),
1308                                     BTRFS_NAME_LEN);
1309                 } else {
1310                         len = name_len;
1311                         error = 0;
1312                 }
1313
1314                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1315
1316                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1317                     key->offset != btrfs_name_hash(namebuf, len)) {
1318                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1319                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1320                         key->objectid, key->offset, namebuf, len, filetype,
1321                         key->offset, btrfs_name_hash(namebuf, len));
1322                 }
1323
1324                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1325                         add_inode_backref(inode_cache, location.objectid,
1326                                           key->objectid, key->offset, namebuf,
1327                                           len, filetype, key->type, error);
1328                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1329                         add_inode_backref(root_cache, location.objectid,
1330                                           key->objectid, key->offset,
1331                                           namebuf, len, filetype,
1332                                           key->type, error);
1333                 } else {
1334                         fprintf(stderr,
1335                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1336                                 location.type, key->objectid, key->offset);
1337                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1338                                           key->objectid, key->offset, namebuf,
1339                                           len, filetype, key->type, error);
1340                 }
1341
1342                 len = sizeof(*di) + name_len + data_len;
1343                 di = (struct btrfs_dir_item *)((char *)di + len);
1344                 cur += len;
1345         }
1346         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1347                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1348
1349         return 0;
1350 }
1351
1352 static int process_inode_ref(struct extent_buffer *eb,
1353                              int slot, struct btrfs_key *key,
1354                              struct shared_node *active_node)
1355 {
1356         u32 total;
1357         u32 cur = 0;
1358         u32 len;
1359         u32 name_len;
1360         u64 index;
1361         int error;
1362         struct cache_tree *inode_cache;
1363         struct btrfs_inode_ref *ref;
1364         char namebuf[BTRFS_NAME_LEN];
1365
1366         inode_cache = &active_node->inode_cache;
1367
1368         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1369         total = btrfs_item_size_nr(eb, slot);
1370         while (cur < total) {
1371                 name_len = btrfs_inode_ref_name_len(eb, ref);
1372                 index = btrfs_inode_ref_index(eb, ref);
1373
1374                 /* inode_ref + namelen should not cross item boundary */
1375                 if (cur + sizeof(*ref) + name_len > total ||
1376                     name_len > BTRFS_NAME_LEN) {
1377                         if (total < cur + sizeof(*ref))
1378                                 break;
1379
1380                         /* Still try to read out the remaining part */
1381                         len = min_t(u32, total - cur - sizeof(*ref),
1382                                     BTRFS_NAME_LEN);
1383                         error = REF_ERR_NAME_TOO_LONG;
1384                 } else {
1385                         len = name_len;
1386                         error = 0;
1387                 }
1388
1389                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1390                 add_inode_backref(inode_cache, key->objectid, key->offset,
1391                                   index, namebuf, len, 0, key->type, error);
1392
1393                 len = sizeof(*ref) + name_len;
1394                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1395                 cur += len;
1396         }
1397         return 0;
1398 }
1399
1400 static int process_inode_extref(struct extent_buffer *eb,
1401                                 int slot, struct btrfs_key *key,
1402                                 struct shared_node *active_node)
1403 {
1404         u32 total;
1405         u32 cur = 0;
1406         u32 len;
1407         u32 name_len;
1408         u64 index;
1409         u64 parent;
1410         int error;
1411         struct cache_tree *inode_cache;
1412         struct btrfs_inode_extref *extref;
1413         char namebuf[BTRFS_NAME_LEN];
1414
1415         inode_cache = &active_node->inode_cache;
1416
1417         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1418         total = btrfs_item_size_nr(eb, slot);
1419         while (cur < total) {
1420                 name_len = btrfs_inode_extref_name_len(eb, extref);
1421                 index = btrfs_inode_extref_index(eb, extref);
1422                 parent = btrfs_inode_extref_parent(eb, extref);
1423                 if (name_len <= BTRFS_NAME_LEN) {
1424                         len = name_len;
1425                         error = 0;
1426                 } else {
1427                         len = BTRFS_NAME_LEN;
1428                         error = REF_ERR_NAME_TOO_LONG;
1429                 }
1430                 read_extent_buffer(eb, namebuf,
1431                                    (unsigned long)(extref + 1), len);
1432                 add_inode_backref(inode_cache, key->objectid, parent,
1433                                   index, namebuf, len, 0, key->type, error);
1434
1435                 len = sizeof(*extref) + name_len;
1436                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1437                 cur += len;
1438         }
1439         return 0;
1440
1441 }
1442
1443 static int count_csum_range(struct btrfs_root *root, u64 start,
1444                             u64 len, u64 *found)
1445 {
1446         struct btrfs_key key;
1447         struct btrfs_path path;
1448         struct extent_buffer *leaf;
1449         int ret;
1450         size_t size;
1451         *found = 0;
1452         u64 csum_end;
1453         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1454
1455         btrfs_init_path(&path);
1456
1457         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1458         key.offset = start;
1459         key.type = BTRFS_EXTENT_CSUM_KEY;
1460
1461         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1462                                 &key, &path, 0, 0);
1463         if (ret < 0)
1464                 goto out;
1465         if (ret > 0 && path.slots[0] > 0) {
1466                 leaf = path.nodes[0];
1467                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1468                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1469                     key.type == BTRFS_EXTENT_CSUM_KEY)
1470                         path.slots[0]--;
1471         }
1472
1473         while (len > 0) {
1474                 leaf = path.nodes[0];
1475                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1476                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1477                         if (ret > 0)
1478                                 break;
1479                         else if (ret < 0)
1480                                 goto out;
1481                         leaf = path.nodes[0];
1482                 }
1483
1484                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1485                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1486                     key.type != BTRFS_EXTENT_CSUM_KEY)
1487                         break;
1488
1489                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1490                 if (key.offset >= start + len)
1491                         break;
1492
1493                 if (key.offset > start)
1494                         start = key.offset;
1495
1496                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1497                 csum_end = key.offset + (size / csum_size) *
1498                            root->fs_info->sectorsize;
1499                 if (csum_end > start) {
1500                         size = min(csum_end - start, len);
1501                         len -= size;
1502                         start += size;
1503                         *found += size;
1504                 }
1505
1506                 path.slots[0]++;
1507         }
1508 out:
1509         btrfs_release_path(&path);
1510         if (ret < 0)
1511                 return ret;
1512         return 0;
1513 }
1514
1515 static int process_file_extent(struct btrfs_root *root,
1516                                 struct extent_buffer *eb,
1517                                 int slot, struct btrfs_key *key,
1518                                 struct shared_node *active_node)
1519 {
1520         struct inode_record *rec;
1521         struct btrfs_file_extent_item *fi;
1522         u64 num_bytes = 0;
1523         u64 disk_bytenr = 0;
1524         u64 extent_offset = 0;
1525         u64 mask = root->fs_info->sectorsize - 1;
1526         int extent_type;
1527         int ret;
1528
1529         rec = active_node->current;
1530         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1531         rec->found_file_extent = 1;
1532
1533         if (rec->extent_start == (u64)-1) {
1534                 rec->extent_start = key->offset;
1535                 rec->extent_end = key->offset;
1536         }
1537
1538         if (rec->extent_end > key->offset)
1539                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1540         else if (rec->extent_end < key->offset) {
1541                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1542                                            key->offset - rec->extent_end);
1543                 if (ret < 0)
1544                         return ret;
1545         }
1546
1547         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1548         extent_type = btrfs_file_extent_type(eb, fi);
1549
1550         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1551                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1552                 if (num_bytes == 0)
1553                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1554                 rec->found_size += num_bytes;
1555                 num_bytes = (num_bytes + mask) & ~mask;
1556         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1557                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1558                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1559                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1560                 extent_offset = btrfs_file_extent_offset(eb, fi);
1561                 if (num_bytes == 0 || (num_bytes & mask))
1562                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1563                 if (num_bytes + extent_offset >
1564                     btrfs_file_extent_ram_bytes(eb, fi))
1565                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1566                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1567                     (btrfs_file_extent_compression(eb, fi) ||
1568                      btrfs_file_extent_encryption(eb, fi) ||
1569                      btrfs_file_extent_other_encoding(eb, fi)))
1570                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1571                 if (disk_bytenr > 0)
1572                         rec->found_size += num_bytes;
1573         } else {
1574                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1575         }
1576         rec->extent_end = key->offset + num_bytes;
1577
1578         /*
1579          * The data reloc tree will copy full extents into its inode and then
1580          * copy the corresponding csums.  Because the extent it copied could be
1581          * a preallocated extent that hasn't been written to yet there may be no
1582          * csums to copy, ergo we won't have csums for our file extent.  This is
1583          * ok so just don't bother checking csums if the inode belongs to the
1584          * data reloc tree.
1585          */
1586         if (disk_bytenr > 0 &&
1587             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1588                 u64 found;
1589                 if (btrfs_file_extent_compression(eb, fi))
1590                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1591                 else
1592                         disk_bytenr += extent_offset;
1593
1594                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1595                 if (ret < 0)
1596                         return ret;
1597                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1598                         if (found > 0)
1599                                 rec->found_csum_item = 1;
1600                         if (found < num_bytes)
1601                                 rec->some_csum_missing = 1;
1602                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1603                         if (found > 0)
1604                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1605                 }
1606         }
1607         return 0;
1608 }
1609
1610 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1611                             struct walk_control *wc)
1612 {
1613         struct btrfs_key key;
1614         u32 nritems;
1615         int i;
1616         int ret = 0;
1617         struct cache_tree *inode_cache;
1618         struct shared_node *active_node;
1619
1620         if (wc->root_level == wc->active_node &&
1621             btrfs_root_refs(&root->root_item) == 0)
1622                 return 0;
1623
1624         active_node = wc->nodes[wc->active_node];
1625         inode_cache = &active_node->inode_cache;
1626         nritems = btrfs_header_nritems(eb);
1627         for (i = 0; i < nritems; i++) {
1628                 btrfs_item_key_to_cpu(eb, &key, i);
1629
1630                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1631                         continue;
1632                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1633                         continue;
1634
1635                 if (active_node->current == NULL ||
1636                     active_node->current->ino < key.objectid) {
1637                         if (active_node->current) {
1638                                 active_node->current->checked = 1;
1639                                 maybe_free_inode_rec(inode_cache,
1640                                                      active_node->current);
1641                         }
1642                         active_node->current = get_inode_rec(inode_cache,
1643                                                              key.objectid, 1);
1644                         BUG_ON(IS_ERR(active_node->current));
1645                 }
1646                 switch (key.type) {
1647                 case BTRFS_DIR_ITEM_KEY:
1648                 case BTRFS_DIR_INDEX_KEY:
1649                         ret = process_dir_item(eb, i, &key, active_node);
1650                         break;
1651                 case BTRFS_INODE_REF_KEY:
1652                         ret = process_inode_ref(eb, i, &key, active_node);
1653                         break;
1654                 case BTRFS_INODE_EXTREF_KEY:
1655                         ret = process_inode_extref(eb, i, &key, active_node);
1656                         break;
1657                 case BTRFS_INODE_ITEM_KEY:
1658                         ret = process_inode_item(eb, i, &key, active_node);
1659                         break;
1660                 case BTRFS_EXTENT_DATA_KEY:
1661                         ret = process_file_extent(root, eb, i, &key,
1662                                                   active_node);
1663                         break;
1664                 default:
1665                         break;
1666                 };
1667         }
1668         return ret;
1669 }
1670
1671 struct node_refs {
1672         u64 bytenr[BTRFS_MAX_LEVEL];
1673         u64 refs[BTRFS_MAX_LEVEL];
1674         int need_check[BTRFS_MAX_LEVEL];
1675         /* field for checking all trees */
1676         int checked[BTRFS_MAX_LEVEL];
1677         /* the corresponding extent should be marked as full backref or not */
1678         int full_backref[BTRFS_MAX_LEVEL];
1679 };
1680
1681 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1682                              struct extent_buffer *eb, struct node_refs *nrefs,
1683                              u64 level, int check_all);
1684 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1685                             unsigned int ext_ref);
1686
1687 /*
1688  * Returns >0  Found error, not fatal, should continue
1689  * Returns <0  Fatal error, must exit the whole check
1690  * Returns 0   No errors found
1691  */
1692 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1693                                struct node_refs *nrefs, int *level, int ext_ref)
1694 {
1695         struct extent_buffer *cur = path->nodes[0];
1696         struct btrfs_key key;
1697         u64 cur_bytenr;
1698         u32 nritems;
1699         u64 first_ino = 0;
1700         int root_level = btrfs_header_level(root->node);
1701         int i;
1702         int ret = 0; /* Final return value */
1703         int err = 0; /* Positive error bitmap */
1704
1705         cur_bytenr = cur->start;
1706
1707         /* skip to first inode item or the first inode number change */
1708         nritems = btrfs_header_nritems(cur);
1709         for (i = 0; i < nritems; i++) {
1710                 btrfs_item_key_to_cpu(cur, &key, i);
1711                 if (i == 0)
1712                         first_ino = key.objectid;
1713                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1714                     (first_ino && first_ino != key.objectid))
1715                         break;
1716         }
1717         if (i == nritems) {
1718                 path->slots[0] = nritems;
1719                 return 0;
1720         }
1721         path->slots[0] = i;
1722
1723 again:
1724         err |= check_inode_item(root, path, ext_ref);
1725
1726         /* modify cur since check_inode_item may change path */
1727         cur = path->nodes[0];
1728
1729         if (err & LAST_ITEM)
1730                 goto out;
1731
1732         /* still have inode items in thie leaf */
1733         if (cur->start == cur_bytenr)
1734                 goto again;
1735
1736         /*
1737          * we have switched to another leaf, above nodes may
1738          * have changed, here walk down the path, if a node
1739          * or leaf is shared, check whether we can skip this
1740          * node or leaf.
1741          */
1742         for (i = root_level; i >= 0; i--) {
1743                 if (path->nodes[i]->start == nrefs->bytenr[i])
1744                         continue;
1745
1746                 ret = update_nodes_refs(root, path->nodes[i]->start,
1747                                 path->nodes[i], nrefs, i, 0);
1748                 if (ret)
1749                         goto out;
1750
1751                 if (!nrefs->need_check[i]) {
1752                         *level += 1;
1753                         break;
1754                 }
1755         }
1756
1757         for (i = 0; i < *level; i++) {
1758                 free_extent_buffer(path->nodes[i]);
1759                 path->nodes[i] = NULL;
1760         }
1761 out:
1762         err &= ~LAST_ITEM;
1763         if (err && !ret)
1764                 ret = err;
1765         return ret;
1766 }
1767
1768 static void reada_walk_down(struct btrfs_root *root,
1769                             struct extent_buffer *node, int slot)
1770 {
1771         struct btrfs_fs_info *fs_info = root->fs_info;
1772         u64 bytenr;
1773         u64 ptr_gen;
1774         u32 nritems;
1775         int i;
1776         int level;
1777
1778         level = btrfs_header_level(node);
1779         if (level != 1)
1780                 return;
1781
1782         nritems = btrfs_header_nritems(node);
1783         for (i = slot; i < nritems; i++) {
1784                 bytenr = btrfs_node_blockptr(node, i);
1785                 ptr_gen = btrfs_node_ptr_generation(node, i);
1786                 readahead_tree_block(fs_info, bytenr, ptr_gen);
1787         }
1788 }
1789
1790 /*
1791  * Check the child node/leaf by the following condition:
1792  * 1. the first item key of the node/leaf should be the same with the one
1793  *    in parent.
1794  * 2. block in parent node should match the child node/leaf.
1795  * 3. generation of parent node and child's header should be consistent.
1796  *
1797  * Or the child node/leaf pointed by the key in parent is not valid.
1798  *
1799  * We hope to check leaf owner too, but since subvol may share leaves,
1800  * which makes leaf owner check not so strong, key check should be
1801  * sufficient enough for that case.
1802  */
1803 static int check_child_node(struct extent_buffer *parent, int slot,
1804                             struct extent_buffer *child)
1805 {
1806         struct btrfs_key parent_key;
1807         struct btrfs_key child_key;
1808         int ret = 0;
1809
1810         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1811         if (btrfs_header_level(child) == 0)
1812                 btrfs_item_key_to_cpu(child, &child_key, 0);
1813         else
1814                 btrfs_node_key_to_cpu(child, &child_key, 0);
1815
1816         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1817                 ret = -EINVAL;
1818                 fprintf(stderr,
1819                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1820                         parent_key.objectid, parent_key.type, parent_key.offset,
1821                         child_key.objectid, child_key.type, child_key.offset);
1822         }
1823         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1824                 ret = -EINVAL;
1825                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1826                         btrfs_node_blockptr(parent, slot),
1827                         btrfs_header_bytenr(child));
1828         }
1829         if (btrfs_node_ptr_generation(parent, slot) !=
1830             btrfs_header_generation(child)) {
1831                 ret = -EINVAL;
1832                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1833                         btrfs_header_generation(child),
1834                         btrfs_node_ptr_generation(parent, slot));
1835         }
1836         return ret;
1837 }
1838
1839 /*
1840  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1841  * in every fs or file tree check. Here we find its all root ids, and only check
1842  * it in the fs or file tree which has the smallest root id.
1843  */
1844 static int need_check(struct btrfs_root *root, struct ulist *roots)
1845 {
1846         struct rb_node *node;
1847         struct ulist_node *u;
1848
1849         /*
1850          * @roots can be empty if it belongs to tree reloc tree
1851          * In that case, we should always check the leaf, as we can't use
1852          * the tree owner to ensure some other root will check it.
1853          */
1854         if (roots->nnodes == 1 || roots->nnodes == 0)
1855                 return 1;
1856
1857         node = rb_first(&roots->root);
1858         u = rb_entry(node, struct ulist_node, rb_node);
1859         /*
1860          * current root id is not smallest, we skip it and let it be checked
1861          * in the fs or file tree who hash the smallest root id.
1862          */
1863         if (root->objectid != u->val)
1864                 return 0;
1865
1866         return 1;
1867 }
1868
1869 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1870                                u64 *flags_ret)
1871 {
1872         struct btrfs_root *extent_root = root->fs_info->extent_root;
1873         struct btrfs_root_item *ri = &root->root_item;
1874         struct btrfs_extent_inline_ref *iref;
1875         struct btrfs_extent_item *ei;
1876         struct btrfs_key key;
1877         struct btrfs_path *path = NULL;
1878         unsigned long ptr;
1879         unsigned long end;
1880         u64 flags;
1881         u64 owner = 0;
1882         u64 offset;
1883         int slot;
1884         int type;
1885         int ret = 0;
1886
1887         /*
1888          * Except file/reloc tree, we can not have FULL BACKREF MODE
1889          */
1890         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1891                 goto normal;
1892
1893         /* root node */
1894         if (eb->start == btrfs_root_bytenr(ri))
1895                 goto normal;
1896
1897         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1898                 goto full_backref;
1899
1900         owner = btrfs_header_owner(eb);
1901         if (owner == root->objectid)
1902                 goto normal;
1903
1904         path = btrfs_alloc_path();
1905         if (!path)
1906                 return -ENOMEM;
1907
1908         key.objectid = btrfs_header_bytenr(eb);
1909         key.type = (u8)-1;
1910         key.offset = (u64)-1;
1911
1912         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1913         if (ret <= 0) {
1914                 ret = -EIO;
1915                 goto out;
1916         }
1917
1918         if (ret > 0) {
1919                 ret = btrfs_previous_extent_item(extent_root, path,
1920                                                  key.objectid);
1921                 if (ret)
1922                         goto full_backref;
1923
1924         }
1925         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1926
1927         eb = path->nodes[0];
1928         slot = path->slots[0];
1929         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1930
1931         flags = btrfs_extent_flags(eb, ei);
1932         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1933                 goto full_backref;
1934
1935         ptr = (unsigned long)(ei + 1);
1936         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1937
1938         if (key.type == BTRFS_EXTENT_ITEM_KEY)
1939                 ptr += sizeof(struct btrfs_tree_block_info);
1940
1941 next:
1942         /* Reached extent item ends normally */
1943         if (ptr == end)
1944                 goto full_backref;
1945
1946         /* Beyond extent item end, wrong item size */
1947         if (ptr > end) {
1948                 error("extent item at bytenr %llu slot %d has wrong size",
1949                         eb->start, slot);
1950                 goto full_backref;
1951         }
1952
1953         iref = (struct btrfs_extent_inline_ref *)ptr;
1954         offset = btrfs_extent_inline_ref_offset(eb, iref);
1955         type = btrfs_extent_inline_ref_type(eb, iref);
1956
1957         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1958                 goto normal;
1959         ptr += btrfs_extent_inline_ref_size(type);
1960         goto next;
1961
1962 normal:
1963         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1964         goto out;
1965
1966 full_backref:
1967         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1968 out:
1969         btrfs_free_path(path);
1970         return ret;
1971 }
1972
1973 /*
1974  * for a tree node or leaf, we record its reference count, so later if we still
1975  * process this node or leaf, don't need to compute its reference count again.
1976  *
1977  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1978  */
1979 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1980                              struct extent_buffer *eb, struct node_refs *nrefs,
1981                              u64 level, int check_all)
1982 {
1983         struct ulist *roots;
1984         u64 refs = 0;
1985         u64 flags = 0;
1986         int root_level = btrfs_header_level(root->node);
1987         int check;
1988         int ret;
1989
1990         if (nrefs->bytenr[level] == bytenr)
1991                 return 0;
1992
1993         if (bytenr != (u64)-1) {
1994                 /* the return value of this function seems a mistake */
1995                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1996                                        level, 1, &refs, &flags);
1997                 /* temporary fix */
1998                 if (ret < 0 && !check_all)
1999                         return ret;
2000
2001                 nrefs->bytenr[level] = bytenr;
2002                 nrefs->refs[level] = refs;
2003                 nrefs->full_backref[level] = 0;
2004                 nrefs->checked[level] = 0;
2005
2006                 if (refs > 1) {
2007                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2008                                                    0, &roots);
2009                         if (ret)
2010                                 return -EIO;
2011
2012                         check = need_check(root, roots);
2013                         ulist_free(roots);
2014                         nrefs->need_check[level] = check;
2015                 } else {
2016                         if (!check_all) {
2017                                 nrefs->need_check[level] = 1;
2018                         } else {
2019                                 if (level == root_level) {
2020                                         nrefs->need_check[level] = 1;
2021                                 } else {
2022                                         /*
2023                                          * The node refs may have not been
2024                                          * updated if upper needs checking (the
2025                                          * lowest root_objectid) the node can
2026                                          * be checked.
2027                                          */
2028                                         nrefs->need_check[level] =
2029                                                 nrefs->need_check[level + 1];
2030                                 }
2031                         }
2032                 }
2033         }
2034
2035         if (check_all && eb) {
2036                 calc_extent_flag_v2(root, eb, &flags);
2037                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2038                         nrefs->full_backref[level] = 1;
2039         }
2040
2041         return 0;
2042 }
2043
2044 /*
2045  * @level           if @level == -1 means extent data item
2046  *                  else normal treeblocl.
2047  */
2048 static int should_check_extent_strictly(struct btrfs_root *root,
2049                                         struct node_refs *nrefs, int level)
2050 {
2051         int root_level = btrfs_header_level(root->node);
2052
2053         if (level > root_level || level < -1)
2054                 return 1;
2055         if (level == root_level)
2056                 return 1;
2057         /*
2058          * if the upper node is marked full backref, it should contain shared
2059          * backref of the parent (except owner == root->objectid).
2060          */
2061         while (++level <= root_level)
2062                 if (nrefs->refs[level] > 1)
2063                         return 0;
2064
2065         return 1;
2066 }
2067
2068 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2069                           struct walk_control *wc, int *level,
2070                           struct node_refs *nrefs)
2071 {
2072         enum btrfs_tree_block_status status;
2073         u64 bytenr;
2074         u64 ptr_gen;
2075         struct btrfs_fs_info *fs_info = root->fs_info;
2076         struct extent_buffer *next;
2077         struct extent_buffer *cur;
2078         int ret, err = 0;
2079         u64 refs;
2080
2081         WARN_ON(*level < 0);
2082         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2083
2084         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2085                 refs = nrefs->refs[*level];
2086                 ret = 0;
2087         } else {
2088                 ret = btrfs_lookup_extent_info(NULL, root,
2089                                        path->nodes[*level]->start,
2090                                        *level, 1, &refs, NULL);
2091                 if (ret < 0) {
2092                         err = ret;
2093                         goto out;
2094                 }
2095                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2096                 nrefs->refs[*level] = refs;
2097         }
2098
2099         if (refs > 1) {
2100                 ret = enter_shared_node(root, path->nodes[*level]->start,
2101                                         refs, wc, *level);
2102                 if (ret > 0) {
2103                         err = ret;
2104                         goto out;
2105                 }
2106         }
2107
2108         while (*level >= 0) {
2109                 WARN_ON(*level < 0);
2110                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2111                 cur = path->nodes[*level];
2112
2113                 if (btrfs_header_level(cur) != *level)
2114                         WARN_ON(1);
2115
2116                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2117                         break;
2118                 if (*level == 0) {
2119                         ret = process_one_leaf(root, cur, wc);
2120                         if (ret < 0)
2121                                 err = ret;
2122                         break;
2123                 }
2124                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2125                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2126
2127                 if (bytenr == nrefs->bytenr[*level - 1]) {
2128                         refs = nrefs->refs[*level - 1];
2129                 } else {
2130                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2131                                         *level - 1, 1, &refs, NULL);
2132                         if (ret < 0) {
2133                                 refs = 0;
2134                         } else {
2135                                 nrefs->bytenr[*level - 1] = bytenr;
2136                                 nrefs->refs[*level - 1] = refs;
2137                         }
2138                 }
2139
2140                 if (refs > 1) {
2141                         ret = enter_shared_node(root, bytenr, refs,
2142                                                 wc, *level - 1);
2143                         if (ret > 0) {
2144                                 path->slots[*level]++;
2145                                 continue;
2146                         }
2147                 }
2148
2149                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2150                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2151                         free_extent_buffer(next);
2152                         reada_walk_down(root, cur, path->slots[*level]);
2153                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2154                         if (!extent_buffer_uptodate(next)) {
2155                                 struct btrfs_key node_key;
2156
2157                                 btrfs_node_key_to_cpu(path->nodes[*level],
2158                                                       &node_key,
2159                                                       path->slots[*level]);
2160                                 btrfs_add_corrupt_extent_record(root->fs_info,
2161                                                 &node_key,
2162                                                 path->nodes[*level]->start,
2163                                                 root->fs_info->nodesize,
2164                                                 *level);
2165                                 err = -EIO;
2166                                 goto out;
2167                         }
2168                 }
2169
2170                 ret = check_child_node(cur, path->slots[*level], next);
2171                 if (ret) {
2172                         free_extent_buffer(next);
2173                         err = ret;
2174                         goto out;
2175                 }
2176
2177                 if (btrfs_is_leaf(next))
2178                         status = btrfs_check_leaf(root, NULL, next);
2179                 else
2180                         status = btrfs_check_node(root, NULL, next);
2181                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2182                         free_extent_buffer(next);
2183                         err = -EIO;
2184                         goto out;
2185                 }
2186
2187                 *level = *level - 1;
2188                 free_extent_buffer(path->nodes[*level]);
2189                 path->nodes[*level] = next;
2190                 path->slots[*level] = 0;
2191         }
2192 out:
2193         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2194         return err;
2195 }
2196
2197 static int fs_root_objectid(u64 objectid);
2198
2199 /*
2200  * Update global fs information.
2201  */
2202 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2203                          int level)
2204 {
2205         u32 free_nrs;
2206         struct extent_buffer *eb = path->nodes[level];
2207
2208         total_btree_bytes += eb->len;
2209         if (fs_root_objectid(root->objectid))
2210                 total_fs_tree_bytes += eb->len;
2211         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2212                 total_extent_tree_bytes += eb->len;
2213
2214         if (level == 0) {
2215                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2216         } else {
2217                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2218                             btrfs_header_nritems(eb));
2219                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2220         }
2221 }
2222
2223 /*
2224  * This function only handles BACKREF_MISSING,
2225  * If corresponding extent item exists, increase the ref, else insert an extent
2226  * item and backref.
2227  *
2228  * Returns error bits after repair.
2229  */
2230 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2231                                  struct btrfs_root *root,
2232                                  struct extent_buffer *node,
2233                                  struct node_refs *nrefs, int level, int err)
2234 {
2235         struct btrfs_fs_info *fs_info = root->fs_info;
2236         struct btrfs_root *extent_root = fs_info->extent_root;
2237         struct btrfs_path path;
2238         struct btrfs_extent_item *ei;
2239         struct btrfs_tree_block_info *bi;
2240         struct btrfs_key key;
2241         struct extent_buffer *eb;
2242         u32 size = sizeof(*ei);
2243         u32 node_size = root->fs_info->nodesize;
2244         int insert_extent = 0;
2245         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2246         int root_level = btrfs_header_level(root->node);
2247         int generation;
2248         int ret;
2249         u64 owner;
2250         u64 bytenr;
2251         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2252         u64 parent = 0;
2253
2254         if ((err & BACKREF_MISSING) == 0)
2255                 return err;
2256
2257         WARN_ON(level > BTRFS_MAX_LEVEL);
2258         WARN_ON(level < 0);
2259
2260         btrfs_init_path(&path);
2261         bytenr = btrfs_header_bytenr(node);
2262         owner = btrfs_header_owner(node);
2263         generation = btrfs_header_generation(node);
2264
2265         key.objectid = bytenr;
2266         key.type = (u8)-1;
2267         key.offset = (u64)-1;
2268
2269         /* Search for the extent item */
2270         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2271         if (ret <= 0) {
2272                 ret = -EIO;
2273                 goto out;
2274         }
2275
2276         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2277         if (ret)
2278                 insert_extent = 1;
2279
2280         /* calculate if the extent item flag is full backref or not */
2281         if (nrefs->full_backref[level] != 0)
2282                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2283
2284         /* insert an extent item */
2285         if (insert_extent) {
2286                 struct btrfs_disk_key copy_key;
2287
2288                 generation = btrfs_header_generation(node);
2289
2290                 if (level < root_level && nrefs->full_backref[level + 1] &&
2291                     owner != root->objectid) {
2292                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2293                 }
2294
2295                 key.objectid = bytenr;
2296                 if (!skinny_metadata) {
2297                         key.type = BTRFS_EXTENT_ITEM_KEY;
2298                         key.offset = node_size;
2299                         size += sizeof(*bi);
2300                 } else {
2301                         key.type = BTRFS_METADATA_ITEM_KEY;
2302                         key.offset = level;
2303                 }
2304
2305                 btrfs_release_path(&path);
2306                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2307                                               size);
2308                 if (ret)
2309                         goto out;
2310
2311                 eb = path.nodes[0];
2312                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2313
2314                 btrfs_set_extent_refs(eb, ei, 0);
2315                 btrfs_set_extent_generation(eb, ei, generation);
2316                 btrfs_set_extent_flags(eb, ei, flags);
2317
2318                 if (!skinny_metadata) {
2319                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2320                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2321                                              sizeof(*bi));
2322                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2323                         btrfs_set_disk_key_type(&copy_key, 0);
2324                         btrfs_set_disk_key_offset(&copy_key, 0);
2325
2326                         btrfs_set_tree_block_level(eb, bi, level);
2327                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2328                 }
2329                 btrfs_mark_buffer_dirty(eb);
2330                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2331                 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2332
2333                 nrefs->refs[level] = 0;
2334                 nrefs->full_backref[level] =
2335                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2336                 btrfs_release_path(&path);
2337         }
2338
2339         if (level < root_level && nrefs->full_backref[level + 1] &&
2340             owner != root->objectid)
2341                 parent = nrefs->bytenr[level + 1];
2342
2343         /* increase the ref */
2344         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2345                         parent, root->objectid, level, 0);
2346
2347         nrefs->refs[level]++;
2348 out:
2349         btrfs_release_path(&path);
2350         if (ret) {
2351                 error(
2352         "failed to repair tree block ref start %llu root %llu due to %s",
2353                       bytenr, root->objectid, strerror(-ret));
2354         } else {
2355                 printf("Added one tree block ref start %llu %s %llu\n",
2356                        bytenr, parent ? "parent" : "root",
2357                        parent ? parent : root->objectid);
2358                 err &= ~BACKREF_MISSING;
2359         }
2360
2361         return err;
2362 }
2363
2364 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2365                             unsigned int ext_ref);
2366 static int check_tree_block_ref(struct btrfs_root *root,
2367                                 struct extent_buffer *eb, u64 bytenr,
2368                                 int level, u64 owner, struct node_refs *nrefs);
2369 static int check_leaf_items(struct btrfs_trans_handle *trans,
2370                             struct btrfs_root *root, struct btrfs_path *path,
2371                             struct node_refs *nrefs, int account_bytes);
2372
2373 /*
2374  * @trans      just for lowmem repair mode
2375  * @check all  if not 0 then check all tree block backrefs and items
2376  *             0 then just check relationship of items in fs tree(s)
2377  *
2378  * Returns >0  Found error, should continue
2379  * Returns <0  Fatal error, must exit the whole check
2380  * Returns 0   No errors found
2381  */
2382 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2383                              struct btrfs_root *root, struct btrfs_path *path,
2384                              int *level, struct node_refs *nrefs, int ext_ref,
2385                              int check_all)
2386
2387 {
2388         enum btrfs_tree_block_status status;
2389         u64 bytenr;
2390         u64 ptr_gen;
2391         struct btrfs_fs_info *fs_info = root->fs_info;
2392         struct extent_buffer *next;
2393         struct extent_buffer *cur;
2394         int ret;
2395         int err = 0;
2396         int check;
2397         int account_file_data = 0;
2398
2399         WARN_ON(*level < 0);
2400         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2401
2402         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2403                                 path->nodes[*level], nrefs, *level, check_all);
2404         if (ret < 0)
2405                 return ret;
2406
2407         while (*level >= 0) {
2408                 WARN_ON(*level < 0);
2409                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2410                 cur = path->nodes[*level];
2411                 bytenr = btrfs_header_bytenr(cur);
2412                 check = nrefs->need_check[*level];
2413
2414                 if (btrfs_header_level(cur) != *level)
2415                         WARN_ON(1);
2416                /*
2417                 * Update bytes accounting and check tree block ref
2418                 * NOTE: Doing accounting and check before checking nritems
2419                 * is necessary because of empty node/leaf.
2420                 */
2421                 if ((check_all && !nrefs->checked[*level]) ||
2422                     (!check_all && nrefs->need_check[*level])) {
2423                         ret = check_tree_block_ref(root, cur,
2424                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2425                            btrfs_header_owner(cur), nrefs);
2426
2427                         if (repair && ret)
2428                                 ret = repair_tree_block_ref(trans, root,
2429                                     path->nodes[*level], nrefs, *level, ret);
2430                         err |= ret;
2431
2432                         if (check_all && nrefs->need_check[*level] &&
2433                                 nrefs->refs[*level]) {
2434                                 account_bytes(root, path, *level);
2435                                 account_file_data = 1;
2436                         }
2437                         nrefs->checked[*level] = 1;
2438                 }
2439
2440                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2441                         break;
2442
2443                 /* Don't forgot to check leaf/node validation */
2444                 if (*level == 0) {
2445                         /* skip duplicate check */
2446                         if (check || !check_all) {
2447                                 ret = btrfs_check_leaf(root, NULL, cur);
2448                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2449                                         err |= -EIO;
2450                                         break;
2451                                 }
2452                         }
2453
2454                         ret = 0;
2455                         if (!check_all)
2456                                 ret = process_one_leaf_v2(root, path, nrefs,
2457                                                           level, ext_ref);
2458                         else
2459                                 ret = check_leaf_items(trans, root, path,
2460                                                nrefs, account_file_data);
2461                         err |= ret;
2462                         break;
2463                 } else {
2464                         if (check || !check_all) {
2465                                 ret = btrfs_check_node(root, NULL, cur);
2466                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2467                                         err |= -EIO;
2468                                         break;
2469                                 }
2470                         }
2471                 }
2472
2473                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2474                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2475
2476                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2477                                         check_all);
2478                 if (ret < 0)
2479                         break;
2480                 /*
2481                  * check all trees in check_chunks_and_extent_v2
2482                  * check shared node once in check_fs_roots
2483                  */
2484                 if (!check_all && !nrefs->need_check[*level - 1]) {
2485                         path->slots[*level]++;
2486                         continue;
2487                 }
2488
2489                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2490                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2491                         free_extent_buffer(next);
2492                         reada_walk_down(root, cur, path->slots[*level]);
2493                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2494                         if (!extent_buffer_uptodate(next)) {
2495                                 struct btrfs_key node_key;
2496
2497                                 btrfs_node_key_to_cpu(path->nodes[*level],
2498                                                       &node_key,
2499                                                       path->slots[*level]);
2500                                 btrfs_add_corrupt_extent_record(fs_info,
2501                                         &node_key, path->nodes[*level]->start,
2502                                         fs_info->nodesize, *level);
2503                                 err |= -EIO;
2504                                 break;
2505                         }
2506                 }
2507
2508                 ret = check_child_node(cur, path->slots[*level], next);
2509                 err |= ret;
2510                 if (ret < 0) 
2511                         break;
2512
2513                 if (btrfs_is_leaf(next))
2514                         status = btrfs_check_leaf(root, NULL, next);
2515                 else
2516                         status = btrfs_check_node(root, NULL, next);
2517                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2518                         free_extent_buffer(next);
2519                         err |= -EIO;
2520                         break;
2521                 }
2522
2523                 *level = *level - 1;
2524                 free_extent_buffer(path->nodes[*level]);
2525                 path->nodes[*level] = next;
2526                 path->slots[*level] = 0;
2527                 account_file_data = 0;
2528
2529                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2530         }
2531         return err;
2532 }
2533
2534 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2535                         struct walk_control *wc, int *level)
2536 {
2537         int i;
2538         struct extent_buffer *leaf;
2539
2540         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2541                 leaf = path->nodes[i];
2542                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2543                         path->slots[i]++;
2544                         *level = i;
2545                         return 0;
2546                 } else {
2547                         free_extent_buffer(path->nodes[*level]);
2548                         path->nodes[*level] = NULL;
2549                         BUG_ON(*level > wc->active_node);
2550                         if (*level == wc->active_node)
2551                                 leave_shared_node(root, wc, *level);
2552                         *level = i + 1;
2553                 }
2554         }
2555         return 1;
2556 }
2557
2558 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2559                            int *level)
2560 {
2561         int i;
2562         struct extent_buffer *leaf;
2563
2564         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2565                 leaf = path->nodes[i];
2566                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2567                         path->slots[i]++;
2568                         *level = i;
2569                         return 0;
2570                 } else {
2571                         free_extent_buffer(path->nodes[*level]);
2572                         path->nodes[*level] = NULL;
2573                         *level = i + 1;
2574                 }
2575         }
2576         return 1;
2577 }
2578
2579 static int check_root_dir(struct inode_record *rec)
2580 {
2581         struct inode_backref *backref;
2582         int ret = -1;
2583
2584         if (!rec->found_inode_item || rec->errors)
2585                 goto out;
2586         if (rec->nlink != 1 || rec->found_link != 0)
2587                 goto out;
2588         if (list_empty(&rec->backrefs))
2589                 goto out;
2590         backref = to_inode_backref(rec->backrefs.next);
2591         if (!backref->found_inode_ref)
2592                 goto out;
2593         if (backref->index != 0 || backref->namelen != 2 ||
2594             memcmp(backref->name, "..", 2))
2595                 goto out;
2596         if (backref->found_dir_index || backref->found_dir_item)
2597                 goto out;
2598         ret = 0;
2599 out:
2600         return ret;
2601 }
2602
2603 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2604                               struct btrfs_root *root, struct btrfs_path *path,
2605                               struct inode_record *rec)
2606 {
2607         struct btrfs_inode_item *ei;
2608         struct btrfs_key key;
2609         int ret;
2610
2611         key.objectid = rec->ino;
2612         key.type = BTRFS_INODE_ITEM_KEY;
2613         key.offset = (u64)-1;
2614
2615         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2616         if (ret < 0)
2617                 goto out;
2618         if (ret) {
2619                 if (!path->slots[0]) {
2620                         ret = -ENOENT;
2621                         goto out;
2622                 }
2623                 path->slots[0]--;
2624                 ret = 0;
2625         }
2626         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2627         if (key.objectid != rec->ino) {
2628                 ret = -ENOENT;
2629                 goto out;
2630         }
2631
2632         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2633                             struct btrfs_inode_item);
2634         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2635         btrfs_mark_buffer_dirty(path->nodes[0]);
2636         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2637         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2638                root->root_key.objectid);
2639 out:
2640         btrfs_release_path(path);
2641         return ret;
2642 }
2643
2644 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2645                                     struct btrfs_root *root,
2646                                     struct btrfs_path *path,
2647                                     struct inode_record *rec)
2648 {
2649         int ret;
2650
2651         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2652         btrfs_release_path(path);
2653         if (!ret)
2654                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2655         return ret;
2656 }
2657
2658 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2659                                struct btrfs_root *root,
2660                                struct btrfs_path *path,
2661                                struct inode_record *rec)
2662 {
2663         struct btrfs_inode_item *ei;
2664         struct btrfs_key key;
2665         int ret = 0;
2666
2667         key.objectid = rec->ino;
2668         key.type = BTRFS_INODE_ITEM_KEY;
2669         key.offset = 0;
2670
2671         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2672         if (ret) {
2673                 if (ret > 0)
2674                         ret = -ENOENT;
2675                 goto out;
2676         }
2677
2678         /* Since ret == 0, no need to check anything */
2679         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2680                             struct btrfs_inode_item);
2681         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2682         btrfs_mark_buffer_dirty(path->nodes[0]);
2683         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2684         printf("reset nbytes for ino %llu root %llu\n",
2685                rec->ino, root->root_key.objectid);
2686 out:
2687         btrfs_release_path(path);
2688         return ret;
2689 }
2690
2691 static int add_missing_dir_index(struct btrfs_root *root,
2692                                  struct cache_tree *inode_cache,
2693                                  struct inode_record *rec,
2694                                  struct inode_backref *backref)
2695 {
2696         struct btrfs_path path;
2697         struct btrfs_trans_handle *trans;
2698         struct btrfs_dir_item *dir_item;
2699         struct extent_buffer *leaf;
2700         struct btrfs_key key;
2701         struct btrfs_disk_key disk_key;
2702         struct inode_record *dir_rec;
2703         unsigned long name_ptr;
2704         u32 data_size = sizeof(*dir_item) + backref->namelen;
2705         int ret;
2706
2707         trans = btrfs_start_transaction(root, 1);
2708         if (IS_ERR(trans))
2709                 return PTR_ERR(trans);
2710
2711         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2712                 (unsigned long long)rec->ino);
2713
2714         btrfs_init_path(&path);
2715         key.objectid = backref->dir;
2716         key.type = BTRFS_DIR_INDEX_KEY;
2717         key.offset = backref->index;
2718         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2719         BUG_ON(ret);
2720
2721         leaf = path.nodes[0];
2722         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2723
2724         disk_key.objectid = cpu_to_le64(rec->ino);
2725         disk_key.type = BTRFS_INODE_ITEM_KEY;
2726         disk_key.offset = 0;
2727
2728         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2729         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2730         btrfs_set_dir_data_len(leaf, dir_item, 0);
2731         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2732         name_ptr = (unsigned long)(dir_item + 1);
2733         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2734         btrfs_mark_buffer_dirty(leaf);
2735         btrfs_release_path(&path);
2736         btrfs_commit_transaction(trans, root);
2737
2738         backref->found_dir_index = 1;
2739         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2740         BUG_ON(IS_ERR(dir_rec));
2741         if (!dir_rec)
2742                 return 0;
2743         dir_rec->found_size += backref->namelen;
2744         if (dir_rec->found_size == dir_rec->isize &&
2745             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2746                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2747         if (dir_rec->found_size != dir_rec->isize)
2748                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2749
2750         return 0;
2751 }
2752
2753 static int delete_dir_index(struct btrfs_root *root,
2754                             struct inode_backref *backref)
2755 {
2756         struct btrfs_trans_handle *trans;
2757         struct btrfs_dir_item *di;
2758         struct btrfs_path path;
2759         int ret = 0;
2760
2761         trans = btrfs_start_transaction(root, 1);
2762         if (IS_ERR(trans))
2763                 return PTR_ERR(trans);
2764
2765         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2766                 (unsigned long long)backref->dir,
2767                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2768                 (unsigned long long)root->objectid);
2769
2770         btrfs_init_path(&path);
2771         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2772                                     backref->name, backref->namelen,
2773                                     backref->index, -1);
2774         if (IS_ERR(di)) {
2775                 ret = PTR_ERR(di);
2776                 btrfs_release_path(&path);
2777                 btrfs_commit_transaction(trans, root);
2778                 if (ret == -ENOENT)
2779                         return 0;
2780                 return ret;
2781         }
2782
2783         if (!di)
2784                 ret = btrfs_del_item(trans, root, &path);
2785         else
2786                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2787         BUG_ON(ret);
2788         btrfs_release_path(&path);
2789         btrfs_commit_transaction(trans, root);
2790         return ret;
2791 }
2792
2793 static int __create_inode_item(struct btrfs_trans_handle *trans,
2794                                struct btrfs_root *root, u64 ino, u64 size,
2795                                u64 nbytes, u64 nlink, u32 mode)
2796 {
2797         struct btrfs_inode_item ii;
2798         time_t now = time(NULL);
2799         int ret;
2800
2801         btrfs_set_stack_inode_size(&ii, size);
2802         btrfs_set_stack_inode_nbytes(&ii, nbytes);
2803         btrfs_set_stack_inode_nlink(&ii, nlink);
2804         btrfs_set_stack_inode_mode(&ii, mode);
2805         btrfs_set_stack_inode_generation(&ii, trans->transid);
2806         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2807         btrfs_set_stack_timespec_sec(&ii.ctime, now);
2808         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2809         btrfs_set_stack_timespec_sec(&ii.mtime, now);
2810         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2811         btrfs_set_stack_timespec_sec(&ii.otime, 0);
2812         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2813
2814         ret = btrfs_insert_inode(trans, root, ino, &ii);
2815         ASSERT(!ret);
2816
2817         warning("root %llu inode %llu recreating inode item, this may "
2818                 "be incomplete, please check permissions and content after "
2819                 "the fsck completes.\n", (unsigned long long)root->objectid,
2820                 (unsigned long long)ino);
2821
2822         return 0;
2823 }
2824
2825 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2826                                     struct btrfs_root *root, u64 ino,
2827                                     u8 filetype)
2828 {
2829         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2830
2831         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2832 }
2833
2834 static int create_inode_item(struct btrfs_root *root,
2835                              struct inode_record *rec, int root_dir)
2836 {
2837         struct btrfs_trans_handle *trans;
2838         u64 nlink = 0;
2839         u32 mode = 0;
2840         u64 size = 0;
2841         int ret;
2842
2843         trans = btrfs_start_transaction(root, 1);
2844         if (IS_ERR(trans)) {
2845                 ret = PTR_ERR(trans);
2846                 return ret;
2847         }
2848
2849         nlink = root_dir ? 1 : rec->found_link;
2850         if (rec->found_dir_item) {
2851                 if (rec->found_file_extent)
2852                         fprintf(stderr, "root %llu inode %llu has both a dir "
2853                                 "item and extents, unsure if it is a dir or a "
2854                                 "regular file so setting it as a directory\n",
2855                                 (unsigned long long)root->objectid,
2856                                 (unsigned long long)rec->ino);
2857                 mode = S_IFDIR | 0755;
2858                 size = rec->found_size;
2859         } else if (!rec->found_dir_item) {
2860                 size = rec->extent_end;
2861                 mode =  S_IFREG | 0755;
2862         }
2863
2864         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2865                                   nlink, mode);
2866         btrfs_commit_transaction(trans, root);
2867         return 0;
2868 }
2869
2870 static int repair_inode_backrefs(struct btrfs_root *root,
2871                                  struct inode_record *rec,
2872                                  struct cache_tree *inode_cache,
2873                                  int delete)
2874 {
2875         struct inode_backref *tmp, *backref;
2876         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2877         int ret = 0;
2878         int repaired = 0;
2879
2880         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2881                 if (!delete && rec->ino == root_dirid) {
2882                         if (!rec->found_inode_item) {
2883                                 ret = create_inode_item(root, rec, 1);
2884                                 if (ret)
2885                                         break;
2886                                 repaired++;
2887                         }
2888                 }
2889
2890                 /* Index 0 for root dir's are special, don't mess with it */
2891                 if (rec->ino == root_dirid && backref->index == 0)
2892                         continue;
2893
2894                 if (delete &&
2895                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2896                      (backref->found_dir_index && backref->found_inode_ref &&
2897                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2898                         ret = delete_dir_index(root, backref);
2899                         if (ret)
2900                                 break;
2901                         repaired++;
2902                         list_del(&backref->list);
2903                         free(backref);
2904                         continue;
2905                 }
2906
2907                 if (!delete && !backref->found_dir_index &&
2908                     backref->found_dir_item && backref->found_inode_ref) {
2909                         ret = add_missing_dir_index(root, inode_cache, rec,
2910                                                     backref);
2911                         if (ret)
2912                                 break;
2913                         repaired++;
2914                         if (backref->found_dir_item &&
2915                             backref->found_dir_index) {
2916                                 if (!backref->errors &&
2917                                     backref->found_inode_ref) {
2918                                         list_del(&backref->list);
2919                                         free(backref);
2920                                         continue;
2921                                 }
2922                         }
2923                 }
2924
2925                 if (!delete && (!backref->found_dir_index &&
2926                                 !backref->found_dir_item &&
2927                                 backref->found_inode_ref)) {
2928                         struct btrfs_trans_handle *trans;
2929                         struct btrfs_key location;
2930
2931                         ret = check_dir_conflict(root, backref->name,
2932                                                  backref->namelen,
2933                                                  backref->dir,
2934                                                  backref->index);
2935                         if (ret) {
2936                                 /*
2937                                  * let nlink fixing routine to handle it,
2938                                  * which can do it better.
2939                                  */
2940                                 ret = 0;
2941                                 break;
2942                         }
2943                         location.objectid = rec->ino;
2944                         location.type = BTRFS_INODE_ITEM_KEY;
2945                         location.offset = 0;
2946
2947                         trans = btrfs_start_transaction(root, 1);
2948                         if (IS_ERR(trans)) {
2949                                 ret = PTR_ERR(trans);
2950                                 break;
2951                         }
2952                         fprintf(stderr, "adding missing dir index/item pair "
2953                                 "for inode %llu\n",
2954                                 (unsigned long long)rec->ino);
2955                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2956                                                     backref->namelen,
2957                                                     backref->dir, &location,
2958                                                     imode_to_type(rec->imode),
2959                                                     backref->index);
2960                         BUG_ON(ret);
2961                         btrfs_commit_transaction(trans, root);
2962                         repaired++;
2963                 }
2964
2965                 if (!delete && (backref->found_inode_ref &&
2966                                 backref->found_dir_index &&
2967                                 backref->found_dir_item &&
2968                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2969                                 !rec->found_inode_item)) {
2970                         ret = create_inode_item(root, rec, 0);
2971                         if (ret)
2972                                 break;
2973                         repaired++;
2974                 }
2975
2976         }
2977         return ret ? ret : repaired;
2978 }
2979
2980 /*
2981  * To determine the file type for nlink/inode_item repair
2982  *
2983  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2984  * Return -ENOENT if file type is not found.
2985  */
2986 static int find_file_type(struct inode_record *rec, u8 *type)
2987 {
2988         struct inode_backref *backref;
2989
2990         /* For inode item recovered case */
2991         if (rec->found_inode_item) {
2992                 *type = imode_to_type(rec->imode);
2993                 return 0;
2994         }
2995
2996         list_for_each_entry(backref, &rec->backrefs, list) {
2997                 if (backref->found_dir_index || backref->found_dir_item) {
2998                         *type = backref->filetype;
2999                         return 0;
3000                 }
3001         }
3002         return -ENOENT;
3003 }
3004
3005 /*
3006  * To determine the file name for nlink repair
3007  *
3008  * Return 0 if file name is found, set name and namelen.
3009  * Return -ENOENT if file name is not found.
3010  */
3011 static int find_file_name(struct inode_record *rec,
3012                           char *name, int *namelen)
3013 {
3014         struct inode_backref *backref;
3015
3016         list_for_each_entry(backref, &rec->backrefs, list) {
3017                 if (backref->found_dir_index || backref->found_dir_item ||
3018                     backref->found_inode_ref) {
3019                         memcpy(name, backref->name, backref->namelen);
3020                         *namelen = backref->namelen;
3021                         return 0;
3022                 }
3023         }
3024         return -ENOENT;
3025 }
3026
3027 /* Reset the nlink of the inode to the correct one */
3028 static int reset_nlink(struct btrfs_trans_handle *trans,
3029                        struct btrfs_root *root,
3030                        struct btrfs_path *path,
3031                        struct inode_record *rec)
3032 {
3033         struct inode_backref *backref;
3034         struct inode_backref *tmp;
3035         struct btrfs_key key;
3036         struct btrfs_inode_item *inode_item;
3037         int ret = 0;
3038
3039         /* We don't believe this either, reset it and iterate backref */
3040         rec->found_link = 0;
3041
3042         /* Remove all backref including the valid ones */
3043         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3044                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3045                                    backref->index, backref->name,
3046                                    backref->namelen, 0);
3047                 if (ret < 0)
3048                         goto out;
3049
3050                 /* remove invalid backref, so it won't be added back */
3051                 if (!(backref->found_dir_index &&
3052                       backref->found_dir_item &&
3053                       backref->found_inode_ref)) {
3054                         list_del(&backref->list);
3055                         free(backref);
3056                 } else {
3057                         rec->found_link++;
3058                 }
3059         }
3060
3061         /* Set nlink to 0 */
3062         key.objectid = rec->ino;
3063         key.type = BTRFS_INODE_ITEM_KEY;
3064         key.offset = 0;
3065         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3066         if (ret < 0)
3067                 goto out;
3068         if (ret > 0) {
3069                 ret = -ENOENT;
3070                 goto out;
3071         }
3072         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3073                                     struct btrfs_inode_item);
3074         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3075         btrfs_mark_buffer_dirty(path->nodes[0]);
3076         btrfs_release_path(path);
3077
3078         /*
3079          * Add back valid inode_ref/dir_item/dir_index,
3080          * add_link() will handle the nlink inc, so new nlink must be correct
3081          */
3082         list_for_each_entry(backref, &rec->backrefs, list) {
3083                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3084                                      backref->name, backref->namelen,
3085                                      backref->filetype, &backref->index, 1, 0);
3086                 if (ret < 0)
3087                         goto out;
3088         }
3089 out:
3090         btrfs_release_path(path);
3091         return ret;
3092 }
3093
3094 static int get_highest_inode(struct btrfs_trans_handle *trans,
3095                                 struct btrfs_root *root,
3096                                 struct btrfs_path *path,
3097                                 u64 *highest_ino)
3098 {
3099         struct btrfs_key key, found_key;
3100         int ret;
3101
3102         btrfs_init_path(path);
3103         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3104         key.offset = -1;
3105         key.type = BTRFS_INODE_ITEM_KEY;
3106         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3107         if (ret == 1) {
3108                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3109                                 path->slots[0] - 1);
3110                 *highest_ino = found_key.objectid;
3111                 ret = 0;
3112         }
3113         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3114                 ret = -EOVERFLOW;
3115         btrfs_release_path(path);
3116         return ret;
3117 }
3118
3119 /*
3120  * Link inode to dir 'lost+found'. Increase @ref_count.
3121  *
3122  * Returns 0 means success.
3123  * Returns <0 means failure.
3124  */
3125 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3126                                    struct btrfs_root *root,
3127                                    struct btrfs_path *path,
3128                                    u64 ino, char *namebuf, u32 name_len,
3129                                    u8 filetype, u64 *ref_count)
3130 {
3131         char *dir_name = "lost+found";
3132         u64 lost_found_ino;
3133         int ret;
3134         u32 mode = 0700;
3135
3136         btrfs_release_path(path);
3137         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3138         if (ret < 0)
3139                 goto out;
3140         lost_found_ino++;
3141
3142         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3143                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3144                           mode);
3145         if (ret < 0) {
3146                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3147                 goto out;
3148         }
3149         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3150                              namebuf, name_len, filetype, NULL, 1, 0);
3151         /*
3152          * Add ".INO" suffix several times to handle case where
3153          * "FILENAME.INO" is already taken by another file.
3154          */
3155         while (ret == -EEXIST) {
3156                 /*
3157                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3158                  */
3159                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3160                         ret = -EFBIG;
3161                         goto out;
3162                 }
3163                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3164                          ".%llu", ino);
3165                 name_len += count_digits(ino) + 1;
3166                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3167                                      name_len, filetype, NULL, 1, 0);
3168         }
3169         if (ret < 0) {
3170                 error("failed to link the inode %llu to %s dir: %s",
3171                       ino, dir_name, strerror(-ret));
3172                 goto out;
3173         }
3174
3175         ++*ref_count;
3176         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3177                name_len, namebuf, dir_name);
3178 out:
3179         btrfs_release_path(path);
3180         if (ret)
3181                 error("failed to move file '%.*s' to '%s' dir", name_len,
3182                                 namebuf, dir_name);
3183         return ret;
3184 }
3185
3186 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3187                                struct btrfs_root *root,
3188                                struct btrfs_path *path,
3189                                struct inode_record *rec)
3190 {
3191         char namebuf[BTRFS_NAME_LEN] = {0};
3192         u8 type = 0;
3193         int namelen = 0;
3194         int name_recovered = 0;
3195         int type_recovered = 0;
3196         int ret = 0;
3197
3198         /*
3199          * Get file name and type first before these invalid inode ref
3200          * are deleted by remove_all_invalid_backref()
3201          */
3202         name_recovered = !find_file_name(rec, namebuf, &namelen);
3203         type_recovered = !find_file_type(rec, &type);
3204
3205         if (!name_recovered) {
3206                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3207                        rec->ino, rec->ino);
3208                 namelen = count_digits(rec->ino);
3209                 sprintf(namebuf, "%llu", rec->ino);
3210                 name_recovered = 1;
3211         }
3212         if (!type_recovered) {
3213                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3214                        rec->ino);
3215                 type = BTRFS_FT_REG_FILE;
3216                 type_recovered = 1;
3217         }
3218
3219         ret = reset_nlink(trans, root, path, rec);
3220         if (ret < 0) {
3221                 fprintf(stderr,
3222                         "Failed to reset nlink for inode %llu: %s\n",
3223                         rec->ino, strerror(-ret));
3224                 goto out;
3225         }
3226
3227         if (rec->found_link == 0) {
3228                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3229                                               namebuf, namelen, type,
3230                                               (u64 *)&rec->found_link);
3231                 if (ret)
3232                         goto out;
3233         }
3234         printf("Fixed the nlink of inode %llu\n", rec->ino);
3235 out:
3236         /*
3237          * Clear the flag anyway, or we will loop forever for the same inode
3238          * as it will not be removed from the bad inode list and the dead loop
3239          * happens.
3240          */
3241         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3242         btrfs_release_path(path);
3243         return ret;
3244 }
3245
3246 /*
3247  * Check if there is any normal(reg or prealloc) file extent for given
3248  * ino.
3249  * This is used to determine the file type when neither its dir_index/item or
3250  * inode_item exists.
3251  *
3252  * This will *NOT* report error, if any error happens, just consider it does
3253  * not have any normal file extent.
3254  */
3255 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3256 {
3257         struct btrfs_path path;
3258         struct btrfs_key key;
3259         struct btrfs_key found_key;
3260         struct btrfs_file_extent_item *fi;
3261         u8 type;
3262         int ret = 0;
3263
3264         btrfs_init_path(&path);
3265         key.objectid = ino;
3266         key.type = BTRFS_EXTENT_DATA_KEY;
3267         key.offset = 0;
3268
3269         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3270         if (ret < 0) {
3271                 ret = 0;
3272                 goto out;
3273         }
3274         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3275                 ret = btrfs_next_leaf(root, &path);
3276                 if (ret) {
3277                         ret = 0;
3278                         goto out;
3279                 }
3280         }
3281         while (1) {
3282                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3283                                       path.slots[0]);
3284                 if (found_key.objectid != ino ||
3285                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3286                         break;
3287                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3288                                     struct btrfs_file_extent_item);
3289                 type = btrfs_file_extent_type(path.nodes[0], fi);
3290                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3291                         ret = 1;
3292                         goto out;
3293                 }
3294         }
3295 out:
3296         btrfs_release_path(&path);
3297         return ret;
3298 }
3299
3300 static u32 btrfs_type_to_imode(u8 type)
3301 {
3302         static u32 imode_by_btrfs_type[] = {
3303                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3304                 [BTRFS_FT_DIR]          = S_IFDIR,
3305                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3306                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3307                 [BTRFS_FT_FIFO]         = S_IFIFO,
3308                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3309                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3310         };
3311
3312         return imode_by_btrfs_type[(type)];
3313 }
3314
3315 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3316                                 struct btrfs_root *root,
3317                                 struct btrfs_path *path,
3318                                 struct inode_record *rec)
3319 {
3320         u8 filetype;
3321         u32 mode = 0700;
3322         int type_recovered = 0;
3323         int ret = 0;
3324
3325         printf("Trying to rebuild inode:%llu\n", rec->ino);
3326
3327         type_recovered = !find_file_type(rec, &filetype);
3328
3329         /*
3330          * Try to determine inode type if type not found.
3331          *
3332          * For found regular file extent, it must be FILE.
3333          * For found dir_item/index, it must be DIR.
3334          *
3335          * For undetermined one, use FILE as fallback.
3336          *
3337          * TODO:
3338          * 1. If found backref(inode_index/item is already handled) to it,
3339          *    it must be DIR.
3340          *    Need new inode-inode ref structure to allow search for that.
3341          */
3342         if (!type_recovered) {
3343                 if (rec->found_file_extent &&
3344                     find_normal_file_extent(root, rec->ino)) {
3345                         type_recovered = 1;
3346                         filetype = BTRFS_FT_REG_FILE;
3347                 } else if (rec->found_dir_item) {
3348                         type_recovered = 1;
3349                         filetype = BTRFS_FT_DIR;
3350                 } else if (!list_empty(&rec->orphan_extents)) {
3351                         type_recovered = 1;
3352                         filetype = BTRFS_FT_REG_FILE;
3353                 } else{
3354                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3355                                rec->ino);
3356                         type_recovered = 1;
3357                         filetype = BTRFS_FT_REG_FILE;
3358                 }
3359         }
3360
3361         ret = btrfs_new_inode(trans, root, rec->ino,
3362                               mode | btrfs_type_to_imode(filetype));
3363         if (ret < 0)
3364                 goto out;
3365
3366         /*
3367          * Here inode rebuild is done, we only rebuild the inode item,
3368          * don't repair the nlink(like move to lost+found).
3369          * That is the job of nlink repair.
3370          *
3371          * We just fill the record and return
3372          */
3373         rec->found_dir_item = 1;
3374         rec->imode = mode | btrfs_type_to_imode(filetype);
3375         rec->nlink = 0;
3376         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3377         /* Ensure the inode_nlinks repair function will be called */
3378         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3379 out:
3380         return ret;
3381 }
3382
3383 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3384                                       struct btrfs_root *root,
3385                                       struct btrfs_path *path,
3386                                       struct inode_record *rec)
3387 {
3388         struct orphan_data_extent *orphan;
3389         struct orphan_data_extent *tmp;
3390         int ret = 0;
3391
3392         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3393                 /*
3394                  * Check for conflicting file extents
3395                  *
3396                  * Here we don't know whether the extents is compressed or not,
3397                  * so we can only assume it not compressed nor data offset,
3398                  * and use its disk_len as extent length.
3399                  */
3400                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3401                                        orphan->offset, orphan->disk_len, 0);
3402                 btrfs_release_path(path);
3403                 if (ret < 0)
3404                         goto out;
3405                 if (!ret) {
3406                         fprintf(stderr,
3407                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3408                                 orphan->disk_bytenr, orphan->disk_len);
3409                         ret = btrfs_free_extent(trans,
3410                                         root->fs_info->extent_root,
3411                                         orphan->disk_bytenr, orphan->disk_len,
3412                                         0, root->objectid, orphan->objectid,
3413                                         orphan->offset);
3414                         if (ret < 0)
3415                                 goto out;
3416                 }
3417                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3418                                 orphan->offset, orphan->disk_bytenr,
3419                                 orphan->disk_len, orphan->disk_len);
3420                 if (ret < 0)
3421                         goto out;
3422
3423                 /* Update file size info */
3424                 rec->found_size += orphan->disk_len;
3425                 if (rec->found_size == rec->nbytes)
3426                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3427
3428                 /* Update the file extent hole info too */
3429                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3430                                            orphan->disk_len);
3431                 if (ret < 0)
3432                         goto out;
3433                 if (RB_EMPTY_ROOT(&rec->holes))
3434                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3435
3436                 list_del(&orphan->list);
3437                 free(orphan);
3438         }
3439         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3440 out:
3441         return ret;
3442 }
3443
3444 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3445                                         struct btrfs_root *root,
3446                                         struct btrfs_path *path,
3447                                         struct inode_record *rec)
3448 {
3449         struct rb_node *node;
3450         struct file_extent_hole *hole;
3451         int found = 0;
3452         int ret = 0;
3453
3454         node = rb_first(&rec->holes);
3455
3456         while (node) {
3457                 found = 1;
3458                 hole = rb_entry(node, struct file_extent_hole, node);
3459                 ret = btrfs_punch_hole(trans, root, rec->ino,
3460                                        hole->start, hole->len);
3461                 if (ret < 0)
3462                         goto out;
3463                 ret = del_file_extent_hole(&rec->holes, hole->start,
3464                                            hole->len);
3465                 if (ret < 0)
3466                         goto out;
3467                 if (RB_EMPTY_ROOT(&rec->holes))
3468                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3469                 node = rb_first(&rec->holes);
3470         }
3471         /* special case for a file losing all its file extent */
3472         if (!found) {
3473                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3474                                        round_up(rec->isize,
3475                                                 root->fs_info->sectorsize));
3476                 if (ret < 0)
3477                         goto out;
3478         }
3479         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3480                rec->ino, root->objectid);
3481 out:
3482         return ret;
3483 }
3484
3485 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3486 {
3487         struct btrfs_trans_handle *trans;
3488         struct btrfs_path path;
3489         int ret = 0;
3490
3491         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3492                              I_ERR_NO_ORPHAN_ITEM |
3493                              I_ERR_LINK_COUNT_WRONG |
3494                              I_ERR_NO_INODE_ITEM |
3495                              I_ERR_FILE_EXTENT_ORPHAN |
3496                              I_ERR_FILE_EXTENT_DISCOUNT|
3497                              I_ERR_FILE_NBYTES_WRONG)))
3498                 return rec->errors;
3499
3500         /*
3501          * For nlink repair, it may create a dir and add link, so
3502          * 2 for parent(256)'s dir_index and dir_item
3503          * 2 for lost+found dir's inode_item and inode_ref
3504          * 1 for the new inode_ref of the file
3505          * 2 for lost+found dir's dir_index and dir_item for the file
3506          */
3507         trans = btrfs_start_transaction(root, 7);
3508         if (IS_ERR(trans))
3509                 return PTR_ERR(trans);
3510
3511         btrfs_init_path(&path);
3512         if (rec->errors & I_ERR_NO_INODE_ITEM)
3513                 ret = repair_inode_no_item(trans, root, &path, rec);
3514         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3515                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3516         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3517                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3518         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3519                 ret = repair_inode_isize(trans, root, &path, rec);
3520         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3521                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3522         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3523                 ret = repair_inode_nlinks(trans, root, &path, rec);
3524         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3525                 ret = repair_inode_nbytes(trans, root, &path, rec);
3526         btrfs_commit_transaction(trans, root);
3527         btrfs_release_path(&path);
3528         return ret;
3529 }
3530
3531 static int check_inode_recs(struct btrfs_root *root,
3532                             struct cache_tree *inode_cache)
3533 {
3534         struct cache_extent *cache;
3535         struct ptr_node *node;
3536         struct inode_record *rec;
3537         struct inode_backref *backref;
3538         int stage = 0;
3539         int ret = 0;
3540         int err = 0;
3541         u64 error = 0;
3542         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3543
3544         if (btrfs_root_refs(&root->root_item) == 0) {
3545                 if (!cache_tree_empty(inode_cache))
3546                         fprintf(stderr, "warning line %d\n", __LINE__);
3547                 return 0;
3548         }
3549
3550         /*
3551          * We need to repair backrefs first because we could change some of the
3552          * errors in the inode recs.
3553          *
3554          * We also need to go through and delete invalid backrefs first and then
3555          * add the correct ones second.  We do this because we may get EEXIST
3556          * when adding back the correct index because we hadn't yet deleted the
3557          * invalid index.
3558          *
3559          * For example, if we were missing a dir index then the directories
3560          * isize would be wrong, so if we fixed the isize to what we thought it
3561          * would be and then fixed the backref we'd still have a invalid fs, so
3562          * we need to add back the dir index and then check to see if the isize
3563          * is still wrong.
3564          */
3565         while (stage < 3) {
3566                 stage++;
3567                 if (stage == 3 && !err)
3568                         break;
3569
3570                 cache = search_cache_extent(inode_cache, 0);
3571                 while (repair && cache) {
3572                         node = container_of(cache, struct ptr_node, cache);
3573                         rec = node->data;
3574                         cache = next_cache_extent(cache);
3575
3576                         /* Need to free everything up and rescan */
3577                         if (stage == 3) {
3578                                 remove_cache_extent(inode_cache, &node->cache);
3579                                 free(node);
3580                                 free_inode_rec(rec);
3581                                 continue;
3582                         }
3583
3584                         if (list_empty(&rec->backrefs))
3585                                 continue;
3586
3587                         ret = repair_inode_backrefs(root, rec, inode_cache,
3588                                                     stage == 1);
3589                         if (ret < 0) {
3590                                 err = ret;
3591                                 stage = 2;
3592                                 break;
3593                         } if (ret > 0) {
3594                                 err = -EAGAIN;
3595                         }
3596                 }
3597         }
3598         if (err)
3599                 return err;
3600
3601         rec = get_inode_rec(inode_cache, root_dirid, 0);
3602         BUG_ON(IS_ERR(rec));
3603         if (rec) {
3604                 ret = check_root_dir(rec);
3605                 if (ret) {
3606                         fprintf(stderr, "root %llu root dir %llu error\n",
3607                                 (unsigned long long)root->root_key.objectid,
3608                                 (unsigned long long)root_dirid);
3609                         print_inode_error(root, rec);
3610                         error++;
3611                 }
3612         } else {
3613                 if (repair) {
3614                         struct btrfs_trans_handle *trans;
3615
3616                         trans = btrfs_start_transaction(root, 1);
3617                         if (IS_ERR(trans)) {
3618                                 err = PTR_ERR(trans);
3619                                 return err;
3620                         }
3621
3622                         fprintf(stderr,
3623                                 "root %llu missing its root dir, recreating\n",
3624                                 (unsigned long long)root->objectid);
3625
3626                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3627                         BUG_ON(ret);
3628
3629                         btrfs_commit_transaction(trans, root);
3630                         return -EAGAIN;
3631                 }
3632
3633                 fprintf(stderr, "root %llu root dir %llu not found\n",
3634                         (unsigned long long)root->root_key.objectid,
3635                         (unsigned long long)root_dirid);
3636         }
3637
3638         while (1) {
3639                 cache = search_cache_extent(inode_cache, 0);
3640                 if (!cache)
3641                         break;
3642                 node = container_of(cache, struct ptr_node, cache);
3643                 rec = node->data;
3644                 remove_cache_extent(inode_cache, &node->cache);
3645                 free(node);
3646                 if (rec->ino == root_dirid ||
3647                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3648                         free_inode_rec(rec);
3649                         continue;
3650                 }
3651
3652                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3653                         ret = check_orphan_item(root, rec->ino);
3654                         if (ret == 0)
3655                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3656                         if (can_free_inode_rec(rec)) {
3657                                 free_inode_rec(rec);
3658                                 continue;
3659                         }
3660                 }
3661
3662                 if (!rec->found_inode_item)
3663                         rec->errors |= I_ERR_NO_INODE_ITEM;
3664                 if (rec->found_link != rec->nlink)
3665                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3666                 if (repair) {
3667                         ret = try_repair_inode(root, rec);
3668                         if (ret == 0 && can_free_inode_rec(rec)) {
3669                                 free_inode_rec(rec);
3670                                 continue;
3671                         }
3672                         ret = 0;
3673                 }
3674
3675                 if (!(repair && ret == 0))
3676                         error++;
3677                 print_inode_error(root, rec);
3678                 list_for_each_entry(backref, &rec->backrefs, list) {
3679                         if (!backref->found_dir_item)
3680                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3681                         if (!backref->found_dir_index)
3682                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3683                         if (!backref->found_inode_ref)
3684                                 backref->errors |= REF_ERR_NO_INODE_REF;
3685                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3686                                 " namelen %u name %s filetype %d errors %x",
3687                                 (unsigned long long)backref->dir,
3688                                 (unsigned long long)backref->index,
3689                                 backref->namelen, backref->name,
3690                                 backref->filetype, backref->errors);
3691                         print_ref_error(backref->errors);
3692                 }
3693                 free_inode_rec(rec);
3694         }
3695         return (error > 0) ? -1 : 0;
3696 }
3697
3698 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3699                                         u64 objectid)
3700 {
3701         struct cache_extent *cache;
3702         struct root_record *rec = NULL;
3703         int ret;
3704
3705         cache = lookup_cache_extent(root_cache, objectid, 1);
3706         if (cache) {
3707                 rec = container_of(cache, struct root_record, cache);
3708         } else {
3709                 rec = calloc(1, sizeof(*rec));
3710                 if (!rec)
3711                         return ERR_PTR(-ENOMEM);
3712                 rec->objectid = objectid;
3713                 INIT_LIST_HEAD(&rec->backrefs);
3714                 rec->cache.start = objectid;
3715                 rec->cache.size = 1;
3716
3717                 ret = insert_cache_extent(root_cache, &rec->cache);
3718                 if (ret)
3719                         return ERR_PTR(-EEXIST);
3720         }
3721         return rec;
3722 }
3723
3724 static struct root_backref *get_root_backref(struct root_record *rec,
3725                                              u64 ref_root, u64 dir, u64 index,
3726                                              const char *name, int namelen)
3727 {
3728         struct root_backref *backref;
3729
3730         list_for_each_entry(backref, &rec->backrefs, list) {
3731                 if (backref->ref_root != ref_root || backref->dir != dir ||
3732                     backref->namelen != namelen)
3733                         continue;
3734                 if (memcmp(name, backref->name, namelen))
3735                         continue;
3736                 return backref;
3737         }
3738
3739         backref = calloc(1, sizeof(*backref) + namelen + 1);
3740         if (!backref)
3741                 return NULL;
3742         backref->ref_root = ref_root;
3743         backref->dir = dir;
3744         backref->index = index;
3745         backref->namelen = namelen;
3746         memcpy(backref->name, name, namelen);
3747         backref->name[namelen] = '\0';
3748         list_add_tail(&backref->list, &rec->backrefs);
3749         return backref;
3750 }
3751
3752 static void free_root_record(struct cache_extent *cache)
3753 {
3754         struct root_record *rec;
3755         struct root_backref *backref;
3756
3757         rec = container_of(cache, struct root_record, cache);
3758         while (!list_empty(&rec->backrefs)) {
3759                 backref = to_root_backref(rec->backrefs.next);
3760                 list_del(&backref->list);
3761                 free(backref);
3762         }
3763
3764         free(rec);
3765 }
3766
3767 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3768
3769 static int add_root_backref(struct cache_tree *root_cache,
3770                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3771                             const char *name, int namelen,
3772                             int item_type, int errors)
3773 {
3774         struct root_record *rec;
3775         struct root_backref *backref;
3776
3777         rec = get_root_rec(root_cache, root_id);
3778         BUG_ON(IS_ERR(rec));
3779         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3780         BUG_ON(!backref);
3781
3782         backref->errors |= errors;
3783
3784         if (item_type != BTRFS_DIR_ITEM_KEY) {
3785                 if (backref->found_dir_index || backref->found_back_ref ||
3786                     backref->found_forward_ref) {
3787                         if (backref->index != index)
3788                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3789                 } else {
3790                         backref->index = index;
3791                 }
3792         }
3793
3794         if (item_type == BTRFS_DIR_ITEM_KEY) {
3795                 if (backref->found_forward_ref)
3796                         rec->found_ref++;
3797                 backref->found_dir_item = 1;
3798         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3799                 backref->found_dir_index = 1;
3800         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3801                 if (backref->found_forward_ref)
3802                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3803                 else if (backref->found_dir_item)
3804                         rec->found_ref++;
3805                 backref->found_forward_ref = 1;
3806         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3807                 if (backref->found_back_ref)
3808                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3809                 backref->found_back_ref = 1;
3810         } else {
3811                 BUG_ON(1);
3812         }
3813
3814         if (backref->found_forward_ref && backref->found_dir_item)
3815                 backref->reachable = 1;
3816         return 0;
3817 }
3818
3819 static int merge_root_recs(struct btrfs_root *root,
3820                            struct cache_tree *src_cache,
3821                            struct cache_tree *dst_cache)
3822 {
3823         struct cache_extent *cache;
3824         struct ptr_node *node;
3825         struct inode_record *rec;
3826         struct inode_backref *backref;
3827         int ret = 0;
3828
3829         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3830                 free_inode_recs_tree(src_cache);
3831                 return 0;
3832         }
3833
3834         while (1) {
3835                 cache = search_cache_extent(src_cache, 0);
3836                 if (!cache)
3837                         break;
3838                 node = container_of(cache, struct ptr_node, cache);
3839                 rec = node->data;
3840                 remove_cache_extent(src_cache, &node->cache);
3841                 free(node);
3842
3843                 ret = is_child_root(root, root->objectid, rec->ino);
3844                 if (ret < 0)
3845                         break;
3846                 else if (ret == 0)
3847                         goto skip;
3848
3849                 list_for_each_entry(backref, &rec->backrefs, list) {
3850                         BUG_ON(backref->found_inode_ref);
3851                         if (backref->found_dir_item)
3852                                 add_root_backref(dst_cache, rec->ino,
3853                                         root->root_key.objectid, backref->dir,
3854                                         backref->index, backref->name,
3855                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3856                                         backref->errors);
3857                         if (backref->found_dir_index)
3858                                 add_root_backref(dst_cache, rec->ino,
3859                                         root->root_key.objectid, backref->dir,
3860                                         backref->index, backref->name,
3861                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3862                                         backref->errors);
3863                 }
3864 skip:
3865                 free_inode_rec(rec);
3866         }
3867         if (ret < 0)
3868                 return ret;
3869         return 0;
3870 }
3871
3872 static int check_root_refs(struct btrfs_root *root,
3873                            struct cache_tree *root_cache)
3874 {
3875         struct root_record *rec;
3876         struct root_record *ref_root;
3877         struct root_backref *backref;
3878         struct cache_extent *cache;
3879         int loop = 1;
3880         int ret;
3881         int error;
3882         int errors = 0;
3883
3884         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3885         BUG_ON(IS_ERR(rec));
3886         rec->found_ref = 1;
3887
3888         /* fixme: this can not detect circular references */
3889         while (loop) {
3890                 loop = 0;
3891                 cache = search_cache_extent(root_cache, 0);
3892                 while (1) {
3893                         if (!cache)
3894                                 break;
3895                         rec = container_of(cache, struct root_record, cache);
3896                         cache = next_cache_extent(cache);
3897
3898                         if (rec->found_ref == 0)
3899                                 continue;
3900
3901                         list_for_each_entry(backref, &rec->backrefs, list) {
3902                                 if (!backref->reachable)
3903                                         continue;
3904
3905                                 ref_root = get_root_rec(root_cache,
3906                                                         backref->ref_root);
3907                                 BUG_ON(IS_ERR(ref_root));
3908                                 if (ref_root->found_ref > 0)
3909                                         continue;
3910
3911                                 backref->reachable = 0;
3912                                 rec->found_ref--;
3913                                 if (rec->found_ref == 0)
3914                                         loop = 1;
3915                         }
3916                 }
3917         }
3918
3919         cache = search_cache_extent(root_cache, 0);
3920         while (1) {
3921                 if (!cache)
3922                         break;
3923                 rec = container_of(cache, struct root_record, cache);
3924                 cache = next_cache_extent(cache);
3925
3926                 if (rec->found_ref == 0 &&
3927                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3928                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3929                         ret = check_orphan_item(root->fs_info->tree_root,
3930                                                 rec->objectid);
3931                         if (ret == 0)
3932                                 continue;
3933
3934                         /*
3935                          * If we don't have a root item then we likely just have
3936                          * a dir item in a snapshot for this root but no actual
3937                          * ref key or anything so it's meaningless.
3938                          */
3939                         if (!rec->found_root_item)
3940                                 continue;
3941                         errors++;
3942                         fprintf(stderr, "fs tree %llu not referenced\n",
3943                                 (unsigned long long)rec->objectid);
3944                 }
3945
3946                 error = 0;
3947                 if (rec->found_ref > 0 && !rec->found_root_item)
3948                         error = 1;
3949                 list_for_each_entry(backref, &rec->backrefs, list) {
3950                         if (!backref->found_dir_item)
3951                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3952                         if (!backref->found_dir_index)
3953                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3954                         if (!backref->found_back_ref)
3955                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3956                         if (!backref->found_forward_ref)
3957                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3958                         if (backref->reachable && backref->errors)
3959                                 error = 1;
3960                 }
3961                 if (!error)
3962                         continue;
3963
3964                 errors++;
3965                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3966                         (unsigned long long)rec->objectid, rec->found_ref,
3967                          rec->found_root_item ? "" : "not found");
3968
3969                 list_for_each_entry(backref, &rec->backrefs, list) {
3970                         if (!backref->reachable)
3971                                 continue;
3972                         if (!backref->errors && rec->found_root_item)
3973                                 continue;
3974                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3975                                 " index %llu namelen %u name %s errors %x\n",
3976                                 (unsigned long long)backref->ref_root,
3977                                 (unsigned long long)backref->dir,
3978                                 (unsigned long long)backref->index,
3979                                 backref->namelen, backref->name,
3980                                 backref->errors);
3981                         print_ref_error(backref->errors);
3982                 }
3983         }
3984         return errors > 0 ? 1 : 0;
3985 }
3986
3987 static int process_root_ref(struct extent_buffer *eb, int slot,
3988                             struct btrfs_key *key,
3989                             struct cache_tree *root_cache)
3990 {
3991         u64 dirid;
3992         u64 index;
3993         u32 len;
3994         u32 name_len;
3995         struct btrfs_root_ref *ref;
3996         char namebuf[BTRFS_NAME_LEN];
3997         int error;
3998
3999         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4000
4001         dirid = btrfs_root_ref_dirid(eb, ref);
4002         index = btrfs_root_ref_sequence(eb, ref);
4003         name_len = btrfs_root_ref_name_len(eb, ref);
4004
4005         if (name_len <= BTRFS_NAME_LEN) {
4006                 len = name_len;
4007                 error = 0;
4008         } else {
4009                 len = BTRFS_NAME_LEN;
4010                 error = REF_ERR_NAME_TOO_LONG;
4011         }
4012         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4013
4014         if (key->type == BTRFS_ROOT_REF_KEY) {
4015                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4016                                  index, namebuf, len, key->type, error);
4017         } else {
4018                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4019                                  index, namebuf, len, key->type, error);
4020         }
4021         return 0;
4022 }
4023
4024 static void free_corrupt_block(struct cache_extent *cache)
4025 {
4026         struct btrfs_corrupt_block *corrupt;
4027
4028         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4029         free(corrupt);
4030 }
4031
4032 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4033
4034 /*
4035  * Repair the btree of the given root.
4036  *
4037  * The fix is to remove the node key in corrupt_blocks cache_tree.
4038  * and rebalance the tree.
4039  * After the fix, the btree should be writeable.
4040  */
4041 static int repair_btree(struct btrfs_root *root,
4042                         struct cache_tree *corrupt_blocks)
4043 {
4044         struct btrfs_trans_handle *trans;
4045         struct btrfs_path path;
4046         struct btrfs_corrupt_block *corrupt;
4047         struct cache_extent *cache;
4048         struct btrfs_key key;
4049         u64 offset;
4050         int level;
4051         int ret = 0;
4052
4053         if (cache_tree_empty(corrupt_blocks))
4054                 return 0;
4055
4056         trans = btrfs_start_transaction(root, 1);
4057         if (IS_ERR(trans)) {
4058                 ret = PTR_ERR(trans);
4059                 fprintf(stderr, "Error starting transaction: %s\n",
4060                         strerror(-ret));
4061                 return ret;
4062         }
4063         btrfs_init_path(&path);
4064         cache = first_cache_extent(corrupt_blocks);
4065         while (cache) {
4066                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4067                                        cache);
4068                 level = corrupt->level;
4069                 path.lowest_level = level;
4070                 key.objectid = corrupt->key.objectid;
4071                 key.type = corrupt->key.type;
4072                 key.offset = corrupt->key.offset;
4073
4074                 /*
4075                  * Here we don't want to do any tree balance, since it may
4076                  * cause a balance with corrupted brother leaf/node,
4077                  * so ins_len set to 0 here.
4078                  * Balance will be done after all corrupt node/leaf is deleted.
4079                  */
4080                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4081                 if (ret < 0)
4082                         goto out;
4083                 offset = btrfs_node_blockptr(path.nodes[level],
4084                                              path.slots[level]);
4085
4086                 /* Remove the ptr */
4087                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4088                 if (ret < 0)
4089                         goto out;
4090                 /*
4091                  * Remove the corresponding extent
4092                  * return value is not concerned.
4093                  */
4094                 btrfs_release_path(&path);
4095                 ret = btrfs_free_extent(trans, root, offset,
4096                                 root->fs_info->nodesize, 0,
4097                                 root->root_key.objectid, level - 1, 0);
4098                 cache = next_cache_extent(cache);
4099         }
4100
4101         /* Balance the btree using btrfs_search_slot() */
4102         cache = first_cache_extent(corrupt_blocks);
4103         while (cache) {
4104                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4105                                        cache);
4106                 memcpy(&key, &corrupt->key, sizeof(key));
4107                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4108                 if (ret < 0)
4109                         goto out;
4110                 /* return will always >0 since it won't find the item */
4111                 ret = 0;
4112                 btrfs_release_path(&path);
4113                 cache = next_cache_extent(cache);
4114         }
4115 out:
4116         btrfs_commit_transaction(trans, root);
4117         btrfs_release_path(&path);
4118         return ret;
4119 }
4120
4121 static int check_fs_root(struct btrfs_root *root,
4122                          struct cache_tree *root_cache,
4123                          struct walk_control *wc)
4124 {
4125         int ret = 0;
4126         int err = 0;
4127         int wret;
4128         int level;
4129         struct btrfs_path path;
4130         struct shared_node root_node;
4131         struct root_record *rec;
4132         struct btrfs_root_item *root_item = &root->root_item;
4133         struct cache_tree corrupt_blocks;
4134         struct orphan_data_extent *orphan;
4135         struct orphan_data_extent *tmp;
4136         enum btrfs_tree_block_status status;
4137         struct node_refs nrefs;
4138
4139         /*
4140          * Reuse the corrupt_block cache tree to record corrupted tree block
4141          *
4142          * Unlike the usage in extent tree check, here we do it in a per
4143          * fs/subvol tree base.
4144          */
4145         cache_tree_init(&corrupt_blocks);
4146         root->fs_info->corrupt_blocks = &corrupt_blocks;
4147
4148         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4149                 rec = get_root_rec(root_cache, root->root_key.objectid);
4150                 BUG_ON(IS_ERR(rec));
4151                 if (btrfs_root_refs(root_item) > 0)
4152                         rec->found_root_item = 1;
4153         }
4154
4155         btrfs_init_path(&path);
4156         memset(&root_node, 0, sizeof(root_node));
4157         cache_tree_init(&root_node.root_cache);
4158         cache_tree_init(&root_node.inode_cache);
4159         memset(&nrefs, 0, sizeof(nrefs));
4160
4161         /* Move the orphan extent record to corresponding inode_record */
4162         list_for_each_entry_safe(orphan, tmp,
4163                                  &root->orphan_data_extents, list) {
4164                 struct inode_record *inode;
4165
4166                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4167                                       1);
4168                 BUG_ON(IS_ERR(inode));
4169                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4170                 list_move(&orphan->list, &inode->orphan_extents);
4171         }
4172
4173         level = btrfs_header_level(root->node);
4174         memset(wc->nodes, 0, sizeof(wc->nodes));
4175         wc->nodes[level] = &root_node;
4176         wc->active_node = level;
4177         wc->root_level = level;
4178
4179         /* We may not have checked the root block, lets do that now */
4180         if (btrfs_is_leaf(root->node))
4181                 status = btrfs_check_leaf(root, NULL, root->node);
4182         else
4183                 status = btrfs_check_node(root, NULL, root->node);
4184         if (status != BTRFS_TREE_BLOCK_CLEAN)
4185                 return -EIO;
4186
4187         if (btrfs_root_refs(root_item) > 0 ||
4188             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4189                 path.nodes[level] = root->node;
4190                 extent_buffer_get(root->node);
4191                 path.slots[level] = 0;
4192         } else {
4193                 struct btrfs_key key;
4194                 struct btrfs_disk_key found_key;
4195
4196                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4197                 level = root_item->drop_level;
4198                 path.lowest_level = level;
4199                 if (level > btrfs_header_level(root->node) ||
4200                     level >= BTRFS_MAX_LEVEL) {
4201                         error("ignoring invalid drop level: %u", level);
4202                         goto skip_walking;
4203                 }
4204                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4205                 if (wret < 0)
4206                         goto skip_walking;
4207                 btrfs_node_key(path.nodes[level], &found_key,
4208                                 path.slots[level]);
4209                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4210                                         sizeof(found_key)));
4211         }
4212
4213         while (1) {
4214                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4215                 if (wret < 0)
4216                         ret = wret;
4217                 if (wret != 0)
4218                         break;
4219
4220                 wret = walk_up_tree(root, &path, wc, &level);
4221                 if (wret < 0)
4222                         ret = wret;
4223                 if (wret != 0)
4224                         break;
4225         }
4226 skip_walking:
4227         btrfs_release_path(&path);
4228
4229         if (!cache_tree_empty(&corrupt_blocks)) {
4230                 struct cache_extent *cache;
4231                 struct btrfs_corrupt_block *corrupt;
4232
4233                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4234                        root->root_key.objectid);
4235                 cache = first_cache_extent(&corrupt_blocks);
4236                 while (cache) {
4237                         corrupt = container_of(cache,
4238                                                struct btrfs_corrupt_block,
4239                                                cache);
4240                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4241                                cache->start, corrupt->level,
4242                                corrupt->key.objectid, corrupt->key.type,
4243                                corrupt->key.offset);
4244                         cache = next_cache_extent(cache);
4245                 }
4246                 if (repair) {
4247                         printf("Try to repair the btree for root %llu\n",
4248                                root->root_key.objectid);
4249                         ret = repair_btree(root, &corrupt_blocks);
4250                         if (ret < 0)
4251                                 fprintf(stderr, "Failed to repair btree: %s\n",
4252                                         strerror(-ret));
4253                         if (!ret)
4254                                 printf("Btree for root %llu is fixed\n",
4255                                        root->root_key.objectid);
4256                 }
4257         }
4258
4259         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4260         if (err < 0)
4261                 ret = err;
4262
4263         if (root_node.current) {
4264                 root_node.current->checked = 1;
4265                 maybe_free_inode_rec(&root_node.inode_cache,
4266                                 root_node.current);
4267         }
4268
4269         err = check_inode_recs(root, &root_node.inode_cache);
4270         if (!ret)
4271                 ret = err;
4272
4273         free_corrupt_blocks_tree(&corrupt_blocks);
4274         root->fs_info->corrupt_blocks = NULL;
4275         free_orphan_data_extents(&root->orphan_data_extents);
4276         return ret;
4277 }
4278
4279 static int fs_root_objectid(u64 objectid)
4280 {
4281         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4282             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4283                 return 1;
4284         return is_fstree(objectid);
4285 }
4286
4287 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4288                           struct cache_tree *root_cache)
4289 {
4290         struct btrfs_path path;
4291         struct btrfs_key key;
4292         struct walk_control wc;
4293         struct extent_buffer *leaf, *tree_node;
4294         struct btrfs_root *tmp_root;
4295         struct btrfs_root *tree_root = fs_info->tree_root;
4296         int ret;
4297         int err = 0;
4298
4299         if (ctx.progress_enabled) {
4300                 ctx.tp = TASK_FS_ROOTS;
4301                 task_start(ctx.info);
4302         }
4303
4304         /*
4305          * Just in case we made any changes to the extent tree that weren't
4306          * reflected into the free space cache yet.
4307          */
4308         if (repair)
4309                 reset_cached_block_groups(fs_info);
4310         memset(&wc, 0, sizeof(wc));
4311         cache_tree_init(&wc.shared);
4312         btrfs_init_path(&path);
4313
4314 again:
4315         key.offset = 0;
4316         key.objectid = 0;
4317         key.type = BTRFS_ROOT_ITEM_KEY;
4318         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4319         if (ret < 0) {
4320                 err = 1;
4321                 goto out;
4322         }
4323         tree_node = tree_root->node;
4324         while (1) {
4325                 if (tree_node != tree_root->node) {
4326                         free_root_recs_tree(root_cache);
4327                         btrfs_release_path(&path);
4328                         goto again;
4329                 }
4330                 leaf = path.nodes[0];
4331                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4332                         ret = btrfs_next_leaf(tree_root, &path);
4333                         if (ret) {
4334                                 if (ret < 0)
4335                                         err = 1;
4336                                 break;
4337                         }
4338                         leaf = path.nodes[0];
4339                 }
4340                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4341                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4342                     fs_root_objectid(key.objectid)) {
4343                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4344                                 tmp_root = btrfs_read_fs_root_no_cache(
4345                                                 fs_info, &key);
4346                         } else {
4347                                 key.offset = (u64)-1;
4348                                 tmp_root = btrfs_read_fs_root(
4349                                                 fs_info, &key);
4350                         }
4351                         if (IS_ERR(tmp_root)) {
4352                                 err = 1;
4353                                 goto next;
4354                         }
4355                         ret = check_fs_root(tmp_root, root_cache, &wc);
4356                         if (ret == -EAGAIN) {
4357                                 free_root_recs_tree(root_cache);
4358                                 btrfs_release_path(&path);
4359                                 goto again;
4360                         }
4361                         if (ret)
4362                                 err = 1;
4363                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4364                                 btrfs_free_fs_root(tmp_root);
4365                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4366                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4367                         process_root_ref(leaf, path.slots[0], &key,
4368                                          root_cache);
4369                 }
4370 next:
4371                 path.slots[0]++;
4372         }
4373 out:
4374         btrfs_release_path(&path);
4375         if (err)
4376                 free_extent_cache_tree(&wc.shared);
4377         if (!cache_tree_empty(&wc.shared))
4378                 fprintf(stderr, "warning line %d\n", __LINE__);
4379
4380         task_stop(ctx.info);
4381
4382         return err;
4383 }
4384
4385 /*
4386  * Find the @index according by @ino and name.
4387  * Notice:time efficiency is O(N)
4388  *
4389  * @root:       the root of the fs/file tree
4390  * @index_ret:  the index as return value
4391  * @namebuf:    the name to match
4392  * @name_len:   the length of name to match
4393  * @file_type:  the file_type of INODE_ITEM to match
4394  *
4395  * Returns 0 if found and *@index_ret will be modified with right value
4396  * Returns< 0 not found and *@index_ret will be (u64)-1
4397  */
4398 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4399                           u64 *index_ret, char *namebuf, u32 name_len,
4400                           u8 file_type)
4401 {
4402         struct btrfs_path path;
4403         struct extent_buffer *node;
4404         struct btrfs_dir_item *di;
4405         struct btrfs_key key;
4406         struct btrfs_key location;
4407         char name[BTRFS_NAME_LEN] = {0};
4408
4409         u32 total;
4410         u32 cur = 0;
4411         u32 len;
4412         u32 data_len;
4413         u8 filetype;
4414         int slot;
4415         int ret;
4416
4417         ASSERT(index_ret);
4418
4419         /* search from the last index */
4420         key.objectid = dirid;
4421         key.offset = (u64)-1;
4422         key.type = BTRFS_DIR_INDEX_KEY;
4423
4424         btrfs_init_path(&path);
4425         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4426         if (ret < 0)
4427                 return ret;
4428
4429 loop:
4430         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4431         if (ret) {
4432                 ret = -ENOENT;
4433                 *index_ret = (64)-1;
4434                 goto out;
4435         }
4436         /* Check whether inode_id/filetype/name match */
4437         node = path.nodes[0];
4438         slot = path.slots[0];
4439         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4440         total = btrfs_item_size_nr(node, slot);
4441         while (cur < total) {
4442                 ret = -ENOENT;
4443                 len = btrfs_dir_name_len(node, di);
4444                 data_len = btrfs_dir_data_len(node, di);
4445
4446                 btrfs_dir_item_key_to_cpu(node, di, &location);
4447                 if (location.objectid != location_id ||
4448                     location.type != BTRFS_INODE_ITEM_KEY ||
4449                     location.offset != 0)
4450                         goto next;
4451
4452                 filetype = btrfs_dir_type(node, di);
4453                 if (file_type != filetype)
4454                         goto next;
4455
4456                 if (len > BTRFS_NAME_LEN)
4457                         len = BTRFS_NAME_LEN;
4458
4459                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4460                 if (len != name_len || strncmp(namebuf, name, len))
4461                         goto next;
4462
4463                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4464                 *index_ret = key.offset;
4465                 ret = 0;
4466                 goto out;
4467 next:
4468                 len += sizeof(*di) + data_len;
4469                 di = (struct btrfs_dir_item *)((char *)di + len);
4470                 cur += len;
4471         }
4472         goto loop;
4473
4474 out:
4475         btrfs_release_path(&path);
4476         return ret;
4477 }
4478
4479 /*
4480  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4481  * INODE_REF/INODE_EXTREF match.
4482  *
4483  * @root:       the root of the fs/file tree
4484  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4485  *              value while find index
4486  * @location_key: location key of the struct btrfs_dir_item to match
4487  * @name:       the name to match
4488  * @namelen:    the length of name
4489  * @file_type:  the type of file to math
4490  *
4491  * Return 0 if no error occurred.
4492  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4493  * DIR_ITEM/DIR_INDEX
4494  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4495  * and DIR_ITEM/DIR_INDEX mismatch
4496  */
4497 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4498                          struct btrfs_key *location_key, char *name,
4499                          u32 namelen, u8 file_type)
4500 {
4501         struct btrfs_path path;
4502         struct extent_buffer *node;
4503         struct btrfs_dir_item *di;
4504         struct btrfs_key location;
4505         char namebuf[BTRFS_NAME_LEN] = {0};
4506         u32 total;
4507         u32 cur = 0;
4508         u32 len;
4509         u32 data_len;
4510         u8 filetype;
4511         int slot;
4512         int ret;
4513
4514         /* get the index by traversing all index */
4515         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4516                 ret = find_dir_index(root, key->objectid,
4517                                      location_key->objectid, &key->offset,
4518                                      name, namelen, file_type);
4519                 if (ret)
4520                         ret = DIR_INDEX_MISSING;
4521                 return ret;
4522         }
4523
4524         btrfs_init_path(&path);
4525         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4526         if (ret) {
4527                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4528                         DIR_INDEX_MISSING;
4529                 goto out;
4530         }
4531
4532         /* Check whether inode_id/filetype/name match */
4533         node = path.nodes[0];
4534         slot = path.slots[0];
4535         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4536         total = btrfs_item_size_nr(node, slot);
4537         while (cur < total) {
4538                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4539                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4540
4541                 len = btrfs_dir_name_len(node, di);
4542                 data_len = btrfs_dir_data_len(node, di);
4543
4544                 btrfs_dir_item_key_to_cpu(node, di, &location);
4545                 if (location.objectid != location_key->objectid ||
4546                     location.type != location_key->type ||
4547                     location.offset != location_key->offset)
4548                         goto next;
4549
4550                 filetype = btrfs_dir_type(node, di);
4551                 if (file_type != filetype)
4552                         goto next;
4553
4554                 if (len > BTRFS_NAME_LEN) {
4555                         len = BTRFS_NAME_LEN;
4556                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4557                         root->objectid,
4558                         key->type == BTRFS_DIR_ITEM_KEY ?
4559                         "DIR_ITEM" : "DIR_INDEX",
4560                         key->objectid, key->offset, len);
4561                 }
4562                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4563                                    len);
4564                 if (len != namelen || strncmp(namebuf, name, len))
4565                         goto next;
4566
4567                 ret = 0;
4568                 goto out;
4569 next:
4570                 len += sizeof(*di) + data_len;
4571                 di = (struct btrfs_dir_item *)((char *)di + len);
4572                 cur += len;
4573         }
4574
4575 out:
4576         btrfs_release_path(&path);
4577         return ret;
4578 }
4579
4580 /*
4581  * Prints inode ref error message
4582  */
4583 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4584                                 u64 index, const char *namebuf, int name_len,
4585                                 u8 filetype, int err)
4586 {
4587         if (!err)
4588                 return;
4589
4590         /* root dir error */
4591         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4592                 error(
4593         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4594                       root->objectid, key->objectid, key->offset, namebuf);
4595                 return;
4596         }
4597
4598         /* normal error */
4599         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4600                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4601                       root->objectid, key->offset,
4602                       btrfs_name_hash(namebuf, name_len),
4603                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4604                       namebuf, filetype);
4605         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4606                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4607                       root->objectid, key->offset, index,
4608                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4609                       namebuf, filetype);
4610 }
4611
4612 /*
4613  * Insert the missing inode item.
4614  *
4615  * Returns 0 means success.
4616  * Returns <0 means error.
4617  */
4618 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4619                                      u8 filetype)
4620 {
4621         struct btrfs_key key;
4622         struct btrfs_trans_handle *trans;
4623         struct btrfs_path path;
4624         int ret;
4625
4626         key.objectid = ino;
4627         key.type = BTRFS_INODE_ITEM_KEY;
4628         key.offset = 0;
4629
4630         btrfs_init_path(&path);
4631         trans = btrfs_start_transaction(root, 1);
4632         if (IS_ERR(trans)) {
4633                 ret = -EIO;
4634                 goto out;
4635         }
4636
4637         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4638         if (ret < 0 || !ret)
4639                 goto fail;
4640
4641         /* insert inode item */
4642         create_inode_item_lowmem(trans, root, ino, filetype);
4643         ret = 0;
4644 fail:
4645         btrfs_commit_transaction(trans, root);
4646 out:
4647         if (ret)
4648                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4649                       root->objectid, ino);
4650         btrfs_release_path(&path);
4651         return ret;
4652 }
4653
4654 /*
4655  * The ternary means dir item, dir index and relative inode ref.
4656  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4657  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4658  * strategy:
4659  * If two of three is missing or mismatched, delete the existing one.
4660  * If one of three is missing or mismatched, add the missing one.
4661  *
4662  * returns 0 means success.
4663  * returns not 0 means on error;
4664  */
4665 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4666                           u64 index, char *name, int name_len, u8 filetype,
4667                           int err)
4668 {
4669         struct btrfs_trans_handle *trans;
4670         int stage = 0;
4671         int ret = 0;
4672
4673         /*
4674          * stage shall be one of following valild values:
4675          *      0: Fine, nothing to do.
4676          *      1: One of three is wrong, so add missing one.
4677          *      2: Two of three is wrong, so delete existed one.
4678          */
4679         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4680                 stage++;
4681         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4682                 stage++;
4683         if (err & (INODE_REF_MISSING))
4684                 stage++;
4685
4686         /* stage must be smllarer than 3 */
4687         ASSERT(stage < 3);
4688
4689         trans = btrfs_start_transaction(root, 1);
4690         if (stage == 2) {
4691                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4692                                    name_len, 0);
4693                 goto out;
4694         }
4695         if (stage == 1) {
4696                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4697                                filetype, &index, 1, 1);
4698                 goto out;
4699         }
4700 out:
4701         btrfs_commit_transaction(trans, root);
4702
4703         if (ret)
4704                 error("fail to repair inode %llu name %s filetype %u",
4705                       ino, name, filetype);
4706         else
4707                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4708                        stage == 2 ? "Delete" : "Add",
4709                        ino, name, filetype);
4710
4711         return ret;
4712 }
4713
4714 /*
4715  * Traverse the given INODE_REF and call find_dir_item() to find related
4716  * DIR_ITEM/DIR_INDEX.
4717  *
4718  * @root:       the root of the fs/file tree
4719  * @ref_key:    the key of the INODE_REF
4720  * @path        the path provides node and slot
4721  * @refs:       the count of INODE_REF
4722  * @mode:       the st_mode of INODE_ITEM
4723  * @name_ret:   returns with the first ref's name
4724  * @name_len_ret:    len of the name_ret
4725  *
4726  * Return 0 if no error occurred.
4727  */
4728 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4729                            struct btrfs_path *path, char *name_ret,
4730                            u32 *namelen_ret, u64 *refs_ret, int mode)
4731 {
4732         struct btrfs_key key;
4733         struct btrfs_key location;
4734         struct btrfs_inode_ref *ref;
4735         struct extent_buffer *node;
4736         char namebuf[BTRFS_NAME_LEN] = {0};
4737         u32 total;
4738         u32 cur = 0;
4739         u32 len;
4740         u32 name_len;
4741         u64 index;
4742         int ret;
4743         int err = 0;
4744         int tmp_err;
4745         int slot;
4746         int need_research = 0;
4747         u64 refs;
4748
4749 begin:
4750         err = 0;
4751         cur = 0;
4752         refs = *refs_ret;
4753
4754         /* since after repair, path and the dir item may be changed */
4755         if (need_research) {
4756                 need_research = 0;
4757                 btrfs_release_path(path);
4758                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4759                 /* the item was deleted, let path point to the last checked item */
4760                 if (ret > 0) {
4761                         if (path->slots[0] == 0)
4762                                 btrfs_prev_leaf(root, path);
4763                         else
4764                                 path->slots[0]--;
4765                 }
4766                 if (ret)
4767                         goto out;
4768         }
4769
4770         location.objectid = ref_key->objectid;
4771         location.type = BTRFS_INODE_ITEM_KEY;
4772         location.offset = 0;
4773         node = path->nodes[0];
4774         slot = path->slots[0];
4775
4776         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4777         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4778         total = btrfs_item_size_nr(node, slot);
4779
4780 next:
4781         /* Update inode ref count */
4782         refs++;
4783         tmp_err = 0;
4784         index = btrfs_inode_ref_index(node, ref);
4785         name_len = btrfs_inode_ref_name_len(node, ref);
4786
4787         if (name_len <= BTRFS_NAME_LEN) {
4788                 len = name_len;
4789         } else {
4790                 len = BTRFS_NAME_LEN;
4791                 warning("root %llu INODE_REF[%llu %llu] name too long",
4792                         root->objectid, ref_key->objectid, ref_key->offset);
4793         }
4794
4795         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4796
4797         /* copy the first name found to name_ret */
4798         if (refs == 1 && name_ret) {
4799                 memcpy(name_ret, namebuf, len);
4800                 *namelen_ret = len;
4801         }
4802
4803         /* Check root dir ref */
4804         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4805                 if (index != 0 || len != strlen("..") ||
4806                     strncmp("..", namebuf, len) ||
4807                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4808                         /* set err bits then repair will delete the ref */
4809                         err |= DIR_INDEX_MISSING;
4810                         err |= DIR_ITEM_MISSING;
4811                 }
4812                 goto end;
4813         }
4814
4815         /* Find related DIR_INDEX */
4816         key.objectid = ref_key->offset;
4817         key.type = BTRFS_DIR_INDEX_KEY;
4818         key.offset = index;
4819         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4820                             imode_to_type(mode));
4821
4822         /* Find related dir_item */
4823         key.objectid = ref_key->offset;
4824         key.type = BTRFS_DIR_ITEM_KEY;
4825         key.offset = btrfs_name_hash(namebuf, len);
4826         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4827                             imode_to_type(mode));
4828 end:
4829         if (tmp_err && repair) {
4830                 ret = repair_ternary_lowmem(root, ref_key->offset,
4831                                             ref_key->objectid, index, namebuf,
4832                                             name_len, imode_to_type(mode),
4833                                             tmp_err);
4834                 if (!ret) {
4835                         need_research = 1;
4836                         goto begin;
4837                 }
4838         }
4839         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4840                             imode_to_type(mode), tmp_err);
4841         err |= tmp_err;
4842         len = sizeof(*ref) + name_len;
4843         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4844         cur += len;
4845         if (cur < total)
4846                 goto next;
4847
4848 out:
4849         *refs_ret = refs;
4850         return err;
4851 }
4852
4853 /*
4854  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4855  * DIR_ITEM/DIR_INDEX.
4856  *
4857  * @root:       the root of the fs/file tree
4858  * @ref_key:    the key of the INODE_EXTREF
4859  * @refs:       the count of INODE_EXTREF
4860  * @mode:       the st_mode of INODE_ITEM
4861  *
4862  * Return 0 if no error occurred.
4863  */
4864 static int check_inode_extref(struct btrfs_root *root,
4865                               struct btrfs_key *ref_key,
4866                               struct extent_buffer *node, int slot, u64 *refs,
4867                               int mode)
4868 {
4869         struct btrfs_key key;
4870         struct btrfs_key location;
4871         struct btrfs_inode_extref *extref;
4872         char namebuf[BTRFS_NAME_LEN] = {0};
4873         u32 total;
4874         u32 cur = 0;
4875         u32 len;
4876         u32 name_len;
4877         u64 index;
4878         u64 parent;
4879         int ret;
4880         int err = 0;
4881
4882         location.objectid = ref_key->objectid;
4883         location.type = BTRFS_INODE_ITEM_KEY;
4884         location.offset = 0;
4885
4886         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4887         total = btrfs_item_size_nr(node, slot);
4888
4889 next:
4890         /* update inode ref count */
4891         (*refs)++;
4892         name_len = btrfs_inode_extref_name_len(node, extref);
4893         index = btrfs_inode_extref_index(node, extref);
4894         parent = btrfs_inode_extref_parent(node, extref);
4895         if (name_len <= BTRFS_NAME_LEN) {
4896                 len = name_len;
4897         } else {
4898                 len = BTRFS_NAME_LEN;
4899                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4900                         root->objectid, ref_key->objectid, ref_key->offset);
4901         }
4902         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4903
4904         /* Check root dir ref name */
4905         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4906                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4907                       root->objectid, ref_key->objectid, ref_key->offset,
4908                       namebuf);
4909                 err |= ROOT_DIR_ERROR;
4910         }
4911
4912         /* find related dir_index */
4913         key.objectid = parent;
4914         key.type = BTRFS_DIR_INDEX_KEY;
4915         key.offset = index;
4916         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4917         err |= ret;
4918
4919         /* find related dir_item */
4920         key.objectid = parent;
4921         key.type = BTRFS_DIR_ITEM_KEY;
4922         key.offset = btrfs_name_hash(namebuf, len);
4923         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4924         err |= ret;
4925
4926         len = sizeof(*extref) + name_len;
4927         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4928         cur += len;
4929
4930         if (cur < total)
4931                 goto next;
4932
4933         return err;
4934 }
4935
4936 /*
4937  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4938  * DIR_ITEM/DIR_INDEX match.
4939  * Return with @index_ret.
4940  *
4941  * @root:       the root of the fs/file tree
4942  * @key:        the key of the INODE_REF/INODE_EXTREF
4943  * @name:       the name in the INODE_REF/INODE_EXTREF
4944  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4945  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4946  *              value (64)-1 means do not check index
4947  * @ext_ref:    the EXTENDED_IREF feature
4948  *
4949  * Return 0 if no error occurred.
4950  * Return >0 for error bitmap
4951  */
4952 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4953                           char *name, int namelen, u64 *index_ret,
4954                           unsigned int ext_ref)
4955 {
4956         struct btrfs_path path;
4957         struct btrfs_inode_ref *ref;
4958         struct btrfs_inode_extref *extref;
4959         struct extent_buffer *node;
4960         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4961         u32 total;
4962         u32 cur = 0;
4963         u32 len;
4964         u32 ref_namelen;
4965         u64 ref_index;
4966         u64 parent;
4967         u64 dir_id;
4968         int slot;
4969         int ret;
4970
4971         ASSERT(index_ret);
4972
4973         btrfs_init_path(&path);
4974         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4975         if (ret) {
4976                 ret = INODE_REF_MISSING;
4977                 goto extref;
4978         }
4979
4980         node = path.nodes[0];
4981         slot = path.slots[0];
4982
4983         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4984         total = btrfs_item_size_nr(node, slot);
4985
4986         /* Iterate all entry of INODE_REF */
4987         while (cur < total) {
4988                 ret = INODE_REF_MISSING;
4989
4990                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4991                 ref_index = btrfs_inode_ref_index(node, ref);
4992                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4993                         goto next_ref;
4994
4995                 if (cur + sizeof(*ref) + ref_namelen > total ||
4996                     ref_namelen > BTRFS_NAME_LEN) {
4997                         warning("root %llu INODE %s[%llu %llu] name too long",
4998                                 root->objectid,
4999                                 key->type == BTRFS_INODE_REF_KEY ?
5000                                         "REF" : "EXTREF",
5001                                 key->objectid, key->offset);
5002
5003                         if (cur + sizeof(*ref) > total)
5004                                 break;
5005                         len = min_t(u32, total - cur - sizeof(*ref),
5006                                     BTRFS_NAME_LEN);
5007                 } else {
5008                         len = ref_namelen;
5009                 }
5010
5011                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5012                                    len);
5013
5014                 if (len != namelen || strncmp(ref_namebuf, name, len))
5015                         goto next_ref;
5016
5017                 *index_ret = ref_index;
5018                 ret = 0;
5019                 goto out;
5020 next_ref:
5021                 len = sizeof(*ref) + ref_namelen;
5022                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5023                 cur += len;
5024         }
5025
5026 extref:
5027         /* Skip if not support EXTENDED_IREF feature */
5028         if (!ext_ref)
5029                 goto out;
5030
5031         btrfs_release_path(&path);
5032         btrfs_init_path(&path);
5033
5034         dir_id = key->offset;
5035         key->type = BTRFS_INODE_EXTREF_KEY;
5036         key->offset = btrfs_extref_hash(dir_id, name, namelen);
5037
5038         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5039         if (ret) {
5040                 ret = INODE_REF_MISSING;
5041                 goto out;
5042         }
5043
5044         node = path.nodes[0];
5045         slot = path.slots[0];
5046
5047         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5048         cur = 0;
5049         total = btrfs_item_size_nr(node, slot);
5050
5051         /* Iterate all entry of INODE_EXTREF */
5052         while (cur < total) {
5053                 ret = INODE_REF_MISSING;
5054
5055                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5056                 ref_index = btrfs_inode_extref_index(node, extref);
5057                 parent = btrfs_inode_extref_parent(node, extref);
5058                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5059                         goto next_extref;
5060
5061                 if (parent != dir_id)
5062                         goto next_extref;
5063
5064                 if (ref_namelen <= BTRFS_NAME_LEN) {
5065                         len = ref_namelen;
5066                 } else {
5067                         len = BTRFS_NAME_LEN;
5068                         warning("root %llu INODE %s[%llu %llu] name too long",
5069                                 root->objectid,
5070                                 key->type == BTRFS_INODE_REF_KEY ?
5071                                         "REF" : "EXTREF",
5072                                 key->objectid, key->offset);
5073                 }
5074                 read_extent_buffer(node, ref_namebuf,
5075                                    (unsigned long)(extref + 1), len);
5076
5077                 if (len != namelen || strncmp(ref_namebuf, name, len))
5078                         goto next_extref;
5079
5080                 *index_ret = ref_index;
5081                 ret = 0;
5082                 goto out;
5083
5084 next_extref:
5085                 len = sizeof(*extref) + ref_namelen;
5086                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5087                 cur += len;
5088
5089         }
5090 out:
5091         btrfs_release_path(&path);
5092         return ret;
5093 }
5094
5095 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5096                                u64 ino, u64 index, const char *namebuf,
5097                                int name_len, u8 filetype, int err)
5098 {
5099         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5100                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5101                       root->objectid, key->objectid, key->offset, namebuf,
5102                       filetype,
5103                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5104         }
5105
5106         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5107                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5108                       root->objectid, key->objectid, index, namebuf, filetype,
5109                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5110         }
5111
5112         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5113                 error(
5114                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5115                       root->objectid, ino, index, namebuf, filetype,
5116                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5117         }
5118
5119         if (err & INODE_REF_MISSING)
5120                 error(
5121                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5122                       root->objectid, ino, key->objectid, namebuf, filetype);
5123
5124 }
5125
5126 /*
5127  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5128  *
5129  * Returns error after repair
5130  */
5131 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5132                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5133                            int err)
5134 {
5135         int ret;
5136
5137         if (err & INODE_ITEM_MISSING) {
5138                 ret = repair_inode_item_missing(root, ino, filetype);
5139                 if (!ret)
5140                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5141         }
5142
5143         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5144                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5145                                             name_len, filetype, err);
5146                 if (!ret) {
5147                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5148                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5149                         err &= ~(INODE_REF_MISSING);
5150                 }
5151         }
5152         return err;
5153 }
5154
5155 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5156                 u64 *size_ret)
5157 {
5158         struct btrfs_key key;
5159         struct btrfs_path path;
5160         u32 len;
5161         struct btrfs_dir_item *di;
5162         int ret;
5163         int cur = 0;
5164         int total = 0;
5165
5166         ASSERT(size_ret);
5167         *size_ret = 0;
5168
5169         key.objectid = ino;
5170         key.type = type;
5171         key.offset = (u64)-1;
5172
5173         btrfs_init_path(&path);
5174         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5175         if (ret < 0) {
5176                 ret = -EIO;
5177                 goto out;
5178         }
5179         /* if found, go to spacial case */
5180         if (ret == 0)
5181                 goto special_case;
5182
5183 loop:
5184         ret = btrfs_previous_item(root, &path, ino, type);
5185
5186         if (ret) {
5187                 ret = 0;
5188                 goto out;
5189         }
5190
5191 special_case:
5192         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5193         cur = 0;
5194         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5195
5196         while (cur < total) {
5197                 len = btrfs_dir_name_len(path.nodes[0], di);
5198                 if (len > BTRFS_NAME_LEN)
5199                         len = BTRFS_NAME_LEN;
5200                 *size_ret += len;
5201
5202                 len += btrfs_dir_data_len(path.nodes[0], di);
5203                 len += sizeof(*di);
5204                 di = (struct btrfs_dir_item *)((char *)di + len);
5205                 cur += len;
5206         }
5207         goto loop;
5208
5209 out:
5210         btrfs_release_path(&path);
5211         return ret;
5212 }
5213
5214 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5215 {
5216         u64 item_size;
5217         u64 index_size;
5218         int ret;
5219
5220         ASSERT(size);
5221         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5222         if (ret)
5223                 goto out;
5224
5225         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5226         if (ret)
5227                 goto out;
5228
5229         *size = item_size + index_size;
5230
5231 out:
5232         if (ret)
5233                 error("failed to count root %llu INODE[%llu] root size",
5234                       root->objectid, ino);
5235         return ret;
5236 }
5237
5238 /*
5239  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5240  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5241  *
5242  * @root:       the root of the fs/file tree
5243  * @key:        the key of the INODE_REF/INODE_EXTREF
5244  * @path:       the path
5245  * @size:       the st_size of the INODE_ITEM
5246  * @ext_ref:    the EXTENDED_IREF feature
5247  *
5248  * Return 0 if no error occurred.
5249  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5250  */
5251 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5252                           struct btrfs_path *path, u64 *size,
5253                           unsigned int ext_ref)
5254 {
5255         struct btrfs_dir_item *di;
5256         struct btrfs_inode_item *ii;
5257         struct btrfs_key key;
5258         struct btrfs_key location;
5259         struct extent_buffer *node;
5260         int slot;
5261         char namebuf[BTRFS_NAME_LEN] = {0};
5262         u32 total;
5263         u32 cur = 0;
5264         u32 len;
5265         u32 name_len;
5266         u32 data_len;
5267         u8 filetype;
5268         u32 mode = 0;
5269         u64 index;
5270         int ret;
5271         int err;
5272         int tmp_err;
5273         int need_research = 0;
5274
5275         /*
5276          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5277          * ignore index check.
5278          */
5279         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5280                 index = di_key->offset;
5281         else
5282                 index = (u64)-1;
5283 begin:
5284         err = 0;
5285         cur = 0;
5286
5287         /* since after repair, path and the dir item may be changed */
5288         if (need_research) {
5289                 need_research = 0;
5290                 err |= DIR_COUNT_AGAIN;
5291                 btrfs_release_path(path);
5292                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5293                 /* the item was deleted, let path point the last checked item */
5294                 if (ret > 0) {
5295                         if (path->slots[0] == 0)
5296                                 btrfs_prev_leaf(root, path);
5297                         else
5298                                 path->slots[0]--;
5299                 }
5300                 if (ret)
5301                         goto out;
5302         }
5303
5304         node = path->nodes[0];
5305         slot = path->slots[0];
5306
5307         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5308         total = btrfs_item_size_nr(node, slot);
5309         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5310
5311         while (cur < total) {
5312                 data_len = btrfs_dir_data_len(node, di);
5313                 tmp_err = 0;
5314                 if (data_len)
5315                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5316                               root->objectid,
5317               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5318                               di_key->objectid, di_key->offset, data_len);
5319
5320                 name_len = btrfs_dir_name_len(node, di);
5321                 if (name_len <= BTRFS_NAME_LEN) {
5322                         len = name_len;
5323                 } else {
5324                         len = BTRFS_NAME_LEN;
5325                         warning("root %llu %s[%llu %llu] name too long",
5326                                 root->objectid,
5327                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5328                                 di_key->objectid, di_key->offset);
5329                 }
5330                 (*size) += name_len;
5331                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5332                                    len);
5333                 filetype = btrfs_dir_type(node, di);
5334
5335                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5336                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5337                         err |= -EIO;
5338                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5339                         root->objectid, di_key->objectid, di_key->offset,
5340                         namebuf, len, filetype, di_key->offset,
5341                         btrfs_name_hash(namebuf, len));
5342                 }
5343
5344                 btrfs_dir_item_key_to_cpu(node, di, &location);
5345                 /* Ignore related ROOT_ITEM check */
5346                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5347                         goto next;
5348
5349                 btrfs_release_path(path);
5350                 /* Check relative INODE_ITEM(existence/filetype) */
5351                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5352                 if (ret) {
5353                         tmp_err |= INODE_ITEM_MISSING;
5354                         goto next;
5355                 }
5356
5357                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5358                                     struct btrfs_inode_item);
5359                 mode = btrfs_inode_mode(path->nodes[0], ii);
5360                 if (imode_to_type(mode) != filetype) {
5361                         tmp_err |= INODE_ITEM_MISMATCH;
5362                         goto next;
5363                 }
5364
5365                 /* Check relative INODE_REF/INODE_EXTREF */
5366                 key.objectid = location.objectid;
5367                 key.type = BTRFS_INODE_REF_KEY;
5368                 key.offset = di_key->objectid;
5369                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5370                                           &index, ext_ref);
5371
5372                 /* check relative INDEX/ITEM */
5373                 key.objectid = di_key->objectid;
5374                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5375                         key.type = BTRFS_DIR_INDEX_KEY;
5376                         key.offset = index;
5377                 } else {
5378                         key.type = BTRFS_DIR_ITEM_KEY;
5379                         key.offset = btrfs_name_hash(namebuf, name_len);
5380                 }
5381
5382                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5383                                          name_len, filetype);
5384                 /* find_dir_item may find index */
5385                 if (key.type == BTRFS_DIR_INDEX_KEY)
5386                         index = key.offset;
5387 next:
5388
5389                 if (tmp_err && repair) {
5390                         ret = repair_dir_item(root, di_key->objectid,
5391                                               location.objectid, index,
5392                                               imode_to_type(mode), namebuf,
5393                                               name_len, tmp_err);
5394                         if (ret != tmp_err) {
5395                                 need_research = 1;
5396                                 goto begin;
5397                         }
5398                 }
5399                 btrfs_release_path(path);
5400                 print_dir_item_err(root, di_key, location.objectid, index,
5401                                    namebuf, name_len, filetype, tmp_err);
5402                 err |= tmp_err;
5403                 len = sizeof(*di) + name_len + data_len;
5404                 di = (struct btrfs_dir_item *)((char *)di + len);
5405                 cur += len;
5406
5407                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5408                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5409                               root->objectid, di_key->objectid,
5410                               di_key->offset);
5411                         break;
5412                 }
5413         }
5414 out:
5415         /* research path */
5416         btrfs_release_path(path);
5417         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5418         if (ret)
5419                 err |= ret > 0 ? -ENOENT : ret;
5420         return err;
5421 }
5422
5423 /*
5424  * Wrapper function of btrfs_punch_hole.
5425  *
5426  * Returns 0 means success.
5427  * Returns not 0 means error.
5428  */
5429 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5430                              u64 len)
5431 {
5432         struct btrfs_trans_handle *trans;
5433         int ret = 0;
5434
5435         trans = btrfs_start_transaction(root, 1);
5436         if (IS_ERR(trans))
5437                 return PTR_ERR(trans);
5438
5439         ret = btrfs_punch_hole(trans, root, ino, start, len);
5440         if (ret)
5441                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5442                       start, len, ino);
5443         else
5444                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5445                        ino);
5446
5447         btrfs_commit_transaction(trans, root);
5448         return ret;
5449 }
5450
5451 /*
5452  * Check file extent datasum/hole, update the size of the file extents,
5453  * check and update the last offset of the file extent.
5454  *
5455  * @root:       the root of fs/file tree.
5456  * @fkey:       the key of the file extent.
5457  * @nodatasum:  INODE_NODATASUM feature.
5458  * @size:       the sum of all EXTENT_DATA items size for this inode.
5459  * @end:        the offset of the last extent.
5460  *
5461  * Return 0 if no error occurred.
5462  */
5463 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5464                              struct extent_buffer *node, int slot,
5465                              unsigned int nodatasum, u64 *size, u64 *end)
5466 {
5467         struct btrfs_file_extent_item *fi;
5468         u64 disk_bytenr;
5469         u64 disk_num_bytes;
5470         u64 extent_num_bytes;
5471         u64 extent_offset;
5472         u64 csum_found;         /* In byte size, sectorsize aligned */
5473         u64 search_start;       /* Logical range start we search for csum */
5474         u64 search_len;         /* Logical range len we search for csum */
5475         unsigned int extent_type;
5476         unsigned int is_hole;
5477         int compressed = 0;
5478         int ret;
5479         int err = 0;
5480
5481         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5482
5483         /* Check inline extent */
5484         extent_type = btrfs_file_extent_type(node, fi);
5485         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5486                 struct btrfs_item *e = btrfs_item_nr(slot);
5487                 u32 item_inline_len;
5488
5489                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5490                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5491                 compressed = btrfs_file_extent_compression(node, fi);
5492                 if (extent_num_bytes == 0) {
5493                         error(
5494                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5495                                 root->objectid, fkey->objectid, fkey->offset);
5496                         err |= FILE_EXTENT_ERROR;
5497                 }
5498                 if (!compressed && extent_num_bytes != item_inline_len) {
5499                         error(
5500                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5501                                 root->objectid, fkey->objectid, fkey->offset,
5502                                 extent_num_bytes, item_inline_len);
5503                         err |= FILE_EXTENT_ERROR;
5504                 }
5505                 *end += extent_num_bytes;
5506                 *size += extent_num_bytes;
5507                 return err;
5508         }
5509
5510         /* Check extent type */
5511         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5512                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5513                 err |= FILE_EXTENT_ERROR;
5514                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5515                       root->objectid, fkey->objectid, fkey->offset);
5516                 return err;
5517         }
5518
5519         /* Check REG_EXTENT/PREALLOC_EXTENT */
5520         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5521         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5522         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5523         extent_offset = btrfs_file_extent_offset(node, fi);
5524         compressed = btrfs_file_extent_compression(node, fi);
5525         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5526
5527         /*
5528          * Check EXTENT_DATA csum
5529          *
5530          * For plain (uncompressed) extent, we should only check the range
5531          * we're referring to, as it's possible that part of prealloc extent
5532          * has been written, and has csum:
5533          *
5534          * |<--- Original large preallocated extent A ---->|
5535          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5536          *      No csum                         Has csum
5537          *
5538          * For compressed extent, we should check the whole range.
5539          */
5540         if (!compressed) {
5541                 search_start = disk_bytenr + extent_offset;
5542                 search_len = extent_num_bytes;
5543         } else {
5544                 search_start = disk_bytenr;
5545                 search_len = disk_num_bytes;
5546         }
5547         ret = count_csum_range(root, search_start, search_len, &csum_found);
5548         if (csum_found > 0 && nodatasum) {
5549                 err |= ODD_CSUM_ITEM;
5550                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5551                       root->objectid, fkey->objectid, fkey->offset);
5552         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5553                    !is_hole && (ret < 0 || csum_found < search_len)) {
5554                 err |= CSUM_ITEM_MISSING;
5555                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5556                       root->objectid, fkey->objectid, fkey->offset,
5557                       csum_found, search_len);
5558         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5559                 err |= ODD_CSUM_ITEM;
5560                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5561                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5562         }
5563
5564         /* Check EXTENT_DATA hole */
5565         if (!no_holes && *end != fkey->offset) {
5566                 if (repair)
5567                         ret = punch_extent_hole(root, fkey->objectid,
5568                                                 *end, fkey->offset - *end);
5569                 if (!repair || ret) {
5570                         err |= FILE_EXTENT_ERROR;
5571                         error(
5572 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5573                                 root->objectid, fkey->objectid, fkey->offset,
5574                                 fkey->objectid, *end);
5575                 }
5576         }
5577
5578         *end += extent_num_bytes;
5579         if (!is_hole)
5580                 *size += extent_num_bytes;
5581
5582         return err;
5583 }
5584
5585 /*
5586  * Set inode item nbytes to @nbytes
5587  *
5588  * Returns  0     on success
5589  * Returns  != 0  on error
5590  */
5591 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5592                                       struct btrfs_path *path,
5593                                       u64 ino, u64 nbytes)
5594 {
5595         struct btrfs_trans_handle *trans;
5596         struct btrfs_inode_item *ii;
5597         struct btrfs_key key;
5598         struct btrfs_key research_key;
5599         int err = 0;
5600         int ret;
5601
5602         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5603
5604         key.objectid = ino;
5605         key.type = BTRFS_INODE_ITEM_KEY;
5606         key.offset = 0;
5607
5608         trans = btrfs_start_transaction(root, 1);
5609         if (IS_ERR(trans)) {
5610                 ret = PTR_ERR(trans);
5611                 err |= ret;
5612                 goto out;
5613         }
5614
5615         btrfs_release_path(path);
5616         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5617         if (ret > 0)
5618                 ret = -ENOENT;
5619         if (ret) {
5620                 err |= ret;
5621                 goto fail;
5622         }
5623
5624         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5625                             struct btrfs_inode_item);
5626         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5627         btrfs_mark_buffer_dirty(path->nodes[0]);
5628 fail:
5629         btrfs_commit_transaction(trans, root);
5630 out:
5631         if (ret)
5632                 error("failed to set nbytes in inode %llu root %llu",
5633                       ino, root->root_key.objectid);
5634         else
5635                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5636                        root->root_key.objectid, nbytes);
5637
5638         /* research path */
5639         btrfs_release_path(path);
5640         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5641         err |= ret;
5642
5643         return err;
5644 }
5645
5646 /*
5647  * Set directory inode isize to @isize.
5648  *
5649  * Returns 0     on success.
5650  * Returns != 0  on error.
5651  */
5652 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5653                                    struct btrfs_path *path,
5654                                    u64 ino, u64 isize)
5655 {
5656         struct btrfs_trans_handle *trans;
5657         struct btrfs_inode_item *ii;
5658         struct btrfs_key key;
5659         struct btrfs_key research_key;
5660         int ret;
5661         int err = 0;
5662
5663         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5664
5665         key.objectid = ino;
5666         key.type = BTRFS_INODE_ITEM_KEY;
5667         key.offset = 0;
5668
5669         trans = btrfs_start_transaction(root, 1);
5670         if (IS_ERR(trans)) {
5671                 ret = PTR_ERR(trans);
5672                 err |= ret;
5673                 goto out;
5674         }
5675
5676         btrfs_release_path(path);
5677         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5678         if (ret > 0)
5679                 ret = -ENOENT;
5680         if (ret) {
5681                 err |= ret;
5682                 goto fail;
5683         }
5684
5685         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5686                             struct btrfs_inode_item);
5687         btrfs_set_inode_size(path->nodes[0], ii, isize);
5688         btrfs_mark_buffer_dirty(path->nodes[0]);
5689 fail:
5690         btrfs_commit_transaction(trans, root);
5691 out:
5692         if (ret)
5693                 error("failed to set isize in inode %llu root %llu",
5694                       ino, root->root_key.objectid);
5695         else
5696                 printf("Set isize in inode %llu root %llu to %llu\n",
5697                        ino, root->root_key.objectid, isize);
5698
5699         btrfs_release_path(path);
5700         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5701         err |= ret;
5702
5703         return err;
5704 }
5705
5706 /*
5707  * Wrapper function for btrfs_add_orphan_item().
5708  *
5709  * Returns 0     on success.
5710  * Returns != 0  on error.
5711  */
5712 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5713                                            struct btrfs_path *path, u64 ino)
5714 {
5715         struct btrfs_trans_handle *trans;
5716         struct btrfs_key research_key;
5717         int ret;
5718         int err = 0;
5719
5720         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5721
5722         trans = btrfs_start_transaction(root, 1);
5723         if (IS_ERR(trans)) {
5724                 ret = PTR_ERR(trans);
5725                 err |= ret;
5726                 goto out;
5727         }
5728
5729         btrfs_release_path(path);
5730         ret = btrfs_add_orphan_item(trans, root, path, ino);
5731         err |= ret;
5732         btrfs_commit_transaction(trans, root);
5733 out:
5734         if (ret)
5735                 error("failed to add inode %llu as orphan item root %llu",
5736                       ino, root->root_key.objectid);
5737         else
5738                 printf("Added inode %llu as orphan item root %llu\n",
5739                        ino, root->root_key.objectid);
5740
5741         btrfs_release_path(path);
5742         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5743         err |= ret;
5744
5745         return err;
5746 }
5747
5748 /* Set inode_item nlink to @ref_count.
5749  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5750  *
5751  * Returns 0 on success
5752  */
5753 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5754                                       struct btrfs_path *path, u64 ino,
5755                                       const char *name, u32 namelen,
5756                                       u64 ref_count, u8 filetype, u64 *nlink)
5757 {
5758         struct btrfs_trans_handle *trans;
5759         struct btrfs_inode_item *ii;
5760         struct btrfs_key key;
5761         struct btrfs_key old_key;
5762         char namebuf[BTRFS_NAME_LEN] = {0};
5763         int name_len;
5764         int ret;
5765         int ret2;
5766
5767         /* save the key */
5768         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5769
5770         if (name && namelen) {
5771                 ASSERT(namelen <= BTRFS_NAME_LEN);
5772                 memcpy(namebuf, name, namelen);
5773                 name_len = namelen;
5774         } else {
5775                 sprintf(namebuf, "%llu", ino);
5776                 name_len = count_digits(ino);
5777                 printf("Can't find file name for inode %llu, use %s instead\n",
5778                        ino, namebuf);
5779         }
5780
5781         trans = btrfs_start_transaction(root, 1);
5782         if (IS_ERR(trans)) {
5783                 ret = PTR_ERR(trans);
5784                 goto out;
5785         }
5786
5787         btrfs_release_path(path);
5788         /* if refs is 0, put it into lostfound */
5789         if (ref_count == 0) {
5790                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5791                                               name_len, filetype, &ref_count);
5792                 if (ret)
5793                         goto fail;
5794         }
5795
5796         /* reset inode_item's nlink to ref_count */
5797         key.objectid = ino;
5798         key.type = BTRFS_INODE_ITEM_KEY;
5799         key.offset = 0;
5800
5801         btrfs_release_path(path);
5802         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5803         if (ret > 0)
5804                 ret = -ENOENT;
5805         if (ret)
5806                 goto fail;
5807
5808         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5809                             struct btrfs_inode_item);
5810         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5811         btrfs_mark_buffer_dirty(path->nodes[0]);
5812
5813         if (nlink)
5814                 *nlink = ref_count;
5815 fail:
5816         btrfs_commit_transaction(trans, root);
5817 out:
5818         if (ret)
5819                 error(
5820         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5821                        root->objectid, ino, namebuf, filetype);
5822         else
5823                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5824                        root->objectid, ino, namebuf, filetype);
5825
5826         /* research */
5827         btrfs_release_path(path);
5828         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5829         if (ret2 < 0)
5830                 return ret |= ret2;
5831         return ret;
5832 }
5833
5834 /*
5835  * Check INODE_ITEM and related ITEMs (the same inode number)
5836  * 1. check link count
5837  * 2. check inode ref/extref
5838  * 3. check dir item/index
5839  *
5840  * @ext_ref:    the EXTENDED_IREF feature
5841  *
5842  * Return 0 if no error occurred.
5843  * Return >0 for error or hit the traversal is done(by error bitmap)
5844  */
5845 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5846                             unsigned int ext_ref)
5847 {
5848         struct extent_buffer *node;
5849         struct btrfs_inode_item *ii;
5850         struct btrfs_key key;
5851         struct btrfs_key last_key;
5852         u64 inode_id;
5853         u32 mode;
5854         u64 nlink;
5855         u64 nbytes;
5856         u64 isize;
5857         u64 size = 0;
5858         u64 refs = 0;
5859         u64 extent_end = 0;
5860         u64 extent_size = 0;
5861         unsigned int dir;
5862         unsigned int nodatasum;
5863         int slot;
5864         int ret;
5865         int err = 0;
5866         char namebuf[BTRFS_NAME_LEN] = {0};
5867         u32 name_len = 0;
5868
5869         node = path->nodes[0];
5870         slot = path->slots[0];
5871
5872         btrfs_item_key_to_cpu(node, &key, slot);
5873         inode_id = key.objectid;
5874
5875         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5876                 ret = btrfs_next_item(root, path);
5877                 if (ret > 0)
5878                         err |= LAST_ITEM;
5879                 return err;
5880         }
5881
5882         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5883         isize = btrfs_inode_size(node, ii);
5884         nbytes = btrfs_inode_nbytes(node, ii);
5885         mode = btrfs_inode_mode(node, ii);
5886         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5887         nlink = btrfs_inode_nlink(node, ii);
5888         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5889
5890         while (1) {
5891                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5892                 ret = btrfs_next_item(root, path);
5893                 if (ret < 0) {
5894                         /* out will fill 'err' rusing current statistics */
5895                         goto out;
5896                 } else if (ret > 0) {
5897                         err |= LAST_ITEM;
5898                         goto out;
5899                 }
5900
5901                 node = path->nodes[0];
5902                 slot = path->slots[0];
5903                 btrfs_item_key_to_cpu(node, &key, slot);
5904                 if (key.objectid != inode_id)
5905                         goto out;
5906
5907                 switch (key.type) {
5908                 case BTRFS_INODE_REF_KEY:
5909                         ret = check_inode_ref(root, &key, path, namebuf,
5910                                               &name_len, &refs, mode);
5911                         err |= ret;
5912                         break;
5913                 case BTRFS_INODE_EXTREF_KEY:
5914                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5915                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5916                                         root->objectid, key.objectid,
5917                                         key.offset);
5918                         ret = check_inode_extref(root, &key, node, slot, &refs,
5919                                                  mode);
5920                         err |= ret;
5921                         break;
5922                 case BTRFS_DIR_ITEM_KEY:
5923                 case BTRFS_DIR_INDEX_KEY:
5924                         if (!dir) {
5925                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5926                                         root->objectid, inode_id,
5927                                         imode_to_type(mode), key.objectid,
5928                                         key.offset);
5929                         }
5930                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5931                         err |= ret;
5932                         break;
5933                 case BTRFS_EXTENT_DATA_KEY:
5934                         if (dir) {
5935                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5936                                         root->objectid, inode_id, key.objectid,
5937                                         key.offset);
5938                         }
5939                         ret = check_file_extent(root, &key, node, slot,
5940                                                 nodatasum, &extent_size,
5941                                                 &extent_end);
5942                         err |= ret;
5943                         break;
5944                 case BTRFS_XATTR_ITEM_KEY:
5945                         break;
5946                 default:
5947                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5948                               key.objectid, key.type, key.offset);
5949                 }
5950         }
5951
5952 out:
5953         if (err & LAST_ITEM) {
5954                 btrfs_release_path(path);
5955                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5956                 if (ret)
5957                         return err;
5958         }
5959
5960         /* verify INODE_ITEM nlink/isize/nbytes */
5961         if (dir) {
5962                 if (repair && (err & DIR_COUNT_AGAIN)) {
5963                         err &= ~DIR_COUNT_AGAIN;
5964                         count_dir_isize(root, inode_id, &size);
5965                 }
5966
5967                 if ((nlink != 1 || refs != 1) && repair) {
5968                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5969                                 namebuf, name_len, refs, imode_to_type(mode),
5970                                 &nlink);
5971                 }
5972
5973                 if (nlink != 1) {
5974                         err |= LINK_COUNT_ERROR;
5975                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5976                               root->objectid, inode_id, nlink);
5977                 }
5978
5979                 /*
5980                  * Just a warning, as dir inode nbytes is just an
5981                  * instructive value.
5982                  */
5983                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5984                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5985                                 root->objectid, inode_id,
5986                                 root->fs_info->nodesize);
5987                 }
5988
5989                 if (isize != size) {
5990                         if (repair)
5991                                 ret = repair_dir_isize_lowmem(root, path,
5992                                                               inode_id, size);
5993                         if (!repair || ret) {
5994                                 err |= ISIZE_ERROR;
5995                                 error(
5996                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5997                                       root->objectid, inode_id, isize, size);
5998                         }
5999                 }
6000         } else {
6001                 if (nlink != refs) {
6002                         if (repair)
6003                                 ret = repair_inode_nlinks_lowmem(root, path,
6004                                          inode_id, namebuf, name_len, refs,
6005                                          imode_to_type(mode), &nlink);
6006                         if (!repair || ret) {
6007                                 err |= LINK_COUNT_ERROR;
6008                                 error(
6009                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6010                                       root->objectid, inode_id, nlink, refs);
6011                         }
6012                 } else if (!nlink) {
6013                         if (repair)
6014                                 ret = repair_inode_orphan_item_lowmem(root,
6015                                                               path, inode_id);
6016                         if (!repair || ret) {
6017                                 err |= ORPHAN_ITEM;
6018                                 error("root %llu INODE[%llu] is orphan item",
6019                                       root->objectid, inode_id);
6020                         }
6021                 }
6022
6023                 if (!nbytes && !no_holes && extent_end < isize) {
6024                         if (repair)
6025                                 ret = punch_extent_hole(root, inode_id,
6026                                                 extent_end, isize - extent_end);
6027                         if (!repair || ret) {
6028                                 err |= NBYTES_ERROR;
6029                                 error(
6030         "root %llu INODE[%llu] size %llu should have a file extent hole",
6031                                       root->objectid, inode_id, isize);
6032                         }
6033                 }
6034
6035                 if (nbytes != extent_size) {
6036                         if (repair)
6037                                 ret = repair_inode_nbytes_lowmem(root, path,
6038                                                          inode_id, extent_size);
6039                         if (!repair || ret) {
6040                                 err |= NBYTES_ERROR;
6041                                 error(
6042         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6043                                       root->objectid, inode_id, nbytes,
6044                                       extent_size);
6045                         }
6046                 }
6047         }
6048
6049         if (err & LAST_ITEM)
6050                 btrfs_next_item(root, path);
6051         return err;
6052 }
6053
6054 /*
6055  * Insert the missing inode item and inode ref.
6056  *
6057  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6058  * Root dir should be handled specially because root dir is the root of fs.
6059  *
6060  * returns err (>0 or 0) after repair
6061  */
6062 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6063 {
6064         struct btrfs_trans_handle *trans;
6065         struct btrfs_key key;
6066         struct btrfs_path path;
6067         int filetype = BTRFS_FT_DIR;
6068         int ret = 0;
6069
6070         btrfs_init_path(&path);
6071
6072         if (err & INODE_REF_MISSING) {
6073                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6074                 key.type = BTRFS_INODE_REF_KEY;
6075                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6076
6077                 trans = btrfs_start_transaction(root, 1);
6078                 if (IS_ERR(trans)) {
6079                         ret = PTR_ERR(trans);
6080                         goto out;
6081                 }
6082
6083                 btrfs_release_path(&path);
6084                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6085                 if (ret)
6086                         goto trans_fail;
6087
6088                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6089                                              BTRFS_FIRST_FREE_OBJECTID,
6090                                              BTRFS_FIRST_FREE_OBJECTID, 0);
6091                 if (ret)
6092                         goto trans_fail;
6093
6094                 printf("Add INODE_REF[%llu %llu] name %s\n",
6095                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6096                        "..");
6097                 err &= ~INODE_REF_MISSING;
6098 trans_fail:
6099                 if (ret)
6100                         error("fail to insert first inode's ref");
6101                 btrfs_commit_transaction(trans, root);
6102         }
6103
6104         if (err & INODE_ITEM_MISSING) {
6105                 ret = repair_inode_item_missing(root,
6106                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
6107                 if (ret)
6108                         goto out;
6109                 err &= ~INODE_ITEM_MISSING;
6110         }
6111 out:
6112         if (ret)
6113                 error("fail to repair first inode");
6114         btrfs_release_path(&path);
6115         return err;
6116 }
6117
6118 /*
6119  * check first root dir's inode_item and inode_ref
6120  *
6121  * returns 0 means no error
6122  * returns >0 means error
6123  * returns <0 means fatal error
6124  */
6125 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6126 {
6127         struct btrfs_path path;
6128         struct btrfs_key key;
6129         struct btrfs_inode_item *ii;
6130         u64 index;
6131         u32 mode;
6132         int err = 0;
6133         int ret;
6134
6135         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6136         key.type = BTRFS_INODE_ITEM_KEY;
6137         key.offset = 0;
6138
6139         /* For root being dropped, we don't need to check first inode */
6140         if (btrfs_root_refs(&root->root_item) == 0 &&
6141             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6142             BTRFS_FIRST_FREE_OBJECTID)
6143                 return 0;
6144
6145         btrfs_init_path(&path);
6146         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6147         if (ret < 0)
6148                 goto out;
6149         if (ret > 0) {
6150                 ret = 0;
6151                 err |= INODE_ITEM_MISSING;
6152         } else {
6153                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6154                                     struct btrfs_inode_item);
6155                 mode = btrfs_inode_mode(path.nodes[0], ii);
6156                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6157                         err |= INODE_ITEM_MISMATCH;
6158         }
6159
6160         /* lookup first inode ref */
6161         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6162         key.type = BTRFS_INODE_REF_KEY;
6163         /* special index value */
6164         index = 0;
6165
6166         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6167         if (ret < 0)
6168                 goto out;
6169         err |= ret;
6170
6171 out:
6172         btrfs_release_path(&path);
6173
6174         if (err && repair)
6175                 err = repair_fs_first_inode(root, err);
6176
6177         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6178                 error("root dir INODE_ITEM is %s",
6179                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6180         if (err & INODE_REF_MISSING)
6181                 error("root dir INODE_REF is missing");
6182
6183         return ret < 0 ? ret : err;
6184 }
6185
6186 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6187                                                 u64 parent, u64 root)
6188 {
6189         struct rb_node *node;
6190         struct tree_backref *back = NULL;
6191         struct tree_backref match = {
6192                 .node = {
6193                         .is_data = 0,
6194                 },
6195         };
6196
6197         if (parent) {
6198                 match.parent = parent;
6199                 match.node.full_backref = 1;
6200         } else {
6201                 match.root = root;
6202         }
6203
6204         node = rb_search(&rec->backref_tree, &match.node.node,
6205                          (rb_compare_keys)compare_extent_backref, NULL);
6206         if (node)
6207                 back = to_tree_backref(rb_node_to_extent_backref(node));
6208
6209         return back;
6210 }
6211
6212 static struct data_backref *find_data_backref(struct extent_record *rec,
6213                                                 u64 parent, u64 root,
6214                                                 u64 owner, u64 offset,
6215                                                 int found_ref,
6216                                                 u64 disk_bytenr, u64 bytes)
6217 {
6218         struct rb_node *node;
6219         struct data_backref *back = NULL;
6220         struct data_backref match = {
6221                 .node = {
6222                         .is_data = 1,
6223                 },
6224                 .owner = owner,
6225                 .offset = offset,
6226                 .bytes = bytes,
6227                 .found_ref = found_ref,
6228                 .disk_bytenr = disk_bytenr,
6229         };
6230
6231         if (parent) {
6232                 match.parent = parent;
6233                 match.node.full_backref = 1;
6234         } else {
6235                 match.root = root;
6236         }
6237
6238         node = rb_search(&rec->backref_tree, &match.node.node,
6239                          (rb_compare_keys)compare_extent_backref, NULL);
6240         if (node)
6241                 back = to_data_backref(rb_node_to_extent_backref(node));
6242
6243         return back;
6244 }
6245 /*
6246  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6247  * blocks and integrity of fs tree items.
6248  *
6249  * @root:         the root of the tree to be checked.
6250  * @ext_ref       feature EXTENDED_IREF is enable or not.
6251  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6252  *                otherwise means check fs tree(s) items relationship and
6253  *                @root MUST be a fs tree root.
6254  * Returns 0      represents OK.
6255  * Returns not 0  represents error.
6256  */
6257 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6258                             struct btrfs_root *root, unsigned int ext_ref,
6259                             int check_all)
6260
6261 {
6262         struct btrfs_path path;
6263         struct node_refs nrefs;
6264         struct btrfs_root_item *root_item = &root->root_item;
6265         int ret;
6266         int level;
6267         int err = 0;
6268
6269         memset(&nrefs, 0, sizeof(nrefs));
6270         if (!check_all) {
6271                 /*
6272                  * We need to manually check the first inode item (256)
6273                  * As the following traversal function will only start from
6274                  * the first inode item in the leaf, if inode item (256) is
6275                  * missing we will skip it forever.
6276                  */
6277                 ret = check_fs_first_inode(root, ext_ref);
6278                 if (ret < 0)
6279                         return ret;
6280         }
6281
6282
6283         level = btrfs_header_level(root->node);
6284         btrfs_init_path(&path);
6285
6286         if (btrfs_root_refs(root_item) > 0 ||
6287             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6288                 path.nodes[level] = root->node;
6289                 path.slots[level] = 0;
6290                 extent_buffer_get(root->node);
6291         } else {
6292                 struct btrfs_key key;
6293
6294                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6295                 level = root_item->drop_level;
6296                 path.lowest_level = level;
6297                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6298                 if (ret < 0)
6299                         goto out;
6300                 ret = 0;
6301         }
6302
6303         while (1) {
6304                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6305                                         ext_ref, check_all);
6306
6307                 err |= !!ret;
6308
6309                 /* if ret is negative, walk shall stop */
6310                 if (ret < 0) {
6311                         ret = err;
6312                         break;
6313                 }
6314
6315                 ret = walk_up_tree_v2(root, &path, &level);
6316                 if (ret != 0) {
6317                         /* Normal exit, reset ret to err */
6318                         ret = err;
6319                         break;
6320                 }
6321         }
6322
6323 out:
6324         btrfs_release_path(&path);
6325         return ret;
6326 }
6327
6328 /*
6329  * Iterate all items in the tree and call check_inode_item() to check.
6330  *
6331  * @root:       the root of the tree to be checked.
6332  * @ext_ref:    the EXTENDED_IREF feature
6333  *
6334  * Return 0 if no error found.
6335  * Return <0 for error.
6336  */
6337 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6338 {
6339         reset_cached_block_groups(root->fs_info);
6340         return check_btrfs_root(NULL, root, ext_ref, 0);
6341 }
6342
6343 /*
6344  * Find the relative ref for root_ref and root_backref.
6345  *
6346  * @root:       the root of the root tree.
6347  * @ref_key:    the key of the root ref.
6348  *
6349  * Return 0 if no error occurred.
6350  */
6351 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6352                           struct extent_buffer *node, int slot)
6353 {
6354         struct btrfs_path path;
6355         struct btrfs_key key;
6356         struct btrfs_root_ref *ref;
6357         struct btrfs_root_ref *backref;
6358         char ref_name[BTRFS_NAME_LEN] = {0};
6359         char backref_name[BTRFS_NAME_LEN] = {0};
6360         u64 ref_dirid;
6361         u64 ref_seq;
6362         u32 ref_namelen;
6363         u64 backref_dirid;
6364         u64 backref_seq;
6365         u32 backref_namelen;
6366         u32 len;
6367         int ret;
6368         int err = 0;
6369
6370         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6371         ref_dirid = btrfs_root_ref_dirid(node, ref);
6372         ref_seq = btrfs_root_ref_sequence(node, ref);
6373         ref_namelen = btrfs_root_ref_name_len(node, ref);
6374
6375         if (ref_namelen <= BTRFS_NAME_LEN) {
6376                 len = ref_namelen;
6377         } else {
6378                 len = BTRFS_NAME_LEN;
6379                 warning("%s[%llu %llu] ref_name too long",
6380                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6381                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6382                         ref_key->offset);
6383         }
6384         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6385
6386         /* Find relative root_ref */
6387         key.objectid = ref_key->offset;
6388         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6389         key.offset = ref_key->objectid;
6390
6391         btrfs_init_path(&path);
6392         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6393         if (ret) {
6394                 err |= ROOT_REF_MISSING;
6395                 error("%s[%llu %llu] couldn't find relative ref",
6396                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6397                       "ROOT_REF" : "ROOT_BACKREF",
6398                       ref_key->objectid, ref_key->offset);
6399                 goto out;
6400         }
6401
6402         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6403                                  struct btrfs_root_ref);
6404         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6405         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6406         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6407
6408         if (backref_namelen <= BTRFS_NAME_LEN) {
6409                 len = backref_namelen;
6410         } else {
6411                 len = BTRFS_NAME_LEN;
6412                 warning("%s[%llu %llu] ref_name too long",
6413                         key.type == BTRFS_ROOT_REF_KEY ?
6414                         "ROOT_REF" : "ROOT_BACKREF",
6415                         key.objectid, key.offset);
6416         }
6417         read_extent_buffer(path.nodes[0], backref_name,
6418                            (unsigned long)(backref + 1), len);
6419
6420         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6421             ref_namelen != backref_namelen ||
6422             strncmp(ref_name, backref_name, len)) {
6423                 err |= ROOT_REF_MISMATCH;
6424                 error("%s[%llu %llu] mismatch relative ref",
6425                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6426                       "ROOT_REF" : "ROOT_BACKREF",
6427                       ref_key->objectid, ref_key->offset);
6428         }
6429 out:
6430         btrfs_release_path(&path);
6431         return err;
6432 }
6433
6434 /*
6435  * Check all fs/file tree in low_memory mode.
6436  *
6437  * 1. for fs tree root item, call check_fs_root_v2()
6438  * 2. for fs tree root ref/backref, call check_root_ref()
6439  *
6440  * Return 0 if no error occurred.
6441  */
6442 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6443 {
6444         struct btrfs_root *tree_root = fs_info->tree_root;
6445         struct btrfs_root *cur_root = NULL;
6446         struct btrfs_path path;
6447         struct btrfs_key key;
6448         struct extent_buffer *node;
6449         unsigned int ext_ref;
6450         int slot;
6451         int ret;
6452         int err = 0;
6453
6454         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6455
6456         btrfs_init_path(&path);
6457         key.objectid = BTRFS_FS_TREE_OBJECTID;
6458         key.offset = 0;
6459         key.type = BTRFS_ROOT_ITEM_KEY;
6460
6461         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6462         if (ret < 0) {
6463                 err = ret;
6464                 goto out;
6465         } else if (ret > 0) {
6466                 err = -ENOENT;
6467                 goto out;
6468         }
6469
6470         while (1) {
6471                 node = path.nodes[0];
6472                 slot = path.slots[0];
6473                 btrfs_item_key_to_cpu(node, &key, slot);
6474                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6475                         goto out;
6476                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6477                     fs_root_objectid(key.objectid)) {
6478                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6479                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6480                                                                        &key);
6481                         } else {
6482                                 key.offset = (u64)-1;
6483                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6484                         }
6485
6486                         if (IS_ERR(cur_root)) {
6487                                 error("Fail to read fs/subvol tree: %lld",
6488                                       key.objectid);
6489                                 err = -EIO;
6490                                 goto next;
6491                         }
6492
6493                         ret = check_fs_root_v2(cur_root, ext_ref);
6494                         err |= ret;
6495
6496                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6497                                 btrfs_free_fs_root(cur_root);
6498                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6499                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6500                         ret = check_root_ref(tree_root, &key, node, slot);
6501                         err |= ret;
6502                 }
6503 next:
6504                 ret = btrfs_next_item(tree_root, &path);
6505                 if (ret > 0)
6506                         goto out;
6507                 if (ret < 0) {
6508                         err = ret;
6509                         goto out;
6510                 }
6511         }
6512
6513 out:
6514         btrfs_release_path(&path);
6515         return err;
6516 }
6517
6518 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6519                           struct cache_tree *root_cache)
6520 {
6521         int ret;
6522
6523         if (!ctx.progress_enabled)
6524                 fprintf(stderr, "checking fs roots\n");
6525         if (check_mode == CHECK_MODE_LOWMEM)
6526                 ret = check_fs_roots_v2(fs_info);
6527         else
6528                 ret = check_fs_roots(fs_info, root_cache);
6529
6530         return ret;
6531 }
6532
6533 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6534 {
6535         struct extent_backref *back, *tmp;
6536         struct tree_backref *tback;
6537         struct data_backref *dback;
6538         u64 found = 0;
6539         int err = 0;
6540
6541         rbtree_postorder_for_each_entry_safe(back, tmp,
6542                                              &rec->backref_tree, node) {
6543                 if (!back->found_extent_tree) {
6544                         err = 1;
6545                         if (!print_errs)
6546                                 goto out;
6547                         if (back->is_data) {
6548                                 dback = to_data_backref(back);
6549                                 fprintf(stderr, "Data backref %llu %s %llu"
6550                                         " owner %llu offset %llu num_refs %lu"
6551                                         " not found in extent tree\n",
6552                                         (unsigned long long)rec->start,
6553                                         back->full_backref ?
6554                                         "parent" : "root",
6555                                         back->full_backref ?
6556                                         (unsigned long long)dback->parent:
6557                                         (unsigned long long)dback->root,
6558                                         (unsigned long long)dback->owner,
6559                                         (unsigned long long)dback->offset,
6560                                         (unsigned long)dback->num_refs);
6561                         } else {
6562                                 tback = to_tree_backref(back);
6563                                 fprintf(stderr, "Tree backref %llu parent %llu"
6564                                         " root %llu not found in extent tree\n",
6565                                         (unsigned long long)rec->start,
6566                                         (unsigned long long)tback->parent,
6567                                         (unsigned long long)tback->root);
6568                         }
6569                 }
6570                 if (!back->is_data && !back->found_ref) {
6571                         err = 1;
6572                         if (!print_errs)
6573                                 goto out;
6574                         tback = to_tree_backref(back);
6575                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6576                                 (unsigned long long)rec->start,
6577                                 back->full_backref ? "parent" : "root",
6578                                 back->full_backref ?
6579                                 (unsigned long long)tback->parent :
6580                                 (unsigned long long)tback->root, back);
6581                 }
6582                 if (back->is_data) {
6583                         dback = to_data_backref(back);
6584                         if (dback->found_ref != dback->num_refs) {
6585                                 err = 1;
6586                                 if (!print_errs)
6587                                         goto out;
6588                                 fprintf(stderr, "Incorrect local backref count"
6589                                         " on %llu %s %llu owner %llu"
6590                                         " offset %llu found %u wanted %u back %p\n",
6591                                         (unsigned long long)rec->start,
6592                                         back->full_backref ?
6593                                         "parent" : "root",
6594                                         back->full_backref ?
6595                                         (unsigned long long)dback->parent:
6596                                         (unsigned long long)dback->root,
6597                                         (unsigned long long)dback->owner,
6598                                         (unsigned long long)dback->offset,
6599                                         dback->found_ref, dback->num_refs, back);
6600                         }
6601                         if (dback->disk_bytenr != rec->start) {
6602                                 err = 1;
6603                                 if (!print_errs)
6604                                         goto out;
6605                                 fprintf(stderr, "Backref disk bytenr does not"
6606                                         " match extent record, bytenr=%llu, "
6607                                         "ref bytenr=%llu\n",
6608                                         (unsigned long long)rec->start,
6609                                         (unsigned long long)dback->disk_bytenr);
6610                         }
6611
6612                         if (dback->bytes != rec->nr) {
6613                                 err = 1;
6614                                 if (!print_errs)
6615                                         goto out;
6616                                 fprintf(stderr, "Backref bytes do not match "
6617                                         "extent backref, bytenr=%llu, ref "
6618                                         "bytes=%llu, backref bytes=%llu\n",
6619                                         (unsigned long long)rec->start,
6620                                         (unsigned long long)rec->nr,
6621                                         (unsigned long long)dback->bytes);
6622                         }
6623                 }
6624                 if (!back->is_data) {
6625                         found += 1;
6626                 } else {
6627                         dback = to_data_backref(back);
6628                         found += dback->found_ref;
6629                 }
6630         }
6631         if (found != rec->refs) {
6632                 err = 1;
6633                 if (!print_errs)
6634                         goto out;
6635                 fprintf(stderr, "Incorrect global backref count "
6636                         "on %llu found %llu wanted %llu\n",
6637                         (unsigned long long)rec->start,
6638                         (unsigned long long)found,
6639                         (unsigned long long)rec->refs);
6640         }
6641 out:
6642         return err;
6643 }
6644
6645 static void __free_one_backref(struct rb_node *node)
6646 {
6647         struct extent_backref *back = rb_node_to_extent_backref(node);
6648
6649         free(back);
6650 }
6651
6652 static void free_all_extent_backrefs(struct extent_record *rec)
6653 {
6654         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6655 }
6656
6657 static void free_extent_record_cache(struct cache_tree *extent_cache)
6658 {
6659         struct cache_extent *cache;
6660         struct extent_record *rec;
6661
6662         while (1) {
6663                 cache = first_cache_extent(extent_cache);
6664                 if (!cache)
6665                         break;
6666                 rec = container_of(cache, struct extent_record, cache);
6667                 remove_cache_extent(extent_cache, cache);
6668                 free_all_extent_backrefs(rec);
6669                 free(rec);
6670         }
6671 }
6672
6673 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6674                                  struct extent_record *rec)
6675 {
6676         if (rec->content_checked && rec->owner_ref_checked &&
6677             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6678             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6679             !rec->bad_full_backref && !rec->crossing_stripes &&
6680             !rec->wrong_chunk_type) {
6681                 remove_cache_extent(extent_cache, &rec->cache);
6682                 free_all_extent_backrefs(rec);
6683                 list_del_init(&rec->list);
6684                 free(rec);
6685         }
6686         return 0;
6687 }
6688
6689 static int check_owner_ref(struct btrfs_root *root,
6690                             struct extent_record *rec,
6691                             struct extent_buffer *buf)
6692 {
6693         struct extent_backref *node, *tmp;
6694         struct tree_backref *back;
6695         struct btrfs_root *ref_root;
6696         struct btrfs_key key;
6697         struct btrfs_path path;
6698         struct extent_buffer *parent;
6699         int level;
6700         int found = 0;
6701         int ret;
6702
6703         rbtree_postorder_for_each_entry_safe(node, tmp,
6704                                              &rec->backref_tree, node) {
6705                 if (node->is_data)
6706                         continue;
6707                 if (!node->found_ref)
6708                         continue;
6709                 if (node->full_backref)
6710                         continue;
6711                 back = to_tree_backref(node);
6712                 if (btrfs_header_owner(buf) == back->root)
6713                         return 0;
6714         }
6715         BUG_ON(rec->is_root);
6716
6717         /* try to find the block by search corresponding fs tree */
6718         key.objectid = btrfs_header_owner(buf);
6719         key.type = BTRFS_ROOT_ITEM_KEY;
6720         key.offset = (u64)-1;
6721
6722         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6723         if (IS_ERR(ref_root))
6724                 return 1;
6725
6726         level = btrfs_header_level(buf);
6727         if (level == 0)
6728                 btrfs_item_key_to_cpu(buf, &key, 0);
6729         else
6730                 btrfs_node_key_to_cpu(buf, &key, 0);
6731
6732         btrfs_init_path(&path);
6733         path.lowest_level = level + 1;
6734         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6735         if (ret < 0)
6736                 return 0;
6737
6738         parent = path.nodes[level + 1];
6739         if (parent && buf->start == btrfs_node_blockptr(parent,
6740                                                         path.slots[level + 1]))
6741                 found = 1;
6742
6743         btrfs_release_path(&path);
6744         return found ? 0 : 1;
6745 }
6746
6747 static int is_extent_tree_record(struct extent_record *rec)
6748 {
6749         struct extent_backref *node, *tmp;
6750         struct tree_backref *back;
6751         int is_extent = 0;
6752
6753         rbtree_postorder_for_each_entry_safe(node, tmp,
6754                                              &rec->backref_tree, node) {
6755                 if (node->is_data)
6756                         return 0;
6757                 back = to_tree_backref(node);
6758                 if (node->full_backref)
6759                         return 0;
6760                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6761                         is_extent = 1;
6762         }
6763         return is_extent;
6764 }
6765
6766
6767 static int record_bad_block_io(struct btrfs_fs_info *info,
6768                                struct cache_tree *extent_cache,
6769                                u64 start, u64 len)
6770 {
6771         struct extent_record *rec;
6772         struct cache_extent *cache;
6773         struct btrfs_key key;
6774
6775         cache = lookup_cache_extent(extent_cache, start, len);
6776         if (!cache)
6777                 return 0;
6778
6779         rec = container_of(cache, struct extent_record, cache);
6780         if (!is_extent_tree_record(rec))
6781                 return 0;
6782
6783         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6784         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6785 }
6786
6787 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6788                        struct extent_buffer *buf, int slot)
6789 {
6790         if (btrfs_header_level(buf)) {
6791                 struct btrfs_key_ptr ptr1, ptr2;
6792
6793                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6794                                    sizeof(struct btrfs_key_ptr));
6795                 read_extent_buffer(buf, &ptr2,
6796                                    btrfs_node_key_ptr_offset(slot + 1),
6797                                    sizeof(struct btrfs_key_ptr));
6798                 write_extent_buffer(buf, &ptr1,
6799                                     btrfs_node_key_ptr_offset(slot + 1),
6800                                     sizeof(struct btrfs_key_ptr));
6801                 write_extent_buffer(buf, &ptr2,
6802                                     btrfs_node_key_ptr_offset(slot),
6803                                     sizeof(struct btrfs_key_ptr));
6804                 if (slot == 0) {
6805                         struct btrfs_disk_key key;
6806                         btrfs_node_key(buf, &key, 0);
6807                         btrfs_fixup_low_keys(root, path, &key,
6808                                              btrfs_header_level(buf) + 1);
6809                 }
6810         } else {
6811                 struct btrfs_item *item1, *item2;
6812                 struct btrfs_key k1, k2;
6813                 char *item1_data, *item2_data;
6814                 u32 item1_offset, item2_offset, item1_size, item2_size;
6815
6816                 item1 = btrfs_item_nr(slot);
6817                 item2 = btrfs_item_nr(slot + 1);
6818                 btrfs_item_key_to_cpu(buf, &k1, slot);
6819                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6820                 item1_offset = btrfs_item_offset(buf, item1);
6821                 item2_offset = btrfs_item_offset(buf, item2);
6822                 item1_size = btrfs_item_size(buf, item1);
6823                 item2_size = btrfs_item_size(buf, item2);
6824
6825                 item1_data = malloc(item1_size);
6826                 if (!item1_data)
6827                         return -ENOMEM;
6828                 item2_data = malloc(item2_size);
6829                 if (!item2_data) {
6830                         free(item1_data);
6831                         return -ENOMEM;
6832                 }
6833
6834                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6835                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6836
6837                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6838                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6839                 free(item1_data);
6840                 free(item2_data);
6841
6842                 btrfs_set_item_offset(buf, item1, item2_offset);
6843                 btrfs_set_item_offset(buf, item2, item1_offset);
6844                 btrfs_set_item_size(buf, item1, item2_size);
6845                 btrfs_set_item_size(buf, item2, item1_size);
6846
6847                 path->slots[0] = slot;
6848                 btrfs_set_item_key_unsafe(root, path, &k2);
6849                 path->slots[0] = slot + 1;
6850                 btrfs_set_item_key_unsafe(root, path, &k1);
6851         }
6852         return 0;
6853 }
6854
6855 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6856 {
6857         struct extent_buffer *buf;
6858         struct btrfs_key k1, k2;
6859         int i;
6860         int level = path->lowest_level;
6861         int ret = -EIO;
6862
6863         buf = path->nodes[level];
6864         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6865                 if (level) {
6866                         btrfs_node_key_to_cpu(buf, &k1, i);
6867                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6868                 } else {
6869                         btrfs_item_key_to_cpu(buf, &k1, i);
6870                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6871                 }
6872                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6873                         continue;
6874                 ret = swap_values(root, path, buf, i);
6875                 if (ret)
6876                         break;
6877                 btrfs_mark_buffer_dirty(buf);
6878                 i = 0;
6879         }
6880         return ret;
6881 }
6882
6883 static int delete_bogus_item(struct btrfs_root *root,
6884                              struct btrfs_path *path,
6885                              struct extent_buffer *buf, int slot)
6886 {
6887         struct btrfs_key key;
6888         int nritems = btrfs_header_nritems(buf);
6889
6890         btrfs_item_key_to_cpu(buf, &key, slot);
6891
6892         /* These are all the keys we can deal with missing. */
6893         if (key.type != BTRFS_DIR_INDEX_KEY &&
6894             key.type != BTRFS_EXTENT_ITEM_KEY &&
6895             key.type != BTRFS_METADATA_ITEM_KEY &&
6896             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6897             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6898                 return -1;
6899
6900         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6901                (unsigned long long)key.objectid, key.type,
6902                (unsigned long long)key.offset, slot, buf->start);
6903         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6904                               btrfs_item_nr_offset(slot + 1),
6905                               sizeof(struct btrfs_item) *
6906                               (nritems - slot - 1));
6907         btrfs_set_header_nritems(buf, nritems - 1);
6908         if (slot == 0) {
6909                 struct btrfs_disk_key disk_key;
6910
6911                 btrfs_item_key(buf, &disk_key, 0);
6912                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6913         }
6914         btrfs_mark_buffer_dirty(buf);
6915         return 0;
6916 }
6917
6918 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6919 {
6920         struct extent_buffer *buf;
6921         int i;
6922         int ret = 0;
6923
6924         /* We should only get this for leaves */
6925         BUG_ON(path->lowest_level);
6926         buf = path->nodes[0];
6927 again:
6928         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6929                 unsigned int shift = 0, offset;
6930
6931                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6932                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6933                         if (btrfs_item_end_nr(buf, i) >
6934                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6935                                 ret = delete_bogus_item(root, path, buf, i);
6936                                 if (!ret)
6937                                         goto again;
6938                                 fprintf(stderr, "item is off the end of the "
6939                                         "leaf, can't fix\n");
6940                                 ret = -EIO;
6941                                 break;
6942                         }
6943                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6944                                 btrfs_item_end_nr(buf, i);
6945                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6946                            btrfs_item_offset_nr(buf, i - 1)) {
6947                         if (btrfs_item_end_nr(buf, i) >
6948                             btrfs_item_offset_nr(buf, i - 1)) {
6949                                 ret = delete_bogus_item(root, path, buf, i);
6950                                 if (!ret)
6951                                         goto again;
6952                                 fprintf(stderr, "items overlap, can't fix\n");
6953                                 ret = -EIO;
6954                                 break;
6955                         }
6956                         shift = btrfs_item_offset_nr(buf, i - 1) -
6957                                 btrfs_item_end_nr(buf, i);
6958                 }
6959                 if (!shift)
6960                         continue;
6961
6962                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6963                        i, shift, (unsigned long long)buf->start);
6964                 offset = btrfs_item_offset_nr(buf, i);
6965                 memmove_extent_buffer(buf,
6966                                       btrfs_leaf_data(buf) + offset + shift,
6967                                       btrfs_leaf_data(buf) + offset,
6968                                       btrfs_item_size_nr(buf, i));
6969                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6970                                       offset + shift);
6971                 btrfs_mark_buffer_dirty(buf);
6972         }
6973
6974         /*
6975          * We may have moved things, in which case we want to exit so we don't
6976          * write those changes out.  Once we have proper abort functionality in
6977          * progs this can be changed to something nicer.
6978          */
6979         BUG_ON(ret);
6980         return ret;
6981 }
6982
6983 /*
6984  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6985  * then just return -EIO.
6986  */
6987 static int try_to_fix_bad_block(struct btrfs_root *root,
6988                                 struct extent_buffer *buf,
6989                                 enum btrfs_tree_block_status status)
6990 {
6991         struct btrfs_trans_handle *trans;
6992         struct ulist *roots;
6993         struct ulist_node *node;
6994         struct btrfs_root *search_root;
6995         struct btrfs_path path;
6996         struct ulist_iterator iter;
6997         struct btrfs_key root_key, key;
6998         int ret;
6999
7000         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7001             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7002                 return -EIO;
7003
7004         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7005         if (ret)
7006                 return -EIO;
7007
7008         btrfs_init_path(&path);
7009         ULIST_ITER_INIT(&iter);
7010         while ((node = ulist_next(roots, &iter))) {
7011                 root_key.objectid = node->val;
7012                 root_key.type = BTRFS_ROOT_ITEM_KEY;
7013                 root_key.offset = (u64)-1;
7014
7015                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7016                 if (IS_ERR(root)) {
7017                         ret = -EIO;
7018                         break;
7019                 }
7020
7021
7022                 trans = btrfs_start_transaction(search_root, 0);
7023                 if (IS_ERR(trans)) {
7024                         ret = PTR_ERR(trans);
7025                         break;
7026                 }
7027
7028                 path.lowest_level = btrfs_header_level(buf);
7029                 path.skip_check_block = 1;
7030                 if (path.lowest_level)
7031                         btrfs_node_key_to_cpu(buf, &key, 0);
7032                 else
7033                         btrfs_item_key_to_cpu(buf, &key, 0);
7034                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7035                 if (ret) {
7036                         ret = -EIO;
7037                         btrfs_commit_transaction(trans, search_root);
7038                         break;
7039                 }
7040                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7041                         ret = fix_key_order(search_root, &path);
7042                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7043                         ret = fix_item_offset(search_root, &path);
7044                 if (ret) {
7045                         btrfs_commit_transaction(trans, search_root);
7046                         break;
7047                 }
7048                 btrfs_release_path(&path);
7049                 btrfs_commit_transaction(trans, search_root);
7050         }
7051         ulist_free(roots);
7052         btrfs_release_path(&path);
7053         return ret;
7054 }
7055
7056 static int check_block(struct btrfs_root *root,
7057                        struct cache_tree *extent_cache,
7058                        struct extent_buffer *buf, u64 flags)
7059 {
7060         struct extent_record *rec;
7061         struct cache_extent *cache;
7062         struct btrfs_key key;
7063         enum btrfs_tree_block_status status;
7064         int ret = 0;
7065         int level;
7066
7067         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7068         if (!cache)
7069                 return 1;
7070         rec = container_of(cache, struct extent_record, cache);
7071         rec->generation = btrfs_header_generation(buf);
7072
7073         level = btrfs_header_level(buf);
7074         if (btrfs_header_nritems(buf) > 0) {
7075
7076                 if (level == 0)
7077                         btrfs_item_key_to_cpu(buf, &key, 0);
7078                 else
7079                         btrfs_node_key_to_cpu(buf, &key, 0);
7080
7081                 rec->info_objectid = key.objectid;
7082         }
7083         rec->info_level = level;
7084
7085         if (btrfs_is_leaf(buf))
7086                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7087         else
7088                 status = btrfs_check_node(root, &rec->parent_key, buf);
7089
7090         if (status != BTRFS_TREE_BLOCK_CLEAN) {
7091                 if (repair)
7092                         status = try_to_fix_bad_block(root, buf, status);
7093                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7094                         ret = -EIO;
7095                         fprintf(stderr, "bad block %llu\n",
7096                                 (unsigned long long)buf->start);
7097                 } else {
7098                         /*
7099                          * Signal to callers we need to start the scan over
7100                          * again since we'll have cowed blocks.
7101                          */
7102                         ret = -EAGAIN;
7103                 }
7104         } else {
7105                 rec->content_checked = 1;
7106                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7107                         rec->owner_ref_checked = 1;
7108                 else {
7109                         ret = check_owner_ref(root, rec, buf);
7110                         if (!ret)
7111                                 rec->owner_ref_checked = 1;
7112                 }
7113         }
7114         if (!ret)
7115                 maybe_free_extent_rec(extent_cache, rec);
7116         return ret;
7117 }
7118
7119 #if 0
7120 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7121                                                 u64 parent, u64 root)
7122 {
7123         struct list_head *cur = rec->backrefs.next;
7124         struct extent_backref *node;
7125         struct tree_backref *back;
7126
7127         while(cur != &rec->backrefs) {
7128                 node = to_extent_backref(cur);
7129                 cur = cur->next;
7130                 if (node->is_data)
7131                         continue;
7132                 back = to_tree_backref(node);
7133                 if (parent > 0) {
7134                         if (!node->full_backref)
7135                                 continue;
7136                         if (parent == back->parent)
7137                                 return back;
7138                 } else {
7139                         if (node->full_backref)
7140                                 continue;
7141                         if (back->root == root)
7142                                 return back;
7143                 }
7144         }
7145         return NULL;
7146 }
7147 #endif
7148
7149 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7150                                                 u64 parent, u64 root)
7151 {
7152         struct tree_backref *ref = malloc(sizeof(*ref));
7153
7154         if (!ref)
7155                 return NULL;
7156         memset(&ref->node, 0, sizeof(ref->node));
7157         if (parent > 0) {
7158                 ref->parent = parent;
7159                 ref->node.full_backref = 1;
7160         } else {
7161                 ref->root = root;
7162                 ref->node.full_backref = 0;
7163         }
7164
7165         return ref;
7166 }
7167
7168 #if 0
7169 static struct data_backref *find_data_backref(struct extent_record *rec,
7170                                                 u64 parent, u64 root,
7171                                                 u64 owner, u64 offset,
7172                                                 int found_ref,
7173                                                 u64 disk_bytenr, u64 bytes)
7174 {
7175         struct list_head *cur = rec->backrefs.next;
7176         struct extent_backref *node;
7177         struct data_backref *back;
7178
7179         while(cur != &rec->backrefs) {
7180                 node = to_extent_backref(cur);
7181                 cur = cur->next;
7182                 if (!node->is_data)
7183                         continue;
7184                 back = to_data_backref(node);
7185                 if (parent > 0) {
7186                         if (!node->full_backref)
7187                                 continue;
7188                         if (parent == back->parent)
7189                                 return back;
7190                 } else {
7191                         if (node->full_backref)
7192                                 continue;
7193                         if (back->root == root && back->owner == owner &&
7194                             back->offset == offset) {
7195                                 if (found_ref && node->found_ref &&
7196                                     (back->bytes != bytes ||
7197                                     back->disk_bytenr != disk_bytenr))
7198                                         continue;
7199                                 return back;
7200                         }
7201                 }
7202         }
7203         return NULL;
7204 }
7205 #endif
7206
7207 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7208                                                 u64 parent, u64 root,
7209                                                 u64 owner, u64 offset,
7210                                                 u64 max_size)
7211 {
7212         struct data_backref *ref = malloc(sizeof(*ref));
7213
7214         if (!ref)
7215                 return NULL;
7216         memset(&ref->node, 0, sizeof(ref->node));
7217         ref->node.is_data = 1;
7218
7219         if (parent > 0) {
7220                 ref->parent = parent;
7221                 ref->owner = 0;
7222                 ref->offset = 0;
7223                 ref->node.full_backref = 1;
7224         } else {
7225                 ref->root = root;
7226                 ref->owner = owner;
7227                 ref->offset = offset;
7228                 ref->node.full_backref = 0;
7229         }
7230         ref->bytes = max_size;
7231         ref->found_ref = 0;
7232         ref->num_refs = 0;
7233         if (max_size > rec->max_size)
7234                 rec->max_size = max_size;
7235         return ref;
7236 }
7237
7238 /* Check if the type of extent matches with its chunk */
7239 static void check_extent_type(struct extent_record *rec)
7240 {
7241         struct btrfs_block_group_cache *bg_cache;
7242
7243         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7244         if (!bg_cache)
7245                 return;
7246
7247         /* data extent, check chunk directly*/
7248         if (!rec->metadata) {
7249                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7250                         rec->wrong_chunk_type = 1;
7251                 return;
7252         }
7253
7254         /* metadata extent, check the obvious case first */
7255         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7256                                  BTRFS_BLOCK_GROUP_METADATA))) {
7257                 rec->wrong_chunk_type = 1;
7258                 return;
7259         }
7260
7261         /*
7262          * Check SYSTEM extent, as it's also marked as metadata, we can only
7263          * make sure it's a SYSTEM extent by its backref
7264          */
7265         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7266                 struct extent_backref *node;
7267                 struct tree_backref *tback;
7268                 u64 bg_type;
7269
7270                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7271                 if (node->is_data) {
7272                         /* tree block shouldn't have data backref */
7273                         rec->wrong_chunk_type = 1;
7274                         return;
7275                 }
7276                 tback = container_of(node, struct tree_backref, node);
7277
7278                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7279                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7280                 else
7281                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7282                 if (!(bg_cache->flags & bg_type))
7283                         rec->wrong_chunk_type = 1;
7284         }
7285 }
7286
7287 /*
7288  * Allocate a new extent record, fill default values from @tmpl and insert int
7289  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7290  * the cache, otherwise it fails.
7291  */
7292 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7293                 struct extent_record *tmpl)
7294 {
7295         struct extent_record *rec;
7296         int ret = 0;
7297
7298         BUG_ON(tmpl->max_size == 0);
7299         rec = malloc(sizeof(*rec));
7300         if (!rec)
7301                 return -ENOMEM;
7302         rec->start = tmpl->start;
7303         rec->max_size = tmpl->max_size;
7304         rec->nr = max(tmpl->nr, tmpl->max_size);
7305         rec->found_rec = tmpl->found_rec;
7306         rec->content_checked = tmpl->content_checked;
7307         rec->owner_ref_checked = tmpl->owner_ref_checked;
7308         rec->num_duplicates = 0;
7309         rec->metadata = tmpl->metadata;
7310         rec->flag_block_full_backref = FLAG_UNSET;
7311         rec->bad_full_backref = 0;
7312         rec->crossing_stripes = 0;
7313         rec->wrong_chunk_type = 0;
7314         rec->is_root = tmpl->is_root;
7315         rec->refs = tmpl->refs;
7316         rec->extent_item_refs = tmpl->extent_item_refs;
7317         rec->parent_generation = tmpl->parent_generation;
7318         INIT_LIST_HEAD(&rec->backrefs);
7319         INIT_LIST_HEAD(&rec->dups);
7320         INIT_LIST_HEAD(&rec->list);
7321         rec->backref_tree = RB_ROOT;
7322         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7323         rec->cache.start = tmpl->start;
7324         rec->cache.size = tmpl->nr;
7325         ret = insert_cache_extent(extent_cache, &rec->cache);
7326         if (ret) {
7327                 free(rec);
7328                 return ret;
7329         }
7330         bytes_used += rec->nr;
7331
7332         if (tmpl->metadata)
7333                 rec->crossing_stripes = check_crossing_stripes(global_info,
7334                                 rec->start, global_info->nodesize);
7335         check_extent_type(rec);
7336         return ret;
7337 }
7338
7339 /*
7340  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7341  * some are hints:
7342  * - refs              - if found, increase refs
7343  * - is_root           - if found, set
7344  * - content_checked   - if found, set
7345  * - owner_ref_checked - if found, set
7346  *
7347  * If not found, create a new one, initialize and insert.
7348  */
7349 static int add_extent_rec(struct cache_tree *extent_cache,
7350                 struct extent_record *tmpl)
7351 {
7352         struct extent_record *rec;
7353         struct cache_extent *cache;
7354         int ret = 0;
7355         int dup = 0;
7356
7357         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7358         if (cache) {
7359                 rec = container_of(cache, struct extent_record, cache);
7360                 if (tmpl->refs)
7361                         rec->refs++;
7362                 if (rec->nr == 1)
7363                         rec->nr = max(tmpl->nr, tmpl->max_size);
7364
7365                 /*
7366                  * We need to make sure to reset nr to whatever the extent
7367                  * record says was the real size, this way we can compare it to
7368                  * the backrefs.
7369                  */
7370                 if (tmpl->found_rec) {
7371                         if (tmpl->start != rec->start || rec->found_rec) {
7372                                 struct extent_record *tmp;
7373
7374                                 dup = 1;
7375                                 if (list_empty(&rec->list))
7376                                         list_add_tail(&rec->list,
7377                                                       &duplicate_extents);
7378
7379                                 /*
7380                                  * We have to do this song and dance in case we
7381                                  * find an extent record that falls inside of
7382                                  * our current extent record but does not have
7383                                  * the same objectid.
7384                                  */
7385                                 tmp = malloc(sizeof(*tmp));
7386                                 if (!tmp)
7387                                         return -ENOMEM;
7388                                 tmp->start = tmpl->start;
7389                                 tmp->max_size = tmpl->max_size;
7390                                 tmp->nr = tmpl->nr;
7391                                 tmp->found_rec = 1;
7392                                 tmp->metadata = tmpl->metadata;
7393                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7394                                 INIT_LIST_HEAD(&tmp->list);
7395                                 list_add_tail(&tmp->list, &rec->dups);
7396                                 rec->num_duplicates++;
7397                         } else {
7398                                 rec->nr = tmpl->nr;
7399                                 rec->found_rec = 1;
7400                         }
7401                 }
7402
7403                 if (tmpl->extent_item_refs && !dup) {
7404                         if (rec->extent_item_refs) {
7405                                 fprintf(stderr, "block %llu rec "
7406                                         "extent_item_refs %llu, passed %llu\n",
7407                                         (unsigned long long)tmpl->start,
7408                                         (unsigned long long)
7409                                                         rec->extent_item_refs,
7410                                         (unsigned long long)tmpl->extent_item_refs);
7411                         }
7412                         rec->extent_item_refs = tmpl->extent_item_refs;
7413                 }
7414                 if (tmpl->is_root)
7415                         rec->is_root = 1;
7416                 if (tmpl->content_checked)
7417                         rec->content_checked = 1;
7418                 if (tmpl->owner_ref_checked)
7419                         rec->owner_ref_checked = 1;
7420                 memcpy(&rec->parent_key, &tmpl->parent_key,
7421                                 sizeof(tmpl->parent_key));
7422                 if (tmpl->parent_generation)
7423                         rec->parent_generation = tmpl->parent_generation;
7424                 if (rec->max_size < tmpl->max_size)
7425                         rec->max_size = tmpl->max_size;
7426
7427                 /*
7428                  * A metadata extent can't cross stripe_len boundary, otherwise
7429                  * kernel scrub won't be able to handle it.
7430                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7431                  * it.
7432                  */
7433                 if (tmpl->metadata)
7434                         rec->crossing_stripes = check_crossing_stripes(
7435                                         global_info, rec->start,
7436                                         global_info->nodesize);
7437                 check_extent_type(rec);
7438                 maybe_free_extent_rec(extent_cache, rec);
7439                 return ret;
7440         }
7441
7442         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7443
7444         return ret;
7445 }
7446
7447 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7448                             u64 parent, u64 root, int found_ref)
7449 {
7450         struct extent_record *rec;
7451         struct tree_backref *back;
7452         struct cache_extent *cache;
7453         int ret;
7454         bool insert = false;
7455
7456         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7457         if (!cache) {
7458                 struct extent_record tmpl;
7459
7460                 memset(&tmpl, 0, sizeof(tmpl));
7461                 tmpl.start = bytenr;
7462                 tmpl.nr = 1;
7463                 tmpl.metadata = 1;
7464                 tmpl.max_size = 1;
7465
7466                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7467                 if (ret)
7468                         return ret;
7469
7470                 /* really a bug in cache_extent implement now */
7471                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7472                 if (!cache)
7473                         return -ENOENT;
7474         }
7475
7476         rec = container_of(cache, struct extent_record, cache);
7477         if (rec->start != bytenr) {
7478                 /*
7479                  * Several cause, from unaligned bytenr to over lapping extents
7480                  */
7481                 return -EEXIST;
7482         }
7483
7484         back = find_tree_backref(rec, parent, root);
7485         if (!back) {
7486                 back = alloc_tree_backref(rec, parent, root);
7487                 if (!back)
7488                         return -ENOMEM;
7489                 insert = true;
7490         }
7491
7492         if (found_ref) {
7493                 if (back->node.found_ref) {
7494                         fprintf(stderr, "Extent back ref already exists "
7495                                 "for %llu parent %llu root %llu \n",
7496                                 (unsigned long long)bytenr,
7497                                 (unsigned long long)parent,
7498                                 (unsigned long long)root);
7499                 }
7500                 back->node.found_ref = 1;
7501         } else {
7502                 if (back->node.found_extent_tree) {
7503                         fprintf(stderr, "Extent back ref already exists "
7504                                 "for %llu parent %llu root %llu \n",
7505                                 (unsigned long long)bytenr,
7506                                 (unsigned long long)parent,
7507                                 (unsigned long long)root);
7508                 }
7509                 back->node.found_extent_tree = 1;
7510         }
7511         if (insert)
7512                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7513                         compare_extent_backref));
7514         check_extent_type(rec);
7515         maybe_free_extent_rec(extent_cache, rec);
7516         return 0;
7517 }
7518
7519 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7520                             u64 parent, u64 root, u64 owner, u64 offset,
7521                             u32 num_refs, int found_ref, u64 max_size)
7522 {
7523         struct extent_record *rec;
7524         struct data_backref *back;
7525         struct cache_extent *cache;
7526         int ret;
7527         bool insert = false;
7528
7529         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7530         if (!cache) {
7531                 struct extent_record tmpl;
7532
7533                 memset(&tmpl, 0, sizeof(tmpl));
7534                 tmpl.start = bytenr;
7535                 tmpl.nr = 1;
7536                 tmpl.max_size = max_size;
7537
7538                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7539                 if (ret)
7540                         return ret;
7541
7542                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7543                 if (!cache)
7544                         abort();
7545         }
7546
7547         rec = container_of(cache, struct extent_record, cache);
7548         if (rec->max_size < max_size)
7549                 rec->max_size = max_size;
7550
7551         /*
7552          * If found_ref is set then max_size is the real size and must match the
7553          * existing refs.  So if we have already found a ref then we need to
7554          * make sure that this ref matches the existing one, otherwise we need
7555          * to add a new backref so we can notice that the backrefs don't match
7556          * and we need to figure out who is telling the truth.  This is to
7557          * account for that awful fsync bug I introduced where we'd end up with
7558          * a btrfs_file_extent_item that would have its length include multiple
7559          * prealloc extents or point inside of a prealloc extent.
7560          */
7561         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7562                                  bytenr, max_size);
7563         if (!back) {
7564                 back = alloc_data_backref(rec, parent, root, owner, offset,
7565                                           max_size);
7566                 BUG_ON(!back);
7567                 insert = true;
7568         }
7569
7570         if (found_ref) {
7571                 BUG_ON(num_refs != 1);
7572                 if (back->node.found_ref)
7573                         BUG_ON(back->bytes != max_size);
7574                 back->node.found_ref = 1;
7575                 back->found_ref += 1;
7576                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7577                         back->bytes = max_size;
7578                         back->disk_bytenr = bytenr;
7579
7580                         /* Need to reinsert if not already in the tree */
7581                         if (!insert) {
7582                                 rb_erase(&back->node.node, &rec->backref_tree);
7583                                 insert = true;
7584                         }
7585                 }
7586                 rec->refs += 1;
7587                 rec->content_checked = 1;
7588                 rec->owner_ref_checked = 1;
7589         } else {
7590                 if (back->node.found_extent_tree) {
7591                         fprintf(stderr, "Extent back ref already exists "
7592                                 "for %llu parent %llu root %llu "
7593                                 "owner %llu offset %llu num_refs %lu\n",
7594                                 (unsigned long long)bytenr,
7595                                 (unsigned long long)parent,
7596                                 (unsigned long long)root,
7597                                 (unsigned long long)owner,
7598                                 (unsigned long long)offset,
7599                                 (unsigned long)num_refs);
7600                 }
7601                 back->num_refs = num_refs;
7602                 back->node.found_extent_tree = 1;
7603         }
7604         if (insert)
7605                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7606                         compare_extent_backref));
7607
7608         maybe_free_extent_rec(extent_cache, rec);
7609         return 0;
7610 }
7611
7612 static int add_pending(struct cache_tree *pending,
7613                        struct cache_tree *seen, u64 bytenr, u32 size)
7614 {
7615         int ret;
7616         ret = add_cache_extent(seen, bytenr, size);
7617         if (ret)
7618                 return ret;
7619         add_cache_extent(pending, bytenr, size);
7620         return 0;
7621 }
7622
7623 static int pick_next_pending(struct cache_tree *pending,
7624                         struct cache_tree *reada,
7625                         struct cache_tree *nodes,
7626                         u64 last, struct block_info *bits, int bits_nr,
7627                         int *reada_bits)
7628 {
7629         unsigned long node_start = last;
7630         struct cache_extent *cache;
7631         int ret;
7632
7633         cache = search_cache_extent(reada, 0);
7634         if (cache) {
7635                 bits[0].start = cache->start;
7636                 bits[0].size = cache->size;
7637                 *reada_bits = 1;
7638                 return 1;
7639         }
7640         *reada_bits = 0;
7641         if (node_start > 32768)
7642                 node_start -= 32768;
7643
7644         cache = search_cache_extent(nodes, node_start);
7645         if (!cache)
7646                 cache = search_cache_extent(nodes, 0);
7647
7648         if (!cache) {
7649                  cache = search_cache_extent(pending, 0);
7650                  if (!cache)
7651                          return 0;
7652                  ret = 0;
7653                  do {
7654                          bits[ret].start = cache->start;
7655                          bits[ret].size = cache->size;
7656                          cache = next_cache_extent(cache);
7657                          ret++;
7658                  } while (cache && ret < bits_nr);
7659                  return ret;
7660         }
7661
7662         ret = 0;
7663         do {
7664                 bits[ret].start = cache->start;
7665                 bits[ret].size = cache->size;
7666                 cache = next_cache_extent(cache);
7667                 ret++;
7668         } while (cache && ret < bits_nr);
7669
7670         if (bits_nr - ret > 8) {
7671                 u64 lookup = bits[0].start + bits[0].size;
7672                 struct cache_extent *next;
7673                 next = search_cache_extent(pending, lookup);
7674                 while(next) {
7675                         if (next->start - lookup > 32768)
7676                                 break;
7677                         bits[ret].start = next->start;
7678                         bits[ret].size = next->size;
7679                         lookup = next->start + next->size;
7680                         ret++;
7681                         if (ret == bits_nr)
7682                                 break;
7683                         next = next_cache_extent(next);
7684                         if (!next)
7685                                 break;
7686                 }
7687         }
7688         return ret;
7689 }
7690
7691 static void free_chunk_record(struct cache_extent *cache)
7692 {
7693         struct chunk_record *rec;
7694
7695         rec = container_of(cache, struct chunk_record, cache);
7696         list_del_init(&rec->list);
7697         list_del_init(&rec->dextents);
7698         free(rec);
7699 }
7700
7701 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7702 {
7703         cache_tree_free_extents(chunk_cache, free_chunk_record);
7704 }
7705
7706 static void free_device_record(struct rb_node *node)
7707 {
7708         struct device_record *rec;
7709
7710         rec = container_of(node, struct device_record, node);
7711         free(rec);
7712 }
7713
7714 FREE_RB_BASED_TREE(device_cache, free_device_record);
7715
7716 int insert_block_group_record(struct block_group_tree *tree,
7717                               struct block_group_record *bg_rec)
7718 {
7719         int ret;
7720
7721         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7722         if (ret)
7723                 return ret;
7724
7725         list_add_tail(&bg_rec->list, &tree->block_groups);
7726         return 0;
7727 }
7728
7729 static void free_block_group_record(struct cache_extent *cache)
7730 {
7731         struct block_group_record *rec;
7732
7733         rec = container_of(cache, struct block_group_record, cache);
7734         list_del_init(&rec->list);
7735         free(rec);
7736 }
7737
7738 void free_block_group_tree(struct block_group_tree *tree)
7739 {
7740         cache_tree_free_extents(&tree->tree, free_block_group_record);
7741 }
7742
7743 int insert_device_extent_record(struct device_extent_tree *tree,
7744                                 struct device_extent_record *de_rec)
7745 {
7746         int ret;
7747
7748         /*
7749          * Device extent is a bit different from the other extents, because
7750          * the extents which belong to the different devices may have the
7751          * same start and size, so we need use the special extent cache
7752          * search/insert functions.
7753          */
7754         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7755         if (ret)
7756                 return ret;
7757
7758         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7759         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7760         return 0;
7761 }
7762
7763 static void free_device_extent_record(struct cache_extent *cache)
7764 {
7765         struct device_extent_record *rec;
7766
7767         rec = container_of(cache, struct device_extent_record, cache);
7768         if (!list_empty(&rec->chunk_list))
7769                 list_del_init(&rec->chunk_list);
7770         if (!list_empty(&rec->device_list))
7771                 list_del_init(&rec->device_list);
7772         free(rec);
7773 }
7774
7775 void free_device_extent_tree(struct device_extent_tree *tree)
7776 {
7777         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7778 }
7779
7780 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7781 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7782                                  struct extent_buffer *leaf, int slot)
7783 {
7784         struct btrfs_extent_ref_v0 *ref0;
7785         struct btrfs_key key;
7786         int ret;
7787
7788         btrfs_item_key_to_cpu(leaf, &key, slot);
7789         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7790         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7791                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7792                                 0, 0);
7793         } else {
7794                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7795                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7796         }
7797         return ret;
7798 }
7799 #endif
7800
7801 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7802                                             struct btrfs_key *key,
7803                                             int slot)
7804 {
7805         struct btrfs_chunk *ptr;
7806         struct chunk_record *rec;
7807         int num_stripes, i;
7808
7809         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7810         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7811
7812         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7813         if (!rec) {
7814                 fprintf(stderr, "memory allocation failed\n");
7815                 exit(-1);
7816         }
7817
7818         INIT_LIST_HEAD(&rec->list);
7819         INIT_LIST_HEAD(&rec->dextents);
7820         rec->bg_rec = NULL;
7821
7822         rec->cache.start = key->offset;
7823         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7824
7825         rec->generation = btrfs_header_generation(leaf);
7826
7827         rec->objectid = key->objectid;
7828         rec->type = key->type;
7829         rec->offset = key->offset;
7830
7831         rec->length = rec->cache.size;
7832         rec->owner = btrfs_chunk_owner(leaf, ptr);
7833         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7834         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7835         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7836         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7837         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7838         rec->num_stripes = num_stripes;
7839         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7840
7841         for (i = 0; i < rec->num_stripes; ++i) {
7842                 rec->stripes[i].devid =
7843                         btrfs_stripe_devid_nr(leaf, ptr, i);
7844                 rec->stripes[i].offset =
7845                         btrfs_stripe_offset_nr(leaf, ptr, i);
7846                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7847                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7848                                 BTRFS_UUID_SIZE);
7849         }
7850
7851         return rec;
7852 }
7853
7854 static int process_chunk_item(struct cache_tree *chunk_cache,
7855                               struct btrfs_key *key, struct extent_buffer *eb,
7856                               int slot)
7857 {
7858         struct chunk_record *rec;
7859         struct btrfs_chunk *chunk;
7860         int ret = 0;
7861
7862         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7863         /*
7864          * Do extra check for this chunk item,
7865          *
7866          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7867          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7868          * and owner<->key_type check.
7869          */
7870         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7871                                       key->offset);
7872         if (ret < 0) {
7873                 error("chunk(%llu, %llu) is not valid, ignore it",
7874                       key->offset, btrfs_chunk_length(eb, chunk));
7875                 return 0;
7876         }
7877         rec = btrfs_new_chunk_record(eb, key, slot);
7878         ret = insert_cache_extent(chunk_cache, &rec->cache);
7879         if (ret) {
7880                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7881                         rec->offset, rec->length);
7882                 free(rec);
7883         }
7884
7885         return ret;
7886 }
7887
7888 static int process_device_item(struct rb_root *dev_cache,
7889                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7890 {
7891         struct btrfs_dev_item *ptr;
7892         struct device_record *rec;
7893         int ret = 0;
7894
7895         ptr = btrfs_item_ptr(eb,
7896                 slot, struct btrfs_dev_item);
7897
7898         rec = malloc(sizeof(*rec));
7899         if (!rec) {
7900                 fprintf(stderr, "memory allocation failed\n");
7901                 return -ENOMEM;
7902         }
7903
7904         rec->devid = key->offset;
7905         rec->generation = btrfs_header_generation(eb);
7906
7907         rec->objectid = key->objectid;
7908         rec->type = key->type;
7909         rec->offset = key->offset;
7910
7911         rec->devid = btrfs_device_id(eb, ptr);
7912         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7913         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7914
7915         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7916         if (ret) {
7917                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7918                 free(rec);
7919         }
7920
7921         return ret;
7922 }
7923
7924 struct block_group_record *
7925 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7926                              int slot)
7927 {
7928         struct btrfs_block_group_item *ptr;
7929         struct block_group_record *rec;
7930
7931         rec = calloc(1, sizeof(*rec));
7932         if (!rec) {
7933                 fprintf(stderr, "memory allocation failed\n");
7934                 exit(-1);
7935         }
7936
7937         rec->cache.start = key->objectid;
7938         rec->cache.size = key->offset;
7939
7940         rec->generation = btrfs_header_generation(leaf);
7941
7942         rec->objectid = key->objectid;
7943         rec->type = key->type;
7944         rec->offset = key->offset;
7945
7946         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7947         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7948
7949         INIT_LIST_HEAD(&rec->list);
7950
7951         return rec;
7952 }
7953
7954 static int process_block_group_item(struct block_group_tree *block_group_cache,
7955                                     struct btrfs_key *key,
7956                                     struct extent_buffer *eb, int slot)
7957 {
7958         struct block_group_record *rec;
7959         int ret = 0;
7960
7961         rec = btrfs_new_block_group_record(eb, key, slot);
7962         ret = insert_block_group_record(block_group_cache, rec);
7963         if (ret) {
7964                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7965                         rec->objectid, rec->offset);
7966                 free(rec);
7967         }
7968
7969         return ret;
7970 }
7971
7972 struct device_extent_record *
7973 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7974                                struct btrfs_key *key, int slot)
7975 {
7976         struct device_extent_record *rec;
7977         struct btrfs_dev_extent *ptr;
7978
7979         rec = calloc(1, sizeof(*rec));
7980         if (!rec) {
7981                 fprintf(stderr, "memory allocation failed\n");
7982                 exit(-1);
7983         }
7984
7985         rec->cache.objectid = key->objectid;
7986         rec->cache.start = key->offset;
7987
7988         rec->generation = btrfs_header_generation(leaf);
7989
7990         rec->objectid = key->objectid;
7991         rec->type = key->type;
7992         rec->offset = key->offset;
7993
7994         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7995         rec->chunk_objecteid =
7996                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7997         rec->chunk_offset =
7998                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7999         rec->length = btrfs_dev_extent_length(leaf, ptr);
8000         rec->cache.size = rec->length;
8001
8002         INIT_LIST_HEAD(&rec->chunk_list);
8003         INIT_LIST_HEAD(&rec->device_list);
8004
8005         return rec;
8006 }
8007
8008 static int
8009 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8010                            struct btrfs_key *key, struct extent_buffer *eb,
8011                            int slot)
8012 {
8013         struct device_extent_record *rec;
8014         int ret;
8015
8016         rec = btrfs_new_device_extent_record(eb, key, slot);
8017         ret = insert_device_extent_record(dev_extent_cache, rec);
8018         if (ret) {
8019                 fprintf(stderr,
8020                         "Device extent[%llu, %llu, %llu] existed.\n",
8021                         rec->objectid, rec->offset, rec->length);
8022                 free(rec);
8023         }
8024
8025         return ret;
8026 }
8027
8028 static int process_extent_item(struct btrfs_root *root,
8029                                struct cache_tree *extent_cache,
8030                                struct extent_buffer *eb, int slot)
8031 {
8032         struct btrfs_extent_item *ei;
8033         struct btrfs_extent_inline_ref *iref;
8034         struct btrfs_extent_data_ref *dref;
8035         struct btrfs_shared_data_ref *sref;
8036         struct btrfs_key key;
8037         struct extent_record tmpl;
8038         unsigned long end;
8039         unsigned long ptr;
8040         int ret;
8041         int type;
8042         u32 item_size = btrfs_item_size_nr(eb, slot);
8043         u64 refs = 0;
8044         u64 offset;
8045         u64 num_bytes;
8046         int metadata = 0;
8047
8048         btrfs_item_key_to_cpu(eb, &key, slot);
8049
8050         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8051                 metadata = 1;
8052                 num_bytes = root->fs_info->nodesize;
8053         } else {
8054                 num_bytes = key.offset;
8055         }
8056
8057         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8058                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8059                       key.objectid, root->fs_info->sectorsize);
8060                 return -EIO;
8061         }
8062         if (item_size < sizeof(*ei)) {
8063 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8064                 struct btrfs_extent_item_v0 *ei0;
8065                 if (item_size != sizeof(*ei0)) {
8066                         error(
8067         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
8068                                 key.objectid, key.type, key.offset,
8069                                 btrfs_header_bytenr(eb), slot);
8070                         BUG();
8071                 }
8072                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8073                 refs = btrfs_extent_refs_v0(eb, ei0);
8074 #else
8075                 BUG();
8076 #endif
8077                 memset(&tmpl, 0, sizeof(tmpl));
8078                 tmpl.start = key.objectid;
8079                 tmpl.nr = num_bytes;
8080                 tmpl.extent_item_refs = refs;
8081                 tmpl.metadata = metadata;
8082                 tmpl.found_rec = 1;
8083                 tmpl.max_size = num_bytes;
8084
8085                 return add_extent_rec(extent_cache, &tmpl);
8086         }
8087
8088         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8089         refs = btrfs_extent_refs(eb, ei);
8090         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8091                 metadata = 1;
8092         else
8093                 metadata = 0;
8094         if (metadata && num_bytes != root->fs_info->nodesize) {
8095                 error("ignore invalid metadata extent, length %llu does not equal to %u",
8096                       num_bytes, root->fs_info->nodesize);
8097                 return -EIO;
8098         }
8099         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8100                 error("ignore invalid data extent, length %llu is not aligned to %u",
8101                       num_bytes, root->fs_info->sectorsize);
8102                 return -EIO;
8103         }
8104
8105         memset(&tmpl, 0, sizeof(tmpl));
8106         tmpl.start = key.objectid;
8107         tmpl.nr = num_bytes;
8108         tmpl.extent_item_refs = refs;
8109         tmpl.metadata = metadata;
8110         tmpl.found_rec = 1;
8111         tmpl.max_size = num_bytes;
8112         add_extent_rec(extent_cache, &tmpl);
8113
8114         ptr = (unsigned long)(ei + 1);
8115         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8116             key.type == BTRFS_EXTENT_ITEM_KEY)
8117                 ptr += sizeof(struct btrfs_tree_block_info);
8118
8119         end = (unsigned long)ei + item_size;
8120         while (ptr < end) {
8121                 iref = (struct btrfs_extent_inline_ref *)ptr;
8122                 type = btrfs_extent_inline_ref_type(eb, iref);
8123                 offset = btrfs_extent_inline_ref_offset(eb, iref);
8124                 switch (type) {
8125                 case BTRFS_TREE_BLOCK_REF_KEY:
8126                         ret = add_tree_backref(extent_cache, key.objectid,
8127                                         0, offset, 0);
8128                         if (ret < 0)
8129                                 error(
8130                         "add_tree_backref failed (extent items tree block): %s",
8131                                       strerror(-ret));
8132                         break;
8133                 case BTRFS_SHARED_BLOCK_REF_KEY:
8134                         ret = add_tree_backref(extent_cache, key.objectid,
8135                                         offset, 0, 0);
8136                         if (ret < 0)
8137                                 error(
8138                         "add_tree_backref failed (extent items shared block): %s",
8139                                       strerror(-ret));
8140                         break;
8141                 case BTRFS_EXTENT_DATA_REF_KEY:
8142                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8143                         add_data_backref(extent_cache, key.objectid, 0,
8144                                         btrfs_extent_data_ref_root(eb, dref),
8145                                         btrfs_extent_data_ref_objectid(eb,
8146                                                                        dref),
8147                                         btrfs_extent_data_ref_offset(eb, dref),
8148                                         btrfs_extent_data_ref_count(eb, dref),
8149                                         0, num_bytes);
8150                         break;
8151                 case BTRFS_SHARED_DATA_REF_KEY:
8152                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8153                         add_data_backref(extent_cache, key.objectid, offset,
8154                                         0, 0, 0,
8155                                         btrfs_shared_data_ref_count(eb, sref),
8156                                         0, num_bytes);
8157                         break;
8158                 default:
8159                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8160                                 key.objectid, key.type, num_bytes);
8161                         goto out;
8162                 }
8163                 ptr += btrfs_extent_inline_ref_size(type);
8164         }
8165         WARN_ON(ptr > end);
8166 out:
8167         return 0;
8168 }
8169
8170 static int check_cache_range(struct btrfs_root *root,
8171                              struct btrfs_block_group_cache *cache,
8172                              u64 offset, u64 bytes)
8173 {
8174         struct btrfs_free_space *entry;
8175         u64 *logical;
8176         u64 bytenr;
8177         int stripe_len;
8178         int i, nr, ret;
8179
8180         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8181                 bytenr = btrfs_sb_offset(i);
8182                 ret = btrfs_rmap_block(root->fs_info,
8183                                        cache->key.objectid, bytenr, 0,
8184                                        &logical, &nr, &stripe_len);
8185                 if (ret)
8186                         return ret;
8187
8188                 while (nr--) {
8189                         if (logical[nr] + stripe_len <= offset)
8190                                 continue;
8191                         if (offset + bytes <= logical[nr])
8192                                 continue;
8193                         if (logical[nr] == offset) {
8194                                 if (stripe_len >= bytes) {
8195                                         free(logical);
8196                                         return 0;
8197                                 }
8198                                 bytes -= stripe_len;
8199                                 offset += stripe_len;
8200                         } else if (logical[nr] < offset) {
8201                                 if (logical[nr] + stripe_len >=
8202                                     offset + bytes) {
8203                                         free(logical);
8204                                         return 0;
8205                                 }
8206                                 bytes = (offset + bytes) -
8207                                         (logical[nr] + stripe_len);
8208                                 offset = logical[nr] + stripe_len;
8209                         } else {
8210                                 /*
8211                                  * Could be tricky, the super may land in the
8212                                  * middle of the area we're checking.  First
8213                                  * check the easiest case, it's at the end.
8214                                  */
8215                                 if (logical[nr] + stripe_len >=
8216                                     bytes + offset) {
8217                                         bytes = logical[nr] - offset;
8218                                         continue;
8219                                 }
8220
8221                                 /* Check the left side */
8222                                 ret = check_cache_range(root, cache,
8223                                                         offset,
8224                                                         logical[nr] - offset);
8225                                 if (ret) {
8226                                         free(logical);
8227                                         return ret;
8228                                 }
8229
8230                                 /* Now we continue with the right side */
8231                                 bytes = (offset + bytes) -
8232                                         (logical[nr] + stripe_len);
8233                                 offset = logical[nr] + stripe_len;
8234                         }
8235                 }
8236
8237                 free(logical);
8238         }
8239
8240         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8241         if (!entry) {
8242                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8243                         offset, offset+bytes);
8244                 return -EINVAL;
8245         }
8246
8247         if (entry->offset != offset) {
8248                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8249                         entry->offset);
8250                 return -EINVAL;
8251         }
8252
8253         if (entry->bytes != bytes) {
8254                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8255                         bytes, entry->bytes, offset);
8256                 return -EINVAL;
8257         }
8258
8259         unlink_free_space(cache->free_space_ctl, entry);
8260         free(entry);
8261         return 0;
8262 }
8263
8264 static int verify_space_cache(struct btrfs_root *root,
8265                               struct btrfs_block_group_cache *cache)
8266 {
8267         struct btrfs_path path;
8268         struct extent_buffer *leaf;
8269         struct btrfs_key key;
8270         u64 last;
8271         int ret = 0;
8272
8273         root = root->fs_info->extent_root;
8274
8275         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8276
8277         btrfs_init_path(&path);
8278         key.objectid = last;
8279         key.offset = 0;
8280         key.type = BTRFS_EXTENT_ITEM_KEY;
8281         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8282         if (ret < 0)
8283                 goto out;
8284         ret = 0;
8285         while (1) {
8286                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8287                         ret = btrfs_next_leaf(root, &path);
8288                         if (ret < 0)
8289                                 goto out;
8290                         if (ret > 0) {
8291                                 ret = 0;
8292                                 break;
8293                         }
8294                 }
8295                 leaf = path.nodes[0];
8296                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8297                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8298                         break;
8299                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8300                     key.type != BTRFS_METADATA_ITEM_KEY) {
8301                         path.slots[0]++;
8302                         continue;
8303                 }
8304
8305                 if (last == key.objectid) {
8306                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8307                                 last = key.objectid + key.offset;
8308                         else
8309                                 last = key.objectid + root->fs_info->nodesize;
8310                         path.slots[0]++;
8311                         continue;
8312                 }
8313
8314                 ret = check_cache_range(root, cache, last,
8315                                         key.objectid - last);
8316                 if (ret)
8317                         break;
8318                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8319                         last = key.objectid + key.offset;
8320                 else
8321                         last = key.objectid + root->fs_info->nodesize;
8322                 path.slots[0]++;
8323         }
8324
8325         if (last < cache->key.objectid + cache->key.offset)
8326                 ret = check_cache_range(root, cache, last,
8327                                         cache->key.objectid +
8328                                         cache->key.offset - last);
8329
8330 out:
8331         btrfs_release_path(&path);
8332
8333         if (!ret &&
8334             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8335                 fprintf(stderr, "There are still entries left in the space "
8336                         "cache\n");
8337                 ret = -EINVAL;
8338         }
8339
8340         return ret;
8341 }
8342
8343 static int check_space_cache(struct btrfs_root *root)
8344 {
8345         struct btrfs_block_group_cache *cache;
8346         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8347         int ret;
8348         int error = 0;
8349
8350         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8351             btrfs_super_generation(root->fs_info->super_copy) !=
8352             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8353                 printf("cache and super generation don't match, space cache "
8354                        "will be invalidated\n");
8355                 return 0;
8356         }
8357
8358         if (ctx.progress_enabled) {
8359                 ctx.tp = TASK_FREE_SPACE;
8360                 task_start(ctx.info);
8361         }
8362
8363         while (1) {
8364                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8365                 if (!cache)
8366                         break;
8367
8368                 start = cache->key.objectid + cache->key.offset;
8369                 if (!cache->free_space_ctl) {
8370                         if (btrfs_init_free_space_ctl(cache,
8371                                                 root->fs_info->sectorsize)) {
8372                                 ret = -ENOMEM;
8373                                 break;
8374                         }
8375                 } else {
8376                         btrfs_remove_free_space_cache(cache);
8377                 }
8378
8379                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8380                         ret = exclude_super_stripes(root, cache);
8381                         if (ret) {
8382                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8383                                         strerror(-ret));
8384                                 error++;
8385                                 continue;
8386                         }
8387                         ret = load_free_space_tree(root->fs_info, cache);
8388                         free_excluded_extents(root, cache);
8389                         if (ret < 0) {
8390                                 fprintf(stderr, "could not load free space tree: %s\n",
8391                                         strerror(-ret));
8392                                 error++;
8393                                 continue;
8394                         }
8395                         error += ret;
8396                 } else {
8397                         ret = load_free_space_cache(root->fs_info, cache);
8398                         if (!ret)
8399                                 continue;
8400                 }
8401
8402                 ret = verify_space_cache(root, cache);
8403                 if (ret) {
8404                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8405                                 cache->key.objectid);
8406                         error++;
8407                 }
8408         }
8409
8410         task_stop(ctx.info);
8411
8412         return error ? -EINVAL : 0;
8413 }
8414
8415 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8416                         u64 num_bytes, unsigned long leaf_offset,
8417                         struct extent_buffer *eb) {
8418
8419         struct btrfs_fs_info *fs_info = root->fs_info;
8420         u64 offset = 0;
8421         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8422         char *data;
8423         unsigned long csum_offset;
8424         u32 csum;
8425         u32 csum_expected;
8426         u64 read_len;
8427         u64 data_checked = 0;
8428         u64 tmp;
8429         int ret = 0;
8430         int mirror;
8431         int num_copies;
8432
8433         if (num_bytes % fs_info->sectorsize)
8434                 return -EINVAL;
8435
8436         data = malloc(num_bytes);
8437         if (!data)
8438                 return -ENOMEM;
8439
8440         while (offset < num_bytes) {
8441                 mirror = 0;
8442 again:
8443                 read_len = num_bytes - offset;
8444                 /* read as much space once a time */
8445                 ret = read_extent_data(fs_info, data + offset,
8446                                 bytenr + offset, &read_len, mirror);
8447                 if (ret)
8448                         goto out;
8449                 data_checked = 0;
8450                 /* verify every 4k data's checksum */
8451                 while (data_checked < read_len) {
8452                         csum = ~(u32)0;
8453                         tmp = offset + data_checked;
8454
8455                         csum = btrfs_csum_data((char *)data + tmp,
8456                                                csum, fs_info->sectorsize);
8457                         btrfs_csum_final(csum, (u8 *)&csum);
8458
8459                         csum_offset = leaf_offset +
8460                                  tmp / fs_info->sectorsize * csum_size;
8461                         read_extent_buffer(eb, (char *)&csum_expected,
8462                                            csum_offset, csum_size);
8463                         /* try another mirror */
8464                         if (csum != csum_expected) {
8465                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8466                                                 mirror, bytenr + tmp,
8467                                                 csum, csum_expected);
8468                                 num_copies = btrfs_num_copies(root->fs_info,
8469                                                 bytenr, num_bytes);
8470                                 if (mirror < num_copies - 1) {
8471                                         mirror += 1;
8472                                         goto again;
8473                                 }
8474                         }
8475                         data_checked += fs_info->sectorsize;
8476                 }
8477                 offset += read_len;
8478         }
8479 out:
8480         free(data);
8481         return ret;
8482 }
8483
8484 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8485                                u64 num_bytes)
8486 {
8487         struct btrfs_path path;
8488         struct extent_buffer *leaf;
8489         struct btrfs_key key;
8490         int ret;
8491
8492         btrfs_init_path(&path);
8493         key.objectid = bytenr;
8494         key.type = BTRFS_EXTENT_ITEM_KEY;
8495         key.offset = (u64)-1;
8496
8497 again:
8498         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8499                                 0, 0);
8500         if (ret < 0) {
8501                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8502                 btrfs_release_path(&path);
8503                 return ret;
8504         } else if (ret) {
8505                 if (path.slots[0] > 0) {
8506                         path.slots[0]--;
8507                 } else {
8508                         ret = btrfs_prev_leaf(root, &path);
8509                         if (ret < 0) {
8510                                 goto out;
8511                         } else if (ret > 0) {
8512                                 ret = 0;
8513                                 goto out;
8514                         }
8515                 }
8516         }
8517
8518         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8519
8520         /*
8521          * Block group items come before extent items if they have the same
8522          * bytenr, so walk back one more just in case.  Dear future traveller,
8523          * first congrats on mastering time travel.  Now if it's not too much
8524          * trouble could you go back to 2006 and tell Chris to make the
8525          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8526          * EXTENT_ITEM_KEY please?
8527          */
8528         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8529                 if (path.slots[0] > 0) {
8530                         path.slots[0]--;
8531                 } else {
8532                         ret = btrfs_prev_leaf(root, &path);
8533                         if (ret < 0) {
8534                                 goto out;
8535                         } else if (ret > 0) {
8536                                 ret = 0;
8537                                 goto out;
8538                         }
8539                 }
8540                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8541         }
8542
8543         while (num_bytes) {
8544                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8545                         ret = btrfs_next_leaf(root, &path);
8546                         if (ret < 0) {
8547                                 fprintf(stderr, "Error going to next leaf "
8548                                         "%d\n", ret);
8549                                 btrfs_release_path(&path);
8550                                 return ret;
8551                         } else if (ret) {
8552                                 break;
8553                         }
8554                 }
8555                 leaf = path.nodes[0];
8556                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8557                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8558                         path.slots[0]++;
8559                         continue;
8560                 }
8561                 if (key.objectid + key.offset < bytenr) {
8562                         path.slots[0]++;
8563                         continue;
8564                 }
8565                 if (key.objectid > bytenr + num_bytes)
8566                         break;
8567
8568                 if (key.objectid == bytenr) {
8569                         if (key.offset >= num_bytes) {
8570                                 num_bytes = 0;
8571                                 break;
8572                         }
8573                         num_bytes -= key.offset;
8574                         bytenr += key.offset;
8575                 } else if (key.objectid < bytenr) {
8576                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8577                                 num_bytes = 0;
8578                                 break;
8579                         }
8580                         num_bytes = (bytenr + num_bytes) -
8581                                 (key.objectid + key.offset);
8582                         bytenr = key.objectid + key.offset;
8583                 } else {
8584                         if (key.objectid + key.offset < bytenr + num_bytes) {
8585                                 u64 new_start = key.objectid + key.offset;
8586                                 u64 new_bytes = bytenr + num_bytes - new_start;
8587
8588                                 /*
8589                                  * Weird case, the extent is in the middle of
8590                                  * our range, we'll have to search one side
8591                                  * and then the other.  Not sure if this happens
8592                                  * in real life, but no harm in coding it up
8593                                  * anyway just in case.
8594                                  */
8595                                 btrfs_release_path(&path);
8596                                 ret = check_extent_exists(root, new_start,
8597                                                           new_bytes);
8598                                 if (ret) {
8599                                         fprintf(stderr, "Right section didn't "
8600                                                 "have a record\n");
8601                                         break;
8602                                 }
8603                                 num_bytes = key.objectid - bytenr;
8604                                 goto again;
8605                         }
8606                         num_bytes = key.objectid - bytenr;
8607                 }
8608                 path.slots[0]++;
8609         }
8610         ret = 0;
8611
8612 out:
8613         if (num_bytes && !ret) {
8614                 fprintf(stderr, "There are no extents for csum range "
8615                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8616                 ret = 1;
8617         }
8618
8619         btrfs_release_path(&path);
8620         return ret;
8621 }
8622
8623 static int check_csums(struct btrfs_root *root)
8624 {
8625         struct btrfs_path path;
8626         struct extent_buffer *leaf;
8627         struct btrfs_key key;
8628         u64 offset = 0, num_bytes = 0;
8629         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8630         int errors = 0;
8631         int ret;
8632         u64 data_len;
8633         unsigned long leaf_offset;
8634
8635         root = root->fs_info->csum_root;
8636         if (!extent_buffer_uptodate(root->node)) {
8637                 fprintf(stderr, "No valid csum tree found\n");
8638                 return -ENOENT;
8639         }
8640
8641         btrfs_init_path(&path);
8642         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8643         key.type = BTRFS_EXTENT_CSUM_KEY;
8644         key.offset = 0;
8645         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8646         if (ret < 0) {
8647                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8648                 btrfs_release_path(&path);
8649                 return ret;
8650         }
8651
8652         if (ret > 0 && path.slots[0])
8653                 path.slots[0]--;
8654         ret = 0;
8655
8656         while (1) {
8657                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8658                         ret = btrfs_next_leaf(root, &path);
8659                         if (ret < 0) {
8660                                 fprintf(stderr, "Error going to next leaf "
8661                                         "%d\n", ret);
8662                                 break;
8663                         }
8664                         if (ret)
8665                                 break;
8666                 }
8667                 leaf = path.nodes[0];
8668
8669                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8670                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8671                         path.slots[0]++;
8672                         continue;
8673                 }
8674
8675                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8676                               csum_size) * root->fs_info->sectorsize;
8677                 if (!check_data_csum)
8678                         goto skip_csum_check;
8679                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8680                 ret = check_extent_csums(root, key.offset, data_len,
8681                                          leaf_offset, leaf);
8682                 if (ret)
8683                         break;
8684 skip_csum_check:
8685                 if (!num_bytes) {
8686                         offset = key.offset;
8687                 } else if (key.offset != offset + num_bytes) {
8688                         ret = check_extent_exists(root, offset, num_bytes);
8689                         if (ret) {
8690                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8691                                         "there is no extent record\n",
8692                                         offset, offset+num_bytes);
8693                                 errors++;
8694                         }
8695                         offset = key.offset;
8696                         num_bytes = 0;
8697                 }
8698                 num_bytes += data_len;
8699                 path.slots[0]++;
8700         }
8701
8702         btrfs_release_path(&path);
8703         return errors;
8704 }
8705
8706 static int is_dropped_key(struct btrfs_key *key,
8707                           struct btrfs_key *drop_key) {
8708         if (key->objectid < drop_key->objectid)
8709                 return 1;
8710         else if (key->objectid == drop_key->objectid) {
8711                 if (key->type < drop_key->type)
8712                         return 1;
8713                 else if (key->type == drop_key->type) {
8714                         if (key->offset < drop_key->offset)
8715                                 return 1;
8716                 }
8717         }
8718         return 0;
8719 }
8720
8721 /*
8722  * Here are the rules for FULL_BACKREF.
8723  *
8724  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8725  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8726  *      FULL_BACKREF set.
8727  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8728  *    if it happened after the relocation occurred since we'll have dropped the
8729  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8730  *    have no real way to know for sure.
8731  *
8732  * We process the blocks one root at a time, and we start from the lowest root
8733  * objectid and go to the highest.  So we can just lookup the owner backref for
8734  * the record and if we don't find it then we know it doesn't exist and we have
8735  * a FULL BACKREF.
8736  *
8737  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8738  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8739  * be set or not and then we can check later once we've gathered all the refs.
8740  */
8741 static int calc_extent_flag(struct cache_tree *extent_cache,
8742                            struct extent_buffer *buf,
8743                            struct root_item_record *ri,
8744                            u64 *flags)
8745 {
8746         struct extent_record *rec;
8747         struct cache_extent *cache;
8748         struct tree_backref *tback;
8749         u64 owner = 0;
8750
8751         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8752         /* we have added this extent before */
8753         if (!cache)
8754                 return -ENOENT;
8755
8756         rec = container_of(cache, struct extent_record, cache);
8757
8758         /*
8759          * Except file/reloc tree, we can not have
8760          * FULL BACKREF MODE
8761          */
8762         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8763                 goto normal;
8764         /*
8765          * root node
8766          */
8767         if (buf->start == ri->bytenr)
8768                 goto normal;
8769
8770         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8771                 goto full_backref;
8772
8773         owner = btrfs_header_owner(buf);
8774         if (owner == ri->objectid)
8775                 goto normal;
8776
8777         tback = find_tree_backref(rec, 0, owner);
8778         if (!tback)
8779                 goto full_backref;
8780 normal:
8781         *flags = 0;
8782         if (rec->flag_block_full_backref != FLAG_UNSET &&
8783             rec->flag_block_full_backref != 0)
8784                 rec->bad_full_backref = 1;
8785         return 0;
8786 full_backref:
8787         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8788         if (rec->flag_block_full_backref != FLAG_UNSET &&
8789             rec->flag_block_full_backref != 1)
8790                 rec->bad_full_backref = 1;
8791         return 0;
8792 }
8793
8794 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8795 {
8796         fprintf(stderr, "Invalid key type(");
8797         print_key_type(stderr, 0, key_type);
8798         fprintf(stderr, ") found in root(");
8799         print_objectid(stderr, rootid, 0);
8800         fprintf(stderr, ")\n");
8801 }
8802
8803 /*
8804  * Check if the key is valid with its extent buffer.
8805  *
8806  * This is a early check in case invalid key exists in a extent buffer
8807  * This is not comprehensive yet, but should prevent wrong key/item passed
8808  * further
8809  */
8810 static int check_type_with_root(u64 rootid, u8 key_type)
8811 {
8812         switch (key_type) {
8813         /* Only valid in chunk tree */
8814         case BTRFS_DEV_ITEM_KEY:
8815         case BTRFS_CHUNK_ITEM_KEY:
8816                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8817                         goto err;
8818                 break;
8819         /* valid in csum and log tree */
8820         case BTRFS_CSUM_TREE_OBJECTID:
8821                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8822                       is_fstree(rootid)))
8823                         goto err;
8824                 break;
8825         case BTRFS_EXTENT_ITEM_KEY:
8826         case BTRFS_METADATA_ITEM_KEY:
8827         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8828                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8829                         goto err;
8830                 break;
8831         case BTRFS_ROOT_ITEM_KEY:
8832                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8833                         goto err;
8834                 break;
8835         case BTRFS_DEV_EXTENT_KEY:
8836                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8837                         goto err;
8838                 break;
8839         }
8840         return 0;
8841 err:
8842         report_mismatch_key_root(key_type, rootid);
8843         return -EINVAL;
8844 }
8845
8846 static int run_next_block(struct btrfs_root *root,
8847                           struct block_info *bits,
8848                           int bits_nr,
8849                           u64 *last,
8850                           struct cache_tree *pending,
8851                           struct cache_tree *seen,
8852                           struct cache_tree *reada,
8853                           struct cache_tree *nodes,
8854                           struct cache_tree *extent_cache,
8855                           struct cache_tree *chunk_cache,
8856                           struct rb_root *dev_cache,
8857                           struct block_group_tree *block_group_cache,
8858                           struct device_extent_tree *dev_extent_cache,
8859                           struct root_item_record *ri)
8860 {
8861         struct btrfs_fs_info *fs_info = root->fs_info;
8862         struct extent_buffer *buf;
8863         struct extent_record *rec = NULL;
8864         u64 bytenr;
8865         u32 size;
8866         u64 parent;
8867         u64 owner;
8868         u64 flags;
8869         u64 ptr;
8870         u64 gen = 0;
8871         int ret = 0;
8872         int i;
8873         int nritems;
8874         struct btrfs_key key;
8875         struct cache_extent *cache;
8876         int reada_bits;
8877
8878         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8879                                     bits_nr, &reada_bits);
8880         if (nritems == 0)
8881                 return 1;
8882
8883         if (!reada_bits) {
8884                 for(i = 0; i < nritems; i++) {
8885                         ret = add_cache_extent(reada, bits[i].start,
8886                                                bits[i].size);
8887                         if (ret == -EEXIST)
8888                                 continue;
8889
8890                         /* fixme, get the parent transid */
8891                         readahead_tree_block(fs_info, bits[i].start, 0);
8892                 }
8893         }
8894         *last = bits[0].start;
8895         bytenr = bits[0].start;
8896         size = bits[0].size;
8897
8898         cache = lookup_cache_extent(pending, bytenr, size);
8899         if (cache) {
8900                 remove_cache_extent(pending, cache);
8901                 free(cache);
8902         }
8903         cache = lookup_cache_extent(reada, bytenr, size);
8904         if (cache) {
8905                 remove_cache_extent(reada, cache);
8906                 free(cache);
8907         }
8908         cache = lookup_cache_extent(nodes, bytenr, size);
8909         if (cache) {
8910                 remove_cache_extent(nodes, cache);
8911                 free(cache);
8912         }
8913         cache = lookup_cache_extent(extent_cache, bytenr, size);
8914         if (cache) {
8915                 rec = container_of(cache, struct extent_record, cache);
8916                 gen = rec->parent_generation;
8917         }
8918
8919         /* fixme, get the real parent transid */
8920         buf = read_tree_block(root->fs_info, bytenr, gen);
8921         if (!extent_buffer_uptodate(buf)) {
8922                 record_bad_block_io(root->fs_info,
8923                                     extent_cache, bytenr, size);
8924                 goto out;
8925         }
8926
8927         nritems = btrfs_header_nritems(buf);
8928
8929         flags = 0;
8930         if (!init_extent_tree) {
8931                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8932                                        btrfs_header_level(buf), 1, NULL,
8933                                        &flags);
8934                 if (ret < 0) {
8935                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8936                         if (ret < 0) {
8937                                 fprintf(stderr, "Couldn't calc extent flags\n");
8938                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8939                         }
8940                 }
8941         } else {
8942                 flags = 0;
8943                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8944                 if (ret < 0) {
8945                         fprintf(stderr, "Couldn't calc extent flags\n");
8946                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8947                 }
8948         }
8949
8950         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8951                 if (ri != NULL &&
8952                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8953                     ri->objectid == btrfs_header_owner(buf)) {
8954                         /*
8955                          * Ok we got to this block from it's original owner and
8956                          * we have FULL_BACKREF set.  Relocation can leave
8957                          * converted blocks over so this is altogether possible,
8958                          * however it's not possible if the generation > the
8959                          * last snapshot, so check for this case.
8960                          */
8961                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8962                             btrfs_header_generation(buf) > ri->last_snapshot) {
8963                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8964                                 rec->bad_full_backref = 1;
8965                         }
8966                 }
8967         } else {
8968                 if (ri != NULL &&
8969                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8970                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8971                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8972                         rec->bad_full_backref = 1;
8973                 }
8974         }
8975
8976         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8977                 rec->flag_block_full_backref = 1;
8978                 parent = bytenr;
8979                 owner = 0;
8980         } else {
8981                 rec->flag_block_full_backref = 0;
8982                 parent = 0;
8983                 owner = btrfs_header_owner(buf);
8984         }
8985
8986         ret = check_block(root, extent_cache, buf, flags);
8987         if (ret)
8988                 goto out;
8989
8990         if (btrfs_is_leaf(buf)) {
8991                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8992                 for (i = 0; i < nritems; i++) {
8993                         struct btrfs_file_extent_item *fi;
8994                         btrfs_item_key_to_cpu(buf, &key, i);
8995                         /*
8996                          * Check key type against the leaf owner.
8997                          * Could filter quite a lot of early error if
8998                          * owner is correct
8999                          */
9000                         if (check_type_with_root(btrfs_header_owner(buf),
9001                                                  key.type)) {
9002                                 fprintf(stderr, "ignoring invalid key\n");
9003                                 continue;
9004                         }
9005                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9006                                 process_extent_item(root, extent_cache, buf,
9007                                                     i);
9008                                 continue;
9009                         }
9010                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9011                                 process_extent_item(root, extent_cache, buf,
9012                                                     i);
9013                                 continue;
9014                         }
9015                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9016                                 total_csum_bytes +=
9017                                         btrfs_item_size_nr(buf, i);
9018                                 continue;
9019                         }
9020                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9021                                 process_chunk_item(chunk_cache, &key, buf, i);
9022                                 continue;
9023                         }
9024                         if (key.type == BTRFS_DEV_ITEM_KEY) {
9025                                 process_device_item(dev_cache, &key, buf, i);
9026                                 continue;
9027                         }
9028                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9029                                 process_block_group_item(block_group_cache,
9030                                         &key, buf, i);
9031                                 continue;
9032                         }
9033                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
9034                                 process_device_extent_item(dev_extent_cache,
9035                                         &key, buf, i);
9036                                 continue;
9037
9038                         }
9039                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9040 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9041                                 process_extent_ref_v0(extent_cache, buf, i);
9042 #else
9043                                 BUG();
9044 #endif
9045                                 continue;
9046                         }
9047
9048                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9049                                 ret = add_tree_backref(extent_cache,
9050                                                 key.objectid, 0, key.offset, 0);
9051                                 if (ret < 0)
9052                                         error(
9053                                 "add_tree_backref failed (leaf tree block): %s",
9054                                               strerror(-ret));
9055                                 continue;
9056                         }
9057                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9058                                 ret = add_tree_backref(extent_cache,
9059                                                 key.objectid, key.offset, 0, 0);
9060                                 if (ret < 0)
9061                                         error(
9062                                 "add_tree_backref failed (leaf shared block): %s",
9063                                               strerror(-ret));
9064                                 continue;
9065                         }
9066                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9067                                 struct btrfs_extent_data_ref *ref;
9068                                 ref = btrfs_item_ptr(buf, i,
9069                                                 struct btrfs_extent_data_ref);
9070                                 add_data_backref(extent_cache,
9071                                         key.objectid, 0,
9072                                         btrfs_extent_data_ref_root(buf, ref),
9073                                         btrfs_extent_data_ref_objectid(buf,
9074                                                                        ref),
9075                                         btrfs_extent_data_ref_offset(buf, ref),
9076                                         btrfs_extent_data_ref_count(buf, ref),
9077                                         0, root->fs_info->sectorsize);
9078                                 continue;
9079                         }
9080                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9081                                 struct btrfs_shared_data_ref *ref;
9082                                 ref = btrfs_item_ptr(buf, i,
9083                                                 struct btrfs_shared_data_ref);
9084                                 add_data_backref(extent_cache,
9085                                         key.objectid, key.offset, 0, 0, 0,
9086                                         btrfs_shared_data_ref_count(buf, ref),
9087                                         0, root->fs_info->sectorsize);
9088                                 continue;
9089                         }
9090                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9091                                 struct bad_item *bad;
9092
9093                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9094                                         continue;
9095                                 if (!owner)
9096                                         continue;
9097                                 bad = malloc(sizeof(struct bad_item));
9098                                 if (!bad)
9099                                         continue;
9100                                 INIT_LIST_HEAD(&bad->list);
9101                                 memcpy(&bad->key, &key,
9102                                        sizeof(struct btrfs_key));
9103                                 bad->root_id = owner;
9104                                 list_add_tail(&bad->list, &delete_items);
9105                                 continue;
9106                         }
9107                         if (key.type != BTRFS_EXTENT_DATA_KEY)
9108                                 continue;
9109                         fi = btrfs_item_ptr(buf, i,
9110                                             struct btrfs_file_extent_item);
9111                         if (btrfs_file_extent_type(buf, fi) ==
9112                             BTRFS_FILE_EXTENT_INLINE)
9113                                 continue;
9114                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9115                                 continue;
9116
9117                         data_bytes_allocated +=
9118                                 btrfs_file_extent_disk_num_bytes(buf, fi);
9119                         if (data_bytes_allocated < root->fs_info->sectorsize) {
9120                                 abort();
9121                         }
9122                         data_bytes_referenced +=
9123                                 btrfs_file_extent_num_bytes(buf, fi);
9124                         add_data_backref(extent_cache,
9125                                 btrfs_file_extent_disk_bytenr(buf, fi),
9126                                 parent, owner, key.objectid, key.offset -
9127                                 btrfs_file_extent_offset(buf, fi), 1, 1,
9128                                 btrfs_file_extent_disk_num_bytes(buf, fi));
9129                 }
9130         } else {
9131                 int level;
9132                 struct btrfs_key first_key;
9133
9134                 first_key.objectid = 0;
9135
9136                 if (nritems > 0)
9137                         btrfs_item_key_to_cpu(buf, &first_key, 0);
9138                 level = btrfs_header_level(buf);
9139                 for (i = 0; i < nritems; i++) {
9140                         struct extent_record tmpl;
9141
9142                         ptr = btrfs_node_blockptr(buf, i);
9143                         size = root->fs_info->nodesize;
9144                         btrfs_node_key_to_cpu(buf, &key, i);
9145                         if (ri != NULL) {
9146                                 if ((level == ri->drop_level)
9147                                     && is_dropped_key(&key, &ri->drop_key)) {
9148                                         continue;
9149                                 }
9150                         }
9151
9152                         memset(&tmpl, 0, sizeof(tmpl));
9153                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9154                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9155                         tmpl.start = ptr;
9156                         tmpl.nr = size;
9157                         tmpl.refs = 1;
9158                         tmpl.metadata = 1;
9159                         tmpl.max_size = size;
9160                         ret = add_extent_rec(extent_cache, &tmpl);
9161                         if (ret < 0)
9162                                 goto out;
9163
9164                         ret = add_tree_backref(extent_cache, ptr, parent,
9165                                         owner, 1);
9166                         if (ret < 0) {
9167                                 error(
9168                                 "add_tree_backref failed (non-leaf block): %s",
9169                                       strerror(-ret));
9170                                 continue;
9171                         }
9172
9173                         if (level > 1) {
9174                                 add_pending(nodes, seen, ptr, size);
9175                         } else {
9176                                 add_pending(pending, seen, ptr, size);
9177                         }
9178                 }
9179                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
9180                                       nritems) * sizeof(struct btrfs_key_ptr);
9181         }
9182         total_btree_bytes += buf->len;
9183         if (fs_root_objectid(btrfs_header_owner(buf)))
9184                 total_fs_tree_bytes += buf->len;
9185         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9186                 total_extent_tree_bytes += buf->len;
9187 out:
9188         free_extent_buffer(buf);
9189         return ret;
9190 }
9191
9192 static int add_root_to_pending(struct extent_buffer *buf,
9193                                struct cache_tree *extent_cache,
9194                                struct cache_tree *pending,
9195                                struct cache_tree *seen,
9196                                struct cache_tree *nodes,
9197                                u64 objectid)
9198 {
9199         struct extent_record tmpl;
9200         int ret;
9201
9202         if (btrfs_header_level(buf) > 0)
9203                 add_pending(nodes, seen, buf->start, buf->len);
9204         else
9205                 add_pending(pending, seen, buf->start, buf->len);
9206
9207         memset(&tmpl, 0, sizeof(tmpl));
9208         tmpl.start = buf->start;
9209         tmpl.nr = buf->len;
9210         tmpl.is_root = 1;
9211         tmpl.refs = 1;
9212         tmpl.metadata = 1;
9213         tmpl.max_size = buf->len;
9214         add_extent_rec(extent_cache, &tmpl);
9215
9216         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9217             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9218                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9219                                 0, 1);
9220         else
9221                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9222                                 1);
9223         return ret;
9224 }
9225
9226 /* as we fix the tree, we might be deleting blocks that
9227  * we're tracking for repair.  This hook makes sure we
9228  * remove any backrefs for blocks as we are fixing them.
9229  */
9230 static int free_extent_hook(struct btrfs_trans_handle *trans,
9231                             struct btrfs_root *root,
9232                             u64 bytenr, u64 num_bytes, u64 parent,
9233                             u64 root_objectid, u64 owner, u64 offset,
9234                             int refs_to_drop)
9235 {
9236         struct extent_record *rec;
9237         struct cache_extent *cache;
9238         int is_data;
9239         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9240
9241         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9242         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9243         if (!cache)
9244                 return 0;
9245
9246         rec = container_of(cache, struct extent_record, cache);
9247         if (is_data) {
9248                 struct data_backref *back;
9249                 back = find_data_backref(rec, parent, root_objectid, owner,
9250                                          offset, 1, bytenr, num_bytes);
9251                 if (!back)
9252                         goto out;
9253                 if (back->node.found_ref) {
9254                         back->found_ref -= refs_to_drop;
9255                         if (rec->refs)
9256                                 rec->refs -= refs_to_drop;
9257                 }
9258                 if (back->node.found_extent_tree) {
9259                         back->num_refs -= refs_to_drop;
9260                         if (rec->extent_item_refs)
9261                                 rec->extent_item_refs -= refs_to_drop;
9262                 }
9263                 if (back->found_ref == 0)
9264                         back->node.found_ref = 0;
9265                 if (back->num_refs == 0)
9266                         back->node.found_extent_tree = 0;
9267
9268                 if (!back->node.found_extent_tree && back->node.found_ref) {
9269                         rb_erase(&back->node.node, &rec->backref_tree);
9270                         free(back);
9271                 }
9272         } else {
9273                 struct tree_backref *back;
9274                 back = find_tree_backref(rec, parent, root_objectid);
9275                 if (!back)
9276                         goto out;
9277                 if (back->node.found_ref) {
9278                         if (rec->refs)
9279                                 rec->refs--;
9280                         back->node.found_ref = 0;
9281                 }
9282                 if (back->node.found_extent_tree) {
9283                         if (rec->extent_item_refs)
9284                                 rec->extent_item_refs--;
9285                         back->node.found_extent_tree = 0;
9286                 }
9287                 if (!back->node.found_extent_tree && back->node.found_ref) {
9288                         rb_erase(&back->node.node, &rec->backref_tree);
9289                         free(back);
9290                 }
9291         }
9292         maybe_free_extent_rec(extent_cache, rec);
9293 out:
9294         return 0;
9295 }
9296
9297 static int delete_extent_records(struct btrfs_trans_handle *trans,
9298                                  struct btrfs_root *root,
9299                                  struct btrfs_path *path,
9300                                  u64 bytenr)
9301 {
9302         struct btrfs_key key;
9303         struct btrfs_key found_key;
9304         struct extent_buffer *leaf;
9305         int ret;
9306         int slot;
9307
9308
9309         key.objectid = bytenr;
9310         key.type = (u8)-1;
9311         key.offset = (u64)-1;
9312
9313         while(1) {
9314                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9315                                         &key, path, 0, 1);
9316                 if (ret < 0)
9317                         break;
9318
9319                 if (ret > 0) {
9320                         ret = 0;
9321                         if (path->slots[0] == 0)
9322                                 break;
9323                         path->slots[0]--;
9324                 }
9325                 ret = 0;
9326
9327                 leaf = path->nodes[0];
9328                 slot = path->slots[0];
9329
9330                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9331                 if (found_key.objectid != bytenr)
9332                         break;
9333
9334                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9335                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9336                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9337                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9338                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9339                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9340                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9341                         btrfs_release_path(path);
9342                         if (found_key.type == 0) {
9343                                 if (found_key.offset == 0)
9344                                         break;
9345                                 key.offset = found_key.offset - 1;
9346                                 key.type = found_key.type;
9347                         }
9348                         key.type = found_key.type - 1;
9349                         key.offset = (u64)-1;
9350                         continue;
9351                 }
9352
9353                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9354                         found_key.objectid, found_key.type, found_key.offset);
9355
9356                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9357                 if (ret)
9358                         break;
9359                 btrfs_release_path(path);
9360
9361                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9362                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9363                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9364                                 found_key.offset : root->fs_info->nodesize;
9365
9366                         ret = btrfs_update_block_group(root, bytenr,
9367                                                        bytes, 0, 0);
9368                         if (ret)
9369                                 break;
9370                 }
9371         }
9372
9373         btrfs_release_path(path);
9374         return ret;
9375 }
9376
9377 /*
9378  * for a single backref, this will allocate a new extent
9379  * and add the backref to it.
9380  */
9381 static int record_extent(struct btrfs_trans_handle *trans,
9382                          struct btrfs_fs_info *info,
9383                          struct btrfs_path *path,
9384                          struct extent_record *rec,
9385                          struct extent_backref *back,
9386                          int allocated, u64 flags)
9387 {
9388         int ret = 0;
9389         struct btrfs_root *extent_root = info->extent_root;
9390         struct extent_buffer *leaf;
9391         struct btrfs_key ins_key;
9392         struct btrfs_extent_item *ei;
9393         struct data_backref *dback;
9394         struct btrfs_tree_block_info *bi;
9395
9396         if (!back->is_data)
9397                 rec->max_size = max_t(u64, rec->max_size,
9398                                     info->nodesize);
9399
9400         if (!allocated) {
9401                 u32 item_size = sizeof(*ei);
9402
9403                 if (!back->is_data)
9404                         item_size += sizeof(*bi);
9405
9406                 ins_key.objectid = rec->start;
9407                 ins_key.offset = rec->max_size;
9408                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9409
9410                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9411                                         &ins_key, item_size);
9412                 if (ret)
9413                         goto fail;
9414
9415                 leaf = path->nodes[0];
9416                 ei = btrfs_item_ptr(leaf, path->slots[0],
9417                                     struct btrfs_extent_item);
9418
9419                 btrfs_set_extent_refs(leaf, ei, 0);
9420                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9421
9422                 if (back->is_data) {
9423                         btrfs_set_extent_flags(leaf, ei,
9424                                                BTRFS_EXTENT_FLAG_DATA);
9425                 } else {
9426                         struct btrfs_disk_key copy_key;;
9427
9428                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9429                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9430                                              sizeof(*bi));
9431
9432                         btrfs_set_disk_key_objectid(&copy_key,
9433                                                     rec->info_objectid);
9434                         btrfs_set_disk_key_type(&copy_key, 0);
9435                         btrfs_set_disk_key_offset(&copy_key, 0);
9436
9437                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9438                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9439
9440                         btrfs_set_extent_flags(leaf, ei,
9441                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9442                 }
9443
9444                 btrfs_mark_buffer_dirty(leaf);
9445                 ret = btrfs_update_block_group(extent_root, rec->start,
9446                                                rec->max_size, 1, 0);
9447                 if (ret)
9448                         goto fail;
9449                 btrfs_release_path(path);
9450         }
9451
9452         if (back->is_data) {
9453                 u64 parent;
9454                 int i;
9455
9456                 dback = to_data_backref(back);
9457                 if (back->full_backref)
9458                         parent = dback->parent;
9459                 else
9460                         parent = 0;
9461
9462                 for (i = 0; i < dback->found_ref; i++) {
9463                         /* if parent != 0, we're doing a full backref
9464                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9465                          * just makes the backref allocator create a data
9466                          * backref
9467                          */
9468                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9469                                                    rec->start, rec->max_size,
9470                                                    parent,
9471                                                    dback->root,
9472                                                    parent ?
9473                                                    BTRFS_FIRST_FREE_OBJECTID :
9474                                                    dback->owner,
9475                                                    dback->offset);
9476                         if (ret)
9477                                 break;
9478                 }
9479                 fprintf(stderr, "adding new data backref"
9480                                 " on %llu %s %llu owner %llu"
9481                                 " offset %llu found %d\n",
9482                                 (unsigned long long)rec->start,
9483                                 back->full_backref ?
9484                                 "parent" : "root",
9485                                 back->full_backref ?
9486                                 (unsigned long long)parent :
9487                                 (unsigned long long)dback->root,
9488                                 (unsigned long long)dback->owner,
9489                                 (unsigned long long)dback->offset,
9490                                 dback->found_ref);
9491         } else {
9492                 u64 parent;
9493                 struct tree_backref *tback;
9494
9495                 tback = to_tree_backref(back);
9496                 if (back->full_backref)
9497                         parent = tback->parent;
9498                 else
9499                         parent = 0;
9500
9501                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9502                                            rec->start, rec->max_size,
9503                                            parent, tback->root, 0, 0);
9504                 fprintf(stderr, "adding new tree backref on "
9505                         "start %llu len %llu parent %llu root %llu\n",
9506                         rec->start, rec->max_size, parent, tback->root);
9507         }
9508 fail:
9509         btrfs_release_path(path);
9510         return ret;
9511 }
9512
9513 static struct extent_entry *find_entry(struct list_head *entries,
9514                                        u64 bytenr, u64 bytes)
9515 {
9516         struct extent_entry *entry = NULL;
9517
9518         list_for_each_entry(entry, entries, list) {
9519                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9520                         return entry;
9521         }
9522
9523         return NULL;
9524 }
9525
9526 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9527 {
9528         struct extent_entry *entry, *best = NULL, *prev = NULL;
9529
9530         list_for_each_entry(entry, entries, list) {
9531                 /*
9532                  * If there are as many broken entries as entries then we know
9533                  * not to trust this particular entry.
9534                  */
9535                 if (entry->broken == entry->count)
9536                         continue;
9537
9538                 /*
9539                  * Special case, when there are only two entries and 'best' is
9540                  * the first one
9541                  */
9542                 if (!prev) {
9543                         best = entry;
9544                         prev = entry;
9545                         continue;
9546                 }
9547
9548                 /*
9549                  * If our current entry == best then we can't be sure our best
9550                  * is really the best, so we need to keep searching.
9551                  */
9552                 if (best && best->count == entry->count) {
9553                         prev = entry;
9554                         best = NULL;
9555                         continue;
9556                 }
9557
9558                 /* Prev == entry, not good enough, have to keep searching */
9559                 if (!prev->broken && prev->count == entry->count)
9560                         continue;
9561
9562                 if (!best)
9563                         best = (prev->count > entry->count) ? prev : entry;
9564                 else if (best->count < entry->count)
9565                         best = entry;
9566                 prev = entry;
9567         }
9568
9569         return best;
9570 }
9571
9572 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9573                       struct data_backref *dback, struct extent_entry *entry)
9574 {
9575         struct btrfs_trans_handle *trans;
9576         struct btrfs_root *root;
9577         struct btrfs_file_extent_item *fi;
9578         struct extent_buffer *leaf;
9579         struct btrfs_key key;
9580         u64 bytenr, bytes;
9581         int ret, err;
9582
9583         key.objectid = dback->root;
9584         key.type = BTRFS_ROOT_ITEM_KEY;
9585         key.offset = (u64)-1;
9586         root = btrfs_read_fs_root(info, &key);
9587         if (IS_ERR(root)) {
9588                 fprintf(stderr, "Couldn't find root for our ref\n");
9589                 return -EINVAL;
9590         }
9591
9592         /*
9593          * The backref points to the original offset of the extent if it was
9594          * split, so we need to search down to the offset we have and then walk
9595          * forward until we find the backref we're looking for.
9596          */
9597         key.objectid = dback->owner;
9598         key.type = BTRFS_EXTENT_DATA_KEY;
9599         key.offset = dback->offset;
9600         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9601         if (ret < 0) {
9602                 fprintf(stderr, "Error looking up ref %d\n", ret);
9603                 return ret;
9604         }
9605
9606         while (1) {
9607                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9608                         ret = btrfs_next_leaf(root, path);
9609                         if (ret) {
9610                                 fprintf(stderr, "Couldn't find our ref, next\n");
9611                                 return -EINVAL;
9612                         }
9613                 }
9614                 leaf = path->nodes[0];
9615                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9616                 if (key.objectid != dback->owner ||
9617                     key.type != BTRFS_EXTENT_DATA_KEY) {
9618                         fprintf(stderr, "Couldn't find our ref, search\n");
9619                         return -EINVAL;
9620                 }
9621                 fi = btrfs_item_ptr(leaf, path->slots[0],
9622                                     struct btrfs_file_extent_item);
9623                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9624                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9625
9626                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9627                         break;
9628                 path->slots[0]++;
9629         }
9630
9631         btrfs_release_path(path);
9632
9633         trans = btrfs_start_transaction(root, 1);
9634         if (IS_ERR(trans))
9635                 return PTR_ERR(trans);
9636
9637         /*
9638          * Ok we have the key of the file extent we want to fix, now we can cow
9639          * down to the thing and fix it.
9640          */
9641         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9642         if (ret < 0) {
9643                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9644                         key.objectid, key.type, key.offset, ret);
9645                 goto out;
9646         }
9647         if (ret > 0) {
9648                 fprintf(stderr, "Well that's odd, we just found this key "
9649                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9650                         key.offset);
9651                 ret = -EINVAL;
9652                 goto out;
9653         }
9654         leaf = path->nodes[0];
9655         fi = btrfs_item_ptr(leaf, path->slots[0],
9656                             struct btrfs_file_extent_item);
9657
9658         if (btrfs_file_extent_compression(leaf, fi) &&
9659             dback->disk_bytenr != entry->bytenr) {
9660                 fprintf(stderr, "Ref doesn't match the record start and is "
9661                         "compressed, please take a btrfs-image of this file "
9662                         "system and send it to a btrfs developer so they can "
9663                         "complete this functionality for bytenr %Lu\n",
9664                         dback->disk_bytenr);
9665                 ret = -EINVAL;
9666                 goto out;
9667         }
9668
9669         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9670                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9671         } else if (dback->disk_bytenr > entry->bytenr) {
9672                 u64 off_diff, offset;
9673
9674                 off_diff = dback->disk_bytenr - entry->bytenr;
9675                 offset = btrfs_file_extent_offset(leaf, fi);
9676                 if (dback->disk_bytenr + offset +
9677                     btrfs_file_extent_num_bytes(leaf, fi) >
9678                     entry->bytenr + entry->bytes) {
9679                         fprintf(stderr, "Ref is past the entry end, please "
9680                                 "take a btrfs-image of this file system and "
9681                                 "send it to a btrfs developer, ref %Lu\n",
9682                                 dback->disk_bytenr);
9683                         ret = -EINVAL;
9684                         goto out;
9685                 }
9686                 offset += off_diff;
9687                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9688                 btrfs_set_file_extent_offset(leaf, fi, offset);
9689         } else if (dback->disk_bytenr < entry->bytenr) {
9690                 u64 offset;
9691
9692                 offset = btrfs_file_extent_offset(leaf, fi);
9693                 if (dback->disk_bytenr + offset < entry->bytenr) {
9694                         fprintf(stderr, "Ref is before the entry start, please"
9695                                 " take a btrfs-image of this file system and "
9696                                 "send it to a btrfs developer, ref %Lu\n",
9697                                 dback->disk_bytenr);
9698                         ret = -EINVAL;
9699                         goto out;
9700                 }
9701
9702                 offset += dback->disk_bytenr;
9703                 offset -= entry->bytenr;
9704                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9705                 btrfs_set_file_extent_offset(leaf, fi, offset);
9706         }
9707
9708         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9709
9710         /*
9711          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9712          * only do this if we aren't using compression, otherwise it's a
9713          * trickier case.
9714          */
9715         if (!btrfs_file_extent_compression(leaf, fi))
9716                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9717         else
9718                 printf("ram bytes may be wrong?\n");
9719         btrfs_mark_buffer_dirty(leaf);
9720 out:
9721         err = btrfs_commit_transaction(trans, root);
9722         btrfs_release_path(path);
9723         return ret ? ret : err;
9724 }
9725
9726 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9727                            struct extent_record *rec)
9728 {
9729         struct extent_backref *back, *tmp;
9730         struct data_backref *dback;
9731         struct extent_entry *entry, *best = NULL;
9732         LIST_HEAD(entries);
9733         int nr_entries = 0;
9734         int broken_entries = 0;
9735         int ret = 0;
9736         short mismatch = 0;
9737
9738         /*
9739          * Metadata is easy and the backrefs should always agree on bytenr and
9740          * size, if not we've got bigger issues.
9741          */
9742         if (rec->metadata)
9743                 return 0;
9744
9745         rbtree_postorder_for_each_entry_safe(back, tmp,
9746                                              &rec->backref_tree, node) {
9747                 if (back->full_backref || !back->is_data)
9748                         continue;
9749
9750                 dback = to_data_backref(back);
9751
9752                 /*
9753                  * We only pay attention to backrefs that we found a real
9754                  * backref for.
9755                  */
9756                 if (dback->found_ref == 0)
9757                         continue;
9758
9759                 /*
9760                  * For now we only catch when the bytes don't match, not the
9761                  * bytenr.  We can easily do this at the same time, but I want
9762                  * to have a fs image to test on before we just add repair
9763                  * functionality willy-nilly so we know we won't screw up the
9764                  * repair.
9765                  */
9766
9767                 entry = find_entry(&entries, dback->disk_bytenr,
9768                                    dback->bytes);
9769                 if (!entry) {
9770                         entry = malloc(sizeof(struct extent_entry));
9771                         if (!entry) {
9772                                 ret = -ENOMEM;
9773                                 goto out;
9774                         }
9775                         memset(entry, 0, sizeof(*entry));
9776                         entry->bytenr = dback->disk_bytenr;
9777                         entry->bytes = dback->bytes;
9778                         list_add_tail(&entry->list, &entries);
9779                         nr_entries++;
9780                 }
9781
9782                 /*
9783                  * If we only have on entry we may think the entries agree when
9784                  * in reality they don't so we have to do some extra checking.
9785                  */
9786                 if (dback->disk_bytenr != rec->start ||
9787                     dback->bytes != rec->nr || back->broken)
9788                         mismatch = 1;
9789
9790                 if (back->broken) {
9791                         entry->broken++;
9792                         broken_entries++;
9793                 }
9794
9795                 entry->count++;
9796         }
9797
9798         /* Yay all the backrefs agree, carry on good sir */
9799         if (nr_entries <= 1 && !mismatch)
9800                 goto out;
9801
9802         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9803                 "%Lu\n", rec->start);
9804
9805         /*
9806          * First we want to see if the backrefs can agree amongst themselves who
9807          * is right, so figure out which one of the entries has the highest
9808          * count.
9809          */
9810         best = find_most_right_entry(&entries);
9811
9812         /*
9813          * Ok so we may have an even split between what the backrefs think, so
9814          * this is where we use the extent ref to see what it thinks.
9815          */
9816         if (!best) {
9817                 entry = find_entry(&entries, rec->start, rec->nr);
9818                 if (!entry && (!broken_entries || !rec->found_rec)) {
9819                         fprintf(stderr, "Backrefs don't agree with each other "
9820                                 "and extent record doesn't agree with anybody,"
9821                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9822                                 rec->start, rec->nr);
9823                         ret = -EINVAL;
9824                         goto out;
9825                 } else if (!entry) {
9826                         /*
9827                          * Ok our backrefs were broken, we'll assume this is the
9828                          * correct value and add an entry for this range.
9829                          */
9830                         entry = malloc(sizeof(struct extent_entry));
9831                         if (!entry) {
9832                                 ret = -ENOMEM;
9833                                 goto out;
9834                         }
9835                         memset(entry, 0, sizeof(*entry));
9836                         entry->bytenr = rec->start;
9837                         entry->bytes = rec->nr;
9838                         list_add_tail(&entry->list, &entries);
9839                         nr_entries++;
9840                 }
9841                 entry->count++;
9842                 best = find_most_right_entry(&entries);
9843                 if (!best) {
9844                         fprintf(stderr, "Backrefs and extent record evenly "
9845                                 "split on who is right, this is going to "
9846                                 "require user input to fix bytenr %Lu bytes "
9847                                 "%Lu\n", rec->start, rec->nr);
9848                         ret = -EINVAL;
9849                         goto out;
9850                 }
9851         }
9852
9853         /*
9854          * I don't think this can happen currently as we'll abort() if we catch
9855          * this case higher up, but in case somebody removes that we still can't
9856          * deal with it properly here yet, so just bail out of that's the case.
9857          */
9858         if (best->bytenr != rec->start) {
9859                 fprintf(stderr, "Extent start and backref starts don't match, "
9860                         "please use btrfs-image on this file system and send "
9861                         "it to a btrfs developer so they can make fsck fix "
9862                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9863                         rec->start, rec->nr);
9864                 ret = -EINVAL;
9865                 goto out;
9866         }
9867
9868         /*
9869          * Ok great we all agreed on an extent record, let's go find the real
9870          * references and fix up the ones that don't match.
9871          */
9872         rbtree_postorder_for_each_entry_safe(back, tmp,
9873                                              &rec->backref_tree, node) {
9874                 if (back->full_backref || !back->is_data)
9875                         continue;
9876
9877                 dback = to_data_backref(back);
9878
9879                 /*
9880                  * Still ignoring backrefs that don't have a real ref attached
9881                  * to them.
9882                  */
9883                 if (dback->found_ref == 0)
9884                         continue;
9885
9886                 if (dback->bytes == best->bytes &&
9887                     dback->disk_bytenr == best->bytenr)
9888                         continue;
9889
9890                 ret = repair_ref(info, path, dback, best);
9891                 if (ret)
9892                         goto out;
9893         }
9894
9895         /*
9896          * Ok we messed with the actual refs, which means we need to drop our
9897          * entire cache and go back and rescan.  I know this is a huge pain and
9898          * adds a lot of extra work, but it's the only way to be safe.  Once all
9899          * the backrefs agree we may not need to do anything to the extent
9900          * record itself.
9901          */
9902         ret = -EAGAIN;
9903 out:
9904         while (!list_empty(&entries)) {
9905                 entry = list_entry(entries.next, struct extent_entry, list);
9906                 list_del_init(&entry->list);
9907                 free(entry);
9908         }
9909         return ret;
9910 }
9911
9912 static int process_duplicates(struct cache_tree *extent_cache,
9913                               struct extent_record *rec)
9914 {
9915         struct extent_record *good, *tmp;
9916         struct cache_extent *cache;
9917         int ret;
9918
9919         /*
9920          * If we found a extent record for this extent then return, or if we
9921          * have more than one duplicate we are likely going to need to delete
9922          * something.
9923          */
9924         if (rec->found_rec || rec->num_duplicates > 1)
9925                 return 0;
9926
9927         /* Shouldn't happen but just in case */
9928         BUG_ON(!rec->num_duplicates);
9929
9930         /*
9931          * So this happens if we end up with a backref that doesn't match the
9932          * actual extent entry.  So either the backref is bad or the extent
9933          * entry is bad.  Either way we want to have the extent_record actually
9934          * reflect what we found in the extent_tree, so we need to take the
9935          * duplicate out and use that as the extent_record since the only way we
9936          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9937          */
9938         remove_cache_extent(extent_cache, &rec->cache);
9939
9940         good = to_extent_record(rec->dups.next);
9941         list_del_init(&good->list);
9942         INIT_LIST_HEAD(&good->backrefs);
9943         INIT_LIST_HEAD(&good->dups);
9944         good->cache.start = good->start;
9945         good->cache.size = good->nr;
9946         good->content_checked = 0;
9947         good->owner_ref_checked = 0;
9948         good->num_duplicates = 0;
9949         good->refs = rec->refs;
9950         list_splice_init(&rec->backrefs, &good->backrefs);
9951         while (1) {
9952                 cache = lookup_cache_extent(extent_cache, good->start,
9953                                             good->nr);
9954                 if (!cache)
9955                         break;
9956                 tmp = container_of(cache, struct extent_record, cache);
9957
9958                 /*
9959                  * If we find another overlapping extent and it's found_rec is
9960                  * set then it's a duplicate and we need to try and delete
9961                  * something.
9962                  */
9963                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9964                         if (list_empty(&good->list))
9965                                 list_add_tail(&good->list,
9966                                               &duplicate_extents);
9967                         good->num_duplicates += tmp->num_duplicates + 1;
9968                         list_splice_init(&tmp->dups, &good->dups);
9969                         list_del_init(&tmp->list);
9970                         list_add_tail(&tmp->list, &good->dups);
9971                         remove_cache_extent(extent_cache, &tmp->cache);
9972                         continue;
9973                 }
9974
9975                 /*
9976                  * Ok we have another non extent item backed extent rec, so lets
9977                  * just add it to this extent and carry on like we did above.
9978                  */
9979                 good->refs += tmp->refs;
9980                 list_splice_init(&tmp->backrefs, &good->backrefs);
9981                 remove_cache_extent(extent_cache, &tmp->cache);
9982                 free(tmp);
9983         }
9984         ret = insert_cache_extent(extent_cache, &good->cache);
9985         BUG_ON(ret);
9986         free(rec);
9987         return good->num_duplicates ? 0 : 1;
9988 }
9989
9990 static int delete_duplicate_records(struct btrfs_root *root,
9991                                     struct extent_record *rec)
9992 {
9993         struct btrfs_trans_handle *trans;
9994         LIST_HEAD(delete_list);
9995         struct btrfs_path path;
9996         struct extent_record *tmp, *good, *n;
9997         int nr_del = 0;
9998         int ret = 0, err;
9999         struct btrfs_key key;
10000
10001         btrfs_init_path(&path);
10002
10003         good = rec;
10004         /* Find the record that covers all of the duplicates. */
10005         list_for_each_entry(tmp, &rec->dups, list) {
10006                 if (good->start < tmp->start)
10007                         continue;
10008                 if (good->nr > tmp->nr)
10009                         continue;
10010
10011                 if (tmp->start + tmp->nr < good->start + good->nr) {
10012                         fprintf(stderr, "Ok we have overlapping extents that "
10013                                 "aren't completely covered by each other, this "
10014                                 "is going to require more careful thought.  "
10015                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10016                                 tmp->start, tmp->nr, good->start, good->nr);
10017                         abort();
10018                 }
10019                 good = tmp;
10020         }
10021
10022         if (good != rec)
10023                 list_add_tail(&rec->list, &delete_list);
10024
10025         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10026                 if (tmp == good)
10027                         continue;
10028                 list_move_tail(&tmp->list, &delete_list);
10029         }
10030
10031         root = root->fs_info->extent_root;
10032         trans = btrfs_start_transaction(root, 1);
10033         if (IS_ERR(trans)) {
10034                 ret = PTR_ERR(trans);
10035                 goto out;
10036         }
10037
10038         list_for_each_entry(tmp, &delete_list, list) {
10039                 if (tmp->found_rec == 0)
10040                         continue;
10041                 key.objectid = tmp->start;
10042                 key.type = BTRFS_EXTENT_ITEM_KEY;
10043                 key.offset = tmp->nr;
10044
10045                 /* Shouldn't happen but just in case */
10046                 if (tmp->metadata) {
10047                         fprintf(stderr, "Well this shouldn't happen, extent "
10048                                 "record overlaps but is metadata? "
10049                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10050                         abort();
10051                 }
10052
10053                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10054                 if (ret) {
10055                         if (ret > 0)
10056                                 ret = -EINVAL;
10057                         break;
10058                 }
10059                 ret = btrfs_del_item(trans, root, &path);
10060                 if (ret)
10061                         break;
10062                 btrfs_release_path(&path);
10063                 nr_del++;
10064         }
10065         err = btrfs_commit_transaction(trans, root);
10066         if (err && !ret)
10067                 ret = err;
10068 out:
10069         while (!list_empty(&delete_list)) {
10070                 tmp = to_extent_record(delete_list.next);
10071                 list_del_init(&tmp->list);
10072                 if (tmp == rec)
10073                         continue;
10074                 free(tmp);
10075         }
10076
10077         while (!list_empty(&rec->dups)) {
10078                 tmp = to_extent_record(rec->dups.next);
10079                 list_del_init(&tmp->list);
10080                 free(tmp);
10081         }
10082
10083         btrfs_release_path(&path);
10084
10085         if (!ret && !nr_del)
10086                 rec->num_duplicates = 0;
10087
10088         return ret ? ret : nr_del;
10089 }
10090
10091 static int find_possible_backrefs(struct btrfs_fs_info *info,
10092                                   struct btrfs_path *path,
10093                                   struct cache_tree *extent_cache,
10094                                   struct extent_record *rec)
10095 {
10096         struct btrfs_root *root;
10097         struct extent_backref *back, *tmp;
10098         struct data_backref *dback;
10099         struct cache_extent *cache;
10100         struct btrfs_file_extent_item *fi;
10101         struct btrfs_key key;
10102         u64 bytenr, bytes;
10103         int ret;
10104
10105         rbtree_postorder_for_each_entry_safe(back, tmp,
10106                                              &rec->backref_tree, node) {
10107                 /* Don't care about full backrefs (poor unloved backrefs) */
10108                 if (back->full_backref || !back->is_data)
10109                         continue;
10110
10111                 dback = to_data_backref(back);
10112
10113                 /* We found this one, we don't need to do a lookup */
10114                 if (dback->found_ref)
10115                         continue;
10116
10117                 key.objectid = dback->root;
10118                 key.type = BTRFS_ROOT_ITEM_KEY;
10119                 key.offset = (u64)-1;
10120
10121                 root = btrfs_read_fs_root(info, &key);
10122
10123                 /* No root, definitely a bad ref, skip */
10124                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10125                         continue;
10126                 /* Other err, exit */
10127                 if (IS_ERR(root))
10128                         return PTR_ERR(root);
10129
10130                 key.objectid = dback->owner;
10131                 key.type = BTRFS_EXTENT_DATA_KEY;
10132                 key.offset = dback->offset;
10133                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10134                 if (ret) {
10135                         btrfs_release_path(path);
10136                         if (ret < 0)
10137                                 return ret;
10138                         /* Didn't find it, we can carry on */
10139                         ret = 0;
10140                         continue;
10141                 }
10142
10143                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10144                                     struct btrfs_file_extent_item);
10145                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10146                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10147                 btrfs_release_path(path);
10148                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10149                 if (cache) {
10150                         struct extent_record *tmp;
10151                         tmp = container_of(cache, struct extent_record, cache);
10152
10153                         /*
10154                          * If we found an extent record for the bytenr for this
10155                          * particular backref then we can't add it to our
10156                          * current extent record.  We only want to add backrefs
10157                          * that don't have a corresponding extent item in the
10158                          * extent tree since they likely belong to this record
10159                          * and we need to fix it if it doesn't match bytenrs.
10160                          */
10161                         if  (tmp->found_rec)
10162                                 continue;
10163                 }
10164
10165                 dback->found_ref += 1;
10166                 dback->disk_bytenr = bytenr;
10167                 dback->bytes = bytes;
10168
10169                 /*
10170                  * Set this so the verify backref code knows not to trust the
10171                  * values in this backref.
10172                  */
10173                 back->broken = 1;
10174         }
10175
10176         return 0;
10177 }
10178
10179 /*
10180  * Record orphan data ref into corresponding root.
10181  *
10182  * Return 0 if the extent item contains data ref and recorded.
10183  * Return 1 if the extent item contains no useful data ref
10184  *   On that case, it may contains only shared_dataref or metadata backref
10185  *   or the file extent exists(this should be handled by the extent bytenr
10186  *   recovery routine)
10187  * Return <0 if something goes wrong.
10188  */
10189 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10190                                       struct extent_record *rec)
10191 {
10192         struct btrfs_key key;
10193         struct btrfs_root *dest_root;
10194         struct extent_backref *back, *tmp;
10195         struct data_backref *dback;
10196         struct orphan_data_extent *orphan;
10197         struct btrfs_path path;
10198         int recorded_data_ref = 0;
10199         int ret = 0;
10200
10201         if (rec->metadata)
10202                 return 1;
10203         btrfs_init_path(&path);
10204         rbtree_postorder_for_each_entry_safe(back, tmp,
10205                                              &rec->backref_tree, node) {
10206                 if (back->full_backref || !back->is_data ||
10207                     !back->found_extent_tree)
10208                         continue;
10209                 dback = to_data_backref(back);
10210                 if (dback->found_ref)
10211                         continue;
10212                 key.objectid = dback->root;
10213                 key.type = BTRFS_ROOT_ITEM_KEY;
10214                 key.offset = (u64)-1;
10215
10216                 dest_root = btrfs_read_fs_root(fs_info, &key);
10217
10218                 /* For non-exist root we just skip it */
10219                 if (IS_ERR(dest_root) || !dest_root)
10220                         continue;
10221
10222                 key.objectid = dback->owner;
10223                 key.type = BTRFS_EXTENT_DATA_KEY;
10224                 key.offset = dback->offset;
10225
10226                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10227                 btrfs_release_path(&path);
10228                 /*
10229                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10230                  * we need to record it for inode/file extent rebuild.
10231                  * For ret > 0, we record it only for file extent rebuild.
10232                  * For ret == 0, the file extent exists but only bytenr
10233                  * mismatch, let the original bytenr fix routine to handle,
10234                  * don't record it.
10235                  */
10236                 if (ret == 0)
10237                         continue;
10238                 ret = 0;
10239                 orphan = malloc(sizeof(*orphan));
10240                 if (!orphan) {
10241                         ret = -ENOMEM;
10242                         goto out;
10243                 }
10244                 INIT_LIST_HEAD(&orphan->list);
10245                 orphan->root = dback->root;
10246                 orphan->objectid = dback->owner;
10247                 orphan->offset = dback->offset;
10248                 orphan->disk_bytenr = rec->cache.start;
10249                 orphan->disk_len = rec->cache.size;
10250                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10251                 recorded_data_ref = 1;
10252         }
10253 out:
10254         btrfs_release_path(&path);
10255         if (!ret)
10256                 return !recorded_data_ref;
10257         else
10258                 return ret;
10259 }
10260
10261 /*
10262  * when an incorrect extent item is found, this will delete
10263  * all of the existing entries for it and recreate them
10264  * based on what the tree scan found.
10265  */
10266 static int fixup_extent_refs(struct btrfs_fs_info *info,
10267                              struct cache_tree *extent_cache,
10268                              struct extent_record *rec)
10269 {
10270         struct btrfs_trans_handle *trans = NULL;
10271         int ret;
10272         struct btrfs_path path;
10273         struct cache_extent *cache;
10274         struct extent_backref *back, *tmp;
10275         int allocated = 0;
10276         u64 flags = 0;
10277
10278         if (rec->flag_block_full_backref)
10279                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10280
10281         btrfs_init_path(&path);
10282         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10283                 /*
10284                  * Sometimes the backrefs themselves are so broken they don't
10285                  * get attached to any meaningful rec, so first go back and
10286                  * check any of our backrefs that we couldn't find and throw
10287                  * them into the list if we find the backref so that
10288                  * verify_backrefs can figure out what to do.
10289                  */
10290                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10291                 if (ret < 0)
10292                         goto out;
10293         }
10294
10295         /* step one, make sure all of the backrefs agree */
10296         ret = verify_backrefs(info, &path, rec);
10297         if (ret < 0)
10298                 goto out;
10299
10300         trans = btrfs_start_transaction(info->extent_root, 1);
10301         if (IS_ERR(trans)) {
10302                 ret = PTR_ERR(trans);
10303                 goto out;
10304         }
10305
10306         /* step two, delete all the existing records */
10307         ret = delete_extent_records(trans, info->extent_root, &path,
10308                                     rec->start);
10309
10310         if (ret < 0)
10311                 goto out;
10312
10313         /* was this block corrupt?  If so, don't add references to it */
10314         cache = lookup_cache_extent(info->corrupt_blocks,
10315                                     rec->start, rec->max_size);
10316         if (cache) {
10317                 ret = 0;
10318                 goto out;
10319         }
10320
10321         /* step three, recreate all the refs we did find */
10322         rbtree_postorder_for_each_entry_safe(back, tmp,
10323                                              &rec->backref_tree, node) {
10324                 /*
10325                  * if we didn't find any references, don't create a
10326                  * new extent record
10327                  */
10328                 if (!back->found_ref)
10329                         continue;
10330
10331                 rec->bad_full_backref = 0;
10332                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10333                 allocated = 1;
10334
10335                 if (ret)
10336                         goto out;
10337         }
10338 out:
10339         if (trans) {
10340                 int err = btrfs_commit_transaction(trans, info->extent_root);
10341                 if (!ret)
10342                         ret = err;
10343         }
10344
10345         if (!ret)
10346                 fprintf(stderr, "Repaired extent references for %llu\n",
10347                                 (unsigned long long)rec->start);
10348
10349         btrfs_release_path(&path);
10350         return ret;
10351 }
10352
10353 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10354                               struct extent_record *rec)
10355 {
10356         struct btrfs_trans_handle *trans;
10357         struct btrfs_root *root = fs_info->extent_root;
10358         struct btrfs_path path;
10359         struct btrfs_extent_item *ei;
10360         struct btrfs_key key;
10361         u64 flags;
10362         int ret = 0;
10363
10364         key.objectid = rec->start;
10365         if (rec->metadata) {
10366                 key.type = BTRFS_METADATA_ITEM_KEY;
10367                 key.offset = rec->info_level;
10368         } else {
10369                 key.type = BTRFS_EXTENT_ITEM_KEY;
10370                 key.offset = rec->max_size;
10371         }
10372
10373         trans = btrfs_start_transaction(root, 0);
10374         if (IS_ERR(trans))
10375                 return PTR_ERR(trans);
10376
10377         btrfs_init_path(&path);
10378         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10379         if (ret < 0) {
10380                 btrfs_release_path(&path);
10381                 btrfs_commit_transaction(trans, root);
10382                 return ret;
10383         } else if (ret) {
10384                 fprintf(stderr, "Didn't find extent for %llu\n",
10385                         (unsigned long long)rec->start);
10386                 btrfs_release_path(&path);
10387                 btrfs_commit_transaction(trans, root);
10388                 return -ENOENT;
10389         }
10390
10391         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10392                             struct btrfs_extent_item);
10393         flags = btrfs_extent_flags(path.nodes[0], ei);
10394         if (rec->flag_block_full_backref) {
10395                 fprintf(stderr, "setting full backref on %llu\n",
10396                         (unsigned long long)key.objectid);
10397                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10398         } else {
10399                 fprintf(stderr, "clearing full backref on %llu\n",
10400                         (unsigned long long)key.objectid);
10401                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10402         }
10403         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10404         btrfs_mark_buffer_dirty(path.nodes[0]);
10405         btrfs_release_path(&path);
10406         ret = btrfs_commit_transaction(trans, root);
10407         if (!ret)
10408                 fprintf(stderr, "Repaired extent flags for %llu\n",
10409                                 (unsigned long long)rec->start);
10410
10411         return ret;
10412 }
10413
10414 /* right now we only prune from the extent allocation tree */
10415 static int prune_one_block(struct btrfs_trans_handle *trans,
10416                            struct btrfs_fs_info *info,
10417                            struct btrfs_corrupt_block *corrupt)
10418 {
10419         int ret;
10420         struct btrfs_path path;
10421         struct extent_buffer *eb;
10422         u64 found;
10423         int slot;
10424         int nritems;
10425         int level = corrupt->level + 1;
10426
10427         btrfs_init_path(&path);
10428 again:
10429         /* we want to stop at the parent to our busted block */
10430         path.lowest_level = level;
10431
10432         ret = btrfs_search_slot(trans, info->extent_root,
10433                                 &corrupt->key, &path, -1, 1);
10434
10435         if (ret < 0)
10436                 goto out;
10437
10438         eb = path.nodes[level];
10439         if (!eb) {
10440                 ret = -ENOENT;
10441                 goto out;
10442         }
10443
10444         /*
10445          * hopefully the search gave us the block we want to prune,
10446          * lets try that first
10447          */
10448         slot = path.slots[level];
10449         found =  btrfs_node_blockptr(eb, slot);
10450         if (found == corrupt->cache.start)
10451                 goto del_ptr;
10452
10453         nritems = btrfs_header_nritems(eb);
10454
10455         /* the search failed, lets scan this node and hope we find it */
10456         for (slot = 0; slot < nritems; slot++) {
10457                 found =  btrfs_node_blockptr(eb, slot);
10458                 if (found == corrupt->cache.start)
10459                         goto del_ptr;
10460         }
10461         /*
10462          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10463          * to this block
10464          */
10465         if (eb == info->extent_root->node) {
10466                 ret = -ENOENT;
10467                 goto out;
10468         } else {
10469                 level++;
10470                 btrfs_release_path(&path);
10471                 goto again;
10472         }
10473
10474 del_ptr:
10475         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10476         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10477
10478 out:
10479         btrfs_release_path(&path);
10480         return ret;
10481 }
10482
10483 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10484 {
10485         struct btrfs_trans_handle *trans = NULL;
10486         struct cache_extent *cache;
10487         struct btrfs_corrupt_block *corrupt;
10488
10489         while (1) {
10490                 cache = search_cache_extent(info->corrupt_blocks, 0);
10491                 if (!cache)
10492                         break;
10493                 if (!trans) {
10494                         trans = btrfs_start_transaction(info->extent_root, 1);
10495                         if (IS_ERR(trans))
10496                                 return PTR_ERR(trans);
10497                 }
10498                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10499                 prune_one_block(trans, info, corrupt);
10500                 remove_cache_extent(info->corrupt_blocks, cache);
10501         }
10502         if (trans)
10503                 return btrfs_commit_transaction(trans, info->extent_root);
10504         return 0;
10505 }
10506
10507 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10508 {
10509         struct btrfs_block_group_cache *cache;
10510         u64 start, end;
10511         int ret;
10512
10513         while (1) {
10514                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10515                                             &start, &end, EXTENT_DIRTY);
10516                 if (ret)
10517                         break;
10518                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10519         }
10520
10521         start = 0;
10522         while (1) {
10523                 cache = btrfs_lookup_first_block_group(fs_info, start);
10524                 if (!cache)
10525                         break;
10526                 if (cache->cached)
10527                         cache->cached = 0;
10528                 start = cache->key.objectid + cache->key.offset;
10529         }
10530 }
10531
10532 static int check_extent_refs(struct btrfs_root *root,
10533                              struct cache_tree *extent_cache)
10534 {
10535         struct extent_record *rec;
10536         struct cache_extent *cache;
10537         int ret = 0;
10538         int had_dups = 0;
10539         int err = 0;
10540
10541         if (repair) {
10542                 /*
10543                  * if we're doing a repair, we have to make sure
10544                  * we don't allocate from the problem extents.
10545                  * In the worst case, this will be all the
10546                  * extents in the FS
10547                  */
10548                 cache = search_cache_extent(extent_cache, 0);
10549                 while(cache) {
10550                         rec = container_of(cache, struct extent_record, cache);
10551                         set_extent_dirty(root->fs_info->excluded_extents,
10552                                          rec->start,
10553                                          rec->start + rec->max_size - 1);
10554                         cache = next_cache_extent(cache);
10555                 }
10556
10557                 /* pin down all the corrupted blocks too */
10558                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10559                 while(cache) {
10560                         set_extent_dirty(root->fs_info->excluded_extents,
10561                                          cache->start,
10562                                          cache->start + cache->size - 1);
10563                         cache = next_cache_extent(cache);
10564                 }
10565                 prune_corrupt_blocks(root->fs_info);
10566                 reset_cached_block_groups(root->fs_info);
10567         }
10568
10569         reset_cached_block_groups(root->fs_info);
10570
10571         /*
10572          * We need to delete any duplicate entries we find first otherwise we
10573          * could mess up the extent tree when we have backrefs that actually
10574          * belong to a different extent item and not the weird duplicate one.
10575          */
10576         while (repair && !list_empty(&duplicate_extents)) {
10577                 rec = to_extent_record(duplicate_extents.next);
10578                 list_del_init(&rec->list);
10579
10580                 /* Sometimes we can find a backref before we find an actual
10581                  * extent, so we need to process it a little bit to see if there
10582                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10583                  * if this is a backref screwup.  If we need to delete stuff
10584                  * process_duplicates() will return 0, otherwise it will return
10585                  * 1 and we
10586                  */
10587                 if (process_duplicates(extent_cache, rec))
10588                         continue;
10589                 ret = delete_duplicate_records(root, rec);
10590                 if (ret < 0)
10591                         return ret;
10592                 /*
10593                  * delete_duplicate_records will return the number of entries
10594                  * deleted, so if it's greater than 0 then we know we actually
10595                  * did something and we need to remove.
10596                  */
10597                 if (ret)
10598                         had_dups = 1;
10599         }
10600
10601         if (had_dups)
10602                 return -EAGAIN;
10603
10604         while(1) {
10605                 int cur_err = 0;
10606                 int fix = 0;
10607
10608                 cache = search_cache_extent(extent_cache, 0);
10609                 if (!cache)
10610                         break;
10611                 rec = container_of(cache, struct extent_record, cache);
10612                 if (rec->num_duplicates) {
10613                         fprintf(stderr, "extent item %llu has multiple extent "
10614                                 "items\n", (unsigned long long)rec->start);
10615                         cur_err = 1;
10616                 }
10617
10618                 if (rec->refs != rec->extent_item_refs) {
10619                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10620                                 (unsigned long long)rec->start,
10621                                 (unsigned long long)rec->nr);
10622                         fprintf(stderr, "extent item %llu, found %llu\n",
10623                                 (unsigned long long)rec->extent_item_refs,
10624                                 (unsigned long long)rec->refs);
10625                         ret = record_orphan_data_extents(root->fs_info, rec);
10626                         if (ret < 0)
10627                                 goto repair_abort;
10628                         fix = ret;
10629                         cur_err = 1;
10630                 }
10631                 if (all_backpointers_checked(rec, 1)) {
10632                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10633                                 (unsigned long long)rec->start,
10634                                 (unsigned long long)rec->nr);
10635                         fix = 1;
10636                         cur_err = 1;
10637                 }
10638                 if (!rec->owner_ref_checked) {
10639                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10640                                 (unsigned long long)rec->start,
10641                                 (unsigned long long)rec->nr);
10642                         fix = 1;
10643                         cur_err = 1;
10644                 }
10645
10646                 if (repair && fix) {
10647                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10648                         if (ret)
10649                                 goto repair_abort;
10650                 }
10651
10652
10653                 if (rec->bad_full_backref) {
10654                         fprintf(stderr, "bad full backref, on [%llu]\n",
10655                                 (unsigned long long)rec->start);
10656                         if (repair) {
10657                                 ret = fixup_extent_flags(root->fs_info, rec);
10658                                 if (ret)
10659                                         goto repair_abort;
10660                                 fix = 1;
10661                         }
10662                         cur_err = 1;
10663                 }
10664                 /*
10665                  * Although it's not a extent ref's problem, we reuse this
10666                  * routine for error reporting.
10667                  * No repair function yet.
10668                  */
10669                 if (rec->crossing_stripes) {
10670                         fprintf(stderr,
10671                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10672                                 rec->start, rec->start + rec->max_size);
10673                         cur_err = 1;
10674                 }
10675
10676                 if (rec->wrong_chunk_type) {
10677                         fprintf(stderr,
10678                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10679                                 rec->start, rec->start + rec->max_size);
10680                         cur_err = 1;
10681                 }
10682
10683                 err = cur_err;
10684                 remove_cache_extent(extent_cache, cache);
10685                 free_all_extent_backrefs(rec);
10686                 if (!init_extent_tree && repair && (!cur_err || fix))
10687                         clear_extent_dirty(root->fs_info->excluded_extents,
10688                                            rec->start,
10689                                            rec->start + rec->max_size - 1);
10690                 free(rec);
10691         }
10692 repair_abort:
10693         if (repair) {
10694                 if (ret && ret != -EAGAIN) {
10695                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10696                         exit(1);
10697                 } else if (!ret) {
10698                         struct btrfs_trans_handle *trans;
10699
10700                         root = root->fs_info->extent_root;
10701                         trans = btrfs_start_transaction(root, 1);
10702                         if (IS_ERR(trans)) {
10703                                 ret = PTR_ERR(trans);
10704                                 goto repair_abort;
10705                         }
10706
10707                         ret = btrfs_fix_block_accounting(trans, root);
10708                         if (ret)
10709                                 goto repair_abort;
10710                         ret = btrfs_commit_transaction(trans, root);
10711                         if (ret)
10712                                 goto repair_abort;
10713                 }
10714                 return ret;
10715         }
10716
10717         if (err)
10718                 err = -EIO;
10719         return err;
10720 }
10721
10722 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10723 {
10724         u64 stripe_size;
10725
10726         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10727                 stripe_size = length;
10728                 stripe_size /= num_stripes;
10729         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10730                 stripe_size = length * 2;
10731                 stripe_size /= num_stripes;
10732         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10733                 stripe_size = length;
10734                 stripe_size /= (num_stripes - 1);
10735         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10736                 stripe_size = length;
10737                 stripe_size /= (num_stripes - 2);
10738         } else {
10739                 stripe_size = length;
10740         }
10741         return stripe_size;
10742 }
10743
10744 /*
10745  * Check the chunk with its block group/dev list ref:
10746  * Return 0 if all refs seems valid.
10747  * Return 1 if part of refs seems valid, need later check for rebuild ref
10748  * like missing block group and needs to search extent tree to rebuild them.
10749  * Return -1 if essential refs are missing and unable to rebuild.
10750  */
10751 static int check_chunk_refs(struct chunk_record *chunk_rec,
10752                             struct block_group_tree *block_group_cache,
10753                             struct device_extent_tree *dev_extent_cache,
10754                             int silent)
10755 {
10756         struct cache_extent *block_group_item;
10757         struct block_group_record *block_group_rec;
10758         struct cache_extent *dev_extent_item;
10759         struct device_extent_record *dev_extent_rec;
10760         u64 devid;
10761         u64 offset;
10762         u64 length;
10763         int metadump_v2 = 0;
10764         int i;
10765         int ret = 0;
10766
10767         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10768                                                chunk_rec->offset,
10769                                                chunk_rec->length);
10770         if (block_group_item) {
10771                 block_group_rec = container_of(block_group_item,
10772                                                struct block_group_record,
10773                                                cache);
10774                 if (chunk_rec->length != block_group_rec->offset ||
10775                     chunk_rec->offset != block_group_rec->objectid ||
10776                     (!metadump_v2 &&
10777                      chunk_rec->type_flags != block_group_rec->flags)) {
10778                         if (!silent)
10779                                 fprintf(stderr,
10780                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10781                                         chunk_rec->objectid,
10782                                         chunk_rec->type,
10783                                         chunk_rec->offset,
10784                                         chunk_rec->length,
10785                                         chunk_rec->offset,
10786                                         chunk_rec->type_flags,
10787                                         block_group_rec->objectid,
10788                                         block_group_rec->type,
10789                                         block_group_rec->offset,
10790                                         block_group_rec->offset,
10791                                         block_group_rec->objectid,
10792                                         block_group_rec->flags);
10793                         ret = -1;
10794                 } else {
10795                         list_del_init(&block_group_rec->list);
10796                         chunk_rec->bg_rec = block_group_rec;
10797                 }
10798         } else {
10799                 if (!silent)
10800                         fprintf(stderr,
10801                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10802                                 chunk_rec->objectid,
10803                                 chunk_rec->type,
10804                                 chunk_rec->offset,
10805                                 chunk_rec->length,
10806                                 chunk_rec->offset,
10807                                 chunk_rec->type_flags);
10808                 ret = 1;
10809         }
10810
10811         if (metadump_v2)
10812                 return ret;
10813
10814         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10815                                     chunk_rec->num_stripes);
10816         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10817                 devid = chunk_rec->stripes[i].devid;
10818                 offset = chunk_rec->stripes[i].offset;
10819                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10820                                                        devid, offset, length);
10821                 if (dev_extent_item) {
10822                         dev_extent_rec = container_of(dev_extent_item,
10823                                                 struct device_extent_record,
10824                                                 cache);
10825                         if (dev_extent_rec->objectid != devid ||
10826                             dev_extent_rec->offset != offset ||
10827                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10828                             dev_extent_rec->length != length) {
10829                                 if (!silent)
10830                                         fprintf(stderr,
10831                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10832                                                 chunk_rec->objectid,
10833                                                 chunk_rec->type,
10834                                                 chunk_rec->offset,
10835                                                 chunk_rec->stripes[i].devid,
10836                                                 chunk_rec->stripes[i].offset,
10837                                                 dev_extent_rec->objectid,
10838                                                 dev_extent_rec->offset,
10839                                                 dev_extent_rec->length);
10840                                 ret = -1;
10841                         } else {
10842                                 list_move(&dev_extent_rec->chunk_list,
10843                                           &chunk_rec->dextents);
10844                         }
10845                 } else {
10846                         if (!silent)
10847                                 fprintf(stderr,
10848                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10849                                         chunk_rec->objectid,
10850                                         chunk_rec->type,
10851                                         chunk_rec->offset,
10852                                         chunk_rec->stripes[i].devid,
10853                                         chunk_rec->stripes[i].offset);
10854                         ret = -1;
10855                 }
10856         }
10857         return ret;
10858 }
10859
10860 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10861 int check_chunks(struct cache_tree *chunk_cache,
10862                  struct block_group_tree *block_group_cache,
10863                  struct device_extent_tree *dev_extent_cache,
10864                  struct list_head *good, struct list_head *bad,
10865                  struct list_head *rebuild, int silent)
10866 {
10867         struct cache_extent *chunk_item;
10868         struct chunk_record *chunk_rec;
10869         struct block_group_record *bg_rec;
10870         struct device_extent_record *dext_rec;
10871         int err;
10872         int ret = 0;
10873
10874         chunk_item = first_cache_extent(chunk_cache);
10875         while (chunk_item) {
10876                 chunk_rec = container_of(chunk_item, struct chunk_record,
10877                                          cache);
10878                 err = check_chunk_refs(chunk_rec, block_group_cache,
10879                                        dev_extent_cache, silent);
10880                 if (err < 0)
10881                         ret = err;
10882                 if (err == 0 && good)
10883                         list_add_tail(&chunk_rec->list, good);
10884                 if (err > 0 && rebuild)
10885                         list_add_tail(&chunk_rec->list, rebuild);
10886                 if (err < 0 && bad)
10887                         list_add_tail(&chunk_rec->list, bad);
10888                 chunk_item = next_cache_extent(chunk_item);
10889         }
10890
10891         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10892                 if (!silent)
10893                         fprintf(stderr,
10894                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10895                                 bg_rec->objectid,
10896                                 bg_rec->offset,
10897                                 bg_rec->flags);
10898                 if (!ret)
10899                         ret = 1;
10900         }
10901
10902         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10903                             chunk_list) {
10904                 if (!silent)
10905                         fprintf(stderr,
10906                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10907                                 dext_rec->objectid,
10908                                 dext_rec->offset,
10909                                 dext_rec->length);
10910                 if (!ret)
10911                         ret = 1;
10912         }
10913         return ret;
10914 }
10915
10916
10917 static int check_device_used(struct device_record *dev_rec,
10918                              struct device_extent_tree *dext_cache)
10919 {
10920         struct cache_extent *cache;
10921         struct device_extent_record *dev_extent_rec;
10922         u64 total_byte = 0;
10923
10924         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10925         while (cache) {
10926                 dev_extent_rec = container_of(cache,
10927                                               struct device_extent_record,
10928                                               cache);
10929                 if (dev_extent_rec->objectid != dev_rec->devid)
10930                         break;
10931
10932                 list_del_init(&dev_extent_rec->device_list);
10933                 total_byte += dev_extent_rec->length;
10934                 cache = next_cache_extent(cache);
10935         }
10936
10937         if (total_byte != dev_rec->byte_used) {
10938                 fprintf(stderr,
10939                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10940                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10941                         dev_rec->type, dev_rec->offset);
10942                 return -1;
10943         } else {
10944                 return 0;
10945         }
10946 }
10947
10948 /*
10949  * Extra (optional) check for dev_item size to report possbile problem on a new
10950  * kernel.
10951  */
10952 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
10953 {
10954         if (!IS_ALIGNED(total_bytes, sectorsize)) {
10955                 warning(
10956 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
10957                         devid, total_bytes, sectorsize);
10958                 warning(
10959 "this is OK for older kernel, but may cause kernel warning for newer kernels");
10960                 warning("this can be fixed by 'btrfs rescue fix-device-size'");
10961         }
10962 }
10963
10964 /*
10965  * Unlike device size alignment check above, some super total_bytes check
10966  * failure can lead to mount failure for newer kernel.
10967  *
10968  * So this function will return the error for a fatal super total_bytes problem.
10969  */
10970 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10971 {
10972         struct btrfs_device *dev;
10973         struct list_head *dev_list = &fs_info->fs_devices->devices;
10974         u64 total_bytes = 0;
10975         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10976
10977         list_for_each_entry(dev, dev_list, dev_list)
10978                 total_bytes += dev->total_bytes;
10979
10980         /* Important check, which can cause unmountable fs */
10981         if (super_bytes < total_bytes) {
10982                 error("super total bytes %llu smaller than real device(s) size %llu",
10983                         super_bytes, total_bytes);
10984                 error("mounting this fs may fail for newer kernels");
10985                 error("this can be fixed by 'btrfs rescue fix-device-size'");
10986                 return false;
10987         }
10988
10989         /*
10990          * Optional check, just to make everything aligned and match with each
10991          * other.
10992          *
10993          * For a btrfs-image restored fs, we don't need to check it anyway.
10994          */
10995         if (btrfs_super_flags(fs_info->super_copy) &
10996             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10997                 return true;
10998         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10999             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
11000             super_bytes != total_bytes) {
11001                 warning("minor unaligned/mismatch device size detected");
11002                 warning(
11003                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
11004         }
11005         return true;
11006 }
11007
11008 /* check btrfs_dev_item -> btrfs_dev_extent */
11009 static int check_devices(struct rb_root *dev_cache,
11010                          struct device_extent_tree *dev_extent_cache)
11011 {
11012         struct rb_node *dev_node;
11013         struct device_record *dev_rec;
11014         struct device_extent_record *dext_rec;
11015         int err;
11016         int ret = 0;
11017
11018         dev_node = rb_first(dev_cache);
11019         while (dev_node) {
11020                 dev_rec = container_of(dev_node, struct device_record, node);
11021                 err = check_device_used(dev_rec, dev_extent_cache);
11022                 if (err)
11023                         ret = err;
11024
11025                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
11026                                          global_info->sectorsize);
11027                 dev_node = rb_next(dev_node);
11028         }
11029         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11030                             device_list) {
11031                 fprintf(stderr,
11032                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11033                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
11034                 if (!ret)
11035                         ret = 1;
11036         }
11037         return ret;
11038 }
11039
11040 static int add_root_item_to_list(struct list_head *head,
11041                                   u64 objectid, u64 bytenr, u64 last_snapshot,
11042                                   u8 level, u8 drop_level,
11043                                   struct btrfs_key *drop_key)
11044 {
11045
11046         struct root_item_record *ri_rec;
11047         ri_rec = malloc(sizeof(*ri_rec));
11048         if (!ri_rec)
11049                 return -ENOMEM;
11050         ri_rec->bytenr = bytenr;
11051         ri_rec->objectid = objectid;
11052         ri_rec->level = level;
11053         ri_rec->drop_level = drop_level;
11054         ri_rec->last_snapshot = last_snapshot;
11055         if (drop_key)
11056                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11057         list_add_tail(&ri_rec->list, head);
11058
11059         return 0;
11060 }
11061
11062 static void free_root_item_list(struct list_head *list)
11063 {
11064         struct root_item_record *ri_rec;
11065
11066         while (!list_empty(list)) {
11067                 ri_rec = list_first_entry(list, struct root_item_record,
11068                                           list);
11069                 list_del_init(&ri_rec->list);
11070                 free(ri_rec);
11071         }
11072 }
11073
11074 static int deal_root_from_list(struct list_head *list,
11075                                struct btrfs_root *root,
11076                                struct block_info *bits,
11077                                int bits_nr,
11078                                struct cache_tree *pending,
11079                                struct cache_tree *seen,
11080                                struct cache_tree *reada,
11081                                struct cache_tree *nodes,
11082                                struct cache_tree *extent_cache,
11083                                struct cache_tree *chunk_cache,
11084                                struct rb_root *dev_cache,
11085                                struct block_group_tree *block_group_cache,
11086                                struct device_extent_tree *dev_extent_cache)
11087 {
11088         int ret = 0;
11089         u64 last;
11090
11091         while (!list_empty(list)) {
11092                 struct root_item_record *rec;
11093                 struct extent_buffer *buf;
11094                 rec = list_entry(list->next,
11095                                  struct root_item_record, list);
11096                 last = 0;
11097                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11098                 if (!extent_buffer_uptodate(buf)) {
11099                         free_extent_buffer(buf);
11100                         ret = -EIO;
11101                         break;
11102                 }
11103                 ret = add_root_to_pending(buf, extent_cache, pending,
11104                                     seen, nodes, rec->objectid);
11105                 if (ret < 0)
11106                         break;
11107                 /*
11108                  * To rebuild extent tree, we need deal with snapshot
11109                  * one by one, otherwise we deal with node firstly which
11110                  * can maximize readahead.
11111                  */
11112                 while (1) {
11113                         ret = run_next_block(root, bits, bits_nr, &last,
11114                                              pending, seen, reada, nodes,
11115                                              extent_cache, chunk_cache,
11116                                              dev_cache, block_group_cache,
11117                                              dev_extent_cache, rec);
11118                         if (ret != 0)
11119                                 break;
11120                 }
11121                 free_extent_buffer(buf);
11122                 list_del(&rec->list);
11123                 free(rec);
11124                 if (ret < 0)
11125                         break;
11126         }
11127         while (ret >= 0) {
11128                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11129                                      reada, nodes, extent_cache, chunk_cache,
11130                                      dev_cache, block_group_cache,
11131                                      dev_extent_cache, NULL);
11132                 if (ret != 0) {
11133                         if (ret > 0)
11134                                 ret = 0;
11135                         break;
11136                 }
11137         }
11138         return ret;
11139 }
11140
11141 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11142 {
11143         struct rb_root dev_cache;
11144         struct cache_tree chunk_cache;
11145         struct block_group_tree block_group_cache;
11146         struct device_extent_tree dev_extent_cache;
11147         struct cache_tree extent_cache;
11148         struct cache_tree seen;
11149         struct cache_tree pending;
11150         struct cache_tree reada;
11151         struct cache_tree nodes;
11152         struct extent_io_tree excluded_extents;
11153         struct cache_tree corrupt_blocks;
11154         struct btrfs_path path;
11155         struct btrfs_key key;
11156         struct btrfs_key found_key;
11157         int ret, err = 0;
11158         struct block_info *bits;
11159         int bits_nr;
11160         struct extent_buffer *leaf;
11161         int slot;
11162         struct btrfs_root_item ri;
11163         struct list_head dropping_trees;
11164         struct list_head normal_trees;
11165         struct btrfs_root *root1;
11166         struct btrfs_root *root;
11167         u64 objectid;
11168         u8 level;
11169
11170         root = fs_info->fs_root;
11171         dev_cache = RB_ROOT;
11172         cache_tree_init(&chunk_cache);
11173         block_group_tree_init(&block_group_cache);
11174         device_extent_tree_init(&dev_extent_cache);
11175
11176         cache_tree_init(&extent_cache);
11177         cache_tree_init(&seen);
11178         cache_tree_init(&pending);
11179         cache_tree_init(&nodes);
11180         cache_tree_init(&reada);
11181         cache_tree_init(&corrupt_blocks);
11182         extent_io_tree_init(&excluded_extents);
11183         INIT_LIST_HEAD(&dropping_trees);
11184         INIT_LIST_HEAD(&normal_trees);
11185
11186         if (repair) {
11187                 fs_info->excluded_extents = &excluded_extents;
11188                 fs_info->fsck_extent_cache = &extent_cache;
11189                 fs_info->free_extent_hook = free_extent_hook;
11190                 fs_info->corrupt_blocks = &corrupt_blocks;
11191         }
11192
11193         bits_nr = 1024;
11194         bits = malloc(bits_nr * sizeof(struct block_info));
11195         if (!bits) {
11196                 perror("malloc");
11197                 exit(1);
11198         }
11199
11200         if (ctx.progress_enabled) {
11201                 ctx.tp = TASK_EXTENTS;
11202                 task_start(ctx.info);
11203         }
11204
11205 again:
11206         root1 = fs_info->tree_root;
11207         level = btrfs_header_level(root1->node);
11208         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11209                                     root1->node->start, 0, level, 0, NULL);
11210         if (ret < 0)
11211                 goto out;
11212         root1 = fs_info->chunk_root;
11213         level = btrfs_header_level(root1->node);
11214         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11215                                     root1->node->start, 0, level, 0, NULL);
11216         if (ret < 0)
11217                 goto out;
11218         btrfs_init_path(&path);
11219         key.offset = 0;
11220         key.objectid = 0;
11221         key.type = BTRFS_ROOT_ITEM_KEY;
11222         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11223         if (ret < 0)
11224                 goto out;
11225         while(1) {
11226                 leaf = path.nodes[0];
11227                 slot = path.slots[0];
11228                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11229                         ret = btrfs_next_leaf(root, &path);
11230                         if (ret != 0)
11231                                 break;
11232                         leaf = path.nodes[0];
11233                         slot = path.slots[0];
11234                 }
11235                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11236                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11237                         unsigned long offset;
11238                         u64 last_snapshot;
11239
11240                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11241                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11242                         last_snapshot = btrfs_root_last_snapshot(&ri);
11243                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11244                                 level = btrfs_root_level(&ri);
11245                                 ret = add_root_item_to_list(&normal_trees,
11246                                                 found_key.objectid,
11247                                                 btrfs_root_bytenr(&ri),
11248                                                 last_snapshot, level,
11249                                                 0, NULL);
11250                                 if (ret < 0)
11251                                         goto out;
11252                         } else {
11253                                 level = btrfs_root_level(&ri);
11254                                 objectid = found_key.objectid;
11255                                 btrfs_disk_key_to_cpu(&found_key,
11256                                                       &ri.drop_progress);
11257                                 ret = add_root_item_to_list(&dropping_trees,
11258                                                 objectid,
11259                                                 btrfs_root_bytenr(&ri),
11260                                                 last_snapshot, level,
11261                                                 ri.drop_level, &found_key);
11262                                 if (ret < 0)
11263                                         goto out;
11264                         }
11265                 }
11266                 path.slots[0]++;
11267         }
11268         btrfs_release_path(&path);
11269
11270         /*
11271          * check_block can return -EAGAIN if it fixes something, please keep
11272          * this in mind when dealing with return values from these functions, if
11273          * we get -EAGAIN we want to fall through and restart the loop.
11274          */
11275         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11276                                   &seen, &reada, &nodes, &extent_cache,
11277                                   &chunk_cache, &dev_cache, &block_group_cache,
11278                                   &dev_extent_cache);
11279         if (ret < 0) {
11280                 if (ret == -EAGAIN)
11281                         goto loop;
11282                 goto out;
11283         }
11284         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11285                                   &pending, &seen, &reada, &nodes,
11286                                   &extent_cache, &chunk_cache, &dev_cache,
11287                                   &block_group_cache, &dev_extent_cache);
11288         if (ret < 0) {
11289                 if (ret == -EAGAIN)
11290                         goto loop;
11291                 goto out;
11292         }
11293
11294         ret = check_chunks(&chunk_cache, &block_group_cache,
11295                            &dev_extent_cache, NULL, NULL, NULL, 0);
11296         if (ret) {
11297                 if (ret == -EAGAIN)
11298                         goto loop;
11299                 err = ret;
11300         }
11301
11302         ret = check_extent_refs(root, &extent_cache);
11303         if (ret < 0) {
11304                 if (ret == -EAGAIN)
11305                         goto loop;
11306                 goto out;
11307         }
11308
11309         ret = check_devices(&dev_cache, &dev_extent_cache);
11310         if (ret && err)
11311                 ret = err;
11312
11313 out:
11314         task_stop(ctx.info);
11315         if (repair) {
11316                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11317                 extent_io_tree_cleanup(&excluded_extents);
11318                 fs_info->fsck_extent_cache = NULL;
11319                 fs_info->free_extent_hook = NULL;
11320                 fs_info->corrupt_blocks = NULL;
11321                 fs_info->excluded_extents = NULL;
11322         }
11323         free(bits);
11324         free_chunk_cache_tree(&chunk_cache);
11325         free_device_cache_tree(&dev_cache);
11326         free_block_group_tree(&block_group_cache);
11327         free_device_extent_tree(&dev_extent_cache);
11328         free_extent_cache_tree(&seen);
11329         free_extent_cache_tree(&pending);
11330         free_extent_cache_tree(&reada);
11331         free_extent_cache_tree(&nodes);
11332         free_root_item_list(&normal_trees);
11333         free_root_item_list(&dropping_trees);
11334         return ret;
11335 loop:
11336         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11337         free_extent_cache_tree(&seen);
11338         free_extent_cache_tree(&pending);
11339         free_extent_cache_tree(&reada);
11340         free_extent_cache_tree(&nodes);
11341         free_chunk_cache_tree(&chunk_cache);
11342         free_block_group_tree(&block_group_cache);
11343         free_device_cache_tree(&dev_cache);
11344         free_device_extent_tree(&dev_extent_cache);
11345         free_extent_record_cache(&extent_cache);
11346         free_root_item_list(&normal_trees);
11347         free_root_item_list(&dropping_trees);
11348         extent_io_tree_cleanup(&excluded_extents);
11349         goto again;
11350 }
11351
11352 static int check_extent_inline_ref(struct extent_buffer *eb,
11353                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11354 {
11355         int ret;
11356         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11357
11358         switch (type) {
11359         case BTRFS_TREE_BLOCK_REF_KEY:
11360         case BTRFS_EXTENT_DATA_REF_KEY:
11361         case BTRFS_SHARED_BLOCK_REF_KEY:
11362         case BTRFS_SHARED_DATA_REF_KEY:
11363                 ret = 0;
11364                 break;
11365         default:
11366                 error("extent[%llu %u %llu] has unknown ref type: %d",
11367                       key->objectid, key->type, key->offset, type);
11368                 ret = UNKNOWN_TYPE;
11369                 break;
11370         }
11371
11372         return ret;
11373 }
11374
11375 /*
11376  * Check backrefs of a tree block given by @bytenr or @eb.
11377  *
11378  * @root:       the root containing the @bytenr or @eb
11379  * @eb:         tree block extent buffer, can be NULL
11380  * @bytenr:     bytenr of the tree block to search
11381  * @level:      tree level of the tree block
11382  * @owner:      owner of the tree block
11383  *
11384  * Return >0 for any error found and output error message
11385  * Return 0 for no error found
11386  */
11387 static int check_tree_block_ref(struct btrfs_root *root,
11388                                 struct extent_buffer *eb, u64 bytenr,
11389                                 int level, u64 owner, struct node_refs *nrefs)
11390 {
11391         struct btrfs_key key;
11392         struct btrfs_root *extent_root = root->fs_info->extent_root;
11393         struct btrfs_path path;
11394         struct btrfs_extent_item *ei;
11395         struct btrfs_extent_inline_ref *iref;
11396         struct extent_buffer *leaf;
11397         unsigned long end;
11398         unsigned long ptr;
11399         int slot;
11400         int skinny_level;
11401         int root_level = btrfs_header_level(root->node);
11402         int type;
11403         u32 nodesize = root->fs_info->nodesize;
11404         u32 item_size;
11405         u64 offset;
11406         int found_ref = 0;
11407         int err = 0;
11408         int ret;
11409         int strict = 1;
11410         int parent = 0;
11411
11412         btrfs_init_path(&path);
11413         key.objectid = bytenr;
11414         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11415                 key.type = BTRFS_METADATA_ITEM_KEY;
11416         else
11417                 key.type = BTRFS_EXTENT_ITEM_KEY;
11418         key.offset = (u64)-1;
11419
11420         /* Search for the backref in extent tree */
11421         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11422         if (ret < 0) {
11423                 err |= BACKREF_MISSING;
11424                 goto out;
11425         }
11426         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11427         if (ret) {
11428                 err |= BACKREF_MISSING;
11429                 goto out;
11430         }
11431
11432         leaf = path.nodes[0];
11433         slot = path.slots[0];
11434         btrfs_item_key_to_cpu(leaf, &key, slot);
11435
11436         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11437
11438         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11439                 skinny_level = (int)key.offset;
11440                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11441         } else {
11442                 struct btrfs_tree_block_info *info;
11443
11444                 info = (struct btrfs_tree_block_info *)(ei + 1);
11445                 skinny_level = btrfs_tree_block_level(leaf, info);
11446                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11447         }
11448
11449
11450         if (eb) {
11451                 u64 header_gen;
11452                 u64 extent_gen;
11453
11454                 /*
11455                  * Due to the feature of shared tree blocks, if the upper node
11456                  * is a fs root or shared node, the extent of checked node may
11457                  * not be updated until the next CoW.
11458                  */
11459                 if (nrefs)
11460                         strict = should_check_extent_strictly(root, nrefs,
11461                                         level);
11462                 if (!(btrfs_extent_flags(leaf, ei) &
11463                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11464                         error(
11465                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11466                                 key.objectid, nodesize,
11467                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11468                         err = BACKREF_MISMATCH;
11469                 }
11470                 header_gen = btrfs_header_generation(eb);
11471                 extent_gen = btrfs_extent_generation(leaf, ei);
11472                 if (header_gen != extent_gen) {
11473                         error(
11474         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11475                                 key.objectid, nodesize, header_gen,
11476                                 extent_gen);
11477                         err = BACKREF_MISMATCH;
11478                 }
11479                 if (level != skinny_level) {
11480                         error(
11481                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11482                                 key.objectid, nodesize, level, skinny_level);
11483                         err = BACKREF_MISMATCH;
11484                 }
11485                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11486                         error(
11487                         "extent[%llu %u] is referred by other roots than %llu",
11488                                 key.objectid, nodesize, root->objectid);
11489                         err = BACKREF_MISMATCH;
11490                 }
11491         }
11492
11493         /*
11494          * Iterate the extent/metadata item to find the exact backref
11495          */
11496         item_size = btrfs_item_size_nr(leaf, slot);
11497         ptr = (unsigned long)iref;
11498         end = (unsigned long)ei + item_size;
11499
11500         while (ptr < end) {
11501                 iref = (struct btrfs_extent_inline_ref *)ptr;
11502                 type = btrfs_extent_inline_ref_type(leaf, iref);
11503                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11504
11505                 ret = check_extent_inline_ref(leaf, &key, iref);
11506                 if (ret) {
11507                         err |= ret;
11508                         break;
11509                 }
11510                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11511                         if (offset == root->objectid)
11512                                 found_ref = 1;
11513                         if (!strict && owner == offset)
11514                                 found_ref = 1;
11515                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11516                         /*
11517                          * Backref of tree reloc root points to itself, no need
11518                          * to check backref any more.
11519                          *
11520                          * This may be an error of loop backref, but extent tree
11521                          * checker should have already handled it.
11522                          * Here we only need to avoid infinite iteration.
11523                          */
11524                         if (offset == bytenr) {
11525                                 found_ref = 1;
11526                         } else {
11527                                 /*
11528                                  * Check if the backref points to valid
11529                                  * referencer
11530                                  */
11531                                 found_ref = !check_tree_block_ref( root, NULL,
11532                                                 offset, level + 1, owner,
11533                                                 NULL);
11534                         }
11535                 }
11536
11537                 if (found_ref)
11538                         break;
11539                 ptr += btrfs_extent_inline_ref_size(type);
11540         }
11541
11542         /*
11543          * Inlined extent item doesn't have what we need, check
11544          * TREE_BLOCK_REF_KEY
11545          */
11546         if (!found_ref) {
11547                 btrfs_release_path(&path);
11548                 key.objectid = bytenr;
11549                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11550                 key.offset = root->objectid;
11551
11552                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11553                 if (!ret)
11554                         found_ref = 1;
11555         }
11556         /*
11557          * Finally check SHARED BLOCK REF, any found will be good
11558          * Here we're not doing comprehensive extent backref checking,
11559          * only need to ensure there is some extent referring to this
11560          * tree block.
11561          */
11562         if (!found_ref) {
11563                 btrfs_release_path(&path);
11564                 key.objectid = bytenr;
11565                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11566                 key.offset = (u64)-1;
11567
11568                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11569                 if (ret < 0) {
11570                         err |= BACKREF_MISSING;
11571                         goto out;
11572                 }
11573                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11574                 if (ret) {
11575                         err |= BACKREF_MISSING;
11576                         goto out;
11577                 }
11578                 found_ref = 1;
11579         }
11580         if (!found_ref)
11581                 err |= BACKREF_MISSING;
11582 out:
11583         btrfs_release_path(&path);
11584         if (nrefs && strict &&
11585             level < root_level && nrefs->full_backref[level + 1])
11586                 parent = nrefs->bytenr[level + 1];
11587         if (eb && (err & BACKREF_MISSING))
11588                 error(
11589         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11590                       bytenr, nodesize, owner, level,
11591                       parent ? "parent" : "root",
11592                       parent ? parent : root->objectid);
11593         return err;
11594 }
11595
11596 /*
11597  * If @err contains BACKREF_MISSING then add extent of the
11598  * file_extent_data_item.
11599  *
11600  * Returns error bits after reapir.
11601  */
11602 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11603                                    struct btrfs_root *root,
11604                                    struct btrfs_path *pathp,
11605                                    struct node_refs *nrefs,
11606                                    int err)
11607 {
11608         struct btrfs_file_extent_item *fi;
11609         struct btrfs_key fi_key;
11610         struct btrfs_key key;
11611         struct btrfs_extent_item *ei;
11612         struct btrfs_path path;
11613         struct btrfs_root *extent_root = root->fs_info->extent_root;
11614         struct extent_buffer *eb;
11615         u64 size;
11616         u64 disk_bytenr;
11617         u64 num_bytes;
11618         u64 parent;
11619         u64 offset;
11620         u64 extent_offset;
11621         u64 file_offset;
11622         int generation;
11623         int slot;
11624         int ret = 0;
11625
11626         eb = pathp->nodes[0];
11627         slot = pathp->slots[0];
11628         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11629         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11630
11631         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11632             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11633                 return err;
11634
11635         file_offset = fi_key.offset;
11636         generation = btrfs_file_extent_generation(eb, fi);
11637         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11638         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11639         extent_offset = btrfs_file_extent_offset(eb, fi);
11640         offset = file_offset - extent_offset;
11641
11642         /* now repair only adds backref */
11643         if ((err & BACKREF_MISSING) == 0)
11644                 return err;
11645
11646         /* search extent item */
11647         key.objectid = disk_bytenr;
11648         key.type = BTRFS_EXTENT_ITEM_KEY;
11649         key.offset = num_bytes;
11650
11651         btrfs_init_path(&path);
11652         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11653         if (ret < 0) {
11654                 ret = -EIO;
11655                 goto out;
11656         }
11657
11658         /* insert an extent item */
11659         if (ret > 0) {
11660                 key.objectid = disk_bytenr;
11661                 key.type = BTRFS_EXTENT_ITEM_KEY;
11662                 key.offset = num_bytes;
11663                 size = sizeof(*ei);
11664
11665                 btrfs_release_path(&path);
11666                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11667                                               size);
11668                 if (ret)
11669                         goto out;
11670                 eb = path.nodes[0];
11671                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11672
11673                 btrfs_set_extent_refs(eb, ei, 0);
11674                 btrfs_set_extent_generation(eb, ei, generation);
11675                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11676
11677                 btrfs_mark_buffer_dirty(eb);
11678                 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11679                                                num_bytes, 1, 0);
11680                 btrfs_release_path(&path);
11681         }
11682
11683         if (nrefs->full_backref[0])
11684                 parent = btrfs_header_bytenr(eb);
11685         else
11686                 parent = 0;
11687
11688         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11689                                    root->objectid,
11690                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11691                                    offset);
11692         if (ret) {
11693                 error(
11694                 "failed to increase extent data backref[%llu %llu] root %llu",
11695                       disk_bytenr, num_bytes, root->objectid);
11696                 goto out;
11697         } else {
11698                 printf("Add one extent data backref [%llu %llu]\n",
11699                        disk_bytenr, num_bytes);
11700         }
11701
11702         err &= ~BACKREF_MISSING;
11703 out:
11704         if (ret)
11705                 error("can't repair root %llu extent data item[%llu %llu]",
11706                       root->objectid, disk_bytenr, num_bytes);
11707         return err;
11708 }
11709
11710 /*
11711  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11712  *
11713  * Return >0 any error found and output error message
11714  * Return 0 for no error found
11715  */
11716 static int check_extent_data_item(struct btrfs_root *root,
11717                                   struct btrfs_path *pathp,
11718                                   struct node_refs *nrefs,  int account_bytes)
11719 {
11720         struct btrfs_file_extent_item *fi;
11721         struct extent_buffer *eb = pathp->nodes[0];
11722         struct btrfs_path path;
11723         struct btrfs_root *extent_root = root->fs_info->extent_root;
11724         struct btrfs_key fi_key;
11725         struct btrfs_key dbref_key;
11726         struct extent_buffer *leaf;
11727         struct btrfs_extent_item *ei;
11728         struct btrfs_extent_inline_ref *iref;
11729         struct btrfs_extent_data_ref *dref;
11730         u64 owner;
11731         u64 disk_bytenr;
11732         u64 disk_num_bytes;
11733         u64 extent_num_bytes;
11734         u64 extent_flags;
11735         u64 offset;
11736         u32 item_size;
11737         unsigned long end;
11738         unsigned long ptr;
11739         int type;
11740         int found_dbackref = 0;
11741         int slot = pathp->slots[0];
11742         int err = 0;
11743         int ret;
11744         int strict;
11745
11746         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11747         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11748
11749         /* Nothing to check for hole and inline data extents */
11750         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11751             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11752                 return 0;
11753
11754         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11755         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11756         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11757         offset = btrfs_file_extent_offset(eb, fi);
11758
11759         /* Check unaligned disk_num_bytes and num_bytes */
11760         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11761                 error(
11762 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11763                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11764                         root->fs_info->sectorsize);
11765                 err |= BYTES_UNALIGNED;
11766         } else if (account_bytes) {
11767                 data_bytes_allocated += disk_num_bytes;
11768         }
11769         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11770                 error(
11771 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11772                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11773                         root->fs_info->sectorsize);
11774                 err |= BYTES_UNALIGNED;
11775         } else if (account_bytes) {
11776                 data_bytes_referenced += extent_num_bytes;
11777         }
11778         owner = btrfs_header_owner(eb);
11779
11780         /* Check the extent item of the file extent in extent tree */
11781         btrfs_init_path(&path);
11782         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11783         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11784         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11785
11786         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11787         if (ret)
11788                 goto out;
11789
11790         leaf = path.nodes[0];
11791         slot = path.slots[0];
11792         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11793
11794         extent_flags = btrfs_extent_flags(leaf, ei);
11795
11796         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11797                 error(
11798                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11799                     disk_bytenr, disk_num_bytes,
11800                     BTRFS_EXTENT_FLAG_DATA);
11801                 err |= BACKREF_MISMATCH;
11802         }
11803
11804         /* Check data backref inside that extent item */
11805         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11806         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11807         ptr = (unsigned long)iref;
11808         end = (unsigned long)ei + item_size;
11809         strict = should_check_extent_strictly(root, nrefs, -1);
11810
11811         while (ptr < end) {
11812                 u64 ref_root;
11813                 u64 ref_objectid;
11814                 u64 ref_offset;
11815                 bool match = false;
11816
11817                 iref = (struct btrfs_extent_inline_ref *)ptr;
11818                 type = btrfs_extent_inline_ref_type(leaf, iref);
11819                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11820
11821                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11822                 if (ret) {
11823                         err |= ret;
11824                         break;
11825                 }
11826                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11827                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11828                         ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11829                         ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11830
11831                         if (ref_objectid == fi_key.objectid &&
11832                             ref_offset == fi_key.offset - offset)
11833                                 match = true;
11834                         if (ref_root == root->objectid && match)
11835                                 found_dbackref = 1;
11836                         else if (!strict && owner == ref_root && match)
11837                                 found_dbackref = 1;
11838                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11839                         found_dbackref = !check_tree_block_ref(root, NULL,
11840                                 btrfs_extent_inline_ref_offset(leaf, iref),
11841                                 0, owner, NULL);
11842                 }
11843
11844                 if (found_dbackref)
11845                         break;
11846                 ptr += btrfs_extent_inline_ref_size(type);
11847         }
11848
11849         if (!found_dbackref) {
11850                 btrfs_release_path(&path);
11851
11852                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11853                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11854                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11855                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11856                                 fi_key.objectid, fi_key.offset - offset);
11857
11858                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11859                                         &dbref_key, &path, 0, 0);
11860                 if (!ret) {
11861                         found_dbackref = 1;
11862                         goto out;
11863                 }
11864
11865                 btrfs_release_path(&path);
11866
11867                 /*
11868                  * Neither inlined nor EXTENT_DATA_REF found, try
11869                  * SHARED_DATA_REF as last chance.
11870                  */
11871                 dbref_key.objectid = disk_bytenr;
11872                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11873                 dbref_key.offset = eb->start;
11874
11875                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11876                                         &dbref_key, &path, 0, 0);
11877                 if (!ret) {
11878                         found_dbackref = 1;
11879                         goto out;
11880                 }
11881         }
11882
11883 out:
11884         if (!found_dbackref)
11885                 err |= BACKREF_MISSING;
11886         btrfs_release_path(&path);
11887         if (err & BACKREF_MISSING) {
11888                 error("data extent[%llu %llu] backref lost",
11889                       disk_bytenr, disk_num_bytes);
11890         }
11891         return err;
11892 }
11893
11894 /*
11895  * Get real tree block level for the case like shared block
11896  * Return >= 0 as tree level
11897  * Return <0 for error
11898  */
11899 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11900 {
11901         struct extent_buffer *eb;
11902         struct btrfs_path path;
11903         struct btrfs_key key;
11904         struct btrfs_extent_item *ei;
11905         u64 flags;
11906         u64 transid;
11907         u8 backref_level;
11908         u8 header_level;
11909         int ret;
11910
11911         /* Search extent tree for extent generation and level */
11912         key.objectid = bytenr;
11913         key.type = BTRFS_METADATA_ITEM_KEY;
11914         key.offset = (u64)-1;
11915
11916         btrfs_init_path(&path);
11917         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11918         if (ret < 0)
11919                 goto release_out;
11920         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11921         if (ret < 0)
11922                 goto release_out;
11923         if (ret > 0) {
11924                 ret = -ENOENT;
11925                 goto release_out;
11926         }
11927
11928         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11929         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11930                             struct btrfs_extent_item);
11931         flags = btrfs_extent_flags(path.nodes[0], ei);
11932         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11933                 ret = -ENOENT;
11934                 goto release_out;
11935         }
11936
11937         /* Get transid for later read_tree_block() check */
11938         transid = btrfs_extent_generation(path.nodes[0], ei);
11939
11940         /* Get backref level as one source */
11941         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11942                 backref_level = key.offset;
11943         } else {
11944                 struct btrfs_tree_block_info *info;
11945
11946                 info = (struct btrfs_tree_block_info *)(ei + 1);
11947                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11948         }
11949         btrfs_release_path(&path);
11950
11951         /* Get level from tree block as an alternative source */
11952         eb = read_tree_block(fs_info, bytenr, transid);
11953         if (!extent_buffer_uptodate(eb)) {
11954                 free_extent_buffer(eb);
11955                 return -EIO;
11956         }
11957         header_level = btrfs_header_level(eb);
11958         free_extent_buffer(eb);
11959
11960         if (header_level != backref_level)
11961                 return -EIO;
11962         return header_level;
11963
11964 release_out:
11965         btrfs_release_path(&path);
11966         return ret;
11967 }
11968
11969 /*
11970  * Check if a tree block backref is valid (points to a valid tree block)
11971  * if level == -1, level will be resolved
11972  * Return >0 for any error found and print error message
11973  */
11974 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11975                                     u64 bytenr, int level)
11976 {
11977         struct btrfs_root *root;
11978         struct btrfs_key key;
11979         struct btrfs_path path;
11980         struct extent_buffer *eb;
11981         struct extent_buffer *node;
11982         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11983         int err = 0;
11984         int ret;
11985
11986         /* Query level for level == -1 special case */
11987         if (level == -1)
11988                 level = query_tree_block_level(fs_info, bytenr);
11989         if (level < 0) {
11990                 err |= REFERENCER_MISSING;
11991                 goto out;
11992         }
11993
11994         key.objectid = root_id;
11995         key.type = BTRFS_ROOT_ITEM_KEY;
11996         key.offset = (u64)-1;
11997
11998         root = btrfs_read_fs_root(fs_info, &key);
11999         if (IS_ERR(root)) {
12000                 err |= REFERENCER_MISSING;
12001                 goto out;
12002         }
12003
12004         /* Read out the tree block to get item/node key */
12005         eb = read_tree_block(fs_info, bytenr, 0);
12006         if (!extent_buffer_uptodate(eb)) {
12007                 err |= REFERENCER_MISSING;
12008                 free_extent_buffer(eb);
12009                 goto out;
12010         }
12011
12012         /* Empty tree, no need to check key */
12013         if (!btrfs_header_nritems(eb) && !level) {
12014                 free_extent_buffer(eb);
12015                 goto out;
12016         }
12017
12018         if (level)
12019                 btrfs_node_key_to_cpu(eb, &key, 0);
12020         else
12021                 btrfs_item_key_to_cpu(eb, &key, 0);
12022
12023         free_extent_buffer(eb);
12024
12025         btrfs_init_path(&path);
12026         path.lowest_level = level;
12027         /* Search with the first key, to ensure we can reach it */
12028         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12029         if (ret < 0) {
12030                 err |= REFERENCER_MISSING;
12031                 goto release_out;
12032         }
12033
12034         node = path.nodes[level];
12035         if (btrfs_header_bytenr(node) != bytenr) {
12036                 error(
12037         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12038                         bytenr, nodesize, bytenr,
12039                         btrfs_header_bytenr(node));
12040                 err |= REFERENCER_MISMATCH;
12041         }
12042         if (btrfs_header_level(node) != level) {
12043                 error(
12044         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12045                         bytenr, nodesize, level,
12046                         btrfs_header_level(node));
12047                 err |= REFERENCER_MISMATCH;
12048         }
12049
12050 release_out:
12051         btrfs_release_path(&path);
12052 out:
12053         if (err & REFERENCER_MISSING) {
12054                 if (level < 0)
12055                         error("extent [%llu %d] lost referencer (owner: %llu)",
12056                                 bytenr, nodesize, root_id);
12057                 else
12058                         error(
12059                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12060                                 bytenr, nodesize, root_id, level);
12061         }
12062
12063         return err;
12064 }
12065
12066 /*
12067  * Check if tree block @eb is tree reloc root.
12068  * Return 0 if it's not or any problem happens
12069  * Return 1 if it's a tree reloc root
12070  */
12071 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12072                                  struct extent_buffer *eb)
12073 {
12074         struct btrfs_root *tree_reloc_root;
12075         struct btrfs_key key;
12076         u64 bytenr = btrfs_header_bytenr(eb);
12077         u64 owner = btrfs_header_owner(eb);
12078         int ret = 0;
12079
12080         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12081         key.offset = owner;
12082         key.type = BTRFS_ROOT_ITEM_KEY;
12083
12084         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12085         if (IS_ERR(tree_reloc_root))
12086                 return 0;
12087
12088         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12089                 ret = 1;
12090         btrfs_free_fs_root(tree_reloc_root);
12091         return ret;
12092 }
12093
12094 /*
12095  * Check referencer for shared block backref
12096  * If level == -1, this function will resolve the level.
12097  */
12098 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12099                                      u64 parent, u64 bytenr, int level)
12100 {
12101         struct extent_buffer *eb;
12102         u32 nr;
12103         int found_parent = 0;
12104         int i;
12105
12106         eb = read_tree_block(fs_info, parent, 0);
12107         if (!extent_buffer_uptodate(eb))
12108                 goto out;
12109
12110         if (level == -1)
12111                 level = query_tree_block_level(fs_info, bytenr);
12112         if (level < 0)
12113                 goto out;
12114
12115         /* It's possible it's a tree reloc root */
12116         if (parent == bytenr) {
12117                 if (is_tree_reloc_root(fs_info, eb))
12118                         found_parent = 1;
12119                 goto out;
12120         }
12121
12122         if (level + 1 != btrfs_header_level(eb))
12123                 goto out;
12124
12125         nr = btrfs_header_nritems(eb);
12126         for (i = 0; i < nr; i++) {
12127                 if (bytenr == btrfs_node_blockptr(eb, i)) {
12128                         found_parent = 1;
12129                         break;
12130                 }
12131         }
12132 out:
12133         free_extent_buffer(eb);
12134         if (!found_parent) {
12135                 error(
12136         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12137                         bytenr, fs_info->nodesize, parent, level);
12138                 return REFERENCER_MISSING;
12139         }
12140         return 0;
12141 }
12142
12143 /*
12144  * Check referencer for normal (inlined) data ref
12145  * If len == 0, it will be resolved by searching in extent tree
12146  */
12147 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12148                                      u64 root_id, u64 objectid, u64 offset,
12149                                      u64 bytenr, u64 len, u32 count)
12150 {
12151         struct btrfs_root *root;
12152         struct btrfs_root *extent_root = fs_info->extent_root;
12153         struct btrfs_key key;
12154         struct btrfs_path path;
12155         struct extent_buffer *leaf;
12156         struct btrfs_file_extent_item *fi;
12157         u32 found_count = 0;
12158         int slot;
12159         int ret = 0;
12160
12161         if (!len) {
12162                 key.objectid = bytenr;
12163                 key.type = BTRFS_EXTENT_ITEM_KEY;
12164                 key.offset = (u64)-1;
12165
12166                 btrfs_init_path(&path);
12167                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12168                 if (ret < 0)
12169                         goto out;
12170                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12171                 if (ret)
12172                         goto out;
12173                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12174                 if (key.objectid != bytenr ||
12175                     key.type != BTRFS_EXTENT_ITEM_KEY)
12176                         goto out;
12177                 len = key.offset;
12178                 btrfs_release_path(&path);
12179         }
12180         key.objectid = root_id;
12181         key.type = BTRFS_ROOT_ITEM_KEY;
12182         key.offset = (u64)-1;
12183         btrfs_init_path(&path);
12184
12185         root = btrfs_read_fs_root(fs_info, &key);
12186         if (IS_ERR(root))
12187                 goto out;
12188
12189         key.objectid = objectid;
12190         key.type = BTRFS_EXTENT_DATA_KEY;
12191         /*
12192          * It can be nasty as data backref offset is
12193          * file offset - file extent offset, which is smaller or
12194          * equal to original backref offset.  The only special case is
12195          * overflow.  So we need to special check and do further search.
12196          */
12197         key.offset = offset & (1ULL << 63) ? 0 : offset;
12198
12199         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12200         if (ret < 0)
12201                 goto out;
12202
12203         /*
12204          * Search afterwards to get correct one
12205          * NOTE: As we must do a comprehensive check on the data backref to
12206          * make sure the dref count also matches, we must iterate all file
12207          * extents for that inode.
12208          */
12209         while (1) {
12210                 leaf = path.nodes[0];
12211                 slot = path.slots[0];
12212
12213                 if (slot >= btrfs_header_nritems(leaf) ||
12214                     btrfs_header_owner(leaf) != root_id)
12215                         goto next;
12216                 btrfs_item_key_to_cpu(leaf, &key, slot);
12217                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12218                         break;
12219                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12220                 /*
12221                  * Except normal disk bytenr and disk num bytes, we still
12222                  * need to do extra check on dbackref offset as
12223                  * dbackref offset = file_offset - file_extent_offset
12224                  *
12225                  * Also, we must check the leaf owner.
12226                  * In case of shared tree blocks (snapshots) we can inherit
12227                  * leaves from source snapshot.
12228                  * In that case, reference from source snapshot should not
12229                  * count.
12230                  */
12231                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12232                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12233                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12234                     offset && btrfs_header_owner(leaf) == root_id)
12235                         found_count++;
12236
12237 next:
12238                 ret = btrfs_next_item(root, &path);
12239                 if (ret)
12240                         break;
12241         }
12242 out:
12243         btrfs_release_path(&path);
12244         if (found_count != count) {
12245                 error(
12246 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12247                         bytenr, len, root_id, objectid, offset, count, found_count);
12248                 return REFERENCER_MISSING;
12249         }
12250         return 0;
12251 }
12252
12253 /*
12254  * Check if the referencer of a shared data backref exists
12255  */
12256 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12257                                      u64 parent, u64 bytenr)
12258 {
12259         struct extent_buffer *eb;
12260         struct btrfs_key key;
12261         struct btrfs_file_extent_item *fi;
12262         u32 nr;
12263         int found_parent = 0;
12264         int i;
12265
12266         eb = read_tree_block(fs_info, parent, 0);
12267         if (!extent_buffer_uptodate(eb))
12268                 goto out;
12269
12270         nr = btrfs_header_nritems(eb);
12271         for (i = 0; i < nr; i++) {
12272                 btrfs_item_key_to_cpu(eb, &key, i);
12273                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12274                         continue;
12275
12276                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12277                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12278                         continue;
12279
12280                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12281                         found_parent = 1;
12282                         break;
12283                 }
12284         }
12285
12286 out:
12287         free_extent_buffer(eb);
12288         if (!found_parent) {
12289                 error("shared extent %llu referencer lost (parent: %llu)",
12290                         bytenr, parent);
12291                 return REFERENCER_MISSING;
12292         }
12293         return 0;
12294 }
12295
12296 /*
12297  * Only delete backref if REFERENCER_MISSING now
12298  *
12299  * Returns <0   the extent was deleted
12300  * Returns >0   the backref was deleted but extent still exists, returned value
12301  *               means error after repair
12302  * Returns  0   nothing happened
12303  */
12304 static int repair_extent_item(struct btrfs_trans_handle *trans,
12305                       struct btrfs_root *root, struct btrfs_path *path,
12306                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12307                       u64 owner, u64 offset, int err)
12308 {
12309         struct btrfs_key old_key;
12310         int freed = 0;
12311         int ret;
12312
12313         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12314
12315         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12316                 /* delete the backref */
12317                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12318                           num_bytes, parent, root_objectid, owner, offset);
12319                 if (!ret) {
12320                         freed = 1;
12321                         err &= ~REFERENCER_MISSING;
12322                         printf("Delete backref in extent [%llu %llu]\n",
12323                                bytenr, num_bytes);
12324                 } else {
12325                         error("fail to delete backref in extent [%llu %llu]",
12326                                bytenr, num_bytes);
12327                 }
12328         }
12329
12330         /* btrfs_free_extent may delete the extent */
12331         btrfs_release_path(path);
12332         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12333
12334         if (ret)
12335                 ret = -ENOENT;
12336         else if (freed)
12337                 ret = err;
12338         return ret;
12339 }
12340
12341 /*
12342  * This function will check a given extent item, including its backref and
12343  * itself (like crossing stripe boundary and type)
12344  *
12345  * Since we don't use extent_record anymore, introduce new error bit
12346  */
12347 static int check_extent_item(struct btrfs_trans_handle *trans,
12348                              struct btrfs_fs_info *fs_info,
12349                              struct btrfs_path *path)
12350 {
12351         struct btrfs_extent_item *ei;
12352         struct btrfs_extent_inline_ref *iref;
12353         struct btrfs_extent_data_ref *dref;
12354         struct extent_buffer *eb = path->nodes[0];
12355         unsigned long end;
12356         unsigned long ptr;
12357         int slot = path->slots[0];
12358         int type;
12359         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12360         u32 item_size = btrfs_item_size_nr(eb, slot);
12361         u64 flags;
12362         u64 offset;
12363         u64 parent;
12364         u64 num_bytes;
12365         u64 root_objectid;
12366         u64 owner;
12367         u64 owner_offset;
12368         int metadata = 0;
12369         int level;
12370         struct btrfs_key key;
12371         int ret;
12372         int err = 0;
12373
12374         btrfs_item_key_to_cpu(eb, &key, slot);
12375         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12376                 bytes_used += key.offset;
12377                 num_bytes = key.offset;
12378         } else {
12379                 bytes_used += nodesize;
12380                 num_bytes = nodesize;
12381         }
12382
12383         if (item_size < sizeof(*ei)) {
12384                 /*
12385                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12386                  * old thing when on disk format is still un-determined.
12387                  * No need to care about it anymore
12388                  */
12389                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12390                 return -ENOTTY;
12391         }
12392
12393         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12394         flags = btrfs_extent_flags(eb, ei);
12395
12396         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12397                 metadata = 1;
12398         if (metadata && check_crossing_stripes(global_info, key.objectid,
12399                                                eb->len)) {
12400                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12401                       key.objectid, key.objectid + nodesize);
12402                 err |= CROSSING_STRIPE_BOUNDARY;
12403         }
12404
12405         ptr = (unsigned long)(ei + 1);
12406
12407         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12408                 /* Old EXTENT_ITEM metadata */
12409                 struct btrfs_tree_block_info *info;
12410
12411                 info = (struct btrfs_tree_block_info *)ptr;
12412                 level = btrfs_tree_block_level(eb, info);
12413                 ptr += sizeof(struct btrfs_tree_block_info);
12414         } else {
12415                 /* New METADATA_ITEM */
12416                 level = key.offset;
12417         }
12418         end = (unsigned long)ei + item_size;
12419
12420 next:
12421         /* Reached extent item end normally */
12422         if (ptr == end)
12423                 goto out;
12424
12425         /* Beyond extent item end, wrong item size */
12426         if (ptr > end) {
12427                 err |= ITEM_SIZE_MISMATCH;
12428                 error("extent item at bytenr %llu slot %d has wrong size",
12429                         eb->start, slot);
12430                 goto out;
12431         }
12432
12433         parent = 0;
12434         root_objectid = 0;
12435         owner = 0;
12436         owner_offset = 0;
12437         /* Now check every backref in this extent item */
12438         iref = (struct btrfs_extent_inline_ref *)ptr;
12439         type = btrfs_extent_inline_ref_type(eb, iref);
12440         offset = btrfs_extent_inline_ref_offset(eb, iref);
12441         switch (type) {
12442         case BTRFS_TREE_BLOCK_REF_KEY:
12443                 root_objectid = offset;
12444                 owner = level;
12445                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12446                                                level);
12447                 err |= ret;
12448                 break;
12449         case BTRFS_SHARED_BLOCK_REF_KEY:
12450                 parent = offset;
12451                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12452                                                  level);
12453                 err |= ret;
12454                 break;
12455         case BTRFS_EXTENT_DATA_REF_KEY:
12456                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12457                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12458                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12459                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12460                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12461                                         owner_offset, key.objectid, key.offset,
12462                                         btrfs_extent_data_ref_count(eb, dref));
12463                 err |= ret;
12464                 break;
12465         case BTRFS_SHARED_DATA_REF_KEY:
12466                 parent = offset;
12467                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12468                 err |= ret;
12469                 break;
12470         default:
12471                 error("extent[%llu %d %llu] has unknown ref type: %d",
12472                         key.objectid, key.type, key.offset, type);
12473                 ret = UNKNOWN_TYPE;
12474                 err |= ret;
12475                 goto out;
12476         }
12477
12478         if (err && repair) {
12479                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12480                          key.objectid, num_bytes, parent, root_objectid,
12481                          owner, owner_offset, ret);
12482                 if (ret < 0)
12483                         goto out;
12484                 if (ret) {
12485                         goto next;
12486                         err = ret;
12487                 }
12488         }
12489
12490         ptr += btrfs_extent_inline_ref_size(type);
12491         goto next;
12492
12493 out:
12494         return err;
12495 }
12496
12497 /*
12498  * Check if a dev extent item is referred correctly by its chunk
12499  */
12500 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12501                                  struct extent_buffer *eb, int slot)
12502 {
12503         struct btrfs_root *chunk_root = fs_info->chunk_root;
12504         struct btrfs_dev_extent *ptr;
12505         struct btrfs_path path;
12506         struct btrfs_key chunk_key;
12507         struct btrfs_key devext_key;
12508         struct btrfs_chunk *chunk;
12509         struct extent_buffer *l;
12510         int num_stripes;
12511         u64 length;
12512         int i;
12513         int found_chunk = 0;
12514         int ret;
12515
12516         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12517         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12518         length = btrfs_dev_extent_length(eb, ptr);
12519
12520         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12521         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12522         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12523
12524         btrfs_init_path(&path);
12525         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12526         if (ret)
12527                 goto out;
12528
12529         l = path.nodes[0];
12530         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12531         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12532                                       chunk_key.offset);
12533         if (ret < 0)
12534                 goto out;
12535
12536         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12537                 goto out;
12538
12539         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12540         for (i = 0; i < num_stripes; i++) {
12541                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12542                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12543
12544                 if (devid == devext_key.objectid &&
12545                     offset == devext_key.offset) {
12546                         found_chunk = 1;
12547                         break;
12548                 }
12549         }
12550 out:
12551         btrfs_release_path(&path);
12552         if (!found_chunk) {
12553                 error(
12554                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12555                         devext_key.objectid, devext_key.offset, length);
12556                 return REFERENCER_MISSING;
12557         }
12558         return 0;
12559 }
12560
12561 /*
12562  * Check if the used space is correct with the dev item
12563  */
12564 static int check_dev_item(struct btrfs_fs_info *fs_info,
12565                           struct extent_buffer *eb, int slot)
12566 {
12567         struct btrfs_root *dev_root = fs_info->dev_root;
12568         struct btrfs_dev_item *dev_item;
12569         struct btrfs_path path;
12570         struct btrfs_key key;
12571         struct btrfs_dev_extent *ptr;
12572         u64 total_bytes;
12573         u64 dev_id;
12574         u64 used;
12575         u64 total = 0;
12576         int ret;
12577
12578         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12579         dev_id = btrfs_device_id(eb, dev_item);
12580         used = btrfs_device_bytes_used(eb, dev_item);
12581         total_bytes = btrfs_device_total_bytes(eb, dev_item);
12582
12583         key.objectid = dev_id;
12584         key.type = BTRFS_DEV_EXTENT_KEY;
12585         key.offset = 0;
12586
12587         btrfs_init_path(&path);
12588         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12589         if (ret < 0) {
12590                 btrfs_item_key_to_cpu(eb, &key, slot);
12591                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12592                         key.objectid, key.type, key.offset);
12593                 btrfs_release_path(&path);
12594                 return REFERENCER_MISSING;
12595         }
12596
12597         /* Iterate dev_extents to calculate the used space of a device */
12598         while (1) {
12599                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12600                         goto next;
12601
12602                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12603                 if (key.objectid > dev_id)
12604                         break;
12605                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12606                         goto next;
12607
12608                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12609                                      struct btrfs_dev_extent);
12610                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12611 next:
12612                 ret = btrfs_next_item(dev_root, &path);
12613                 if (ret)
12614                         break;
12615         }
12616         btrfs_release_path(&path);
12617
12618         if (used != total) {
12619                 btrfs_item_key_to_cpu(eb, &key, slot);
12620                 error(
12621 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12622                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12623                         BTRFS_DEV_EXTENT_KEY, dev_id);
12624                 return ACCOUNTING_MISMATCH;
12625         }
12626         check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12627
12628         return 0;
12629 }
12630
12631 /*
12632  * Check a block group item with its referener (chunk) and its used space
12633  * with extent/metadata item
12634  */
12635 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12636                                   struct extent_buffer *eb, int slot)
12637 {
12638         struct btrfs_root *extent_root = fs_info->extent_root;
12639         struct btrfs_root *chunk_root = fs_info->chunk_root;
12640         struct btrfs_block_group_item *bi;
12641         struct btrfs_block_group_item bg_item;
12642         struct btrfs_path path;
12643         struct btrfs_key bg_key;
12644         struct btrfs_key chunk_key;
12645         struct btrfs_key extent_key;
12646         struct btrfs_chunk *chunk;
12647         struct extent_buffer *leaf;
12648         struct btrfs_extent_item *ei;
12649         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12650         u64 flags;
12651         u64 bg_flags;
12652         u64 used;
12653         u64 total = 0;
12654         int ret;
12655         int err = 0;
12656
12657         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12658         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12659         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12660         used = btrfs_block_group_used(&bg_item);
12661         bg_flags = btrfs_block_group_flags(&bg_item);
12662
12663         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12664         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12665         chunk_key.offset = bg_key.objectid;
12666
12667         btrfs_init_path(&path);
12668         /* Search for the referencer chunk */
12669         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12670         if (ret) {
12671                 error(
12672                 "block group[%llu %llu] did not find the related chunk item",
12673                         bg_key.objectid, bg_key.offset);
12674                 err |= REFERENCER_MISSING;
12675         } else {
12676                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12677                                         struct btrfs_chunk);
12678                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12679                                                 bg_key.offset) {
12680                         error(
12681         "block group[%llu %llu] related chunk item length does not match",
12682                                 bg_key.objectid, bg_key.offset);
12683                         err |= REFERENCER_MISMATCH;
12684                 }
12685         }
12686         btrfs_release_path(&path);
12687
12688         /* Search from the block group bytenr */
12689         extent_key.objectid = bg_key.objectid;
12690         extent_key.type = 0;
12691         extent_key.offset = 0;
12692
12693         btrfs_init_path(&path);
12694         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12695         if (ret < 0)
12696                 goto out;
12697
12698         /* Iterate extent tree to account used space */
12699         while (1) {
12700                 leaf = path.nodes[0];
12701
12702                 /* Search slot can point to the last item beyond leaf nritems */
12703                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12704                         goto next;
12705
12706                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12707                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12708                         break;
12709
12710                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12711                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12712                         goto next;
12713                 if (extent_key.objectid < bg_key.objectid)
12714                         goto next;
12715
12716                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12717                         total += nodesize;
12718                 else
12719                         total += extent_key.offset;
12720
12721                 ei = btrfs_item_ptr(leaf, path.slots[0],
12722                                     struct btrfs_extent_item);
12723                 flags = btrfs_extent_flags(leaf, ei);
12724                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12725                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12726                                 error(
12727                         "bad extent[%llu, %llu) type mismatch with chunk",
12728                                         extent_key.objectid,
12729                                         extent_key.objectid + extent_key.offset);
12730                                 err |= CHUNK_TYPE_MISMATCH;
12731                         }
12732                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12733                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12734                                     BTRFS_BLOCK_GROUP_METADATA))) {
12735                                 error(
12736                         "bad extent[%llu, %llu) type mismatch with chunk",
12737                                         extent_key.objectid,
12738                                         extent_key.objectid + nodesize);
12739                                 err |= CHUNK_TYPE_MISMATCH;
12740                         }
12741                 }
12742 next:
12743                 ret = btrfs_next_item(extent_root, &path);
12744                 if (ret)
12745                         break;
12746         }
12747
12748 out:
12749         btrfs_release_path(&path);
12750
12751         if (total != used) {
12752                 error(
12753                 "block group[%llu %llu] used %llu but extent items used %llu",
12754                         bg_key.objectid, bg_key.offset, used, total);
12755                 err |= BG_ACCOUNTING_ERROR;
12756         }
12757         return err;
12758 }
12759
12760 /*
12761  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12762  * FIXME: We still need to repair error of dev_item.
12763  *
12764  * Returns error after repair.
12765  */
12766 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12767                              struct btrfs_root *chunk_root,
12768                              struct btrfs_path *path, int err)
12769 {
12770         struct btrfs_chunk *chunk;
12771         struct btrfs_key chunk_key;
12772         struct extent_buffer *eb = path->nodes[0];
12773         u64 length;
12774         int slot = path->slots[0];
12775         u64 type;
12776         int ret = 0;
12777
12778         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12779         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12780                 return err;
12781         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12782         type = btrfs_chunk_type(path->nodes[0], chunk);
12783         length = btrfs_chunk_length(eb, chunk);
12784
12785         if (err & REFERENCER_MISSING) {
12786                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12787                                              type, chunk_key.offset, length);
12788                 if (ret) {
12789                         error("fail to add block group item[%llu %llu]",
12790                               chunk_key.offset, length);
12791                         goto out;
12792                 } else {
12793                         err &= ~REFERENCER_MISSING;
12794                         printf("Added block group item[%llu %llu]\n",
12795                                chunk_key.offset, length);
12796                 }
12797         }
12798
12799 out:
12800         return err;
12801 }
12802
12803 /*
12804  * Check a chunk item.
12805  * Including checking all referred dev_extents and block group
12806  */
12807 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12808                             struct extent_buffer *eb, int slot)
12809 {
12810         struct btrfs_root *extent_root = fs_info->extent_root;
12811         struct btrfs_root *dev_root = fs_info->dev_root;
12812         struct btrfs_path path;
12813         struct btrfs_key chunk_key;
12814         struct btrfs_key bg_key;
12815         struct btrfs_key devext_key;
12816         struct btrfs_chunk *chunk;
12817         struct extent_buffer *leaf;
12818         struct btrfs_block_group_item *bi;
12819         struct btrfs_block_group_item bg_item;
12820         struct btrfs_dev_extent *ptr;
12821         u64 length;
12822         u64 chunk_end;
12823         u64 stripe_len;
12824         u64 type;
12825         int num_stripes;
12826         u64 offset;
12827         u64 objectid;
12828         int i;
12829         int ret;
12830         int err = 0;
12831
12832         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12833         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12834         length = btrfs_chunk_length(eb, chunk);
12835         chunk_end = chunk_key.offset + length;
12836         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12837                                       chunk_key.offset);
12838         if (ret < 0) {
12839                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12840                         chunk_end);
12841                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12842                 goto out;
12843         }
12844         type = btrfs_chunk_type(eb, chunk);
12845
12846         bg_key.objectid = chunk_key.offset;
12847         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12848         bg_key.offset = length;
12849
12850         btrfs_init_path(&path);
12851         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12852         if (ret) {
12853                 error(
12854                 "chunk[%llu %llu) did not find the related block group item",
12855                         chunk_key.offset, chunk_end);
12856                 err |= REFERENCER_MISSING;
12857         } else{
12858                 leaf = path.nodes[0];
12859                 bi = btrfs_item_ptr(leaf, path.slots[0],
12860                                     struct btrfs_block_group_item);
12861                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12862                                    sizeof(bg_item));
12863                 if (btrfs_block_group_flags(&bg_item) != type) {
12864                         error(
12865 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12866                                 chunk_key.offset, chunk_end, type,
12867                                 btrfs_block_group_flags(&bg_item));
12868                         err |= REFERENCER_MISSING;
12869                 }
12870         }
12871
12872         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12873         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12874         for (i = 0; i < num_stripes; i++) {
12875                 btrfs_release_path(&path);
12876                 btrfs_init_path(&path);
12877                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12878                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12879                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12880
12881                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12882                                         0, 0);
12883                 if (ret)
12884                         goto not_match_dev;
12885
12886                 leaf = path.nodes[0];
12887                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12888                                      struct btrfs_dev_extent);
12889                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12890                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12891                 if (objectid != chunk_key.objectid ||
12892                     offset != chunk_key.offset ||
12893                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12894                         goto not_match_dev;
12895                 continue;
12896 not_match_dev:
12897                 err |= BACKREF_MISSING;
12898                 error(
12899                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12900                         chunk_key.objectid, chunk_end, i);
12901                 continue;
12902         }
12903         btrfs_release_path(&path);
12904 out:
12905         return err;
12906 }
12907
12908 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12909                                    struct btrfs_root *root,
12910                                    struct btrfs_path *path)
12911 {
12912         struct btrfs_key key;
12913         int ret = 0;
12914
12915         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12916         btrfs_release_path(path);
12917         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12918         if (ret) {
12919                 ret = -ENOENT;
12920                 goto out;
12921         }
12922
12923         ret = btrfs_del_item(trans, root, path);
12924         if (ret)
12925                 goto out;
12926
12927         if (path->slots[0] == 0)
12928                 btrfs_prev_leaf(root, path);
12929         else
12930                 path->slots[0]--;
12931 out:
12932         if (ret)
12933                 error("failed to delete root %llu item[%llu, %u, %llu]",
12934                       root->objectid, key.objectid, key.type, key.offset);
12935         else
12936                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12937                        root->objectid, key.objectid, key.type, key.offset);
12938         return ret;
12939 }
12940
12941 /*
12942  * Main entry function to check known items and update related accounting info
12943  */
12944 static int check_leaf_items(struct btrfs_trans_handle *trans,
12945                             struct btrfs_root *root, struct btrfs_path *path,
12946                             struct node_refs *nrefs, int account_bytes)
12947 {
12948         struct btrfs_fs_info *fs_info = root->fs_info;
12949         struct btrfs_key key;
12950         struct extent_buffer *eb;
12951         int slot;
12952         int type;
12953         struct btrfs_extent_data_ref *dref;
12954         int ret = 0;
12955         int err = 0;
12956
12957 again:
12958         eb = path->nodes[0];
12959         slot = path->slots[0];
12960         if (slot >= btrfs_header_nritems(eb)) {
12961                 if (slot == 0) {
12962                         error("empty leaf [%llu %u] root %llu", eb->start,
12963                                 root->fs_info->nodesize, root->objectid);
12964                         err |= EIO;
12965                 }
12966                 goto out;
12967         }
12968
12969         btrfs_item_key_to_cpu(eb, &key, slot);
12970         type = key.type;
12971
12972         switch (type) {
12973         case BTRFS_EXTENT_DATA_KEY:
12974                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12975                 if (repair && ret)
12976                         ret = repair_extent_data_item(trans, root, path, nrefs,
12977                                                       ret);
12978                 err |= ret;
12979                 break;
12980         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12981                 ret = check_block_group_item(fs_info, eb, slot);
12982                 if (repair &&
12983                     ret & REFERENCER_MISSING)
12984                         ret = delete_extent_tree_item(trans, root, path);
12985                 err |= ret;
12986                 break;
12987         case BTRFS_DEV_ITEM_KEY:
12988                 ret = check_dev_item(fs_info, eb, slot);
12989                 err |= ret;
12990                 break;
12991         case BTRFS_CHUNK_ITEM_KEY:
12992                 ret = check_chunk_item(fs_info, eb, slot);
12993                 if (repair && ret)
12994                         ret = repair_chunk_item(trans, root, path, ret);
12995                 err |= ret;
12996                 break;
12997         case BTRFS_DEV_EXTENT_KEY:
12998                 ret = check_dev_extent_item(fs_info, eb, slot);
12999                 err |= ret;
13000                 break;
13001         case BTRFS_EXTENT_ITEM_KEY:
13002         case BTRFS_METADATA_ITEM_KEY:
13003                 ret = check_extent_item(trans, fs_info, path);
13004                 err |= ret;
13005                 break;
13006         case BTRFS_EXTENT_CSUM_KEY:
13007                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13008                 err |= ret;
13009                 break;
13010         case BTRFS_TREE_BLOCK_REF_KEY:
13011                 ret = check_tree_block_backref(fs_info, key.offset,
13012                                                key.objectid, -1);
13013                 if (repair &&
13014                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13015                         ret = delete_extent_tree_item(trans, root, path);
13016                 err |= ret;
13017                 break;
13018         case BTRFS_EXTENT_DATA_REF_KEY:
13019                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13020                 ret = check_extent_data_backref(fs_info,
13021                                 btrfs_extent_data_ref_root(eb, dref),
13022                                 btrfs_extent_data_ref_objectid(eb, dref),
13023                                 btrfs_extent_data_ref_offset(eb, dref),
13024                                 key.objectid, 0,
13025                                 btrfs_extent_data_ref_count(eb, dref));
13026                 if (repair &&
13027                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13028                         ret = delete_extent_tree_item(trans, root, path);
13029                 err |= ret;
13030                 break;
13031         case BTRFS_SHARED_BLOCK_REF_KEY:
13032                 ret = check_shared_block_backref(fs_info, key.offset,
13033                                                  key.objectid, -1);
13034                 if (repair &&
13035                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13036                         ret = delete_extent_tree_item(trans, root, path);
13037                 err |= ret;
13038                 break;
13039         case BTRFS_SHARED_DATA_REF_KEY:
13040                 ret = check_shared_data_backref(fs_info, key.offset,
13041                                                 key.objectid);
13042                 if (repair &&
13043                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13044                         ret = delete_extent_tree_item(trans, root, path);
13045                 err |= ret;
13046                 break;
13047         default:
13048                 break;
13049         }
13050
13051         ++path->slots[0];
13052         goto again;
13053 out:
13054         return err;
13055 }
13056
13057 /*
13058  * Low memory usage version check_chunks_and_extents.
13059  */
13060 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13061 {
13062         struct btrfs_trans_handle *trans = NULL;
13063         struct btrfs_path path;
13064         struct btrfs_key old_key;
13065         struct btrfs_key key;
13066         struct btrfs_root *root1;
13067         struct btrfs_root *root;
13068         struct btrfs_root *cur_root;
13069         int err = 0;
13070         int ret;
13071
13072         root = fs_info->fs_root;
13073
13074         if (repair) {
13075                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13076                 if (IS_ERR(trans)) {
13077                         error("failed to start transaction before check");
13078                         return PTR_ERR(trans);
13079                 }
13080         }
13081
13082         root1 = root->fs_info->chunk_root;
13083         ret = check_btrfs_root(trans, root1, 0, 1);
13084         err |= ret;
13085
13086         root1 = root->fs_info->tree_root;
13087         ret = check_btrfs_root(trans, root1, 0, 1);
13088         err |= ret;
13089
13090         btrfs_init_path(&path);
13091         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13092         key.offset = 0;
13093         key.type = BTRFS_ROOT_ITEM_KEY;
13094
13095         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13096         if (ret) {
13097                 error("cannot find extent tree in tree_root");
13098                 goto out;
13099         }
13100
13101         while (1) {
13102                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13103                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13104                         goto next;
13105                 old_key = key;
13106                 key.offset = (u64)-1;
13107
13108                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13109                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13110                                         &key);
13111                 else
13112                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
13113                 if (IS_ERR(cur_root) || !cur_root) {
13114                         error("failed to read tree: %lld", key.objectid);
13115                         goto next;
13116                 }
13117
13118                 ret = check_btrfs_root(trans, cur_root, 0, 1);
13119                 err |= ret;
13120
13121                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13122                         btrfs_free_fs_root(cur_root);
13123
13124                 btrfs_release_path(&path);
13125                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13126                                         &old_key, &path, 0, 0);
13127                 if (ret)
13128                         goto out;
13129 next:
13130                 ret = btrfs_next_item(root1, &path);
13131                 if (ret)
13132                         goto out;
13133         }
13134 out:
13135
13136         /* if repair, update block accounting */
13137         if (repair) {
13138                 ret = btrfs_fix_block_accounting(trans, root);
13139                 if (ret)
13140                         err |= ret;
13141                 else
13142                         err &= ~BG_ACCOUNTING_ERROR;
13143         }
13144
13145         if (trans)
13146                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13147
13148         btrfs_release_path(&path);
13149
13150         return err;
13151 }
13152
13153 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13154 {
13155         int ret;
13156
13157         if (!ctx.progress_enabled)
13158                 fprintf(stderr, "checking extents\n");
13159         if (check_mode == CHECK_MODE_LOWMEM)
13160                 ret = check_chunks_and_extents_v2(fs_info);
13161         else
13162                 ret = check_chunks_and_extents(fs_info);
13163
13164         /* Also repair device size related problems */
13165         if (repair && !ret) {
13166                 ret = btrfs_fix_device_and_super_size(fs_info);
13167                 if (ret > 0)
13168                         ret = 0;
13169         }
13170         return ret;
13171 }
13172
13173 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13174                            struct btrfs_root *root, int overwrite)
13175 {
13176         struct extent_buffer *c;
13177         struct extent_buffer *old = root->node;
13178         int level;
13179         int ret;
13180         struct btrfs_disk_key disk_key = {0,0,0};
13181
13182         level = 0;
13183
13184         if (overwrite) {
13185                 c = old;
13186                 extent_buffer_get(c);
13187                 goto init;
13188         }
13189         c = btrfs_alloc_free_block(trans, root,
13190                                    root->fs_info->nodesize,
13191                                    root->root_key.objectid,
13192                                    &disk_key, level, 0, 0);
13193         if (IS_ERR(c)) {
13194                 c = old;
13195                 extent_buffer_get(c);
13196                 overwrite = 1;
13197         }
13198 init:
13199         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13200         btrfs_set_header_level(c, level);
13201         btrfs_set_header_bytenr(c, c->start);
13202         btrfs_set_header_generation(c, trans->transid);
13203         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13204         btrfs_set_header_owner(c, root->root_key.objectid);
13205
13206         write_extent_buffer(c, root->fs_info->fsid,
13207                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
13208
13209         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13210                             btrfs_header_chunk_tree_uuid(c),
13211                             BTRFS_UUID_SIZE);
13212
13213         btrfs_mark_buffer_dirty(c);
13214         /*
13215          * this case can happen in the following case:
13216          *
13217          * 1.overwrite previous root.
13218          *
13219          * 2.reinit reloc data root, this is because we skip pin
13220          * down reloc data tree before which means we can allocate
13221          * same block bytenr here.
13222          */
13223         if (old->start == c->start) {
13224                 btrfs_set_root_generation(&root->root_item,
13225                                           trans->transid);
13226                 root->root_item.level = btrfs_header_level(root->node);
13227                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13228                                         &root->root_key, &root->root_item);
13229                 if (ret) {
13230                         free_extent_buffer(c);
13231                         return ret;
13232                 }
13233         }
13234         free_extent_buffer(old);
13235         root->node = c;
13236         add_root_to_dirty_list(root);
13237         return 0;
13238 }
13239
13240 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13241                                 struct extent_buffer *eb, int tree_root)
13242 {
13243         struct extent_buffer *tmp;
13244         struct btrfs_root_item *ri;
13245         struct btrfs_key key;
13246         u64 bytenr;
13247         int level = btrfs_header_level(eb);
13248         int nritems;
13249         int ret;
13250         int i;
13251
13252         /*
13253          * If we have pinned this block before, don't pin it again.
13254          * This can not only avoid forever loop with broken filesystem
13255          * but also give us some speedups.
13256          */
13257         if (test_range_bit(&fs_info->pinned_extents, eb->start,
13258                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13259                 return 0;
13260
13261         btrfs_pin_extent(fs_info, eb->start, eb->len);
13262
13263         nritems = btrfs_header_nritems(eb);
13264         for (i = 0; i < nritems; i++) {
13265                 if (level == 0) {
13266                         btrfs_item_key_to_cpu(eb, &key, i);
13267                         if (key.type != BTRFS_ROOT_ITEM_KEY)
13268                                 continue;
13269                         /* Skip the extent root and reloc roots */
13270                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13271                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13272                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13273                                 continue;
13274                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13275                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13276
13277                         /*
13278                          * If at any point we start needing the real root we
13279                          * will have to build a stump root for the root we are
13280                          * in, but for now this doesn't actually use the root so
13281                          * just pass in extent_root.
13282                          */
13283                         tmp = read_tree_block(fs_info, bytenr, 0);
13284                         if (!extent_buffer_uptodate(tmp)) {
13285                                 fprintf(stderr, "Error reading root block\n");
13286                                 return -EIO;
13287                         }
13288                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13289                         free_extent_buffer(tmp);
13290                         if (ret)
13291                                 return ret;
13292                 } else {
13293                         bytenr = btrfs_node_blockptr(eb, i);
13294
13295                         /* If we aren't the tree root don't read the block */
13296                         if (level == 1 && !tree_root) {
13297                                 btrfs_pin_extent(fs_info, bytenr,
13298                                                 fs_info->nodesize);
13299                                 continue;
13300                         }
13301
13302                         tmp = read_tree_block(fs_info, bytenr, 0);
13303                         if (!extent_buffer_uptodate(tmp)) {
13304                                 fprintf(stderr, "Error reading tree block\n");
13305                                 return -EIO;
13306                         }
13307                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13308                         free_extent_buffer(tmp);
13309                         if (ret)
13310                                 return ret;
13311                 }
13312         }
13313
13314         return 0;
13315 }
13316
13317 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13318 {
13319         int ret;
13320
13321         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13322         if (ret)
13323                 return ret;
13324
13325         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13326 }
13327
13328 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13329 {
13330         struct btrfs_block_group_cache *cache;
13331         struct btrfs_path path;
13332         struct extent_buffer *leaf;
13333         struct btrfs_chunk *chunk;
13334         struct btrfs_key key;
13335         int ret;
13336         u64 start;
13337
13338         btrfs_init_path(&path);
13339         key.objectid = 0;
13340         key.type = BTRFS_CHUNK_ITEM_KEY;
13341         key.offset = 0;
13342         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13343         if (ret < 0) {
13344                 btrfs_release_path(&path);
13345                 return ret;
13346         }
13347
13348         /*
13349          * We do this in case the block groups were screwed up and had alloc
13350          * bits that aren't actually set on the chunks.  This happens with
13351          * restored images every time and could happen in real life I guess.
13352          */
13353         fs_info->avail_data_alloc_bits = 0;
13354         fs_info->avail_metadata_alloc_bits = 0;
13355         fs_info->avail_system_alloc_bits = 0;
13356
13357         /* First we need to create the in-memory block groups */
13358         while (1) {
13359                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13360                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13361                         if (ret < 0) {
13362                                 btrfs_release_path(&path);
13363                                 return ret;
13364                         }
13365                         if (ret) {
13366                                 ret = 0;
13367                                 break;
13368                         }
13369                 }
13370                 leaf = path.nodes[0];
13371                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13372                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13373                         path.slots[0]++;
13374                         continue;
13375                 }
13376
13377                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13378                 btrfs_add_block_group(fs_info, 0,
13379                                       btrfs_chunk_type(leaf, chunk), key.offset,
13380                                       btrfs_chunk_length(leaf, chunk));
13381                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13382                                  key.offset + btrfs_chunk_length(leaf, chunk));
13383                 path.slots[0]++;
13384         }
13385         start = 0;
13386         while (1) {
13387                 cache = btrfs_lookup_first_block_group(fs_info, start);
13388                 if (!cache)
13389                         break;
13390                 cache->cached = 1;
13391                 start = cache->key.objectid + cache->key.offset;
13392         }
13393
13394         btrfs_release_path(&path);
13395         return 0;
13396 }
13397
13398 static int reset_balance(struct btrfs_trans_handle *trans,
13399                          struct btrfs_fs_info *fs_info)
13400 {
13401         struct btrfs_root *root = fs_info->tree_root;
13402         struct btrfs_path path;
13403         struct extent_buffer *leaf;
13404         struct btrfs_key key;
13405         int del_slot, del_nr = 0;
13406         int ret;
13407         int found = 0;
13408
13409         btrfs_init_path(&path);
13410         key.objectid = BTRFS_BALANCE_OBJECTID;
13411         key.type = BTRFS_BALANCE_ITEM_KEY;
13412         key.offset = 0;
13413         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13414         if (ret) {
13415                 if (ret > 0)
13416                         ret = 0;
13417                 if (!ret)
13418                         goto reinit_data_reloc;
13419                 else
13420                         goto out;
13421         }
13422
13423         ret = btrfs_del_item(trans, root, &path);
13424         if (ret)
13425                 goto out;
13426         btrfs_release_path(&path);
13427
13428         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13429         key.type = BTRFS_ROOT_ITEM_KEY;
13430         key.offset = 0;
13431         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13432         if (ret < 0)
13433                 goto out;
13434         while (1) {
13435                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13436                         if (!found)
13437                                 break;
13438
13439                         if (del_nr) {
13440                                 ret = btrfs_del_items(trans, root, &path,
13441                                                       del_slot, del_nr);
13442                                 del_nr = 0;
13443                                 if (ret)
13444                                         goto out;
13445                         }
13446                         key.offset++;
13447                         btrfs_release_path(&path);
13448
13449                         found = 0;
13450                         ret = btrfs_search_slot(trans, root, &key, &path,
13451                                                 -1, 1);
13452                         if (ret < 0)
13453                                 goto out;
13454                         continue;
13455                 }
13456                 found = 1;
13457                 leaf = path.nodes[0];
13458                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13459                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13460                         break;
13461                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13462                         path.slots[0]++;
13463                         continue;
13464                 }
13465                 if (!del_nr) {
13466                         del_slot = path.slots[0];
13467                         del_nr = 1;
13468                 } else {
13469                         del_nr++;
13470                 }
13471                 path.slots[0]++;
13472         }
13473
13474         if (del_nr) {
13475                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13476                 if (ret)
13477                         goto out;
13478         }
13479         btrfs_release_path(&path);
13480
13481 reinit_data_reloc:
13482         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13483         key.type = BTRFS_ROOT_ITEM_KEY;
13484         key.offset = (u64)-1;
13485         root = btrfs_read_fs_root(fs_info, &key);
13486         if (IS_ERR(root)) {
13487                 fprintf(stderr, "Error reading data reloc tree\n");
13488                 ret = PTR_ERR(root);
13489                 goto out;
13490         }
13491         record_root_in_trans(trans, root);
13492         ret = btrfs_fsck_reinit_root(trans, root, 0);
13493         if (ret)
13494                 goto out;
13495         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13496 out:
13497         btrfs_release_path(&path);
13498         return ret;
13499 }
13500
13501 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13502                               struct btrfs_fs_info *fs_info)
13503 {
13504         u64 start = 0;
13505         int ret;
13506
13507         /*
13508          * The only reason we don't do this is because right now we're just
13509          * walking the trees we find and pinning down their bytes, we don't look
13510          * at any of the leaves.  In order to do mixed groups we'd have to check
13511          * the leaves of any fs roots and pin down the bytes for any file
13512          * extents we find.  Not hard but why do it if we don't have to?
13513          */
13514         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13515                 fprintf(stderr, "We don't support re-initing the extent tree "
13516                         "for mixed block groups yet, please notify a btrfs "
13517                         "developer you want to do this so they can add this "
13518                         "functionality.\n");
13519                 return -EINVAL;
13520         }
13521
13522         /*
13523          * first we need to walk all of the trees except the extent tree and pin
13524          * down the bytes that are in use so we don't overwrite any existing
13525          * metadata.
13526          */
13527         ret = pin_metadata_blocks(fs_info);
13528         if (ret) {
13529                 fprintf(stderr, "error pinning down used bytes\n");
13530                 return ret;
13531         }
13532
13533         /*
13534          * Need to drop all the block groups since we're going to recreate all
13535          * of them again.
13536          */
13537         btrfs_free_block_groups(fs_info);
13538         ret = reset_block_groups(fs_info);
13539         if (ret) {
13540                 fprintf(stderr, "error resetting the block groups\n");
13541                 return ret;
13542         }
13543
13544         /* Ok we can allocate now, reinit the extent root */
13545         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13546         if (ret) {
13547                 fprintf(stderr, "extent root initialization failed\n");
13548                 /*
13549                  * When the transaction code is updated we should end the
13550                  * transaction, but for now progs only knows about commit so
13551                  * just return an error.
13552                  */
13553                 return ret;
13554         }
13555
13556         /*
13557          * Now we have all the in-memory block groups setup so we can make
13558          * allocations properly, and the metadata we care about is safe since we
13559          * pinned all of it above.
13560          */
13561         while (1) {
13562                 struct btrfs_block_group_cache *cache;
13563
13564                 cache = btrfs_lookup_first_block_group(fs_info, start);
13565                 if (!cache)
13566                         break;
13567                 start = cache->key.objectid + cache->key.offset;
13568                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13569                                         &cache->key, &cache->item,
13570                                         sizeof(cache->item));
13571                 if (ret) {
13572                         fprintf(stderr, "Error adding block group\n");
13573                         return ret;
13574                 }
13575                 btrfs_extent_post_op(trans, fs_info->extent_root);
13576         }
13577
13578         ret = reset_balance(trans, fs_info);
13579         if (ret)
13580                 fprintf(stderr, "error resetting the pending balance\n");
13581
13582         return ret;
13583 }
13584
13585 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13586 {
13587         struct btrfs_path path;
13588         struct btrfs_trans_handle *trans;
13589         struct btrfs_key key;
13590         int ret;
13591
13592         printf("Recowing metadata block %llu\n", eb->start);
13593         key.objectid = btrfs_header_owner(eb);
13594         key.type = BTRFS_ROOT_ITEM_KEY;
13595         key.offset = (u64)-1;
13596
13597         root = btrfs_read_fs_root(root->fs_info, &key);
13598         if (IS_ERR(root)) {
13599                 fprintf(stderr, "Couldn't find owner root %llu\n",
13600                         key.objectid);
13601                 return PTR_ERR(root);
13602         }
13603
13604         trans = btrfs_start_transaction(root, 1);
13605         if (IS_ERR(trans))
13606                 return PTR_ERR(trans);
13607
13608         btrfs_init_path(&path);
13609         path.lowest_level = btrfs_header_level(eb);
13610         if (path.lowest_level)
13611                 btrfs_node_key_to_cpu(eb, &key, 0);
13612         else
13613                 btrfs_item_key_to_cpu(eb, &key, 0);
13614
13615         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13616         btrfs_commit_transaction(trans, root);
13617         btrfs_release_path(&path);
13618         return ret;
13619 }
13620
13621 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13622 {
13623         struct btrfs_path path;
13624         struct btrfs_trans_handle *trans;
13625         struct btrfs_key key;
13626         int ret;
13627
13628         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13629                bad->key.type, bad->key.offset);
13630         key.objectid = bad->root_id;
13631         key.type = BTRFS_ROOT_ITEM_KEY;
13632         key.offset = (u64)-1;
13633
13634         root = btrfs_read_fs_root(root->fs_info, &key);
13635         if (IS_ERR(root)) {
13636                 fprintf(stderr, "Couldn't find owner root %llu\n",
13637                         key.objectid);
13638                 return PTR_ERR(root);
13639         }
13640
13641         trans = btrfs_start_transaction(root, 1);
13642         if (IS_ERR(trans))
13643                 return PTR_ERR(trans);
13644
13645         btrfs_init_path(&path);
13646         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13647         if (ret) {
13648                 if (ret > 0)
13649                         ret = 0;
13650                 goto out;
13651         }
13652         ret = btrfs_del_item(trans, root, &path);
13653 out:
13654         btrfs_commit_transaction(trans, root);
13655         btrfs_release_path(&path);
13656         return ret;
13657 }
13658
13659 static int zero_log_tree(struct btrfs_root *root)
13660 {
13661         struct btrfs_trans_handle *trans;
13662         int ret;
13663
13664         trans = btrfs_start_transaction(root, 1);
13665         if (IS_ERR(trans)) {
13666                 ret = PTR_ERR(trans);
13667                 return ret;
13668         }
13669         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13670         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13671         ret = btrfs_commit_transaction(trans, root);
13672         return ret;
13673 }
13674
13675 static int populate_csum(struct btrfs_trans_handle *trans,
13676                          struct btrfs_root *csum_root, char *buf, u64 start,
13677                          u64 len)
13678 {
13679         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13680         u64 offset = 0;
13681         u64 sectorsize;
13682         int ret = 0;
13683
13684         while (offset < len) {
13685                 sectorsize = fs_info->sectorsize;
13686                 ret = read_extent_data(fs_info, buf, start + offset,
13687                                        &sectorsize, 0);
13688                 if (ret)
13689                         break;
13690                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13691                                             start + offset, buf, sectorsize);
13692                 if (ret)
13693                         break;
13694                 offset += sectorsize;
13695         }
13696         return ret;
13697 }
13698
13699 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13700                                       struct btrfs_root *csum_root,
13701                                       struct btrfs_root *cur_root)
13702 {
13703         struct btrfs_path path;
13704         struct btrfs_key key;
13705         struct extent_buffer *node;
13706         struct btrfs_file_extent_item *fi;
13707         char *buf = NULL;
13708         u64 start = 0;
13709         u64 len = 0;
13710         int slot = 0;
13711         int ret = 0;
13712
13713         buf = malloc(cur_root->fs_info->sectorsize);
13714         if (!buf)
13715                 return -ENOMEM;
13716
13717         btrfs_init_path(&path);
13718         key.objectid = 0;
13719         key.offset = 0;
13720         key.type = 0;
13721         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13722         if (ret < 0)
13723                 goto out;
13724         /* Iterate all regular file extents and fill its csum */
13725         while (1) {
13726                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13727
13728                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13729                         goto next;
13730                 node = path.nodes[0];
13731                 slot = path.slots[0];
13732                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13733                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13734                         goto next;
13735                 start = btrfs_file_extent_disk_bytenr(node, fi);
13736                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13737
13738                 ret = populate_csum(trans, csum_root, buf, start, len);
13739                 if (ret == -EEXIST)
13740                         ret = 0;
13741                 if (ret < 0)
13742                         goto out;
13743 next:
13744                 /*
13745                  * TODO: if next leaf is corrupted, jump to nearest next valid
13746                  * leaf.
13747                  */
13748                 ret = btrfs_next_item(cur_root, &path);
13749                 if (ret < 0)
13750                         goto out;
13751                 if (ret > 0) {
13752                         ret = 0;
13753                         goto out;
13754                 }
13755         }
13756
13757 out:
13758         btrfs_release_path(&path);
13759         free(buf);
13760         return ret;
13761 }
13762
13763 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13764                                   struct btrfs_root *csum_root)
13765 {
13766         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13767         struct btrfs_path path;
13768         struct btrfs_root *tree_root = fs_info->tree_root;
13769         struct btrfs_root *cur_root;
13770         struct extent_buffer *node;
13771         struct btrfs_key key;
13772         int slot = 0;
13773         int ret = 0;
13774
13775         btrfs_init_path(&path);
13776         key.objectid = BTRFS_FS_TREE_OBJECTID;
13777         key.offset = 0;
13778         key.type = BTRFS_ROOT_ITEM_KEY;
13779         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13780         if (ret < 0)
13781                 goto out;
13782         if (ret > 0) {
13783                 ret = -ENOENT;
13784                 goto out;
13785         }
13786
13787         while (1) {
13788                 node = path.nodes[0];
13789                 slot = path.slots[0];
13790                 btrfs_item_key_to_cpu(node, &key, slot);
13791                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13792                         goto out;
13793                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13794                         goto next;
13795                 if (!is_fstree(key.objectid))
13796                         goto next;
13797                 key.offset = (u64)-1;
13798
13799                 cur_root = btrfs_read_fs_root(fs_info, &key);
13800                 if (IS_ERR(cur_root) || !cur_root) {
13801                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13802                                 key.objectid);
13803                         goto out;
13804                 }
13805                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13806                                 cur_root);
13807                 if (ret < 0)
13808                         goto out;
13809 next:
13810                 ret = btrfs_next_item(tree_root, &path);
13811                 if (ret > 0) {
13812                         ret = 0;
13813                         goto out;
13814                 }
13815                 if (ret < 0)
13816                         goto out;
13817         }
13818
13819 out:
13820         btrfs_release_path(&path);
13821         return ret;
13822 }
13823
13824 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13825                                       struct btrfs_root *csum_root)
13826 {
13827         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13828         struct btrfs_path path;
13829         struct btrfs_extent_item *ei;
13830         struct extent_buffer *leaf;
13831         char *buf;
13832         struct btrfs_key key;
13833         int ret;
13834
13835         btrfs_init_path(&path);
13836         key.objectid = 0;
13837         key.type = BTRFS_EXTENT_ITEM_KEY;
13838         key.offset = 0;
13839         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13840         if (ret < 0) {
13841                 btrfs_release_path(&path);
13842                 return ret;
13843         }
13844
13845         buf = malloc(csum_root->fs_info->sectorsize);
13846         if (!buf) {
13847                 btrfs_release_path(&path);
13848                 return -ENOMEM;
13849         }
13850
13851         while (1) {
13852                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13853                         ret = btrfs_next_leaf(extent_root, &path);
13854                         if (ret < 0)
13855                                 break;
13856                         if (ret) {
13857                                 ret = 0;
13858                                 break;
13859                         }
13860                 }
13861                 leaf = path.nodes[0];
13862
13863                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13864                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13865                         path.slots[0]++;
13866                         continue;
13867                 }
13868
13869                 ei = btrfs_item_ptr(leaf, path.slots[0],
13870                                     struct btrfs_extent_item);
13871                 if (!(btrfs_extent_flags(leaf, ei) &
13872                       BTRFS_EXTENT_FLAG_DATA)) {
13873                         path.slots[0]++;
13874                         continue;
13875                 }
13876
13877                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13878                                     key.offset);
13879                 if (ret)
13880                         break;
13881                 path.slots[0]++;
13882         }
13883
13884         btrfs_release_path(&path);
13885         free(buf);
13886         return ret;
13887 }
13888
13889 /*
13890  * Recalculate the csum and put it into the csum tree.
13891  *
13892  * Extent tree init will wipe out all the extent info, so in that case, we
13893  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13894  * will use fs/subvol trees to init the csum tree.
13895  */
13896 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13897                           struct btrfs_root *csum_root,
13898                           int search_fs_tree)
13899 {
13900         if (search_fs_tree)
13901                 return fill_csum_tree_from_fs(trans, csum_root);
13902         else
13903                 return fill_csum_tree_from_extent(trans, csum_root);
13904 }
13905
13906 static void free_roots_info_cache(void)
13907 {
13908         if (!roots_info_cache)
13909                 return;
13910
13911         while (!cache_tree_empty(roots_info_cache)) {
13912                 struct cache_extent *entry;
13913                 struct root_item_info *rii;
13914
13915                 entry = first_cache_extent(roots_info_cache);
13916                 if (!entry)
13917                         break;
13918                 remove_cache_extent(roots_info_cache, entry);
13919                 rii = container_of(entry, struct root_item_info, cache_extent);
13920                 free(rii);
13921         }
13922
13923         free(roots_info_cache);
13924         roots_info_cache = NULL;
13925 }
13926
13927 static int build_roots_info_cache(struct btrfs_fs_info *info)
13928 {
13929         int ret = 0;
13930         struct btrfs_key key;
13931         struct extent_buffer *leaf;
13932         struct btrfs_path path;
13933
13934         if (!roots_info_cache) {
13935                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13936                 if (!roots_info_cache)
13937                         return -ENOMEM;
13938                 cache_tree_init(roots_info_cache);
13939         }
13940
13941         btrfs_init_path(&path);
13942         key.objectid = 0;
13943         key.type = BTRFS_EXTENT_ITEM_KEY;
13944         key.offset = 0;
13945         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13946         if (ret < 0)
13947                 goto out;
13948         leaf = path.nodes[0];
13949
13950         while (1) {
13951                 struct btrfs_key found_key;
13952                 struct btrfs_extent_item *ei;
13953                 struct btrfs_extent_inline_ref *iref;
13954                 int slot = path.slots[0];
13955                 int type;
13956                 u64 flags;
13957                 u64 root_id;
13958                 u8 level;
13959                 struct cache_extent *entry;
13960                 struct root_item_info *rii;
13961
13962                 if (slot >= btrfs_header_nritems(leaf)) {
13963                         ret = btrfs_next_leaf(info->extent_root, &path);
13964                         if (ret < 0) {
13965                                 break;
13966                         } else if (ret) {
13967                                 ret = 0;
13968                                 break;
13969                         }
13970                         leaf = path.nodes[0];
13971                         slot = path.slots[0];
13972                 }
13973
13974                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13975
13976                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13977                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13978                         goto next;
13979
13980                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13981                 flags = btrfs_extent_flags(leaf, ei);
13982
13983                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13984                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13985                         goto next;
13986
13987                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13988                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13989                         level = found_key.offset;
13990                 } else {
13991                         struct btrfs_tree_block_info *binfo;
13992
13993                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13994                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13995                         level = btrfs_tree_block_level(leaf, binfo);
13996                 }
13997
13998                 /*
13999                  * For a root extent, it must be of the following type and the
14000                  * first (and only one) iref in the item.
14001                  */
14002                 type = btrfs_extent_inline_ref_type(leaf, iref);
14003                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14004                         goto next;
14005
14006                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14007                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14008                 if (!entry) {
14009                         rii = malloc(sizeof(struct root_item_info));
14010                         if (!rii) {
14011                                 ret = -ENOMEM;
14012                                 goto out;
14013                         }
14014                         rii->cache_extent.start = root_id;
14015                         rii->cache_extent.size = 1;
14016                         rii->level = (u8)-1;
14017                         entry = &rii->cache_extent;
14018                         ret = insert_cache_extent(roots_info_cache, entry);
14019                         ASSERT(ret == 0);
14020                 } else {
14021                         rii = container_of(entry, struct root_item_info,
14022                                            cache_extent);
14023                 }
14024
14025                 ASSERT(rii->cache_extent.start == root_id);
14026                 ASSERT(rii->cache_extent.size == 1);
14027
14028                 if (level > rii->level || rii->level == (u8)-1) {
14029                         rii->level = level;
14030                         rii->bytenr = found_key.objectid;
14031                         rii->gen = btrfs_extent_generation(leaf, ei);
14032                         rii->node_count = 1;
14033                 } else if (level == rii->level) {
14034                         rii->node_count++;
14035                 }
14036 next:
14037                 path.slots[0]++;
14038         }
14039
14040 out:
14041         btrfs_release_path(&path);
14042
14043         return ret;
14044 }
14045
14046 static int maybe_repair_root_item(struct btrfs_path *path,
14047                                   const struct btrfs_key *root_key,
14048                                   const int read_only_mode)
14049 {
14050         const u64 root_id = root_key->objectid;
14051         struct cache_extent *entry;
14052         struct root_item_info *rii;
14053         struct btrfs_root_item ri;
14054         unsigned long offset;
14055
14056         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14057         if (!entry) {
14058                 fprintf(stderr,
14059                         "Error: could not find extent items for root %llu\n",
14060                         root_key->objectid);
14061                 return -ENOENT;
14062         }
14063
14064         rii = container_of(entry, struct root_item_info, cache_extent);
14065         ASSERT(rii->cache_extent.start == root_id);
14066         ASSERT(rii->cache_extent.size == 1);
14067
14068         if (rii->node_count != 1) {
14069                 fprintf(stderr,
14070                         "Error: could not find btree root extent for root %llu\n",
14071                         root_id);
14072                 return -ENOENT;
14073         }
14074
14075         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14076         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14077
14078         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14079             btrfs_root_level(&ri) != rii->level ||
14080             btrfs_root_generation(&ri) != rii->gen) {
14081
14082                 /*
14083                  * If we're in repair mode but our caller told us to not update
14084                  * the root item, i.e. just check if it needs to be updated, don't
14085                  * print this message, since the caller will call us again shortly
14086                  * for the same root item without read only mode (the caller will
14087                  * open a transaction first).
14088                  */
14089                 if (!(read_only_mode && repair))
14090                         fprintf(stderr,
14091                                 "%sroot item for root %llu,"
14092                                 " current bytenr %llu, current gen %llu, current level %u,"
14093                                 " new bytenr %llu, new gen %llu, new level %u\n",
14094                                 (read_only_mode ? "" : "fixing "),
14095                                 root_id,
14096                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14097                                 btrfs_root_level(&ri),
14098                                 rii->bytenr, rii->gen, rii->level);
14099
14100                 if (btrfs_root_generation(&ri) > rii->gen) {
14101                         fprintf(stderr,
14102                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14103                                 root_id, btrfs_root_generation(&ri), rii->gen);
14104                         return -EINVAL;
14105                 }
14106
14107                 if (!read_only_mode) {
14108                         btrfs_set_root_bytenr(&ri, rii->bytenr);
14109                         btrfs_set_root_level(&ri, rii->level);
14110                         btrfs_set_root_generation(&ri, rii->gen);
14111                         write_extent_buffer(path->nodes[0], &ri,
14112                                             offset, sizeof(ri));
14113                 }
14114
14115                 return 1;
14116         }
14117
14118         return 0;
14119 }
14120
14121 /*
14122  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14123  * caused read-only snapshots to be corrupted if they were created at a moment
14124  * when the source subvolume/snapshot had orphan items. The issue was that the
14125  * on-disk root items became incorrect, referring to the pre orphan cleanup root
14126  * node instead of the post orphan cleanup root node.
14127  * So this function, and its callees, just detects and fixes those cases. Even
14128  * though the regression was for read-only snapshots, this function applies to
14129  * any snapshot/subvolume root.
14130  * This must be run before any other repair code - not doing it so, makes other
14131  * repair code delete or modify backrefs in the extent tree for example, which
14132  * will result in an inconsistent fs after repairing the root items.
14133  */
14134 static int repair_root_items(struct btrfs_fs_info *info)
14135 {
14136         struct btrfs_path path;
14137         struct btrfs_key key;
14138         struct extent_buffer *leaf;
14139         struct btrfs_trans_handle *trans = NULL;
14140         int ret = 0;
14141         int bad_roots = 0;
14142         int need_trans = 0;
14143
14144         btrfs_init_path(&path);
14145
14146         ret = build_roots_info_cache(info);
14147         if (ret)
14148                 goto out;
14149
14150         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14151         key.type = BTRFS_ROOT_ITEM_KEY;
14152         key.offset = 0;
14153
14154 again:
14155         /*
14156          * Avoid opening and committing transactions if a leaf doesn't have
14157          * any root items that need to be fixed, so that we avoid rotating
14158          * backup roots unnecessarily.
14159          */
14160         if (need_trans) {
14161                 trans = btrfs_start_transaction(info->tree_root, 1);
14162                 if (IS_ERR(trans)) {
14163                         ret = PTR_ERR(trans);
14164                         goto out;
14165                 }
14166         }
14167
14168         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14169                                 0, trans ? 1 : 0);
14170         if (ret < 0)
14171                 goto out;
14172         leaf = path.nodes[0];
14173
14174         while (1) {
14175                 struct btrfs_key found_key;
14176
14177                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14178                         int no_more_keys = find_next_key(&path, &key);
14179
14180                         btrfs_release_path(&path);
14181                         if (trans) {
14182                                 ret = btrfs_commit_transaction(trans,
14183                                                                info->tree_root);
14184                                 trans = NULL;
14185                                 if (ret < 0)
14186                                         goto out;
14187                         }
14188                         need_trans = 0;
14189                         if (no_more_keys)
14190                                 break;
14191                         goto again;
14192                 }
14193
14194                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14195
14196                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14197                         goto next;
14198                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14199                         goto next;
14200
14201                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14202                 if (ret < 0)
14203                         goto out;
14204                 if (ret) {
14205                         if (!trans && repair) {
14206                                 need_trans = 1;
14207                                 key = found_key;
14208                                 btrfs_release_path(&path);
14209                                 goto again;
14210                         }
14211                         bad_roots++;
14212                 }
14213 next:
14214                 path.slots[0]++;
14215         }
14216         ret = 0;
14217 out:
14218         free_roots_info_cache();
14219         btrfs_release_path(&path);
14220         if (trans)
14221                 btrfs_commit_transaction(trans, info->tree_root);
14222         if (ret < 0)
14223                 return ret;
14224
14225         return bad_roots;
14226 }
14227
14228 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14229 {
14230         struct btrfs_trans_handle *trans;
14231         struct btrfs_block_group_cache *bg_cache;
14232         u64 current = 0;
14233         int ret = 0;
14234
14235         /* Clear all free space cache inodes and its extent data */
14236         while (1) {
14237                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14238                 if (!bg_cache)
14239                         break;
14240                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14241                 if (ret < 0)
14242                         return ret;
14243                 current = bg_cache->key.objectid + bg_cache->key.offset;
14244         }
14245
14246         /* Don't forget to set cache_generation to -1 */
14247         trans = btrfs_start_transaction(fs_info->tree_root, 0);
14248         if (IS_ERR(trans)) {
14249                 error("failed to update super block cache generation");
14250                 return PTR_ERR(trans);
14251         }
14252         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14253         btrfs_commit_transaction(trans, fs_info->tree_root);
14254
14255         return ret;
14256 }
14257
14258 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14259                 int clear_version)
14260 {
14261         int ret = 0;
14262
14263         if (clear_version == 1) {
14264                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14265                         error(
14266                 "free space cache v2 detected, use --clear-space-cache v2");
14267                         ret = 1;
14268                         goto close_out;
14269                 }
14270                 printf("Clearing free space cache\n");
14271                 ret = clear_free_space_cache(fs_info);
14272                 if (ret) {
14273                         error("failed to clear free space cache");
14274                         ret = 1;
14275                 } else {
14276                         printf("Free space cache cleared\n");
14277                 }
14278         } else if (clear_version == 2) {
14279                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14280                         printf("no free space cache v2 to clear\n");
14281                         ret = 0;
14282                         goto close_out;
14283                 }
14284                 printf("Clear free space cache v2\n");
14285                 ret = btrfs_clear_free_space_tree(fs_info);
14286                 if (ret) {
14287                         error("failed to clear free space cache v2: %d", ret);
14288                         ret = 1;
14289                 } else {
14290                         printf("free space cache v2 cleared\n");
14291                 }
14292         }
14293 close_out:
14294         return ret;
14295 }
14296
14297 const char * const cmd_check_usage[] = {
14298         "btrfs check [options] <device>",
14299         "Check structural integrity of a filesystem (unmounted).",
14300         "Check structural integrity of an unmounted filesystem. Verify internal",
14301         "trees' consistency and item connectivity. In the repair mode try to",
14302         "fix the problems found. ",
14303         "WARNING: the repair mode is considered dangerous",
14304         "",
14305         "-s|--super <superblock>     use this superblock copy",
14306         "-b|--backup                 use the first valid backup root copy",
14307         "--force                     skip mount checks, repair is not possible",
14308         "--repair                    try to repair the filesystem",
14309         "--readonly                  run in read-only mode (default)",
14310         "--init-csum-tree            create a new CRC tree",
14311         "--init-extent-tree          create a new extent tree",
14312         "--mode <MODE>               allows choice of memory/IO trade-offs",
14313         "                            where MODE is one of:",
14314         "                            original - read inodes and extents to memory (requires",
14315         "                                       more memory, does less IO)",
14316         "                            lowmem   - try to use less memory but read blocks again",
14317         "                                       when needed",
14318         "--check-data-csum           verify checksums of data blocks",
14319         "-Q|--qgroup-report          print a report on qgroup consistency",
14320         "-E|--subvol-extents <subvolid>",
14321         "                            print subvolume extents and sharing state",
14322         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14323         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14324         "-p|--progress               indicate progress",
14325         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14326         NULL
14327 };
14328
14329 int cmd_check(int argc, char **argv)
14330 {
14331         struct cache_tree root_cache;
14332         struct btrfs_root *root;
14333         struct btrfs_fs_info *info;
14334         u64 bytenr = 0;
14335         u64 subvolid = 0;
14336         u64 tree_root_bytenr = 0;
14337         u64 chunk_root_bytenr = 0;
14338         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14339         int ret = 0;
14340         int err = 0;
14341         u64 num;
14342         int init_csum_tree = 0;
14343         int readonly = 0;
14344         int clear_space_cache = 0;
14345         int qgroup_report = 0;
14346         int qgroups_repaired = 0;
14347         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14348         int force = 0;
14349
14350         while(1) {
14351                 int c;
14352                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14353                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14354                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14355                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14356                         GETOPT_VAL_FORCE };
14357                 static const struct option long_options[] = {
14358                         { "super", required_argument, NULL, 's' },
14359                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14360                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14361                         { "init-csum-tree", no_argument, NULL,
14362                                 GETOPT_VAL_INIT_CSUM },
14363                         { "init-extent-tree", no_argument, NULL,
14364                                 GETOPT_VAL_INIT_EXTENT },
14365                         { "check-data-csum", no_argument, NULL,
14366                                 GETOPT_VAL_CHECK_CSUM },
14367                         { "backup", no_argument, NULL, 'b' },
14368                         { "subvol-extents", required_argument, NULL, 'E' },
14369                         { "qgroup-report", no_argument, NULL, 'Q' },
14370                         { "tree-root", required_argument, NULL, 'r' },
14371                         { "chunk-root", required_argument, NULL,
14372                                 GETOPT_VAL_CHUNK_TREE },
14373                         { "progress", no_argument, NULL, 'p' },
14374                         { "mode", required_argument, NULL,
14375                                 GETOPT_VAL_MODE },
14376                         { "clear-space-cache", required_argument, NULL,
14377                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14378                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14379                         { NULL, 0, NULL, 0}
14380                 };
14381
14382                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14383                 if (c < 0)
14384                         break;
14385                 switch(c) {
14386                         case 'a': /* ignored */ break;
14387                         case 'b':
14388                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14389                                 break;
14390                         case 's':
14391                                 num = arg_strtou64(optarg);
14392                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14393                                         error(
14394                                         "super mirror should be less than %d",
14395                                                 BTRFS_SUPER_MIRROR_MAX);
14396                                         exit(1);
14397                                 }
14398                                 bytenr = btrfs_sb_offset(((int)num));
14399                                 printf("using SB copy %llu, bytenr %llu\n", num,
14400                                        (unsigned long long)bytenr);
14401                                 break;
14402                         case 'Q':
14403                                 qgroup_report = 1;
14404                                 break;
14405                         case 'E':
14406                                 subvolid = arg_strtou64(optarg);
14407                                 break;
14408                         case 'r':
14409                                 tree_root_bytenr = arg_strtou64(optarg);
14410                                 break;
14411                         case GETOPT_VAL_CHUNK_TREE:
14412                                 chunk_root_bytenr = arg_strtou64(optarg);
14413                                 break;
14414                         case 'p':
14415                                 ctx.progress_enabled = true;
14416                                 break;
14417                         case '?':
14418                         case 'h':
14419                                 usage(cmd_check_usage);
14420                         case GETOPT_VAL_REPAIR:
14421                                 printf("enabling repair mode\n");
14422                                 repair = 1;
14423                                 ctree_flags |= OPEN_CTREE_WRITES;
14424                                 break;
14425                         case GETOPT_VAL_READONLY:
14426                                 readonly = 1;
14427                                 break;
14428                         case GETOPT_VAL_INIT_CSUM:
14429                                 printf("Creating a new CRC tree\n");
14430                                 init_csum_tree = 1;
14431                                 repair = 1;
14432                                 ctree_flags |= OPEN_CTREE_WRITES;
14433                                 break;
14434                         case GETOPT_VAL_INIT_EXTENT:
14435                                 init_extent_tree = 1;
14436                                 ctree_flags |= (OPEN_CTREE_WRITES |
14437                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14438                                 repair = 1;
14439                                 break;
14440                         case GETOPT_VAL_CHECK_CSUM:
14441                                 check_data_csum = 1;
14442                                 break;
14443                         case GETOPT_VAL_MODE:
14444                                 check_mode = parse_check_mode(optarg);
14445                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14446                                         error("unknown mode: %s", optarg);
14447                                         exit(1);
14448                                 }
14449                                 break;
14450                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14451                                 if (strcmp(optarg, "v1") == 0) {
14452                                         clear_space_cache = 1;
14453                                 } else if (strcmp(optarg, "v2") == 0) {
14454                                         clear_space_cache = 2;
14455                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14456                                 } else {
14457                                         error(
14458                 "invalid argument to --clear-space-cache, must be v1 or v2");
14459                                         exit(1);
14460                                 }
14461                                 ctree_flags |= OPEN_CTREE_WRITES;
14462                                 break;
14463                         case GETOPT_VAL_FORCE:
14464                                 force = 1;
14465                                 break;
14466                 }
14467         }
14468
14469         if (check_argc_exact(argc - optind, 1))
14470                 usage(cmd_check_usage);
14471
14472         if (ctx.progress_enabled) {
14473                 ctx.tp = TASK_NOTHING;
14474                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14475         }
14476
14477         /* This check is the only reason for --readonly to exist */
14478         if (readonly && repair) {
14479                 error("repair options are not compatible with --readonly");
14480                 exit(1);
14481         }
14482
14483         /*
14484          * experimental and dangerous
14485          */
14486         if (repair && check_mode == CHECK_MODE_LOWMEM)
14487                 warning("low-memory mode repair support is only partial");
14488
14489         radix_tree_init();
14490         cache_tree_init(&root_cache);
14491
14492         ret = check_mounted(argv[optind]);
14493         if (!force) {
14494                 if (ret < 0) {
14495                         error("could not check mount status: %s",
14496                                         strerror(-ret));
14497                         err |= !!ret;
14498                         goto err_out;
14499                 } else if (ret) {
14500                         error(
14501 "%s is currently mounted, use --force if you really intend to check the filesystem",
14502                                 argv[optind]);
14503                         ret = -EBUSY;
14504                         err |= !!ret;
14505                         goto err_out;
14506                 }
14507         } else {
14508                 if (repair) {
14509                         error("repair and --force is not yet supported");
14510                         ret = 1;
14511                         err |= !!ret;
14512                         goto err_out;
14513                 }
14514                 if (ret < 0) {
14515                         warning(
14516 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14517                                 argv[optind]);
14518                 } else if (ret) {
14519                         warning(
14520                         "filesystem mounted, continuing because of --force");
14521                 }
14522                 /* A block device is mounted in exclusive mode by kernel */
14523                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14524         }
14525
14526         /* only allow partial opening under repair mode */
14527         if (repair)
14528                 ctree_flags |= OPEN_CTREE_PARTIAL;
14529
14530         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14531                                   chunk_root_bytenr, ctree_flags);
14532         if (!info) {
14533                 error("cannot open file system");
14534                 ret = -EIO;
14535                 err |= !!ret;
14536                 goto err_out;
14537         }
14538
14539         global_info = info;
14540         root = info->fs_root;
14541         uuid_unparse(info->super_copy->fsid, uuidbuf);
14542
14543         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14544
14545         /*
14546          * Check the bare minimum before starting anything else that could rely
14547          * on it, namely the tree roots, any local consistency checks
14548          */
14549         if (!extent_buffer_uptodate(info->tree_root->node) ||
14550             !extent_buffer_uptodate(info->dev_root->node) ||
14551             !extent_buffer_uptodate(info->chunk_root->node)) {
14552                 error("critical roots corrupted, unable to check the filesystem");
14553                 err |= !!ret;
14554                 ret = -EIO;
14555                 goto close_out;
14556         }
14557
14558         if (clear_space_cache) {
14559                 ret = do_clear_free_space_cache(info, clear_space_cache);
14560                 err |= !!ret;
14561                 goto close_out;
14562         }
14563
14564         /*
14565          * repair mode will force us to commit transaction which
14566          * will make us fail to load log tree when mounting.
14567          */
14568         if (repair && btrfs_super_log_root(info->super_copy)) {
14569                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14570                 if (!ret) {
14571                         ret = 1;
14572                         err |= !!ret;
14573                         goto close_out;
14574                 }
14575                 ret = zero_log_tree(root);
14576                 err |= !!ret;
14577                 if (ret) {
14578                         error("failed to zero log tree: %d", ret);
14579                         goto close_out;
14580                 }
14581         }
14582
14583         if (qgroup_report) {
14584                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14585                        uuidbuf);
14586                 ret = qgroup_verify_all(info);
14587                 err |= !!ret;
14588                 if (ret == 0)
14589                         report_qgroups(1);
14590                 goto close_out;
14591         }
14592         if (subvolid) {
14593                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14594                        subvolid, argv[optind], uuidbuf);
14595                 ret = print_extent_state(info, subvolid);
14596                 err |= !!ret;
14597                 goto close_out;
14598         }
14599
14600         if (init_extent_tree || init_csum_tree) {
14601                 struct btrfs_trans_handle *trans;
14602
14603                 trans = btrfs_start_transaction(info->extent_root, 0);
14604                 if (IS_ERR(trans)) {
14605                         error("error starting transaction");
14606                         ret = PTR_ERR(trans);
14607                         err |= !!ret;
14608                         goto close_out;
14609                 }
14610
14611                 if (init_extent_tree) {
14612                         printf("Creating a new extent tree\n");
14613                         ret = reinit_extent_tree(trans, info);
14614                         err |= !!ret;
14615                         if (ret)
14616                                 goto close_out;
14617                 }
14618
14619                 if (init_csum_tree) {
14620                         printf("Reinitialize checksum tree\n");
14621                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14622                         if (ret) {
14623                                 error("checksum tree initialization failed: %d",
14624                                                 ret);
14625                                 ret = -EIO;
14626                                 err |= !!ret;
14627                                 goto close_out;
14628                         }
14629
14630                         ret = fill_csum_tree(trans, info->csum_root,
14631                                              init_extent_tree);
14632                         err |= !!ret;
14633                         if (ret) {
14634                                 error("checksum tree refilling failed: %d", ret);
14635                                 return -EIO;
14636                         }
14637                 }
14638                 /*
14639                  * Ok now we commit and run the normal fsck, which will add
14640                  * extent entries for all of the items it finds.
14641                  */
14642                 ret = btrfs_commit_transaction(trans, info->extent_root);
14643                 err |= !!ret;
14644                 if (ret)
14645                         goto close_out;
14646         }
14647         if (!extent_buffer_uptodate(info->extent_root->node)) {
14648                 error("critical: extent_root, unable to check the filesystem");
14649                 ret = -EIO;
14650                 err |= !!ret;
14651                 goto close_out;
14652         }
14653         if (!extent_buffer_uptodate(info->csum_root->node)) {
14654                 error("critical: csum_root, unable to check the filesystem");
14655                 ret = -EIO;
14656                 err |= !!ret;
14657                 goto close_out;
14658         }
14659
14660         if (!init_extent_tree) {
14661                 ret = repair_root_items(info);
14662                 if (ret < 0) {
14663                         err = !!ret;
14664                         error("failed to repair root items: %s", strerror(-ret));
14665                         goto close_out;
14666                 }
14667                 if (repair) {
14668                         fprintf(stderr, "Fixed %d roots.\n", ret);
14669                         ret = 0;
14670                 } else if (ret > 0) {
14671                         fprintf(stderr,
14672                                 "Found %d roots with an outdated root item.\n",
14673                                 ret);
14674                         fprintf(stderr,
14675         "Please run a filesystem check with the option --repair to fix them.\n");
14676                         ret = 1;
14677                         err |= ret;
14678                         goto close_out;
14679                 }
14680         }
14681
14682         ret = do_check_chunks_and_extents(info);
14683         err |= !!ret;
14684         if (ret)
14685                 error(
14686                 "errors found in extent allocation tree or chunk allocation");
14687
14688         /* Only re-check super size after we checked and repaired the fs */
14689         err |= !is_super_size_valid(info);
14690
14691         if (!ctx.progress_enabled) {
14692                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14693                         fprintf(stderr, "checking free space tree\n");
14694                 else
14695                         fprintf(stderr, "checking free space cache\n");
14696         }
14697         ret = check_space_cache(root);
14698         err |= !!ret;
14699         if (ret) {
14700                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14701                         error("errors found in free space tree");
14702                 else
14703                         error("errors found in free space cache");
14704                 goto out;
14705         }
14706
14707         /*
14708          * We used to have to have these hole extents in between our real
14709          * extents so if we don't have this flag set we need to make sure there
14710          * are no gaps in the file extents for inodes, otherwise we can just
14711          * ignore it when this happens.
14712          */
14713         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14714         ret = do_check_fs_roots(info, &root_cache);
14715         err |= !!ret;
14716         if (ret) {
14717                 error("errors found in fs roots");
14718                 goto out;
14719         }
14720
14721         fprintf(stderr, "checking csums\n");
14722         ret = check_csums(root);
14723         err |= !!ret;
14724         if (ret) {
14725                 error("errors found in csum tree");
14726                 goto out;
14727         }
14728
14729         fprintf(stderr, "checking root refs\n");
14730         /* For low memory mode, check_fs_roots_v2 handles root refs */
14731         if (check_mode != CHECK_MODE_LOWMEM) {
14732                 ret = check_root_refs(root, &root_cache);
14733                 err |= !!ret;
14734                 if (ret) {
14735                         error("errors found in root refs");
14736                         goto out;
14737                 }
14738         }
14739
14740         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14741                 struct extent_buffer *eb;
14742
14743                 eb = list_first_entry(&root->fs_info->recow_ebs,
14744                                       struct extent_buffer, recow);
14745                 list_del_init(&eb->recow);
14746                 ret = recow_extent_buffer(root, eb);
14747                 err |= !!ret;
14748                 if (ret) {
14749                         error("fails to fix transid errors");
14750                         break;
14751                 }
14752         }
14753
14754         while (!list_empty(&delete_items)) {
14755                 struct bad_item *bad;
14756
14757                 bad = list_first_entry(&delete_items, struct bad_item, list);
14758                 list_del_init(&bad->list);
14759                 if (repair) {
14760                         ret = delete_bad_item(root, bad);
14761                         err |= !!ret;
14762                 }
14763                 free(bad);
14764         }
14765
14766         if (info->quota_enabled) {
14767                 fprintf(stderr, "checking quota groups\n");
14768                 ret = qgroup_verify_all(info);
14769                 err |= !!ret;
14770                 if (ret) {
14771                         error("failed to check quota groups");
14772                         goto out;
14773                 }
14774                 report_qgroups(0);
14775                 ret = repair_qgroups(info, &qgroups_repaired);
14776                 err |= !!ret;
14777                 if (err) {
14778                         error("failed to repair quota groups");
14779                         goto out;
14780                 }
14781                 ret = 0;
14782         }
14783
14784         if (!list_empty(&root->fs_info->recow_ebs)) {
14785                 error("transid errors in file system");
14786                 ret = 1;
14787                 err |= !!ret;
14788         }
14789 out:
14790         printf("found %llu bytes used, ",
14791                (unsigned long long)bytes_used);
14792         if (err)
14793                 printf("error(s) found\n");
14794         else
14795                 printf("no error found\n");
14796         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14797         printf("total tree bytes: %llu\n",
14798                (unsigned long long)total_btree_bytes);
14799         printf("total fs tree bytes: %llu\n",
14800                (unsigned long long)total_fs_tree_bytes);
14801         printf("total extent tree bytes: %llu\n",
14802                (unsigned long long)total_extent_tree_bytes);
14803         printf("btree space waste bytes: %llu\n",
14804                (unsigned long long)btree_space_waste);
14805         printf("file data blocks allocated: %llu\n referenced %llu\n",
14806                 (unsigned long long)data_bytes_allocated,
14807                 (unsigned long long)data_bytes_referenced);
14808
14809         free_qgroup_counts();
14810         free_root_recs_tree(&root_cache);
14811 close_out:
14812         close_ctree(root);
14813 err_out:
14814         if (ctx.progress_enabled)
14815                 task_deinit(ctx.info);
14816
14817         return err;
14818 }