btrfs-progs: check: Export check global variables to check/common.h
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
416
417 static void record_root_in_trans(struct btrfs_trans_handle *trans,
418                                  struct btrfs_root *root)
419 {
420         if (root->last_trans != trans->transid) {
421                 root->track_dirty = 1;
422                 root->last_trans = trans->transid;
423                 root->commit_root = root->node;
424                 extent_buffer_get(root->node);
425         }
426 }
427
428 static u8 imode_to_type(u32 imode)
429 {
430 #define S_SHIFT 12
431         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
432                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
433                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
434                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
435                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
436                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
437                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
438                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
439         };
440
441         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
442 #undef S_SHIFT
443 }
444
445 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
446 {
447         struct device_record *rec1;
448         struct device_record *rec2;
449
450         rec1 = rb_entry(node1, struct device_record, node);
451         rec2 = rb_entry(node2, struct device_record, node);
452         if (rec1->devid > rec2->devid)
453                 return -1;
454         else if (rec1->devid < rec2->devid)
455                 return 1;
456         else
457                 return 0;
458 }
459
460 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
461 {
462         struct inode_record *rec;
463         struct inode_backref *backref;
464         struct inode_backref *orig;
465         struct inode_backref *tmp;
466         struct orphan_data_extent *src_orphan;
467         struct orphan_data_extent *dst_orphan;
468         struct rb_node *rb;
469         size_t size;
470         int ret;
471
472         rec = malloc(sizeof(*rec));
473         if (!rec)
474                 return ERR_PTR(-ENOMEM);
475         memcpy(rec, orig_rec, sizeof(*rec));
476         rec->refs = 1;
477         INIT_LIST_HEAD(&rec->backrefs);
478         INIT_LIST_HEAD(&rec->orphan_extents);
479         rec->holes = RB_ROOT;
480
481         list_for_each_entry(orig, &orig_rec->backrefs, list) {
482                 size = sizeof(*orig) + orig->namelen + 1;
483                 backref = malloc(size);
484                 if (!backref) {
485                         ret = -ENOMEM;
486                         goto cleanup;
487                 }
488                 memcpy(backref, orig, size);
489                 list_add_tail(&backref->list, &rec->backrefs);
490         }
491         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
492                 dst_orphan = malloc(sizeof(*dst_orphan));
493                 if (!dst_orphan) {
494                         ret = -ENOMEM;
495                         goto cleanup;
496                 }
497                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
498                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
499         }
500         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
501         if (ret < 0)
502                 goto cleanup_rb;
503
504         return rec;
505
506 cleanup_rb:
507         rb = rb_first(&rec->holes);
508         while (rb) {
509                 struct file_extent_hole *hole;
510
511                 hole = rb_entry(rb, struct file_extent_hole, node);
512                 rb = rb_next(rb);
513                 free(hole);
514         }
515
516 cleanup:
517         if (!list_empty(&rec->backrefs))
518                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
519                         list_del(&orig->list);
520                         free(orig);
521                 }
522
523         if (!list_empty(&rec->orphan_extents))
524                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
525                         list_del(&orig->list);
526                         free(orig);
527                 }
528
529         free(rec);
530
531         return ERR_PTR(ret);
532 }
533
534 static void print_orphan_data_extents(struct list_head *orphan_extents,
535                                       u64 objectid)
536 {
537         struct orphan_data_extent *orphan;
538
539         if (list_empty(orphan_extents))
540                 return;
541         printf("The following data extent is lost in tree %llu:\n",
542                objectid);
543         list_for_each_entry(orphan, orphan_extents, list) {
544                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
545                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
546                        orphan->disk_len);
547         }
548 }
549
550 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
551 {
552         u64 root_objectid = root->root_key.objectid;
553         int errors = rec->errors;
554
555         if (!errors)
556                 return;
557         /* reloc root errors, we print its corresponding fs root objectid*/
558         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
559                 root_objectid = root->root_key.offset;
560                 fprintf(stderr, "reloc");
561         }
562         fprintf(stderr, "root %llu inode %llu errors %x",
563                 (unsigned long long) root_objectid,
564                 (unsigned long long) rec->ino, rec->errors);
565
566         if (errors & I_ERR_NO_INODE_ITEM)
567                 fprintf(stderr, ", no inode item");
568         if (errors & I_ERR_NO_ORPHAN_ITEM)
569                 fprintf(stderr, ", no orphan item");
570         if (errors & I_ERR_DUP_INODE_ITEM)
571                 fprintf(stderr, ", dup inode item");
572         if (errors & I_ERR_DUP_DIR_INDEX)
573                 fprintf(stderr, ", dup dir index");
574         if (errors & I_ERR_ODD_DIR_ITEM)
575                 fprintf(stderr, ", odd dir item");
576         if (errors & I_ERR_ODD_FILE_EXTENT)
577                 fprintf(stderr, ", odd file extent");
578         if (errors & I_ERR_BAD_FILE_EXTENT)
579                 fprintf(stderr, ", bad file extent");
580         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
581                 fprintf(stderr, ", file extent overlap");
582         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
583                 fprintf(stderr, ", file extent discount");
584         if (errors & I_ERR_DIR_ISIZE_WRONG)
585                 fprintf(stderr, ", dir isize wrong");
586         if (errors & I_ERR_FILE_NBYTES_WRONG)
587                 fprintf(stderr, ", nbytes wrong");
588         if (errors & I_ERR_ODD_CSUM_ITEM)
589                 fprintf(stderr, ", odd csum item");
590         if (errors & I_ERR_SOME_CSUM_MISSING)
591                 fprintf(stderr, ", some csum missing");
592         if (errors & I_ERR_LINK_COUNT_WRONG)
593                 fprintf(stderr, ", link count wrong");
594         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
595                 fprintf(stderr, ", orphan file extent");
596         fprintf(stderr, "\n");
597         /* Print the orphan extents if needed */
598         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
599                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
600
601         /* Print the holes if needed */
602         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
603                 struct file_extent_hole *hole;
604                 struct rb_node *node;
605                 int found = 0;
606
607                 node = rb_first(&rec->holes);
608                 fprintf(stderr, "Found file extent holes:\n");
609                 while (node) {
610                         found = 1;
611                         hole = rb_entry(node, struct file_extent_hole, node);
612                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
613                                 hole->start, hole->len);
614                         node = rb_next(node);
615                 }
616                 if (!found)
617                         fprintf(stderr, "\tstart: 0, len: %llu\n",
618                                 round_up(rec->isize,
619                                          root->fs_info->sectorsize));
620         }
621 }
622
623 static void print_ref_error(int errors)
624 {
625         if (errors & REF_ERR_NO_DIR_ITEM)
626                 fprintf(stderr, ", no dir item");
627         if (errors & REF_ERR_NO_DIR_INDEX)
628                 fprintf(stderr, ", no dir index");
629         if (errors & REF_ERR_NO_INODE_REF)
630                 fprintf(stderr, ", no inode ref");
631         if (errors & REF_ERR_DUP_DIR_ITEM)
632                 fprintf(stderr, ", dup dir item");
633         if (errors & REF_ERR_DUP_DIR_INDEX)
634                 fprintf(stderr, ", dup dir index");
635         if (errors & REF_ERR_DUP_INODE_REF)
636                 fprintf(stderr, ", dup inode ref");
637         if (errors & REF_ERR_INDEX_UNMATCH)
638                 fprintf(stderr, ", index mismatch");
639         if (errors & REF_ERR_FILETYPE_UNMATCH)
640                 fprintf(stderr, ", filetype mismatch");
641         if (errors & REF_ERR_NAME_TOO_LONG)
642                 fprintf(stderr, ", name too long");
643         if (errors & REF_ERR_NO_ROOT_REF)
644                 fprintf(stderr, ", no root ref");
645         if (errors & REF_ERR_NO_ROOT_BACKREF)
646                 fprintf(stderr, ", no root backref");
647         if (errors & REF_ERR_DUP_ROOT_REF)
648                 fprintf(stderr, ", dup root ref");
649         if (errors & REF_ERR_DUP_ROOT_BACKREF)
650                 fprintf(stderr, ", dup root backref");
651         fprintf(stderr, "\n");
652 }
653
654 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
655                                           u64 ino, int mod)
656 {
657         struct ptr_node *node;
658         struct cache_extent *cache;
659         struct inode_record *rec = NULL;
660         int ret;
661
662         cache = lookup_cache_extent(inode_cache, ino, 1);
663         if (cache) {
664                 node = container_of(cache, struct ptr_node, cache);
665                 rec = node->data;
666                 if (mod && rec->refs > 1) {
667                         node->data = clone_inode_rec(rec);
668                         if (IS_ERR(node->data))
669                                 return node->data;
670                         rec->refs--;
671                         rec = node->data;
672                 }
673         } else if (mod) {
674                 rec = calloc(1, sizeof(*rec));
675                 if (!rec)
676                         return ERR_PTR(-ENOMEM);
677                 rec->ino = ino;
678                 rec->extent_start = (u64)-1;
679                 rec->refs = 1;
680                 INIT_LIST_HEAD(&rec->backrefs);
681                 INIT_LIST_HEAD(&rec->orphan_extents);
682                 rec->holes = RB_ROOT;
683
684                 node = malloc(sizeof(*node));
685                 if (!node) {
686                         free(rec);
687                         return ERR_PTR(-ENOMEM);
688                 }
689                 node->cache.start = ino;
690                 node->cache.size = 1;
691                 node->data = rec;
692
693                 if (ino == BTRFS_FREE_INO_OBJECTID)
694                         rec->found_link = 1;
695
696                 ret = insert_cache_extent(inode_cache, &node->cache);
697                 if (ret)
698                         return ERR_PTR(-EEXIST);
699         }
700         return rec;
701 }
702
703 static void free_orphan_data_extents(struct list_head *orphan_extents)
704 {
705         struct orphan_data_extent *orphan;
706
707         while (!list_empty(orphan_extents)) {
708                 orphan = list_entry(orphan_extents->next,
709                                     struct orphan_data_extent, list);
710                 list_del(&orphan->list);
711                 free(orphan);
712         }
713 }
714
715 static void free_inode_rec(struct inode_record *rec)
716 {
717         struct inode_backref *backref;
718
719         if (--rec->refs > 0)
720                 return;
721
722         while (!list_empty(&rec->backrefs)) {
723                 backref = to_inode_backref(rec->backrefs.next);
724                 list_del(&backref->list);
725                 free(backref);
726         }
727         free_orphan_data_extents(&rec->orphan_extents);
728         free_file_extent_holes(&rec->holes);
729         free(rec);
730 }
731
732 static int can_free_inode_rec(struct inode_record *rec)
733 {
734         if (!rec->errors && rec->checked && rec->found_inode_item &&
735             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
736                 return 1;
737         return 0;
738 }
739
740 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
741                                  struct inode_record *rec)
742 {
743         struct cache_extent *cache;
744         struct inode_backref *tmp, *backref;
745         struct ptr_node *node;
746         u8 filetype;
747
748         if (!rec->found_inode_item)
749                 return;
750
751         filetype = imode_to_type(rec->imode);
752         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
753                 if (backref->found_dir_item && backref->found_dir_index) {
754                         if (backref->filetype != filetype)
755                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
756                         if (!backref->errors && backref->found_inode_ref &&
757                             rec->nlink == rec->found_link) {
758                                 list_del(&backref->list);
759                                 free(backref);
760                         }
761                 }
762         }
763
764         if (!rec->checked || rec->merging)
765                 return;
766
767         if (S_ISDIR(rec->imode)) {
768                 if (rec->found_size != rec->isize)
769                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
770                 if (rec->found_file_extent)
771                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
772         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
773                 if (rec->found_dir_item)
774                         rec->errors |= I_ERR_ODD_DIR_ITEM;
775                 if (rec->found_size != rec->nbytes)
776                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
777                 if (rec->nlink > 0 && !no_holes &&
778                     (rec->extent_end < rec->isize ||
779                      first_extent_gap(&rec->holes) < rec->isize))
780                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
781         }
782
783         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
784                 if (rec->found_csum_item && rec->nodatasum)
785                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
786                 if (rec->some_csum_missing && !rec->nodatasum)
787                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
788         }
789
790         BUG_ON(rec->refs != 1);
791         if (can_free_inode_rec(rec)) {
792                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
793                 node = container_of(cache, struct ptr_node, cache);
794                 BUG_ON(node->data != rec);
795                 remove_cache_extent(inode_cache, &node->cache);
796                 free(node);
797                 free_inode_rec(rec);
798         }
799 }
800
801 static int check_orphan_item(struct btrfs_root *root, u64 ino)
802 {
803         struct btrfs_path path;
804         struct btrfs_key key;
805         int ret;
806
807         key.objectid = BTRFS_ORPHAN_OBJECTID;
808         key.type = BTRFS_ORPHAN_ITEM_KEY;
809         key.offset = ino;
810
811         btrfs_init_path(&path);
812         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
813         btrfs_release_path(&path);
814         if (ret > 0)
815                 ret = -ENOENT;
816         return ret;
817 }
818
819 static int process_inode_item(struct extent_buffer *eb,
820                               int slot, struct btrfs_key *key,
821                               struct shared_node *active_node)
822 {
823         struct inode_record *rec;
824         struct btrfs_inode_item *item;
825
826         rec = active_node->current;
827         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
828         if (rec->found_inode_item) {
829                 rec->errors |= I_ERR_DUP_INODE_ITEM;
830                 return 1;
831         }
832         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
833         rec->nlink = btrfs_inode_nlink(eb, item);
834         rec->isize = btrfs_inode_size(eb, item);
835         rec->nbytes = btrfs_inode_nbytes(eb, item);
836         rec->imode = btrfs_inode_mode(eb, item);
837         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
838                 rec->nodatasum = 1;
839         rec->found_inode_item = 1;
840         if (rec->nlink == 0)
841                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
842         maybe_free_inode_rec(&active_node->inode_cache, rec);
843         return 0;
844 }
845
846 static struct inode_backref *get_inode_backref(struct inode_record *rec,
847                                                 const char *name,
848                                                 int namelen, u64 dir)
849 {
850         struct inode_backref *backref;
851
852         list_for_each_entry(backref, &rec->backrefs, list) {
853                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
854                         break;
855                 if (backref->dir != dir || backref->namelen != namelen)
856                         continue;
857                 if (memcmp(name, backref->name, namelen))
858                         continue;
859                 return backref;
860         }
861
862         backref = malloc(sizeof(*backref) + namelen + 1);
863         if (!backref)
864                 return NULL;
865         memset(backref, 0, sizeof(*backref));
866         backref->dir = dir;
867         backref->namelen = namelen;
868         memcpy(backref->name, name, namelen);
869         backref->name[namelen] = '\0';
870         list_add_tail(&backref->list, &rec->backrefs);
871         return backref;
872 }
873
874 static int add_inode_backref(struct cache_tree *inode_cache,
875                              u64 ino, u64 dir, u64 index,
876                              const char *name, int namelen,
877                              u8 filetype, u8 itemtype, int errors)
878 {
879         struct inode_record *rec;
880         struct inode_backref *backref;
881
882         rec = get_inode_rec(inode_cache, ino, 1);
883         BUG_ON(IS_ERR(rec));
884         backref = get_inode_backref(rec, name, namelen, dir);
885         BUG_ON(!backref);
886         if (errors)
887                 backref->errors |= errors;
888         if (itemtype == BTRFS_DIR_INDEX_KEY) {
889                 if (backref->found_dir_index)
890                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
891                 if (backref->found_inode_ref && backref->index != index)
892                         backref->errors |= REF_ERR_INDEX_UNMATCH;
893                 if (backref->found_dir_item && backref->filetype != filetype)
894                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
895
896                 backref->index = index;
897                 backref->filetype = filetype;
898                 backref->found_dir_index = 1;
899         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
900                 rec->found_link++;
901                 if (backref->found_dir_item)
902                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
903                 if (backref->found_dir_index && backref->filetype != filetype)
904                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
905
906                 backref->filetype = filetype;
907                 backref->found_dir_item = 1;
908         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
909                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
910                 if (backref->found_inode_ref)
911                         backref->errors |= REF_ERR_DUP_INODE_REF;
912                 if (backref->found_dir_index && backref->index != index)
913                         backref->errors |= REF_ERR_INDEX_UNMATCH;
914                 else
915                         backref->index = index;
916
917                 backref->ref_type = itemtype;
918                 backref->found_inode_ref = 1;
919         } else {
920                 BUG_ON(1);
921         }
922
923         maybe_free_inode_rec(inode_cache, rec);
924         return 0;
925 }
926
927 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
928                             struct cache_tree *dst_cache)
929 {
930         struct inode_backref *backref;
931         u32 dir_count = 0;
932         int ret = 0;
933
934         dst->merging = 1;
935         list_for_each_entry(backref, &src->backrefs, list) {
936                 if (backref->found_dir_index) {
937                         add_inode_backref(dst_cache, dst->ino, backref->dir,
938                                         backref->index, backref->name,
939                                         backref->namelen, backref->filetype,
940                                         BTRFS_DIR_INDEX_KEY, backref->errors);
941                 }
942                 if (backref->found_dir_item) {
943                         dir_count++;
944                         add_inode_backref(dst_cache, dst->ino,
945                                         backref->dir, 0, backref->name,
946                                         backref->namelen, backref->filetype,
947                                         BTRFS_DIR_ITEM_KEY, backref->errors);
948                 }
949                 if (backref->found_inode_ref) {
950                         add_inode_backref(dst_cache, dst->ino,
951                                         backref->dir, backref->index,
952                                         backref->name, backref->namelen, 0,
953                                         backref->ref_type, backref->errors);
954                 }
955         }
956
957         if (src->found_dir_item)
958                 dst->found_dir_item = 1;
959         if (src->found_file_extent)
960                 dst->found_file_extent = 1;
961         if (src->found_csum_item)
962                 dst->found_csum_item = 1;
963         if (src->some_csum_missing)
964                 dst->some_csum_missing = 1;
965         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
966                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
967                 if (ret < 0)
968                         return ret;
969         }
970
971         BUG_ON(src->found_link < dir_count);
972         dst->found_link += src->found_link - dir_count;
973         dst->found_size += src->found_size;
974         if (src->extent_start != (u64)-1) {
975                 if (dst->extent_start == (u64)-1) {
976                         dst->extent_start = src->extent_start;
977                         dst->extent_end = src->extent_end;
978                 } else {
979                         if (dst->extent_end > src->extent_start)
980                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
981                         else if (dst->extent_end < src->extent_start) {
982                                 ret = add_file_extent_hole(&dst->holes,
983                                         dst->extent_end,
984                                         src->extent_start - dst->extent_end);
985                         }
986                         if (dst->extent_end < src->extent_end)
987                                 dst->extent_end = src->extent_end;
988                 }
989         }
990
991         dst->errors |= src->errors;
992         if (src->found_inode_item) {
993                 if (!dst->found_inode_item) {
994                         dst->nlink = src->nlink;
995                         dst->isize = src->isize;
996                         dst->nbytes = src->nbytes;
997                         dst->imode = src->imode;
998                         dst->nodatasum = src->nodatasum;
999                         dst->found_inode_item = 1;
1000                 } else {
1001                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1002                 }
1003         }
1004         dst->merging = 0;
1005
1006         return 0;
1007 }
1008
1009 static int splice_shared_node(struct shared_node *src_node,
1010                               struct shared_node *dst_node)
1011 {
1012         struct cache_extent *cache;
1013         struct ptr_node *node, *ins;
1014         struct cache_tree *src, *dst;
1015         struct inode_record *rec, *conflict;
1016         u64 current_ino = 0;
1017         int splice = 0;
1018         int ret;
1019
1020         if (--src_node->refs == 0)
1021                 splice = 1;
1022         if (src_node->current)
1023                 current_ino = src_node->current->ino;
1024
1025         src = &src_node->root_cache;
1026         dst = &dst_node->root_cache;
1027 again:
1028         cache = search_cache_extent(src, 0);
1029         while (cache) {
1030                 node = container_of(cache, struct ptr_node, cache);
1031                 rec = node->data;
1032                 cache = next_cache_extent(cache);
1033
1034                 if (splice) {
1035                         remove_cache_extent(src, &node->cache);
1036                         ins = node;
1037                 } else {
1038                         ins = malloc(sizeof(*ins));
1039                         BUG_ON(!ins);
1040                         ins->cache.start = node->cache.start;
1041                         ins->cache.size = node->cache.size;
1042                         ins->data = rec;
1043                         rec->refs++;
1044                 }
1045                 ret = insert_cache_extent(dst, &ins->cache);
1046                 if (ret == -EEXIST) {
1047                         conflict = get_inode_rec(dst, rec->ino, 1);
1048                         BUG_ON(IS_ERR(conflict));
1049                         merge_inode_recs(rec, conflict, dst);
1050                         if (rec->checked) {
1051                                 conflict->checked = 1;
1052                                 if (dst_node->current == conflict)
1053                                         dst_node->current = NULL;
1054                         }
1055                         maybe_free_inode_rec(dst, conflict);
1056                         free_inode_rec(rec);
1057                         free(ins);
1058                 } else {
1059                         BUG_ON(ret);
1060                 }
1061         }
1062
1063         if (src == &src_node->root_cache) {
1064                 src = &src_node->inode_cache;
1065                 dst = &dst_node->inode_cache;
1066                 goto again;
1067         }
1068
1069         if (current_ino > 0 && (!dst_node->current ||
1070             current_ino > dst_node->current->ino)) {
1071                 if (dst_node->current) {
1072                         dst_node->current->checked = 1;
1073                         maybe_free_inode_rec(dst, dst_node->current);
1074                 }
1075                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1076                 BUG_ON(IS_ERR(dst_node->current));
1077         }
1078         return 0;
1079 }
1080
1081 static void free_inode_ptr(struct cache_extent *cache)
1082 {
1083         struct ptr_node *node;
1084         struct inode_record *rec;
1085
1086         node = container_of(cache, struct ptr_node, cache);
1087         rec = node->data;
1088         free_inode_rec(rec);
1089         free(node);
1090 }
1091
1092 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1093
1094 static struct shared_node *find_shared_node(struct cache_tree *shared,
1095                                             u64 bytenr)
1096 {
1097         struct cache_extent *cache;
1098         struct shared_node *node;
1099
1100         cache = lookup_cache_extent(shared, bytenr, 1);
1101         if (cache) {
1102                 node = container_of(cache, struct shared_node, cache);
1103                 return node;
1104         }
1105         return NULL;
1106 }
1107
1108 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1109 {
1110         int ret;
1111         struct shared_node *node;
1112
1113         node = calloc(1, sizeof(*node));
1114         if (!node)
1115                 return -ENOMEM;
1116         node->cache.start = bytenr;
1117         node->cache.size = 1;
1118         cache_tree_init(&node->root_cache);
1119         cache_tree_init(&node->inode_cache);
1120         node->refs = refs;
1121
1122         ret = insert_cache_extent(shared, &node->cache);
1123
1124         return ret;
1125 }
1126
1127 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1128                              struct walk_control *wc, int level)
1129 {
1130         struct shared_node *node;
1131         struct shared_node *dest;
1132         int ret;
1133
1134         if (level == wc->active_node)
1135                 return 0;
1136
1137         BUG_ON(wc->active_node <= level);
1138         node = find_shared_node(&wc->shared, bytenr);
1139         if (!node) {
1140                 ret = add_shared_node(&wc->shared, bytenr, refs);
1141                 BUG_ON(ret);
1142                 node = find_shared_node(&wc->shared, bytenr);
1143                 wc->nodes[level] = node;
1144                 wc->active_node = level;
1145                 return 0;
1146         }
1147
1148         if (wc->root_level == wc->active_node &&
1149             btrfs_root_refs(&root->root_item) == 0) {
1150                 if (--node->refs == 0) {
1151                         free_inode_recs_tree(&node->root_cache);
1152                         free_inode_recs_tree(&node->inode_cache);
1153                         remove_cache_extent(&wc->shared, &node->cache);
1154                         free(node);
1155                 }
1156                 return 1;
1157         }
1158
1159         dest = wc->nodes[wc->active_node];
1160         splice_shared_node(node, dest);
1161         if (node->refs == 0) {
1162                 remove_cache_extent(&wc->shared, &node->cache);
1163                 free(node);
1164         }
1165         return 1;
1166 }
1167
1168 static int leave_shared_node(struct btrfs_root *root,
1169                              struct walk_control *wc, int level)
1170 {
1171         struct shared_node *node;
1172         struct shared_node *dest;
1173         int i;
1174
1175         if (level == wc->root_level)
1176                 return 0;
1177
1178         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1179                 if (wc->nodes[i])
1180                         break;
1181         }
1182         BUG_ON(i >= BTRFS_MAX_LEVEL);
1183
1184         node = wc->nodes[wc->active_node];
1185         wc->nodes[wc->active_node] = NULL;
1186         wc->active_node = i;
1187
1188         dest = wc->nodes[wc->active_node];
1189         if (wc->active_node < wc->root_level ||
1190             btrfs_root_refs(&root->root_item) > 0) {
1191                 BUG_ON(node->refs <= 1);
1192                 splice_shared_node(node, dest);
1193         } else {
1194                 BUG_ON(node->refs < 2);
1195                 node->refs--;
1196         }
1197         return 0;
1198 }
1199
1200 /*
1201  * Returns:
1202  * < 0 - on error
1203  * 1   - if the root with id child_root_id is a child of root parent_root_id
1204  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1205  *       has other root(s) as parent(s)
1206  * 2   - if the root child_root_id doesn't have any parent roots
1207  */
1208 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1209                          u64 child_root_id)
1210 {
1211         struct btrfs_path path;
1212         struct btrfs_key key;
1213         struct extent_buffer *leaf;
1214         int has_parent = 0;
1215         int ret;
1216
1217         btrfs_init_path(&path);
1218
1219         key.objectid = parent_root_id;
1220         key.type = BTRFS_ROOT_REF_KEY;
1221         key.offset = child_root_id;
1222         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1223                                 0, 0);
1224         if (ret < 0)
1225                 return ret;
1226         btrfs_release_path(&path);
1227         if (!ret)
1228                 return 1;
1229
1230         key.objectid = child_root_id;
1231         key.type = BTRFS_ROOT_BACKREF_KEY;
1232         key.offset = 0;
1233         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1234                                 0, 0);
1235         if (ret < 0)
1236                 goto out;
1237
1238         while (1) {
1239                 leaf = path.nodes[0];
1240                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1241                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1242                         if (ret)
1243                                 break;
1244                         leaf = path.nodes[0];
1245                 }
1246
1247                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1248                 if (key.objectid != child_root_id ||
1249                     key.type != BTRFS_ROOT_BACKREF_KEY)
1250                         break;
1251
1252                 has_parent = 1;
1253
1254                 if (key.offset == parent_root_id) {
1255                         btrfs_release_path(&path);
1256                         return 1;
1257                 }
1258
1259                 path.slots[0]++;
1260         }
1261 out:
1262         btrfs_release_path(&path);
1263         if (ret < 0)
1264                 return ret;
1265         return has_parent ? 0 : 2;
1266 }
1267
1268 static int process_dir_item(struct extent_buffer *eb,
1269                             int slot, struct btrfs_key *key,
1270                             struct shared_node *active_node)
1271 {
1272         u32 total;
1273         u32 cur = 0;
1274         u32 len;
1275         u32 name_len;
1276         u32 data_len;
1277         int error;
1278         int nritems = 0;
1279         u8 filetype;
1280         struct btrfs_dir_item *di;
1281         struct inode_record *rec;
1282         struct cache_tree *root_cache;
1283         struct cache_tree *inode_cache;
1284         struct btrfs_key location;
1285         char namebuf[BTRFS_NAME_LEN];
1286
1287         root_cache = &active_node->root_cache;
1288         inode_cache = &active_node->inode_cache;
1289         rec = active_node->current;
1290         rec->found_dir_item = 1;
1291
1292         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1293         total = btrfs_item_size_nr(eb, slot);
1294         while (cur < total) {
1295                 nritems++;
1296                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1297                 name_len = btrfs_dir_name_len(eb, di);
1298                 data_len = btrfs_dir_data_len(eb, di);
1299                 filetype = btrfs_dir_type(eb, di);
1300
1301                 rec->found_size += name_len;
1302                 if (cur + sizeof(*di) + name_len > total ||
1303                     name_len > BTRFS_NAME_LEN) {
1304                         error = REF_ERR_NAME_TOO_LONG;
1305
1306                         if (cur + sizeof(*di) > total)
1307                                 break;
1308                         len = min_t(u32, total - cur - sizeof(*di),
1309                                     BTRFS_NAME_LEN);
1310                 } else {
1311                         len = name_len;
1312                         error = 0;
1313                 }
1314
1315                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1316
1317                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1318                     key->offset != btrfs_name_hash(namebuf, len)) {
1319                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1320                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1321                         key->objectid, key->offset, namebuf, len, filetype,
1322                         key->offset, btrfs_name_hash(namebuf, len));
1323                 }
1324
1325                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1326                         add_inode_backref(inode_cache, location.objectid,
1327                                           key->objectid, key->offset, namebuf,
1328                                           len, filetype, key->type, error);
1329                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1330                         add_inode_backref(root_cache, location.objectid,
1331                                           key->objectid, key->offset,
1332                                           namebuf, len, filetype,
1333                                           key->type, error);
1334                 } else {
1335                         fprintf(stderr,
1336                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1337                                 location.type, key->objectid, key->offset);
1338                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1339                                           key->objectid, key->offset, namebuf,
1340                                           len, filetype, key->type, error);
1341                 }
1342
1343                 len = sizeof(*di) + name_len + data_len;
1344                 di = (struct btrfs_dir_item *)((char *)di + len);
1345                 cur += len;
1346         }
1347         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1348                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1349
1350         return 0;
1351 }
1352
1353 static int process_inode_ref(struct extent_buffer *eb,
1354                              int slot, struct btrfs_key *key,
1355                              struct shared_node *active_node)
1356 {
1357         u32 total;
1358         u32 cur = 0;
1359         u32 len;
1360         u32 name_len;
1361         u64 index;
1362         int error;
1363         struct cache_tree *inode_cache;
1364         struct btrfs_inode_ref *ref;
1365         char namebuf[BTRFS_NAME_LEN];
1366
1367         inode_cache = &active_node->inode_cache;
1368
1369         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1370         total = btrfs_item_size_nr(eb, slot);
1371         while (cur < total) {
1372                 name_len = btrfs_inode_ref_name_len(eb, ref);
1373                 index = btrfs_inode_ref_index(eb, ref);
1374
1375                 /* inode_ref + namelen should not cross item boundary */
1376                 if (cur + sizeof(*ref) + name_len > total ||
1377                     name_len > BTRFS_NAME_LEN) {
1378                         if (total < cur + sizeof(*ref))
1379                                 break;
1380
1381                         /* Still try to read out the remaining part */
1382                         len = min_t(u32, total - cur - sizeof(*ref),
1383                                     BTRFS_NAME_LEN);
1384                         error = REF_ERR_NAME_TOO_LONG;
1385                 } else {
1386                         len = name_len;
1387                         error = 0;
1388                 }
1389
1390                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1391                 add_inode_backref(inode_cache, key->objectid, key->offset,
1392                                   index, namebuf, len, 0, key->type, error);
1393
1394                 len = sizeof(*ref) + name_len;
1395                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1396                 cur += len;
1397         }
1398         return 0;
1399 }
1400
1401 static int process_inode_extref(struct extent_buffer *eb,
1402                                 int slot, struct btrfs_key *key,
1403                                 struct shared_node *active_node)
1404 {
1405         u32 total;
1406         u32 cur = 0;
1407         u32 len;
1408         u32 name_len;
1409         u64 index;
1410         u64 parent;
1411         int error;
1412         struct cache_tree *inode_cache;
1413         struct btrfs_inode_extref *extref;
1414         char namebuf[BTRFS_NAME_LEN];
1415
1416         inode_cache = &active_node->inode_cache;
1417
1418         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1419         total = btrfs_item_size_nr(eb, slot);
1420         while (cur < total) {
1421                 name_len = btrfs_inode_extref_name_len(eb, extref);
1422                 index = btrfs_inode_extref_index(eb, extref);
1423                 parent = btrfs_inode_extref_parent(eb, extref);
1424                 if (name_len <= BTRFS_NAME_LEN) {
1425                         len = name_len;
1426                         error = 0;
1427                 } else {
1428                         len = BTRFS_NAME_LEN;
1429                         error = REF_ERR_NAME_TOO_LONG;
1430                 }
1431                 read_extent_buffer(eb, namebuf,
1432                                    (unsigned long)(extref + 1), len);
1433                 add_inode_backref(inode_cache, key->objectid, parent,
1434                                   index, namebuf, len, 0, key->type, error);
1435
1436                 len = sizeof(*extref) + name_len;
1437                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1438                 cur += len;
1439         }
1440         return 0;
1441
1442 }
1443
1444 static int count_csum_range(struct btrfs_root *root, u64 start,
1445                             u64 len, u64 *found)
1446 {
1447         struct btrfs_key key;
1448         struct btrfs_path path;
1449         struct extent_buffer *leaf;
1450         int ret;
1451         size_t size;
1452         *found = 0;
1453         u64 csum_end;
1454         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1455
1456         btrfs_init_path(&path);
1457
1458         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1459         key.offset = start;
1460         key.type = BTRFS_EXTENT_CSUM_KEY;
1461
1462         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1463                                 &key, &path, 0, 0);
1464         if (ret < 0)
1465                 goto out;
1466         if (ret > 0 && path.slots[0] > 0) {
1467                 leaf = path.nodes[0];
1468                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1469                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1470                     key.type == BTRFS_EXTENT_CSUM_KEY)
1471                         path.slots[0]--;
1472         }
1473
1474         while (len > 0) {
1475                 leaf = path.nodes[0];
1476                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1477                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1478                         if (ret > 0)
1479                                 break;
1480                         else if (ret < 0)
1481                                 goto out;
1482                         leaf = path.nodes[0];
1483                 }
1484
1485                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1486                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1487                     key.type != BTRFS_EXTENT_CSUM_KEY)
1488                         break;
1489
1490                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1491                 if (key.offset >= start + len)
1492                         break;
1493
1494                 if (key.offset > start)
1495                         start = key.offset;
1496
1497                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1498                 csum_end = key.offset + (size / csum_size) *
1499                            root->fs_info->sectorsize;
1500                 if (csum_end > start) {
1501                         size = min(csum_end - start, len);
1502                         len -= size;
1503                         start += size;
1504                         *found += size;
1505                 }
1506
1507                 path.slots[0]++;
1508         }
1509 out:
1510         btrfs_release_path(&path);
1511         if (ret < 0)
1512                 return ret;
1513         return 0;
1514 }
1515
1516 static int process_file_extent(struct btrfs_root *root,
1517                                 struct extent_buffer *eb,
1518                                 int slot, struct btrfs_key *key,
1519                                 struct shared_node *active_node)
1520 {
1521         struct inode_record *rec;
1522         struct btrfs_file_extent_item *fi;
1523         u64 num_bytes = 0;
1524         u64 disk_bytenr = 0;
1525         u64 extent_offset = 0;
1526         u64 mask = root->fs_info->sectorsize - 1;
1527         int extent_type;
1528         int ret;
1529
1530         rec = active_node->current;
1531         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1532         rec->found_file_extent = 1;
1533
1534         if (rec->extent_start == (u64)-1) {
1535                 rec->extent_start = key->offset;
1536                 rec->extent_end = key->offset;
1537         }
1538
1539         if (rec->extent_end > key->offset)
1540                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1541         else if (rec->extent_end < key->offset) {
1542                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1543                                            key->offset - rec->extent_end);
1544                 if (ret < 0)
1545                         return ret;
1546         }
1547
1548         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1549         extent_type = btrfs_file_extent_type(eb, fi);
1550
1551         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1552                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1553                 if (num_bytes == 0)
1554                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1555                 rec->found_size += num_bytes;
1556                 num_bytes = (num_bytes + mask) & ~mask;
1557         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1558                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1559                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1560                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1561                 extent_offset = btrfs_file_extent_offset(eb, fi);
1562                 if (num_bytes == 0 || (num_bytes & mask))
1563                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1564                 if (num_bytes + extent_offset >
1565                     btrfs_file_extent_ram_bytes(eb, fi))
1566                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1567                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1568                     (btrfs_file_extent_compression(eb, fi) ||
1569                      btrfs_file_extent_encryption(eb, fi) ||
1570                      btrfs_file_extent_other_encoding(eb, fi)))
1571                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1572                 if (disk_bytenr > 0)
1573                         rec->found_size += num_bytes;
1574         } else {
1575                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1576         }
1577         rec->extent_end = key->offset + num_bytes;
1578
1579         /*
1580          * The data reloc tree will copy full extents into its inode and then
1581          * copy the corresponding csums.  Because the extent it copied could be
1582          * a preallocated extent that hasn't been written to yet there may be no
1583          * csums to copy, ergo we won't have csums for our file extent.  This is
1584          * ok so just don't bother checking csums if the inode belongs to the
1585          * data reloc tree.
1586          */
1587         if (disk_bytenr > 0 &&
1588             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1589                 u64 found;
1590                 if (btrfs_file_extent_compression(eb, fi))
1591                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1592                 else
1593                         disk_bytenr += extent_offset;
1594
1595                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1596                 if (ret < 0)
1597                         return ret;
1598                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1599                         if (found > 0)
1600                                 rec->found_csum_item = 1;
1601                         if (found < num_bytes)
1602                                 rec->some_csum_missing = 1;
1603                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1604                         if (found > 0)
1605                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1606                 }
1607         }
1608         return 0;
1609 }
1610
1611 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1612                             struct walk_control *wc)
1613 {
1614         struct btrfs_key key;
1615         u32 nritems;
1616         int i;
1617         int ret = 0;
1618         struct cache_tree *inode_cache;
1619         struct shared_node *active_node;
1620
1621         if (wc->root_level == wc->active_node &&
1622             btrfs_root_refs(&root->root_item) == 0)
1623                 return 0;
1624
1625         active_node = wc->nodes[wc->active_node];
1626         inode_cache = &active_node->inode_cache;
1627         nritems = btrfs_header_nritems(eb);
1628         for (i = 0; i < nritems; i++) {
1629                 btrfs_item_key_to_cpu(eb, &key, i);
1630
1631                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1632                         continue;
1633                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1634                         continue;
1635
1636                 if (active_node->current == NULL ||
1637                     active_node->current->ino < key.objectid) {
1638                         if (active_node->current) {
1639                                 active_node->current->checked = 1;
1640                                 maybe_free_inode_rec(inode_cache,
1641                                                      active_node->current);
1642                         }
1643                         active_node->current = get_inode_rec(inode_cache,
1644                                                              key.objectid, 1);
1645                         BUG_ON(IS_ERR(active_node->current));
1646                 }
1647                 switch (key.type) {
1648                 case BTRFS_DIR_ITEM_KEY:
1649                 case BTRFS_DIR_INDEX_KEY:
1650                         ret = process_dir_item(eb, i, &key, active_node);
1651                         break;
1652                 case BTRFS_INODE_REF_KEY:
1653                         ret = process_inode_ref(eb, i, &key, active_node);
1654                         break;
1655                 case BTRFS_INODE_EXTREF_KEY:
1656                         ret = process_inode_extref(eb, i, &key, active_node);
1657                         break;
1658                 case BTRFS_INODE_ITEM_KEY:
1659                         ret = process_inode_item(eb, i, &key, active_node);
1660                         break;
1661                 case BTRFS_EXTENT_DATA_KEY:
1662                         ret = process_file_extent(root, eb, i, &key,
1663                                                   active_node);
1664                         break;
1665                 default:
1666                         break;
1667                 };
1668         }
1669         return ret;
1670 }
1671
1672 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1673                              struct extent_buffer *eb, struct node_refs *nrefs,
1674                              u64 level, int check_all);
1675 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1676                             unsigned int ext_ref);
1677
1678 /*
1679  * Returns >0  Found error, not fatal, should continue
1680  * Returns <0  Fatal error, must exit the whole check
1681  * Returns 0   No errors found
1682  */
1683 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1684                                struct node_refs *nrefs, int *level, int ext_ref)
1685 {
1686         struct extent_buffer *cur = path->nodes[0];
1687         struct btrfs_key key;
1688         u64 cur_bytenr;
1689         u32 nritems;
1690         u64 first_ino = 0;
1691         int root_level = btrfs_header_level(root->node);
1692         int i;
1693         int ret = 0; /* Final return value */
1694         int err = 0; /* Positive error bitmap */
1695
1696         cur_bytenr = cur->start;
1697
1698         /* skip to first inode item or the first inode number change */
1699         nritems = btrfs_header_nritems(cur);
1700         for (i = 0; i < nritems; i++) {
1701                 btrfs_item_key_to_cpu(cur, &key, i);
1702                 if (i == 0)
1703                         first_ino = key.objectid;
1704                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1705                     (first_ino && first_ino != key.objectid))
1706                         break;
1707         }
1708         if (i == nritems) {
1709                 path->slots[0] = nritems;
1710                 return 0;
1711         }
1712         path->slots[0] = i;
1713
1714 again:
1715         err |= check_inode_item(root, path, ext_ref);
1716
1717         /* modify cur since check_inode_item may change path */
1718         cur = path->nodes[0];
1719
1720         if (err & LAST_ITEM)
1721                 goto out;
1722
1723         /* still have inode items in thie leaf */
1724         if (cur->start == cur_bytenr)
1725                 goto again;
1726
1727         /*
1728          * we have switched to another leaf, above nodes may
1729          * have changed, here walk down the path, if a node
1730          * or leaf is shared, check whether we can skip this
1731          * node or leaf.
1732          */
1733         for (i = root_level; i >= 0; i--) {
1734                 if (path->nodes[i]->start == nrefs->bytenr[i])
1735                         continue;
1736
1737                 ret = update_nodes_refs(root, path->nodes[i]->start,
1738                                 path->nodes[i], nrefs, i, 0);
1739                 if (ret)
1740                         goto out;
1741
1742                 if (!nrefs->need_check[i]) {
1743                         *level += 1;
1744                         break;
1745                 }
1746         }
1747
1748         for (i = 0; i < *level; i++) {
1749                 free_extent_buffer(path->nodes[i]);
1750                 path->nodes[i] = NULL;
1751         }
1752 out:
1753         err &= ~LAST_ITEM;
1754         if (err && !ret)
1755                 ret = err;
1756         return ret;
1757 }
1758
1759 static void reada_walk_down(struct btrfs_root *root,
1760                             struct extent_buffer *node, int slot)
1761 {
1762         struct btrfs_fs_info *fs_info = root->fs_info;
1763         u64 bytenr;
1764         u64 ptr_gen;
1765         u32 nritems;
1766         int i;
1767         int level;
1768
1769         level = btrfs_header_level(node);
1770         if (level != 1)
1771                 return;
1772
1773         nritems = btrfs_header_nritems(node);
1774         for (i = slot; i < nritems; i++) {
1775                 bytenr = btrfs_node_blockptr(node, i);
1776                 ptr_gen = btrfs_node_ptr_generation(node, i);
1777                 readahead_tree_block(fs_info, bytenr, ptr_gen);
1778         }
1779 }
1780
1781 /*
1782  * Check the child node/leaf by the following condition:
1783  * 1. the first item key of the node/leaf should be the same with the one
1784  *    in parent.
1785  * 2. block in parent node should match the child node/leaf.
1786  * 3. generation of parent node and child's header should be consistent.
1787  *
1788  * Or the child node/leaf pointed by the key in parent is not valid.
1789  *
1790  * We hope to check leaf owner too, but since subvol may share leaves,
1791  * which makes leaf owner check not so strong, key check should be
1792  * sufficient enough for that case.
1793  */
1794 static int check_child_node(struct extent_buffer *parent, int slot,
1795                             struct extent_buffer *child)
1796 {
1797         struct btrfs_key parent_key;
1798         struct btrfs_key child_key;
1799         int ret = 0;
1800
1801         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1802         if (btrfs_header_level(child) == 0)
1803                 btrfs_item_key_to_cpu(child, &child_key, 0);
1804         else
1805                 btrfs_node_key_to_cpu(child, &child_key, 0);
1806
1807         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1808                 ret = -EINVAL;
1809                 fprintf(stderr,
1810                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1811                         parent_key.objectid, parent_key.type, parent_key.offset,
1812                         child_key.objectid, child_key.type, child_key.offset);
1813         }
1814         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1815                 ret = -EINVAL;
1816                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1817                         btrfs_node_blockptr(parent, slot),
1818                         btrfs_header_bytenr(child));
1819         }
1820         if (btrfs_node_ptr_generation(parent, slot) !=
1821             btrfs_header_generation(child)) {
1822                 ret = -EINVAL;
1823                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1824                         btrfs_header_generation(child),
1825                         btrfs_node_ptr_generation(parent, slot));
1826         }
1827         return ret;
1828 }
1829
1830 /*
1831  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1832  * in every fs or file tree check. Here we find its all root ids, and only check
1833  * it in the fs or file tree which has the smallest root id.
1834  */
1835 static int need_check(struct btrfs_root *root, struct ulist *roots)
1836 {
1837         struct rb_node *node;
1838         struct ulist_node *u;
1839
1840         /*
1841          * @roots can be empty if it belongs to tree reloc tree
1842          * In that case, we should always check the leaf, as we can't use
1843          * the tree owner to ensure some other root will check it.
1844          */
1845         if (roots->nnodes == 1 || roots->nnodes == 0)
1846                 return 1;
1847
1848         node = rb_first(&roots->root);
1849         u = rb_entry(node, struct ulist_node, rb_node);
1850         /*
1851          * current root id is not smallest, we skip it and let it be checked
1852          * in the fs or file tree who hash the smallest root id.
1853          */
1854         if (root->objectid != u->val)
1855                 return 0;
1856
1857         return 1;
1858 }
1859
1860 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1861                                u64 *flags_ret)
1862 {
1863         struct btrfs_root *extent_root = root->fs_info->extent_root;
1864         struct btrfs_root_item *ri = &root->root_item;
1865         struct btrfs_extent_inline_ref *iref;
1866         struct btrfs_extent_item *ei;
1867         struct btrfs_key key;
1868         struct btrfs_path *path = NULL;
1869         unsigned long ptr;
1870         unsigned long end;
1871         u64 flags;
1872         u64 owner = 0;
1873         u64 offset;
1874         int slot;
1875         int type;
1876         int ret = 0;
1877
1878         /*
1879          * Except file/reloc tree, we can not have FULL BACKREF MODE
1880          */
1881         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1882                 goto normal;
1883
1884         /* root node */
1885         if (eb->start == btrfs_root_bytenr(ri))
1886                 goto normal;
1887
1888         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1889                 goto full_backref;
1890
1891         owner = btrfs_header_owner(eb);
1892         if (owner == root->objectid)
1893                 goto normal;
1894
1895         path = btrfs_alloc_path();
1896         if (!path)
1897                 return -ENOMEM;
1898
1899         key.objectid = btrfs_header_bytenr(eb);
1900         key.type = (u8)-1;
1901         key.offset = (u64)-1;
1902
1903         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1904         if (ret <= 0) {
1905                 ret = -EIO;
1906                 goto out;
1907         }
1908
1909         if (ret > 0) {
1910                 ret = btrfs_previous_extent_item(extent_root, path,
1911                                                  key.objectid);
1912                 if (ret)
1913                         goto full_backref;
1914
1915         }
1916         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1917
1918         eb = path->nodes[0];
1919         slot = path->slots[0];
1920         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1921
1922         flags = btrfs_extent_flags(eb, ei);
1923         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1924                 goto full_backref;
1925
1926         ptr = (unsigned long)(ei + 1);
1927         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1928
1929         if (key.type == BTRFS_EXTENT_ITEM_KEY)
1930                 ptr += sizeof(struct btrfs_tree_block_info);
1931
1932 next:
1933         /* Reached extent item ends normally */
1934         if (ptr == end)
1935                 goto full_backref;
1936
1937         /* Beyond extent item end, wrong item size */
1938         if (ptr > end) {
1939                 error("extent item at bytenr %llu slot %d has wrong size",
1940                         eb->start, slot);
1941                 goto full_backref;
1942         }
1943
1944         iref = (struct btrfs_extent_inline_ref *)ptr;
1945         offset = btrfs_extent_inline_ref_offset(eb, iref);
1946         type = btrfs_extent_inline_ref_type(eb, iref);
1947
1948         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1949                 goto normal;
1950         ptr += btrfs_extent_inline_ref_size(type);
1951         goto next;
1952
1953 normal:
1954         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1955         goto out;
1956
1957 full_backref:
1958         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1959 out:
1960         btrfs_free_path(path);
1961         return ret;
1962 }
1963
1964 /*
1965  * for a tree node or leaf, we record its reference count, so later if we still
1966  * process this node or leaf, don't need to compute its reference count again.
1967  *
1968  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1969  */
1970 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1971                              struct extent_buffer *eb, struct node_refs *nrefs,
1972                              u64 level, int check_all)
1973 {
1974         struct ulist *roots;
1975         u64 refs = 0;
1976         u64 flags = 0;
1977         int root_level = btrfs_header_level(root->node);
1978         int check;
1979         int ret;
1980
1981         if (nrefs->bytenr[level] == bytenr)
1982                 return 0;
1983
1984         if (bytenr != (u64)-1) {
1985                 /* the return value of this function seems a mistake */
1986                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1987                                        level, 1, &refs, &flags);
1988                 /* temporary fix */
1989                 if (ret < 0 && !check_all)
1990                         return ret;
1991
1992                 nrefs->bytenr[level] = bytenr;
1993                 nrefs->refs[level] = refs;
1994                 nrefs->full_backref[level] = 0;
1995                 nrefs->checked[level] = 0;
1996
1997                 if (refs > 1) {
1998                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1999                                                    0, &roots);
2000                         if (ret)
2001                                 return -EIO;
2002
2003                         check = need_check(root, roots);
2004                         ulist_free(roots);
2005                         nrefs->need_check[level] = check;
2006                 } else {
2007                         if (!check_all) {
2008                                 nrefs->need_check[level] = 1;
2009                         } else {
2010                                 if (level == root_level) {
2011                                         nrefs->need_check[level] = 1;
2012                                 } else {
2013                                         /*
2014                                          * The node refs may have not been
2015                                          * updated if upper needs checking (the
2016                                          * lowest root_objectid) the node can
2017                                          * be checked.
2018                                          */
2019                                         nrefs->need_check[level] =
2020                                                 nrefs->need_check[level + 1];
2021                                 }
2022                         }
2023                 }
2024         }
2025
2026         if (check_all && eb) {
2027                 calc_extent_flag_v2(root, eb, &flags);
2028                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2029                         nrefs->full_backref[level] = 1;
2030         }
2031
2032         return 0;
2033 }
2034
2035 /*
2036  * @level           if @level == -1 means extent data item
2037  *                  else normal treeblocl.
2038  */
2039 static int should_check_extent_strictly(struct btrfs_root *root,
2040                                         struct node_refs *nrefs, int level)
2041 {
2042         int root_level = btrfs_header_level(root->node);
2043
2044         if (level > root_level || level < -1)
2045                 return 1;
2046         if (level == root_level)
2047                 return 1;
2048         /*
2049          * if the upper node is marked full backref, it should contain shared
2050          * backref of the parent (except owner == root->objectid).
2051          */
2052         while (++level <= root_level)
2053                 if (nrefs->refs[level] > 1)
2054                         return 0;
2055
2056         return 1;
2057 }
2058
2059 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2060                           struct walk_control *wc, int *level,
2061                           struct node_refs *nrefs)
2062 {
2063         enum btrfs_tree_block_status status;
2064         u64 bytenr;
2065         u64 ptr_gen;
2066         struct btrfs_fs_info *fs_info = root->fs_info;
2067         struct extent_buffer *next;
2068         struct extent_buffer *cur;
2069         int ret, err = 0;
2070         u64 refs;
2071
2072         WARN_ON(*level < 0);
2073         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2074
2075         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2076                 refs = nrefs->refs[*level];
2077                 ret = 0;
2078         } else {
2079                 ret = btrfs_lookup_extent_info(NULL, root,
2080                                        path->nodes[*level]->start,
2081                                        *level, 1, &refs, NULL);
2082                 if (ret < 0) {
2083                         err = ret;
2084                         goto out;
2085                 }
2086                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2087                 nrefs->refs[*level] = refs;
2088         }
2089
2090         if (refs > 1) {
2091                 ret = enter_shared_node(root, path->nodes[*level]->start,
2092                                         refs, wc, *level);
2093                 if (ret > 0) {
2094                         err = ret;
2095                         goto out;
2096                 }
2097         }
2098
2099         while (*level >= 0) {
2100                 WARN_ON(*level < 0);
2101                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2102                 cur = path->nodes[*level];
2103
2104                 if (btrfs_header_level(cur) != *level)
2105                         WARN_ON(1);
2106
2107                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2108                         break;
2109                 if (*level == 0) {
2110                         ret = process_one_leaf(root, cur, wc);
2111                         if (ret < 0)
2112                                 err = ret;
2113                         break;
2114                 }
2115                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2116                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2117
2118                 if (bytenr == nrefs->bytenr[*level - 1]) {
2119                         refs = nrefs->refs[*level - 1];
2120                 } else {
2121                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2122                                         *level - 1, 1, &refs, NULL);
2123                         if (ret < 0) {
2124                                 refs = 0;
2125                         } else {
2126                                 nrefs->bytenr[*level - 1] = bytenr;
2127                                 nrefs->refs[*level - 1] = refs;
2128                         }
2129                 }
2130
2131                 if (refs > 1) {
2132                         ret = enter_shared_node(root, bytenr, refs,
2133                                                 wc, *level - 1);
2134                         if (ret > 0) {
2135                                 path->slots[*level]++;
2136                                 continue;
2137                         }
2138                 }
2139
2140                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2141                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2142                         free_extent_buffer(next);
2143                         reada_walk_down(root, cur, path->slots[*level]);
2144                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2145                         if (!extent_buffer_uptodate(next)) {
2146                                 struct btrfs_key node_key;
2147
2148                                 btrfs_node_key_to_cpu(path->nodes[*level],
2149                                                       &node_key,
2150                                                       path->slots[*level]);
2151                                 btrfs_add_corrupt_extent_record(root->fs_info,
2152                                                 &node_key,
2153                                                 path->nodes[*level]->start,
2154                                                 root->fs_info->nodesize,
2155                                                 *level);
2156                                 err = -EIO;
2157                                 goto out;
2158                         }
2159                 }
2160
2161                 ret = check_child_node(cur, path->slots[*level], next);
2162                 if (ret) {
2163                         free_extent_buffer(next);
2164                         err = ret;
2165                         goto out;
2166                 }
2167
2168                 if (btrfs_is_leaf(next))
2169                         status = btrfs_check_leaf(root, NULL, next);
2170                 else
2171                         status = btrfs_check_node(root, NULL, next);
2172                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2173                         free_extent_buffer(next);
2174                         err = -EIO;
2175                         goto out;
2176                 }
2177
2178                 *level = *level - 1;
2179                 free_extent_buffer(path->nodes[*level]);
2180                 path->nodes[*level] = next;
2181                 path->slots[*level] = 0;
2182         }
2183 out:
2184         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2185         return err;
2186 }
2187
2188 static int fs_root_objectid(u64 objectid);
2189
2190 /*
2191  * Update global fs information.
2192  */
2193 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2194                          int level)
2195 {
2196         u32 free_nrs;
2197         struct extent_buffer *eb = path->nodes[level];
2198
2199         total_btree_bytes += eb->len;
2200         if (fs_root_objectid(root->objectid))
2201                 total_fs_tree_bytes += eb->len;
2202         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2203                 total_extent_tree_bytes += eb->len;
2204
2205         if (level == 0) {
2206                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2207         } else {
2208                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2209                             btrfs_header_nritems(eb));
2210                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2211         }
2212 }
2213
2214 /*
2215  * This function only handles BACKREF_MISSING,
2216  * If corresponding extent item exists, increase the ref, else insert an extent
2217  * item and backref.
2218  *
2219  * Returns error bits after repair.
2220  */
2221 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2222                                  struct btrfs_root *root,
2223                                  struct extent_buffer *node,
2224                                  struct node_refs *nrefs, int level, int err)
2225 {
2226         struct btrfs_fs_info *fs_info = root->fs_info;
2227         struct btrfs_root *extent_root = fs_info->extent_root;
2228         struct btrfs_path path;
2229         struct btrfs_extent_item *ei;
2230         struct btrfs_tree_block_info *bi;
2231         struct btrfs_key key;
2232         struct extent_buffer *eb;
2233         u32 size = sizeof(*ei);
2234         u32 node_size = root->fs_info->nodesize;
2235         int insert_extent = 0;
2236         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2237         int root_level = btrfs_header_level(root->node);
2238         int generation;
2239         int ret;
2240         u64 owner;
2241         u64 bytenr;
2242         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2243         u64 parent = 0;
2244
2245         if ((err & BACKREF_MISSING) == 0)
2246                 return err;
2247
2248         WARN_ON(level > BTRFS_MAX_LEVEL);
2249         WARN_ON(level < 0);
2250
2251         btrfs_init_path(&path);
2252         bytenr = btrfs_header_bytenr(node);
2253         owner = btrfs_header_owner(node);
2254         generation = btrfs_header_generation(node);
2255
2256         key.objectid = bytenr;
2257         key.type = (u8)-1;
2258         key.offset = (u64)-1;
2259
2260         /* Search for the extent item */
2261         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2262         if (ret <= 0) {
2263                 ret = -EIO;
2264                 goto out;
2265         }
2266
2267         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2268         if (ret)
2269                 insert_extent = 1;
2270
2271         /* calculate if the extent item flag is full backref or not */
2272         if (nrefs->full_backref[level] != 0)
2273                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2274
2275         /* insert an extent item */
2276         if (insert_extent) {
2277                 struct btrfs_disk_key copy_key;
2278
2279                 generation = btrfs_header_generation(node);
2280
2281                 if (level < root_level && nrefs->full_backref[level + 1] &&
2282                     owner != root->objectid) {
2283                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2284                 }
2285
2286                 key.objectid = bytenr;
2287                 if (!skinny_metadata) {
2288                         key.type = BTRFS_EXTENT_ITEM_KEY;
2289                         key.offset = node_size;
2290                         size += sizeof(*bi);
2291                 } else {
2292                         key.type = BTRFS_METADATA_ITEM_KEY;
2293                         key.offset = level;
2294                 }
2295
2296                 btrfs_release_path(&path);
2297                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2298                                               size);
2299                 if (ret)
2300                         goto out;
2301
2302                 eb = path.nodes[0];
2303                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2304
2305                 btrfs_set_extent_refs(eb, ei, 0);
2306                 btrfs_set_extent_generation(eb, ei, generation);
2307                 btrfs_set_extent_flags(eb, ei, flags);
2308
2309                 if (!skinny_metadata) {
2310                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2311                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2312                                              sizeof(*bi));
2313                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2314                         btrfs_set_disk_key_type(&copy_key, 0);
2315                         btrfs_set_disk_key_offset(&copy_key, 0);
2316
2317                         btrfs_set_tree_block_level(eb, bi, level);
2318                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2319                 }
2320                 btrfs_mark_buffer_dirty(eb);
2321                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2322                 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2323
2324                 nrefs->refs[level] = 0;
2325                 nrefs->full_backref[level] =
2326                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2327                 btrfs_release_path(&path);
2328         }
2329
2330         if (level < root_level && nrefs->full_backref[level + 1] &&
2331             owner != root->objectid)
2332                 parent = nrefs->bytenr[level + 1];
2333
2334         /* increase the ref */
2335         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2336                         parent, root->objectid, level, 0);
2337
2338         nrefs->refs[level]++;
2339 out:
2340         btrfs_release_path(&path);
2341         if (ret) {
2342                 error(
2343         "failed to repair tree block ref start %llu root %llu due to %s",
2344                       bytenr, root->objectid, strerror(-ret));
2345         } else {
2346                 printf("Added one tree block ref start %llu %s %llu\n",
2347                        bytenr, parent ? "parent" : "root",
2348                        parent ? parent : root->objectid);
2349                 err &= ~BACKREF_MISSING;
2350         }
2351
2352         return err;
2353 }
2354
2355 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2356                             unsigned int ext_ref);
2357 static int check_tree_block_ref(struct btrfs_root *root,
2358                                 struct extent_buffer *eb, u64 bytenr,
2359                                 int level, u64 owner, struct node_refs *nrefs);
2360 static int check_leaf_items(struct btrfs_trans_handle *trans,
2361                             struct btrfs_root *root, struct btrfs_path *path,
2362                             struct node_refs *nrefs, int account_bytes);
2363
2364 /*
2365  * @trans      just for lowmem repair mode
2366  * @check all  if not 0 then check all tree block backrefs and items
2367  *             0 then just check relationship of items in fs tree(s)
2368  *
2369  * Returns >0  Found error, should continue
2370  * Returns <0  Fatal error, must exit the whole check
2371  * Returns 0   No errors found
2372  */
2373 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2374                              struct btrfs_root *root, struct btrfs_path *path,
2375                              int *level, struct node_refs *nrefs, int ext_ref,
2376                              int check_all)
2377
2378 {
2379         enum btrfs_tree_block_status status;
2380         u64 bytenr;
2381         u64 ptr_gen;
2382         struct btrfs_fs_info *fs_info = root->fs_info;
2383         struct extent_buffer *next;
2384         struct extent_buffer *cur;
2385         int ret;
2386         int err = 0;
2387         int check;
2388         int account_file_data = 0;
2389
2390         WARN_ON(*level < 0);
2391         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2392
2393         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2394                                 path->nodes[*level], nrefs, *level, check_all);
2395         if (ret < 0)
2396                 return ret;
2397
2398         while (*level >= 0) {
2399                 WARN_ON(*level < 0);
2400                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2401                 cur = path->nodes[*level];
2402                 bytenr = btrfs_header_bytenr(cur);
2403                 check = nrefs->need_check[*level];
2404
2405                 if (btrfs_header_level(cur) != *level)
2406                         WARN_ON(1);
2407                /*
2408                 * Update bytes accounting and check tree block ref
2409                 * NOTE: Doing accounting and check before checking nritems
2410                 * is necessary because of empty node/leaf.
2411                 */
2412                 if ((check_all && !nrefs->checked[*level]) ||
2413                     (!check_all && nrefs->need_check[*level])) {
2414                         ret = check_tree_block_ref(root, cur,
2415                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2416                            btrfs_header_owner(cur), nrefs);
2417
2418                         if (repair && ret)
2419                                 ret = repair_tree_block_ref(trans, root,
2420                                     path->nodes[*level], nrefs, *level, ret);
2421                         err |= ret;
2422
2423                         if (check_all && nrefs->need_check[*level] &&
2424                                 nrefs->refs[*level]) {
2425                                 account_bytes(root, path, *level);
2426                                 account_file_data = 1;
2427                         }
2428                         nrefs->checked[*level] = 1;
2429                 }
2430
2431                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2432                         break;
2433
2434                 /* Don't forgot to check leaf/node validation */
2435                 if (*level == 0) {
2436                         /* skip duplicate check */
2437                         if (check || !check_all) {
2438                                 ret = btrfs_check_leaf(root, NULL, cur);
2439                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2440                                         err |= -EIO;
2441                                         break;
2442                                 }
2443                         }
2444
2445                         ret = 0;
2446                         if (!check_all)
2447                                 ret = process_one_leaf_v2(root, path, nrefs,
2448                                                           level, ext_ref);
2449                         else
2450                                 ret = check_leaf_items(trans, root, path,
2451                                                nrefs, account_file_data);
2452                         err |= ret;
2453                         break;
2454                 } else {
2455                         if (check || !check_all) {
2456                                 ret = btrfs_check_node(root, NULL, cur);
2457                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2458                                         err |= -EIO;
2459                                         break;
2460                                 }
2461                         }
2462                 }
2463
2464                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2465                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2466
2467                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2468                                         check_all);
2469                 if (ret < 0)
2470                         break;
2471                 /*
2472                  * check all trees in check_chunks_and_extent_v2
2473                  * check shared node once in check_fs_roots
2474                  */
2475                 if (!check_all && !nrefs->need_check[*level - 1]) {
2476                         path->slots[*level]++;
2477                         continue;
2478                 }
2479
2480                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2481                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2482                         free_extent_buffer(next);
2483                         reada_walk_down(root, cur, path->slots[*level]);
2484                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2485                         if (!extent_buffer_uptodate(next)) {
2486                                 struct btrfs_key node_key;
2487
2488                                 btrfs_node_key_to_cpu(path->nodes[*level],
2489                                                       &node_key,
2490                                                       path->slots[*level]);
2491                                 btrfs_add_corrupt_extent_record(fs_info,
2492                                         &node_key, path->nodes[*level]->start,
2493                                         fs_info->nodesize, *level);
2494                                 err |= -EIO;
2495                                 break;
2496                         }
2497                 }
2498
2499                 ret = check_child_node(cur, path->slots[*level], next);
2500                 err |= ret;
2501                 if (ret < 0) 
2502                         break;
2503
2504                 if (btrfs_is_leaf(next))
2505                         status = btrfs_check_leaf(root, NULL, next);
2506                 else
2507                         status = btrfs_check_node(root, NULL, next);
2508                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2509                         free_extent_buffer(next);
2510                         err |= -EIO;
2511                         break;
2512                 }
2513
2514                 *level = *level - 1;
2515                 free_extent_buffer(path->nodes[*level]);
2516                 path->nodes[*level] = next;
2517                 path->slots[*level] = 0;
2518                 account_file_data = 0;
2519
2520                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2521         }
2522         return err;
2523 }
2524
2525 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2526                         struct walk_control *wc, int *level)
2527 {
2528         int i;
2529         struct extent_buffer *leaf;
2530
2531         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2532                 leaf = path->nodes[i];
2533                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2534                         path->slots[i]++;
2535                         *level = i;
2536                         return 0;
2537                 } else {
2538                         free_extent_buffer(path->nodes[*level]);
2539                         path->nodes[*level] = NULL;
2540                         BUG_ON(*level > wc->active_node);
2541                         if (*level == wc->active_node)
2542                                 leave_shared_node(root, wc, *level);
2543                         *level = i + 1;
2544                 }
2545         }
2546         return 1;
2547 }
2548
2549 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2550                            int *level)
2551 {
2552         int i;
2553         struct extent_buffer *leaf;
2554
2555         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2556                 leaf = path->nodes[i];
2557                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2558                         path->slots[i]++;
2559                         *level = i;
2560                         return 0;
2561                 } else {
2562                         free_extent_buffer(path->nodes[*level]);
2563                         path->nodes[*level] = NULL;
2564                         *level = i + 1;
2565                 }
2566         }
2567         return 1;
2568 }
2569
2570 static int check_root_dir(struct inode_record *rec)
2571 {
2572         struct inode_backref *backref;
2573         int ret = -1;
2574
2575         if (!rec->found_inode_item || rec->errors)
2576                 goto out;
2577         if (rec->nlink != 1 || rec->found_link != 0)
2578                 goto out;
2579         if (list_empty(&rec->backrefs))
2580                 goto out;
2581         backref = to_inode_backref(rec->backrefs.next);
2582         if (!backref->found_inode_ref)
2583                 goto out;
2584         if (backref->index != 0 || backref->namelen != 2 ||
2585             memcmp(backref->name, "..", 2))
2586                 goto out;
2587         if (backref->found_dir_index || backref->found_dir_item)
2588                 goto out;
2589         ret = 0;
2590 out:
2591         return ret;
2592 }
2593
2594 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2595                               struct btrfs_root *root, struct btrfs_path *path,
2596                               struct inode_record *rec)
2597 {
2598         struct btrfs_inode_item *ei;
2599         struct btrfs_key key;
2600         int ret;
2601
2602         key.objectid = rec->ino;
2603         key.type = BTRFS_INODE_ITEM_KEY;
2604         key.offset = (u64)-1;
2605
2606         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2607         if (ret < 0)
2608                 goto out;
2609         if (ret) {
2610                 if (!path->slots[0]) {
2611                         ret = -ENOENT;
2612                         goto out;
2613                 }
2614                 path->slots[0]--;
2615                 ret = 0;
2616         }
2617         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2618         if (key.objectid != rec->ino) {
2619                 ret = -ENOENT;
2620                 goto out;
2621         }
2622
2623         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2624                             struct btrfs_inode_item);
2625         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2626         btrfs_mark_buffer_dirty(path->nodes[0]);
2627         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2628         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2629                root->root_key.objectid);
2630 out:
2631         btrfs_release_path(path);
2632         return ret;
2633 }
2634
2635 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2636                                     struct btrfs_root *root,
2637                                     struct btrfs_path *path,
2638                                     struct inode_record *rec)
2639 {
2640         int ret;
2641
2642         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2643         btrfs_release_path(path);
2644         if (!ret)
2645                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2646         return ret;
2647 }
2648
2649 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2650                                struct btrfs_root *root,
2651                                struct btrfs_path *path,
2652                                struct inode_record *rec)
2653 {
2654         struct btrfs_inode_item *ei;
2655         struct btrfs_key key;
2656         int ret = 0;
2657
2658         key.objectid = rec->ino;
2659         key.type = BTRFS_INODE_ITEM_KEY;
2660         key.offset = 0;
2661
2662         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2663         if (ret) {
2664                 if (ret > 0)
2665                         ret = -ENOENT;
2666                 goto out;
2667         }
2668
2669         /* Since ret == 0, no need to check anything */
2670         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2671                             struct btrfs_inode_item);
2672         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2673         btrfs_mark_buffer_dirty(path->nodes[0]);
2674         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2675         printf("reset nbytes for ino %llu root %llu\n",
2676                rec->ino, root->root_key.objectid);
2677 out:
2678         btrfs_release_path(path);
2679         return ret;
2680 }
2681
2682 static int add_missing_dir_index(struct btrfs_root *root,
2683                                  struct cache_tree *inode_cache,
2684                                  struct inode_record *rec,
2685                                  struct inode_backref *backref)
2686 {
2687         struct btrfs_path path;
2688         struct btrfs_trans_handle *trans;
2689         struct btrfs_dir_item *dir_item;
2690         struct extent_buffer *leaf;
2691         struct btrfs_key key;
2692         struct btrfs_disk_key disk_key;
2693         struct inode_record *dir_rec;
2694         unsigned long name_ptr;
2695         u32 data_size = sizeof(*dir_item) + backref->namelen;
2696         int ret;
2697
2698         trans = btrfs_start_transaction(root, 1);
2699         if (IS_ERR(trans))
2700                 return PTR_ERR(trans);
2701
2702         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2703                 (unsigned long long)rec->ino);
2704
2705         btrfs_init_path(&path);
2706         key.objectid = backref->dir;
2707         key.type = BTRFS_DIR_INDEX_KEY;
2708         key.offset = backref->index;
2709         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2710         BUG_ON(ret);
2711
2712         leaf = path.nodes[0];
2713         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2714
2715         disk_key.objectid = cpu_to_le64(rec->ino);
2716         disk_key.type = BTRFS_INODE_ITEM_KEY;
2717         disk_key.offset = 0;
2718
2719         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2720         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2721         btrfs_set_dir_data_len(leaf, dir_item, 0);
2722         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2723         name_ptr = (unsigned long)(dir_item + 1);
2724         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2725         btrfs_mark_buffer_dirty(leaf);
2726         btrfs_release_path(&path);
2727         btrfs_commit_transaction(trans, root);
2728
2729         backref->found_dir_index = 1;
2730         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2731         BUG_ON(IS_ERR(dir_rec));
2732         if (!dir_rec)
2733                 return 0;
2734         dir_rec->found_size += backref->namelen;
2735         if (dir_rec->found_size == dir_rec->isize &&
2736             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2737                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2738         if (dir_rec->found_size != dir_rec->isize)
2739                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2740
2741         return 0;
2742 }
2743
2744 static int delete_dir_index(struct btrfs_root *root,
2745                             struct inode_backref *backref)
2746 {
2747         struct btrfs_trans_handle *trans;
2748         struct btrfs_dir_item *di;
2749         struct btrfs_path path;
2750         int ret = 0;
2751
2752         trans = btrfs_start_transaction(root, 1);
2753         if (IS_ERR(trans))
2754                 return PTR_ERR(trans);
2755
2756         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2757                 (unsigned long long)backref->dir,
2758                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2759                 (unsigned long long)root->objectid);
2760
2761         btrfs_init_path(&path);
2762         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2763                                     backref->name, backref->namelen,
2764                                     backref->index, -1);
2765         if (IS_ERR(di)) {
2766                 ret = PTR_ERR(di);
2767                 btrfs_release_path(&path);
2768                 btrfs_commit_transaction(trans, root);
2769                 if (ret == -ENOENT)
2770                         return 0;
2771                 return ret;
2772         }
2773
2774         if (!di)
2775                 ret = btrfs_del_item(trans, root, &path);
2776         else
2777                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2778         BUG_ON(ret);
2779         btrfs_release_path(&path);
2780         btrfs_commit_transaction(trans, root);
2781         return ret;
2782 }
2783
2784 static int __create_inode_item(struct btrfs_trans_handle *trans,
2785                                struct btrfs_root *root, u64 ino, u64 size,
2786                                u64 nbytes, u64 nlink, u32 mode)
2787 {
2788         struct btrfs_inode_item ii;
2789         time_t now = time(NULL);
2790         int ret;
2791
2792         btrfs_set_stack_inode_size(&ii, size);
2793         btrfs_set_stack_inode_nbytes(&ii, nbytes);
2794         btrfs_set_stack_inode_nlink(&ii, nlink);
2795         btrfs_set_stack_inode_mode(&ii, mode);
2796         btrfs_set_stack_inode_generation(&ii, trans->transid);
2797         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2798         btrfs_set_stack_timespec_sec(&ii.ctime, now);
2799         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2800         btrfs_set_stack_timespec_sec(&ii.mtime, now);
2801         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2802         btrfs_set_stack_timespec_sec(&ii.otime, 0);
2803         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2804
2805         ret = btrfs_insert_inode(trans, root, ino, &ii);
2806         ASSERT(!ret);
2807
2808         warning("root %llu inode %llu recreating inode item, this may "
2809                 "be incomplete, please check permissions and content after "
2810                 "the fsck completes.\n", (unsigned long long)root->objectid,
2811                 (unsigned long long)ino);
2812
2813         return 0;
2814 }
2815
2816 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2817                                     struct btrfs_root *root, u64 ino,
2818                                     u8 filetype)
2819 {
2820         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2821
2822         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2823 }
2824
2825 static int create_inode_item(struct btrfs_root *root,
2826                              struct inode_record *rec, int root_dir)
2827 {
2828         struct btrfs_trans_handle *trans;
2829         u64 nlink = 0;
2830         u32 mode = 0;
2831         u64 size = 0;
2832         int ret;
2833
2834         trans = btrfs_start_transaction(root, 1);
2835         if (IS_ERR(trans)) {
2836                 ret = PTR_ERR(trans);
2837                 return ret;
2838         }
2839
2840         nlink = root_dir ? 1 : rec->found_link;
2841         if (rec->found_dir_item) {
2842                 if (rec->found_file_extent)
2843                         fprintf(stderr, "root %llu inode %llu has both a dir "
2844                                 "item and extents, unsure if it is a dir or a "
2845                                 "regular file so setting it as a directory\n",
2846                                 (unsigned long long)root->objectid,
2847                                 (unsigned long long)rec->ino);
2848                 mode = S_IFDIR | 0755;
2849                 size = rec->found_size;
2850         } else if (!rec->found_dir_item) {
2851                 size = rec->extent_end;
2852                 mode =  S_IFREG | 0755;
2853         }
2854
2855         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2856                                   nlink, mode);
2857         btrfs_commit_transaction(trans, root);
2858         return 0;
2859 }
2860
2861 static int repair_inode_backrefs(struct btrfs_root *root,
2862                                  struct inode_record *rec,
2863                                  struct cache_tree *inode_cache,
2864                                  int delete)
2865 {
2866         struct inode_backref *tmp, *backref;
2867         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2868         int ret = 0;
2869         int repaired = 0;
2870
2871         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2872                 if (!delete && rec->ino == root_dirid) {
2873                         if (!rec->found_inode_item) {
2874                                 ret = create_inode_item(root, rec, 1);
2875                                 if (ret)
2876                                         break;
2877                                 repaired++;
2878                         }
2879                 }
2880
2881                 /* Index 0 for root dir's are special, don't mess with it */
2882                 if (rec->ino == root_dirid && backref->index == 0)
2883                         continue;
2884
2885                 if (delete &&
2886                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2887                      (backref->found_dir_index && backref->found_inode_ref &&
2888                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2889                         ret = delete_dir_index(root, backref);
2890                         if (ret)
2891                                 break;
2892                         repaired++;
2893                         list_del(&backref->list);
2894                         free(backref);
2895                         continue;
2896                 }
2897
2898                 if (!delete && !backref->found_dir_index &&
2899                     backref->found_dir_item && backref->found_inode_ref) {
2900                         ret = add_missing_dir_index(root, inode_cache, rec,
2901                                                     backref);
2902                         if (ret)
2903                                 break;
2904                         repaired++;
2905                         if (backref->found_dir_item &&
2906                             backref->found_dir_index) {
2907                                 if (!backref->errors &&
2908                                     backref->found_inode_ref) {
2909                                         list_del(&backref->list);
2910                                         free(backref);
2911                                         continue;
2912                                 }
2913                         }
2914                 }
2915
2916                 if (!delete && (!backref->found_dir_index &&
2917                                 !backref->found_dir_item &&
2918                                 backref->found_inode_ref)) {
2919                         struct btrfs_trans_handle *trans;
2920                         struct btrfs_key location;
2921
2922                         ret = check_dir_conflict(root, backref->name,
2923                                                  backref->namelen,
2924                                                  backref->dir,
2925                                                  backref->index);
2926                         if (ret) {
2927                                 /*
2928                                  * let nlink fixing routine to handle it,
2929                                  * which can do it better.
2930                                  */
2931                                 ret = 0;
2932                                 break;
2933                         }
2934                         location.objectid = rec->ino;
2935                         location.type = BTRFS_INODE_ITEM_KEY;
2936                         location.offset = 0;
2937
2938                         trans = btrfs_start_transaction(root, 1);
2939                         if (IS_ERR(trans)) {
2940                                 ret = PTR_ERR(trans);
2941                                 break;
2942                         }
2943                         fprintf(stderr, "adding missing dir index/item pair "
2944                                 "for inode %llu\n",
2945                                 (unsigned long long)rec->ino);
2946                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2947                                                     backref->namelen,
2948                                                     backref->dir, &location,
2949                                                     imode_to_type(rec->imode),
2950                                                     backref->index);
2951                         BUG_ON(ret);
2952                         btrfs_commit_transaction(trans, root);
2953                         repaired++;
2954                 }
2955
2956                 if (!delete && (backref->found_inode_ref &&
2957                                 backref->found_dir_index &&
2958                                 backref->found_dir_item &&
2959                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2960                                 !rec->found_inode_item)) {
2961                         ret = create_inode_item(root, rec, 0);
2962                         if (ret)
2963                                 break;
2964                         repaired++;
2965                 }
2966
2967         }
2968         return ret ? ret : repaired;
2969 }
2970
2971 /*
2972  * To determine the file type for nlink/inode_item repair
2973  *
2974  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2975  * Return -ENOENT if file type is not found.
2976  */
2977 static int find_file_type(struct inode_record *rec, u8 *type)
2978 {
2979         struct inode_backref *backref;
2980
2981         /* For inode item recovered case */
2982         if (rec->found_inode_item) {
2983                 *type = imode_to_type(rec->imode);
2984                 return 0;
2985         }
2986
2987         list_for_each_entry(backref, &rec->backrefs, list) {
2988                 if (backref->found_dir_index || backref->found_dir_item) {
2989                         *type = backref->filetype;
2990                         return 0;
2991                 }
2992         }
2993         return -ENOENT;
2994 }
2995
2996 /*
2997  * To determine the file name for nlink repair
2998  *
2999  * Return 0 if file name is found, set name and namelen.
3000  * Return -ENOENT if file name is not found.
3001  */
3002 static int find_file_name(struct inode_record *rec,
3003                           char *name, int *namelen)
3004 {
3005         struct inode_backref *backref;
3006
3007         list_for_each_entry(backref, &rec->backrefs, list) {
3008                 if (backref->found_dir_index || backref->found_dir_item ||
3009                     backref->found_inode_ref) {
3010                         memcpy(name, backref->name, backref->namelen);
3011                         *namelen = backref->namelen;
3012                         return 0;
3013                 }
3014         }
3015         return -ENOENT;
3016 }
3017
3018 /* Reset the nlink of the inode to the correct one */
3019 static int reset_nlink(struct btrfs_trans_handle *trans,
3020                        struct btrfs_root *root,
3021                        struct btrfs_path *path,
3022                        struct inode_record *rec)
3023 {
3024         struct inode_backref *backref;
3025         struct inode_backref *tmp;
3026         struct btrfs_key key;
3027         struct btrfs_inode_item *inode_item;
3028         int ret = 0;
3029
3030         /* We don't believe this either, reset it and iterate backref */
3031         rec->found_link = 0;
3032
3033         /* Remove all backref including the valid ones */
3034         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3035                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3036                                    backref->index, backref->name,
3037                                    backref->namelen, 0);
3038                 if (ret < 0)
3039                         goto out;
3040
3041                 /* remove invalid backref, so it won't be added back */
3042                 if (!(backref->found_dir_index &&
3043                       backref->found_dir_item &&
3044                       backref->found_inode_ref)) {
3045                         list_del(&backref->list);
3046                         free(backref);
3047                 } else {
3048                         rec->found_link++;
3049                 }
3050         }
3051
3052         /* Set nlink to 0 */
3053         key.objectid = rec->ino;
3054         key.type = BTRFS_INODE_ITEM_KEY;
3055         key.offset = 0;
3056         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3057         if (ret < 0)
3058                 goto out;
3059         if (ret > 0) {
3060                 ret = -ENOENT;
3061                 goto out;
3062         }
3063         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3064                                     struct btrfs_inode_item);
3065         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3066         btrfs_mark_buffer_dirty(path->nodes[0]);
3067         btrfs_release_path(path);
3068
3069         /*
3070          * Add back valid inode_ref/dir_item/dir_index,
3071          * add_link() will handle the nlink inc, so new nlink must be correct
3072          */
3073         list_for_each_entry(backref, &rec->backrefs, list) {
3074                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3075                                      backref->name, backref->namelen,
3076                                      backref->filetype, &backref->index, 1, 0);
3077                 if (ret < 0)
3078                         goto out;
3079         }
3080 out:
3081         btrfs_release_path(path);
3082         return ret;
3083 }
3084
3085 static int get_highest_inode(struct btrfs_trans_handle *trans,
3086                                 struct btrfs_root *root,
3087                                 struct btrfs_path *path,
3088                                 u64 *highest_ino)
3089 {
3090         struct btrfs_key key, found_key;
3091         int ret;
3092
3093         btrfs_init_path(path);
3094         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3095         key.offset = -1;
3096         key.type = BTRFS_INODE_ITEM_KEY;
3097         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3098         if (ret == 1) {
3099                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3100                                 path->slots[0] - 1);
3101                 *highest_ino = found_key.objectid;
3102                 ret = 0;
3103         }
3104         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3105                 ret = -EOVERFLOW;
3106         btrfs_release_path(path);
3107         return ret;
3108 }
3109
3110 /*
3111  * Link inode to dir 'lost+found'. Increase @ref_count.
3112  *
3113  * Returns 0 means success.
3114  * Returns <0 means failure.
3115  */
3116 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3117                                    struct btrfs_root *root,
3118                                    struct btrfs_path *path,
3119                                    u64 ino, char *namebuf, u32 name_len,
3120                                    u8 filetype, u64 *ref_count)
3121 {
3122         char *dir_name = "lost+found";
3123         u64 lost_found_ino;
3124         int ret;
3125         u32 mode = 0700;
3126
3127         btrfs_release_path(path);
3128         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3129         if (ret < 0)
3130                 goto out;
3131         lost_found_ino++;
3132
3133         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3134                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3135                           mode);
3136         if (ret < 0) {
3137                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3138                 goto out;
3139         }
3140         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3141                              namebuf, name_len, filetype, NULL, 1, 0);
3142         /*
3143          * Add ".INO" suffix several times to handle case where
3144          * "FILENAME.INO" is already taken by another file.
3145          */
3146         while (ret == -EEXIST) {
3147                 /*
3148                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3149                  */
3150                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3151                         ret = -EFBIG;
3152                         goto out;
3153                 }
3154                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3155                          ".%llu", ino);
3156                 name_len += count_digits(ino) + 1;
3157                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3158                                      name_len, filetype, NULL, 1, 0);
3159         }
3160         if (ret < 0) {
3161                 error("failed to link the inode %llu to %s dir: %s",
3162                       ino, dir_name, strerror(-ret));
3163                 goto out;
3164         }
3165
3166         ++*ref_count;
3167         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3168                name_len, namebuf, dir_name);
3169 out:
3170         btrfs_release_path(path);
3171         if (ret)
3172                 error("failed to move file '%.*s' to '%s' dir", name_len,
3173                                 namebuf, dir_name);
3174         return ret;
3175 }
3176
3177 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3178                                struct btrfs_root *root,
3179                                struct btrfs_path *path,
3180                                struct inode_record *rec)
3181 {
3182         char namebuf[BTRFS_NAME_LEN] = {0};
3183         u8 type = 0;
3184         int namelen = 0;
3185         int name_recovered = 0;
3186         int type_recovered = 0;
3187         int ret = 0;
3188
3189         /*
3190          * Get file name and type first before these invalid inode ref
3191          * are deleted by remove_all_invalid_backref()
3192          */
3193         name_recovered = !find_file_name(rec, namebuf, &namelen);
3194         type_recovered = !find_file_type(rec, &type);
3195
3196         if (!name_recovered) {
3197                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3198                        rec->ino, rec->ino);
3199                 namelen = count_digits(rec->ino);
3200                 sprintf(namebuf, "%llu", rec->ino);
3201                 name_recovered = 1;
3202         }
3203         if (!type_recovered) {
3204                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3205                        rec->ino);
3206                 type = BTRFS_FT_REG_FILE;
3207                 type_recovered = 1;
3208         }
3209
3210         ret = reset_nlink(trans, root, path, rec);
3211         if (ret < 0) {
3212                 fprintf(stderr,
3213                         "Failed to reset nlink for inode %llu: %s\n",
3214                         rec->ino, strerror(-ret));
3215                 goto out;
3216         }
3217
3218         if (rec->found_link == 0) {
3219                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3220                                               namebuf, namelen, type,
3221                                               (u64 *)&rec->found_link);
3222                 if (ret)
3223                         goto out;
3224         }
3225         printf("Fixed the nlink of inode %llu\n", rec->ino);
3226 out:
3227         /*
3228          * Clear the flag anyway, or we will loop forever for the same inode
3229          * as it will not be removed from the bad inode list and the dead loop
3230          * happens.
3231          */
3232         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3233         btrfs_release_path(path);
3234         return ret;
3235 }
3236
3237 /*
3238  * Check if there is any normal(reg or prealloc) file extent for given
3239  * ino.
3240  * This is used to determine the file type when neither its dir_index/item or
3241  * inode_item exists.
3242  *
3243  * This will *NOT* report error, if any error happens, just consider it does
3244  * not have any normal file extent.
3245  */
3246 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3247 {
3248         struct btrfs_path path;
3249         struct btrfs_key key;
3250         struct btrfs_key found_key;
3251         struct btrfs_file_extent_item *fi;
3252         u8 type;
3253         int ret = 0;
3254
3255         btrfs_init_path(&path);
3256         key.objectid = ino;
3257         key.type = BTRFS_EXTENT_DATA_KEY;
3258         key.offset = 0;
3259
3260         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3261         if (ret < 0) {
3262                 ret = 0;
3263                 goto out;
3264         }
3265         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3266                 ret = btrfs_next_leaf(root, &path);
3267                 if (ret) {
3268                         ret = 0;
3269                         goto out;
3270                 }
3271         }
3272         while (1) {
3273                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3274                                       path.slots[0]);
3275                 if (found_key.objectid != ino ||
3276                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3277                         break;
3278                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3279                                     struct btrfs_file_extent_item);
3280                 type = btrfs_file_extent_type(path.nodes[0], fi);
3281                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3282                         ret = 1;
3283                         goto out;
3284                 }
3285         }
3286 out:
3287         btrfs_release_path(&path);
3288         return ret;
3289 }
3290
3291 static u32 btrfs_type_to_imode(u8 type)
3292 {
3293         static u32 imode_by_btrfs_type[] = {
3294                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3295                 [BTRFS_FT_DIR]          = S_IFDIR,
3296                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3297                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3298                 [BTRFS_FT_FIFO]         = S_IFIFO,
3299                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3300                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3301         };
3302
3303         return imode_by_btrfs_type[(type)];
3304 }
3305
3306 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3307                                 struct btrfs_root *root,
3308                                 struct btrfs_path *path,
3309                                 struct inode_record *rec)
3310 {
3311         u8 filetype;
3312         u32 mode = 0700;
3313         int type_recovered = 0;
3314         int ret = 0;
3315
3316         printf("Trying to rebuild inode:%llu\n", rec->ino);
3317
3318         type_recovered = !find_file_type(rec, &filetype);
3319
3320         /*
3321          * Try to determine inode type if type not found.
3322          *
3323          * For found regular file extent, it must be FILE.
3324          * For found dir_item/index, it must be DIR.
3325          *
3326          * For undetermined one, use FILE as fallback.
3327          *
3328          * TODO:
3329          * 1. If found backref(inode_index/item is already handled) to it,
3330          *    it must be DIR.
3331          *    Need new inode-inode ref structure to allow search for that.
3332          */
3333         if (!type_recovered) {
3334                 if (rec->found_file_extent &&
3335                     find_normal_file_extent(root, rec->ino)) {
3336                         type_recovered = 1;
3337                         filetype = BTRFS_FT_REG_FILE;
3338                 } else if (rec->found_dir_item) {
3339                         type_recovered = 1;
3340                         filetype = BTRFS_FT_DIR;
3341                 } else if (!list_empty(&rec->orphan_extents)) {
3342                         type_recovered = 1;
3343                         filetype = BTRFS_FT_REG_FILE;
3344                 } else{
3345                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3346                                rec->ino);
3347                         type_recovered = 1;
3348                         filetype = BTRFS_FT_REG_FILE;
3349                 }
3350         }
3351
3352         ret = btrfs_new_inode(trans, root, rec->ino,
3353                               mode | btrfs_type_to_imode(filetype));
3354         if (ret < 0)
3355                 goto out;
3356
3357         /*
3358          * Here inode rebuild is done, we only rebuild the inode item,
3359          * don't repair the nlink(like move to lost+found).
3360          * That is the job of nlink repair.
3361          *
3362          * We just fill the record and return
3363          */
3364         rec->found_dir_item = 1;
3365         rec->imode = mode | btrfs_type_to_imode(filetype);
3366         rec->nlink = 0;
3367         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3368         /* Ensure the inode_nlinks repair function will be called */
3369         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3370 out:
3371         return ret;
3372 }
3373
3374 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3375                                       struct btrfs_root *root,
3376                                       struct btrfs_path *path,
3377                                       struct inode_record *rec)
3378 {
3379         struct orphan_data_extent *orphan;
3380         struct orphan_data_extent *tmp;
3381         int ret = 0;
3382
3383         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3384                 /*
3385                  * Check for conflicting file extents
3386                  *
3387                  * Here we don't know whether the extents is compressed or not,
3388                  * so we can only assume it not compressed nor data offset,
3389                  * and use its disk_len as extent length.
3390                  */
3391                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3392                                        orphan->offset, orphan->disk_len, 0);
3393                 btrfs_release_path(path);
3394                 if (ret < 0)
3395                         goto out;
3396                 if (!ret) {
3397                         fprintf(stderr,
3398                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3399                                 orphan->disk_bytenr, orphan->disk_len);
3400                         ret = btrfs_free_extent(trans,
3401                                         root->fs_info->extent_root,
3402                                         orphan->disk_bytenr, orphan->disk_len,
3403                                         0, root->objectid, orphan->objectid,
3404                                         orphan->offset);
3405                         if (ret < 0)
3406                                 goto out;
3407                 }
3408                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3409                                 orphan->offset, orphan->disk_bytenr,
3410                                 orphan->disk_len, orphan->disk_len);
3411                 if (ret < 0)
3412                         goto out;
3413
3414                 /* Update file size info */
3415                 rec->found_size += orphan->disk_len;
3416                 if (rec->found_size == rec->nbytes)
3417                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3418
3419                 /* Update the file extent hole info too */
3420                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3421                                            orphan->disk_len);
3422                 if (ret < 0)
3423                         goto out;
3424                 if (RB_EMPTY_ROOT(&rec->holes))
3425                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3426
3427                 list_del(&orphan->list);
3428                 free(orphan);
3429         }
3430         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3431 out:
3432         return ret;
3433 }
3434
3435 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3436                                         struct btrfs_root *root,
3437                                         struct btrfs_path *path,
3438                                         struct inode_record *rec)
3439 {
3440         struct rb_node *node;
3441         struct file_extent_hole *hole;
3442         int found = 0;
3443         int ret = 0;
3444
3445         node = rb_first(&rec->holes);
3446
3447         while (node) {
3448                 found = 1;
3449                 hole = rb_entry(node, struct file_extent_hole, node);
3450                 ret = btrfs_punch_hole(trans, root, rec->ino,
3451                                        hole->start, hole->len);
3452                 if (ret < 0)
3453                         goto out;
3454                 ret = del_file_extent_hole(&rec->holes, hole->start,
3455                                            hole->len);
3456                 if (ret < 0)
3457                         goto out;
3458                 if (RB_EMPTY_ROOT(&rec->holes))
3459                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3460                 node = rb_first(&rec->holes);
3461         }
3462         /* special case for a file losing all its file extent */
3463         if (!found) {
3464                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3465                                        round_up(rec->isize,
3466                                                 root->fs_info->sectorsize));
3467                 if (ret < 0)
3468                         goto out;
3469         }
3470         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3471                rec->ino, root->objectid);
3472 out:
3473         return ret;
3474 }
3475
3476 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3477 {
3478         struct btrfs_trans_handle *trans;
3479         struct btrfs_path path;
3480         int ret = 0;
3481
3482         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3483                              I_ERR_NO_ORPHAN_ITEM |
3484                              I_ERR_LINK_COUNT_WRONG |
3485                              I_ERR_NO_INODE_ITEM |
3486                              I_ERR_FILE_EXTENT_ORPHAN |
3487                              I_ERR_FILE_EXTENT_DISCOUNT|
3488                              I_ERR_FILE_NBYTES_WRONG)))
3489                 return rec->errors;
3490
3491         /*
3492          * For nlink repair, it may create a dir and add link, so
3493          * 2 for parent(256)'s dir_index and dir_item
3494          * 2 for lost+found dir's inode_item and inode_ref
3495          * 1 for the new inode_ref of the file
3496          * 2 for lost+found dir's dir_index and dir_item for the file
3497          */
3498         trans = btrfs_start_transaction(root, 7);
3499         if (IS_ERR(trans))
3500                 return PTR_ERR(trans);
3501
3502         btrfs_init_path(&path);
3503         if (rec->errors & I_ERR_NO_INODE_ITEM)
3504                 ret = repair_inode_no_item(trans, root, &path, rec);
3505         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3506                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3507         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3508                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3509         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3510                 ret = repair_inode_isize(trans, root, &path, rec);
3511         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3512                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3513         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3514                 ret = repair_inode_nlinks(trans, root, &path, rec);
3515         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3516                 ret = repair_inode_nbytes(trans, root, &path, rec);
3517         btrfs_commit_transaction(trans, root);
3518         btrfs_release_path(&path);
3519         return ret;
3520 }
3521
3522 static int check_inode_recs(struct btrfs_root *root,
3523                             struct cache_tree *inode_cache)
3524 {
3525         struct cache_extent *cache;
3526         struct ptr_node *node;
3527         struct inode_record *rec;
3528         struct inode_backref *backref;
3529         int stage = 0;
3530         int ret = 0;
3531         int err = 0;
3532         u64 error = 0;
3533         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3534
3535         if (btrfs_root_refs(&root->root_item) == 0) {
3536                 if (!cache_tree_empty(inode_cache))
3537                         fprintf(stderr, "warning line %d\n", __LINE__);
3538                 return 0;
3539         }
3540
3541         /*
3542          * We need to repair backrefs first because we could change some of the
3543          * errors in the inode recs.
3544          *
3545          * We also need to go through and delete invalid backrefs first and then
3546          * add the correct ones second.  We do this because we may get EEXIST
3547          * when adding back the correct index because we hadn't yet deleted the
3548          * invalid index.
3549          *
3550          * For example, if we were missing a dir index then the directories
3551          * isize would be wrong, so if we fixed the isize to what we thought it
3552          * would be and then fixed the backref we'd still have a invalid fs, so
3553          * we need to add back the dir index and then check to see if the isize
3554          * is still wrong.
3555          */
3556         while (stage < 3) {
3557                 stage++;
3558                 if (stage == 3 && !err)
3559                         break;
3560
3561                 cache = search_cache_extent(inode_cache, 0);
3562                 while (repair && cache) {
3563                         node = container_of(cache, struct ptr_node, cache);
3564                         rec = node->data;
3565                         cache = next_cache_extent(cache);
3566
3567                         /* Need to free everything up and rescan */
3568                         if (stage == 3) {
3569                                 remove_cache_extent(inode_cache, &node->cache);
3570                                 free(node);
3571                                 free_inode_rec(rec);
3572                                 continue;
3573                         }
3574
3575                         if (list_empty(&rec->backrefs))
3576                                 continue;
3577
3578                         ret = repair_inode_backrefs(root, rec, inode_cache,
3579                                                     stage == 1);
3580                         if (ret < 0) {
3581                                 err = ret;
3582                                 stage = 2;
3583                                 break;
3584                         } if (ret > 0) {
3585                                 err = -EAGAIN;
3586                         }
3587                 }
3588         }
3589         if (err)
3590                 return err;
3591
3592         rec = get_inode_rec(inode_cache, root_dirid, 0);
3593         BUG_ON(IS_ERR(rec));
3594         if (rec) {
3595                 ret = check_root_dir(rec);
3596                 if (ret) {
3597                         fprintf(stderr, "root %llu root dir %llu error\n",
3598                                 (unsigned long long)root->root_key.objectid,
3599                                 (unsigned long long)root_dirid);
3600                         print_inode_error(root, rec);
3601                         error++;
3602                 }
3603         } else {
3604                 if (repair) {
3605                         struct btrfs_trans_handle *trans;
3606
3607                         trans = btrfs_start_transaction(root, 1);
3608                         if (IS_ERR(trans)) {
3609                                 err = PTR_ERR(trans);
3610                                 return err;
3611                         }
3612
3613                         fprintf(stderr,
3614                                 "root %llu missing its root dir, recreating\n",
3615                                 (unsigned long long)root->objectid);
3616
3617                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3618                         BUG_ON(ret);
3619
3620                         btrfs_commit_transaction(trans, root);
3621                         return -EAGAIN;
3622                 }
3623
3624                 fprintf(stderr, "root %llu root dir %llu not found\n",
3625                         (unsigned long long)root->root_key.objectid,
3626                         (unsigned long long)root_dirid);
3627         }
3628
3629         while (1) {
3630                 cache = search_cache_extent(inode_cache, 0);
3631                 if (!cache)
3632                         break;
3633                 node = container_of(cache, struct ptr_node, cache);
3634                 rec = node->data;
3635                 remove_cache_extent(inode_cache, &node->cache);
3636                 free(node);
3637                 if (rec->ino == root_dirid ||
3638                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3639                         free_inode_rec(rec);
3640                         continue;
3641                 }
3642
3643                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3644                         ret = check_orphan_item(root, rec->ino);
3645                         if (ret == 0)
3646                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3647                         if (can_free_inode_rec(rec)) {
3648                                 free_inode_rec(rec);
3649                                 continue;
3650                         }
3651                 }
3652
3653                 if (!rec->found_inode_item)
3654                         rec->errors |= I_ERR_NO_INODE_ITEM;
3655                 if (rec->found_link != rec->nlink)
3656                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3657                 if (repair) {
3658                         ret = try_repair_inode(root, rec);
3659                         if (ret == 0 && can_free_inode_rec(rec)) {
3660                                 free_inode_rec(rec);
3661                                 continue;
3662                         }
3663                         ret = 0;
3664                 }
3665
3666                 if (!(repair && ret == 0))
3667                         error++;
3668                 print_inode_error(root, rec);
3669                 list_for_each_entry(backref, &rec->backrefs, list) {
3670                         if (!backref->found_dir_item)
3671                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3672                         if (!backref->found_dir_index)
3673                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3674                         if (!backref->found_inode_ref)
3675                                 backref->errors |= REF_ERR_NO_INODE_REF;
3676                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3677                                 " namelen %u name %s filetype %d errors %x",
3678                                 (unsigned long long)backref->dir,
3679                                 (unsigned long long)backref->index,
3680                                 backref->namelen, backref->name,
3681                                 backref->filetype, backref->errors);
3682                         print_ref_error(backref->errors);
3683                 }
3684                 free_inode_rec(rec);
3685         }
3686         return (error > 0) ? -1 : 0;
3687 }
3688
3689 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3690                                         u64 objectid)
3691 {
3692         struct cache_extent *cache;
3693         struct root_record *rec = NULL;
3694         int ret;
3695
3696         cache = lookup_cache_extent(root_cache, objectid, 1);
3697         if (cache) {
3698                 rec = container_of(cache, struct root_record, cache);
3699         } else {
3700                 rec = calloc(1, sizeof(*rec));
3701                 if (!rec)
3702                         return ERR_PTR(-ENOMEM);
3703                 rec->objectid = objectid;
3704                 INIT_LIST_HEAD(&rec->backrefs);
3705                 rec->cache.start = objectid;
3706                 rec->cache.size = 1;
3707
3708                 ret = insert_cache_extent(root_cache, &rec->cache);
3709                 if (ret)
3710                         return ERR_PTR(-EEXIST);
3711         }
3712         return rec;
3713 }
3714
3715 static struct root_backref *get_root_backref(struct root_record *rec,
3716                                              u64 ref_root, u64 dir, u64 index,
3717                                              const char *name, int namelen)
3718 {
3719         struct root_backref *backref;
3720
3721         list_for_each_entry(backref, &rec->backrefs, list) {
3722                 if (backref->ref_root != ref_root || backref->dir != dir ||
3723                     backref->namelen != namelen)
3724                         continue;
3725                 if (memcmp(name, backref->name, namelen))
3726                         continue;
3727                 return backref;
3728         }
3729
3730         backref = calloc(1, sizeof(*backref) + namelen + 1);
3731         if (!backref)
3732                 return NULL;
3733         backref->ref_root = ref_root;
3734         backref->dir = dir;
3735         backref->index = index;
3736         backref->namelen = namelen;
3737         memcpy(backref->name, name, namelen);
3738         backref->name[namelen] = '\0';
3739         list_add_tail(&backref->list, &rec->backrefs);
3740         return backref;
3741 }
3742
3743 static void free_root_record(struct cache_extent *cache)
3744 {
3745         struct root_record *rec;
3746         struct root_backref *backref;
3747
3748         rec = container_of(cache, struct root_record, cache);
3749         while (!list_empty(&rec->backrefs)) {
3750                 backref = to_root_backref(rec->backrefs.next);
3751                 list_del(&backref->list);
3752                 free(backref);
3753         }
3754
3755         free(rec);
3756 }
3757
3758 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3759
3760 static int add_root_backref(struct cache_tree *root_cache,
3761                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3762                             const char *name, int namelen,
3763                             int item_type, int errors)
3764 {
3765         struct root_record *rec;
3766         struct root_backref *backref;
3767
3768         rec = get_root_rec(root_cache, root_id);
3769         BUG_ON(IS_ERR(rec));
3770         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3771         BUG_ON(!backref);
3772
3773         backref->errors |= errors;
3774
3775         if (item_type != BTRFS_DIR_ITEM_KEY) {
3776                 if (backref->found_dir_index || backref->found_back_ref ||
3777                     backref->found_forward_ref) {
3778                         if (backref->index != index)
3779                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3780                 } else {
3781                         backref->index = index;
3782                 }
3783         }
3784
3785         if (item_type == BTRFS_DIR_ITEM_KEY) {
3786                 if (backref->found_forward_ref)
3787                         rec->found_ref++;
3788                 backref->found_dir_item = 1;
3789         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3790                 backref->found_dir_index = 1;
3791         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3792                 if (backref->found_forward_ref)
3793                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3794                 else if (backref->found_dir_item)
3795                         rec->found_ref++;
3796                 backref->found_forward_ref = 1;
3797         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3798                 if (backref->found_back_ref)
3799                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3800                 backref->found_back_ref = 1;
3801         } else {
3802                 BUG_ON(1);
3803         }
3804
3805         if (backref->found_forward_ref && backref->found_dir_item)
3806                 backref->reachable = 1;
3807         return 0;
3808 }
3809
3810 static int merge_root_recs(struct btrfs_root *root,
3811                            struct cache_tree *src_cache,
3812                            struct cache_tree *dst_cache)
3813 {
3814         struct cache_extent *cache;
3815         struct ptr_node *node;
3816         struct inode_record *rec;
3817         struct inode_backref *backref;
3818         int ret = 0;
3819
3820         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3821                 free_inode_recs_tree(src_cache);
3822                 return 0;
3823         }
3824
3825         while (1) {
3826                 cache = search_cache_extent(src_cache, 0);
3827                 if (!cache)
3828                         break;
3829                 node = container_of(cache, struct ptr_node, cache);
3830                 rec = node->data;
3831                 remove_cache_extent(src_cache, &node->cache);
3832                 free(node);
3833
3834                 ret = is_child_root(root, root->objectid, rec->ino);
3835                 if (ret < 0)
3836                         break;
3837                 else if (ret == 0)
3838                         goto skip;
3839
3840                 list_for_each_entry(backref, &rec->backrefs, list) {
3841                         BUG_ON(backref->found_inode_ref);
3842                         if (backref->found_dir_item)
3843                                 add_root_backref(dst_cache, rec->ino,
3844                                         root->root_key.objectid, backref->dir,
3845                                         backref->index, backref->name,
3846                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3847                                         backref->errors);
3848                         if (backref->found_dir_index)
3849                                 add_root_backref(dst_cache, rec->ino,
3850                                         root->root_key.objectid, backref->dir,
3851                                         backref->index, backref->name,
3852                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3853                                         backref->errors);
3854                 }
3855 skip:
3856                 free_inode_rec(rec);
3857         }
3858         if (ret < 0)
3859                 return ret;
3860         return 0;
3861 }
3862
3863 static int check_root_refs(struct btrfs_root *root,
3864                            struct cache_tree *root_cache)
3865 {
3866         struct root_record *rec;
3867         struct root_record *ref_root;
3868         struct root_backref *backref;
3869         struct cache_extent *cache;
3870         int loop = 1;
3871         int ret;
3872         int error;
3873         int errors = 0;
3874
3875         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3876         BUG_ON(IS_ERR(rec));
3877         rec->found_ref = 1;
3878
3879         /* fixme: this can not detect circular references */
3880         while (loop) {
3881                 loop = 0;
3882                 cache = search_cache_extent(root_cache, 0);
3883                 while (1) {
3884                         if (!cache)
3885                                 break;
3886                         rec = container_of(cache, struct root_record, cache);
3887                         cache = next_cache_extent(cache);
3888
3889                         if (rec->found_ref == 0)
3890                                 continue;
3891
3892                         list_for_each_entry(backref, &rec->backrefs, list) {
3893                                 if (!backref->reachable)
3894                                         continue;
3895
3896                                 ref_root = get_root_rec(root_cache,
3897                                                         backref->ref_root);
3898                                 BUG_ON(IS_ERR(ref_root));
3899                                 if (ref_root->found_ref > 0)
3900                                         continue;
3901
3902                                 backref->reachable = 0;
3903                                 rec->found_ref--;
3904                                 if (rec->found_ref == 0)
3905                                         loop = 1;
3906                         }
3907                 }
3908         }
3909
3910         cache = search_cache_extent(root_cache, 0);
3911         while (1) {
3912                 if (!cache)
3913                         break;
3914                 rec = container_of(cache, struct root_record, cache);
3915                 cache = next_cache_extent(cache);
3916
3917                 if (rec->found_ref == 0 &&
3918                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3919                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3920                         ret = check_orphan_item(root->fs_info->tree_root,
3921                                                 rec->objectid);
3922                         if (ret == 0)
3923                                 continue;
3924
3925                         /*
3926                          * If we don't have a root item then we likely just have
3927                          * a dir item in a snapshot for this root but no actual
3928                          * ref key or anything so it's meaningless.
3929                          */
3930                         if (!rec->found_root_item)
3931                                 continue;
3932                         errors++;
3933                         fprintf(stderr, "fs tree %llu not referenced\n",
3934                                 (unsigned long long)rec->objectid);
3935                 }
3936
3937                 error = 0;
3938                 if (rec->found_ref > 0 && !rec->found_root_item)
3939                         error = 1;
3940                 list_for_each_entry(backref, &rec->backrefs, list) {
3941                         if (!backref->found_dir_item)
3942                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3943                         if (!backref->found_dir_index)
3944                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3945                         if (!backref->found_back_ref)
3946                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3947                         if (!backref->found_forward_ref)
3948                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3949                         if (backref->reachable && backref->errors)
3950                                 error = 1;
3951                 }
3952                 if (!error)
3953                         continue;
3954
3955                 errors++;
3956                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3957                         (unsigned long long)rec->objectid, rec->found_ref,
3958                          rec->found_root_item ? "" : "not found");
3959
3960                 list_for_each_entry(backref, &rec->backrefs, list) {
3961                         if (!backref->reachable)
3962                                 continue;
3963                         if (!backref->errors && rec->found_root_item)
3964                                 continue;
3965                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3966                                 " index %llu namelen %u name %s errors %x\n",
3967                                 (unsigned long long)backref->ref_root,
3968                                 (unsigned long long)backref->dir,
3969                                 (unsigned long long)backref->index,
3970                                 backref->namelen, backref->name,
3971                                 backref->errors);
3972                         print_ref_error(backref->errors);
3973                 }
3974         }
3975         return errors > 0 ? 1 : 0;
3976 }
3977
3978 static int process_root_ref(struct extent_buffer *eb, int slot,
3979                             struct btrfs_key *key,
3980                             struct cache_tree *root_cache)
3981 {
3982         u64 dirid;
3983         u64 index;
3984         u32 len;
3985         u32 name_len;
3986         struct btrfs_root_ref *ref;
3987         char namebuf[BTRFS_NAME_LEN];
3988         int error;
3989
3990         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3991
3992         dirid = btrfs_root_ref_dirid(eb, ref);
3993         index = btrfs_root_ref_sequence(eb, ref);
3994         name_len = btrfs_root_ref_name_len(eb, ref);
3995
3996         if (name_len <= BTRFS_NAME_LEN) {
3997                 len = name_len;
3998                 error = 0;
3999         } else {
4000                 len = BTRFS_NAME_LEN;
4001                 error = REF_ERR_NAME_TOO_LONG;
4002         }
4003         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4004
4005         if (key->type == BTRFS_ROOT_REF_KEY) {
4006                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4007                                  index, namebuf, len, key->type, error);
4008         } else {
4009                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4010                                  index, namebuf, len, key->type, error);
4011         }
4012         return 0;
4013 }
4014
4015 static void free_corrupt_block(struct cache_extent *cache)
4016 {
4017         struct btrfs_corrupt_block *corrupt;
4018
4019         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4020         free(corrupt);
4021 }
4022
4023 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4024
4025 /*
4026  * Repair the btree of the given root.
4027  *
4028  * The fix is to remove the node key in corrupt_blocks cache_tree.
4029  * and rebalance the tree.
4030  * After the fix, the btree should be writeable.
4031  */
4032 static int repair_btree(struct btrfs_root *root,
4033                         struct cache_tree *corrupt_blocks)
4034 {
4035         struct btrfs_trans_handle *trans;
4036         struct btrfs_path path;
4037         struct btrfs_corrupt_block *corrupt;
4038         struct cache_extent *cache;
4039         struct btrfs_key key;
4040         u64 offset;
4041         int level;
4042         int ret = 0;
4043
4044         if (cache_tree_empty(corrupt_blocks))
4045                 return 0;
4046
4047         trans = btrfs_start_transaction(root, 1);
4048         if (IS_ERR(trans)) {
4049                 ret = PTR_ERR(trans);
4050                 fprintf(stderr, "Error starting transaction: %s\n",
4051                         strerror(-ret));
4052                 return ret;
4053         }
4054         btrfs_init_path(&path);
4055         cache = first_cache_extent(corrupt_blocks);
4056         while (cache) {
4057                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4058                                        cache);
4059                 level = corrupt->level;
4060                 path.lowest_level = level;
4061                 key.objectid = corrupt->key.objectid;
4062                 key.type = corrupt->key.type;
4063                 key.offset = corrupt->key.offset;
4064
4065                 /*
4066                  * Here we don't want to do any tree balance, since it may
4067                  * cause a balance with corrupted brother leaf/node,
4068                  * so ins_len set to 0 here.
4069                  * Balance will be done after all corrupt node/leaf is deleted.
4070                  */
4071                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4072                 if (ret < 0)
4073                         goto out;
4074                 offset = btrfs_node_blockptr(path.nodes[level],
4075                                              path.slots[level]);
4076
4077                 /* Remove the ptr */
4078                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4079                 if (ret < 0)
4080                         goto out;
4081                 /*
4082                  * Remove the corresponding extent
4083                  * return value is not concerned.
4084                  */
4085                 btrfs_release_path(&path);
4086                 ret = btrfs_free_extent(trans, root, offset,
4087                                 root->fs_info->nodesize, 0,
4088                                 root->root_key.objectid, level - 1, 0);
4089                 cache = next_cache_extent(cache);
4090         }
4091
4092         /* Balance the btree using btrfs_search_slot() */
4093         cache = first_cache_extent(corrupt_blocks);
4094         while (cache) {
4095                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4096                                        cache);
4097                 memcpy(&key, &corrupt->key, sizeof(key));
4098                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4099                 if (ret < 0)
4100                         goto out;
4101                 /* return will always >0 since it won't find the item */
4102                 ret = 0;
4103                 btrfs_release_path(&path);
4104                 cache = next_cache_extent(cache);
4105         }
4106 out:
4107         btrfs_commit_transaction(trans, root);
4108         btrfs_release_path(&path);
4109         return ret;
4110 }
4111
4112 static int check_fs_root(struct btrfs_root *root,
4113                          struct cache_tree *root_cache,
4114                          struct walk_control *wc)
4115 {
4116         int ret = 0;
4117         int err = 0;
4118         int wret;
4119         int level;
4120         struct btrfs_path path;
4121         struct shared_node root_node;
4122         struct root_record *rec;
4123         struct btrfs_root_item *root_item = &root->root_item;
4124         struct cache_tree corrupt_blocks;
4125         struct orphan_data_extent *orphan;
4126         struct orphan_data_extent *tmp;
4127         enum btrfs_tree_block_status status;
4128         struct node_refs nrefs;
4129
4130         /*
4131          * Reuse the corrupt_block cache tree to record corrupted tree block
4132          *
4133          * Unlike the usage in extent tree check, here we do it in a per
4134          * fs/subvol tree base.
4135          */
4136         cache_tree_init(&corrupt_blocks);
4137         root->fs_info->corrupt_blocks = &corrupt_blocks;
4138
4139         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4140                 rec = get_root_rec(root_cache, root->root_key.objectid);
4141                 BUG_ON(IS_ERR(rec));
4142                 if (btrfs_root_refs(root_item) > 0)
4143                         rec->found_root_item = 1;
4144         }
4145
4146         btrfs_init_path(&path);
4147         memset(&root_node, 0, sizeof(root_node));
4148         cache_tree_init(&root_node.root_cache);
4149         cache_tree_init(&root_node.inode_cache);
4150         memset(&nrefs, 0, sizeof(nrefs));
4151
4152         /* Move the orphan extent record to corresponding inode_record */
4153         list_for_each_entry_safe(orphan, tmp,
4154                                  &root->orphan_data_extents, list) {
4155                 struct inode_record *inode;
4156
4157                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4158                                       1);
4159                 BUG_ON(IS_ERR(inode));
4160                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4161                 list_move(&orphan->list, &inode->orphan_extents);
4162         }
4163
4164         level = btrfs_header_level(root->node);
4165         memset(wc->nodes, 0, sizeof(wc->nodes));
4166         wc->nodes[level] = &root_node;
4167         wc->active_node = level;
4168         wc->root_level = level;
4169
4170         /* We may not have checked the root block, lets do that now */
4171         if (btrfs_is_leaf(root->node))
4172                 status = btrfs_check_leaf(root, NULL, root->node);
4173         else
4174                 status = btrfs_check_node(root, NULL, root->node);
4175         if (status != BTRFS_TREE_BLOCK_CLEAN)
4176                 return -EIO;
4177
4178         if (btrfs_root_refs(root_item) > 0 ||
4179             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4180                 path.nodes[level] = root->node;
4181                 extent_buffer_get(root->node);
4182                 path.slots[level] = 0;
4183         } else {
4184                 struct btrfs_key key;
4185                 struct btrfs_disk_key found_key;
4186
4187                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4188                 level = root_item->drop_level;
4189                 path.lowest_level = level;
4190                 if (level > btrfs_header_level(root->node) ||
4191                     level >= BTRFS_MAX_LEVEL) {
4192                         error("ignoring invalid drop level: %u", level);
4193                         goto skip_walking;
4194                 }
4195                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4196                 if (wret < 0)
4197                         goto skip_walking;
4198                 btrfs_node_key(path.nodes[level], &found_key,
4199                                 path.slots[level]);
4200                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4201                                         sizeof(found_key)));
4202         }
4203
4204         while (1) {
4205                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4206                 if (wret < 0)
4207                         ret = wret;
4208                 if (wret != 0)
4209                         break;
4210
4211                 wret = walk_up_tree(root, &path, wc, &level);
4212                 if (wret < 0)
4213                         ret = wret;
4214                 if (wret != 0)
4215                         break;
4216         }
4217 skip_walking:
4218         btrfs_release_path(&path);
4219
4220         if (!cache_tree_empty(&corrupt_blocks)) {
4221                 struct cache_extent *cache;
4222                 struct btrfs_corrupt_block *corrupt;
4223
4224                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4225                        root->root_key.objectid);
4226                 cache = first_cache_extent(&corrupt_blocks);
4227                 while (cache) {
4228                         corrupt = container_of(cache,
4229                                                struct btrfs_corrupt_block,
4230                                                cache);
4231                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4232                                cache->start, corrupt->level,
4233                                corrupt->key.objectid, corrupt->key.type,
4234                                corrupt->key.offset);
4235                         cache = next_cache_extent(cache);
4236                 }
4237                 if (repair) {
4238                         printf("Try to repair the btree for root %llu\n",
4239                                root->root_key.objectid);
4240                         ret = repair_btree(root, &corrupt_blocks);
4241                         if (ret < 0)
4242                                 fprintf(stderr, "Failed to repair btree: %s\n",
4243                                         strerror(-ret));
4244                         if (!ret)
4245                                 printf("Btree for root %llu is fixed\n",
4246                                        root->root_key.objectid);
4247                 }
4248         }
4249
4250         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4251         if (err < 0)
4252                 ret = err;
4253
4254         if (root_node.current) {
4255                 root_node.current->checked = 1;
4256                 maybe_free_inode_rec(&root_node.inode_cache,
4257                                 root_node.current);
4258         }
4259
4260         err = check_inode_recs(root, &root_node.inode_cache);
4261         if (!ret)
4262                 ret = err;
4263
4264         free_corrupt_blocks_tree(&corrupt_blocks);
4265         root->fs_info->corrupt_blocks = NULL;
4266         free_orphan_data_extents(&root->orphan_data_extents);
4267         return ret;
4268 }
4269
4270 static int fs_root_objectid(u64 objectid)
4271 {
4272         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4273             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4274                 return 1;
4275         return is_fstree(objectid);
4276 }
4277
4278 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4279                           struct cache_tree *root_cache)
4280 {
4281         struct btrfs_path path;
4282         struct btrfs_key key;
4283         struct walk_control wc;
4284         struct extent_buffer *leaf, *tree_node;
4285         struct btrfs_root *tmp_root;
4286         struct btrfs_root *tree_root = fs_info->tree_root;
4287         int ret;
4288         int err = 0;
4289
4290         if (ctx.progress_enabled) {
4291                 ctx.tp = TASK_FS_ROOTS;
4292                 task_start(ctx.info);
4293         }
4294
4295         /*
4296          * Just in case we made any changes to the extent tree that weren't
4297          * reflected into the free space cache yet.
4298          */
4299         if (repair)
4300                 reset_cached_block_groups(fs_info);
4301         memset(&wc, 0, sizeof(wc));
4302         cache_tree_init(&wc.shared);
4303         btrfs_init_path(&path);
4304
4305 again:
4306         key.offset = 0;
4307         key.objectid = 0;
4308         key.type = BTRFS_ROOT_ITEM_KEY;
4309         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4310         if (ret < 0) {
4311                 err = 1;
4312                 goto out;
4313         }
4314         tree_node = tree_root->node;
4315         while (1) {
4316                 if (tree_node != tree_root->node) {
4317                         free_root_recs_tree(root_cache);
4318                         btrfs_release_path(&path);
4319                         goto again;
4320                 }
4321                 leaf = path.nodes[0];
4322                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4323                         ret = btrfs_next_leaf(tree_root, &path);
4324                         if (ret) {
4325                                 if (ret < 0)
4326                                         err = 1;
4327                                 break;
4328                         }
4329                         leaf = path.nodes[0];
4330                 }
4331                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4332                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4333                     fs_root_objectid(key.objectid)) {
4334                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4335                                 tmp_root = btrfs_read_fs_root_no_cache(
4336                                                 fs_info, &key);
4337                         } else {
4338                                 key.offset = (u64)-1;
4339                                 tmp_root = btrfs_read_fs_root(
4340                                                 fs_info, &key);
4341                         }
4342                         if (IS_ERR(tmp_root)) {
4343                                 err = 1;
4344                                 goto next;
4345                         }
4346                         ret = check_fs_root(tmp_root, root_cache, &wc);
4347                         if (ret == -EAGAIN) {
4348                                 free_root_recs_tree(root_cache);
4349                                 btrfs_release_path(&path);
4350                                 goto again;
4351                         }
4352                         if (ret)
4353                                 err = 1;
4354                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4355                                 btrfs_free_fs_root(tmp_root);
4356                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4357                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4358                         process_root_ref(leaf, path.slots[0], &key,
4359                                          root_cache);
4360                 }
4361 next:
4362                 path.slots[0]++;
4363         }
4364 out:
4365         btrfs_release_path(&path);
4366         if (err)
4367                 free_extent_cache_tree(&wc.shared);
4368         if (!cache_tree_empty(&wc.shared))
4369                 fprintf(stderr, "warning line %d\n", __LINE__);
4370
4371         task_stop(ctx.info);
4372
4373         return err;
4374 }
4375
4376 /*
4377  * Find the @index according by @ino and name.
4378  * Notice:time efficiency is O(N)
4379  *
4380  * @root:       the root of the fs/file tree
4381  * @index_ret:  the index as return value
4382  * @namebuf:    the name to match
4383  * @name_len:   the length of name to match
4384  * @file_type:  the file_type of INODE_ITEM to match
4385  *
4386  * Returns 0 if found and *@index_ret will be modified with right value
4387  * Returns< 0 not found and *@index_ret will be (u64)-1
4388  */
4389 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4390                           u64 *index_ret, char *namebuf, u32 name_len,
4391                           u8 file_type)
4392 {
4393         struct btrfs_path path;
4394         struct extent_buffer *node;
4395         struct btrfs_dir_item *di;
4396         struct btrfs_key key;
4397         struct btrfs_key location;
4398         char name[BTRFS_NAME_LEN] = {0};
4399
4400         u32 total;
4401         u32 cur = 0;
4402         u32 len;
4403         u32 data_len;
4404         u8 filetype;
4405         int slot;
4406         int ret;
4407
4408         ASSERT(index_ret);
4409
4410         /* search from the last index */
4411         key.objectid = dirid;
4412         key.offset = (u64)-1;
4413         key.type = BTRFS_DIR_INDEX_KEY;
4414
4415         btrfs_init_path(&path);
4416         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4417         if (ret < 0)
4418                 return ret;
4419
4420 loop:
4421         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4422         if (ret) {
4423                 ret = -ENOENT;
4424                 *index_ret = (64)-1;
4425                 goto out;
4426         }
4427         /* Check whether inode_id/filetype/name match */
4428         node = path.nodes[0];
4429         slot = path.slots[0];
4430         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4431         total = btrfs_item_size_nr(node, slot);
4432         while (cur < total) {
4433                 ret = -ENOENT;
4434                 len = btrfs_dir_name_len(node, di);
4435                 data_len = btrfs_dir_data_len(node, di);
4436
4437                 btrfs_dir_item_key_to_cpu(node, di, &location);
4438                 if (location.objectid != location_id ||
4439                     location.type != BTRFS_INODE_ITEM_KEY ||
4440                     location.offset != 0)
4441                         goto next;
4442
4443                 filetype = btrfs_dir_type(node, di);
4444                 if (file_type != filetype)
4445                         goto next;
4446
4447                 if (len > BTRFS_NAME_LEN)
4448                         len = BTRFS_NAME_LEN;
4449
4450                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4451                 if (len != name_len || strncmp(namebuf, name, len))
4452                         goto next;
4453
4454                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4455                 *index_ret = key.offset;
4456                 ret = 0;
4457                 goto out;
4458 next:
4459                 len += sizeof(*di) + data_len;
4460                 di = (struct btrfs_dir_item *)((char *)di + len);
4461                 cur += len;
4462         }
4463         goto loop;
4464
4465 out:
4466         btrfs_release_path(&path);
4467         return ret;
4468 }
4469
4470 /*
4471  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4472  * INODE_REF/INODE_EXTREF match.
4473  *
4474  * @root:       the root of the fs/file tree
4475  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4476  *              value while find index
4477  * @location_key: location key of the struct btrfs_dir_item to match
4478  * @name:       the name to match
4479  * @namelen:    the length of name
4480  * @file_type:  the type of file to math
4481  *
4482  * Return 0 if no error occurred.
4483  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4484  * DIR_ITEM/DIR_INDEX
4485  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4486  * and DIR_ITEM/DIR_INDEX mismatch
4487  */
4488 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4489                          struct btrfs_key *location_key, char *name,
4490                          u32 namelen, u8 file_type)
4491 {
4492         struct btrfs_path path;
4493         struct extent_buffer *node;
4494         struct btrfs_dir_item *di;
4495         struct btrfs_key location;
4496         char namebuf[BTRFS_NAME_LEN] = {0};
4497         u32 total;
4498         u32 cur = 0;
4499         u32 len;
4500         u32 data_len;
4501         u8 filetype;
4502         int slot;
4503         int ret;
4504
4505         /* get the index by traversing all index */
4506         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4507                 ret = find_dir_index(root, key->objectid,
4508                                      location_key->objectid, &key->offset,
4509                                      name, namelen, file_type);
4510                 if (ret)
4511                         ret = DIR_INDEX_MISSING;
4512                 return ret;
4513         }
4514
4515         btrfs_init_path(&path);
4516         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4517         if (ret) {
4518                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4519                         DIR_INDEX_MISSING;
4520                 goto out;
4521         }
4522
4523         /* Check whether inode_id/filetype/name match */
4524         node = path.nodes[0];
4525         slot = path.slots[0];
4526         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4527         total = btrfs_item_size_nr(node, slot);
4528         while (cur < total) {
4529                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4530                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4531
4532                 len = btrfs_dir_name_len(node, di);
4533                 data_len = btrfs_dir_data_len(node, di);
4534
4535                 btrfs_dir_item_key_to_cpu(node, di, &location);
4536                 if (location.objectid != location_key->objectid ||
4537                     location.type != location_key->type ||
4538                     location.offset != location_key->offset)
4539                         goto next;
4540
4541                 filetype = btrfs_dir_type(node, di);
4542                 if (file_type != filetype)
4543                         goto next;
4544
4545                 if (len > BTRFS_NAME_LEN) {
4546                         len = BTRFS_NAME_LEN;
4547                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4548                         root->objectid,
4549                         key->type == BTRFS_DIR_ITEM_KEY ?
4550                         "DIR_ITEM" : "DIR_INDEX",
4551                         key->objectid, key->offset, len);
4552                 }
4553                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4554                                    len);
4555                 if (len != namelen || strncmp(namebuf, name, len))
4556                         goto next;
4557
4558                 ret = 0;
4559                 goto out;
4560 next:
4561                 len += sizeof(*di) + data_len;
4562                 di = (struct btrfs_dir_item *)((char *)di + len);
4563                 cur += len;
4564         }
4565
4566 out:
4567         btrfs_release_path(&path);
4568         return ret;
4569 }
4570
4571 /*
4572  * Prints inode ref error message
4573  */
4574 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4575                                 u64 index, const char *namebuf, int name_len,
4576                                 u8 filetype, int err)
4577 {
4578         if (!err)
4579                 return;
4580
4581         /* root dir error */
4582         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4583                 error(
4584         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4585                       root->objectid, key->objectid, key->offset, namebuf);
4586                 return;
4587         }
4588
4589         /* normal error */
4590         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4591                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4592                       root->objectid, key->offset,
4593                       btrfs_name_hash(namebuf, name_len),
4594                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4595                       namebuf, filetype);
4596         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4597                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4598                       root->objectid, key->offset, index,
4599                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4600                       namebuf, filetype);
4601 }
4602
4603 /*
4604  * Insert the missing inode item.
4605  *
4606  * Returns 0 means success.
4607  * Returns <0 means error.
4608  */
4609 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4610                                      u8 filetype)
4611 {
4612         struct btrfs_key key;
4613         struct btrfs_trans_handle *trans;
4614         struct btrfs_path path;
4615         int ret;
4616
4617         key.objectid = ino;
4618         key.type = BTRFS_INODE_ITEM_KEY;
4619         key.offset = 0;
4620
4621         btrfs_init_path(&path);
4622         trans = btrfs_start_transaction(root, 1);
4623         if (IS_ERR(trans)) {
4624                 ret = -EIO;
4625                 goto out;
4626         }
4627
4628         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4629         if (ret < 0 || !ret)
4630                 goto fail;
4631
4632         /* insert inode item */
4633         create_inode_item_lowmem(trans, root, ino, filetype);
4634         ret = 0;
4635 fail:
4636         btrfs_commit_transaction(trans, root);
4637 out:
4638         if (ret)
4639                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4640                       root->objectid, ino);
4641         btrfs_release_path(&path);
4642         return ret;
4643 }
4644
4645 /*
4646  * The ternary means dir item, dir index and relative inode ref.
4647  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4648  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4649  * strategy:
4650  * If two of three is missing or mismatched, delete the existing one.
4651  * If one of three is missing or mismatched, add the missing one.
4652  *
4653  * returns 0 means success.
4654  * returns not 0 means on error;
4655  */
4656 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4657                           u64 index, char *name, int name_len, u8 filetype,
4658                           int err)
4659 {
4660         struct btrfs_trans_handle *trans;
4661         int stage = 0;
4662         int ret = 0;
4663
4664         /*
4665          * stage shall be one of following valild values:
4666          *      0: Fine, nothing to do.
4667          *      1: One of three is wrong, so add missing one.
4668          *      2: Two of three is wrong, so delete existed one.
4669          */
4670         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4671                 stage++;
4672         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4673                 stage++;
4674         if (err & (INODE_REF_MISSING))
4675                 stage++;
4676
4677         /* stage must be smllarer than 3 */
4678         ASSERT(stage < 3);
4679
4680         trans = btrfs_start_transaction(root, 1);
4681         if (stage == 2) {
4682                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4683                                    name_len, 0);
4684                 goto out;
4685         }
4686         if (stage == 1) {
4687                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4688                                filetype, &index, 1, 1);
4689                 goto out;
4690         }
4691 out:
4692         btrfs_commit_transaction(trans, root);
4693
4694         if (ret)
4695                 error("fail to repair inode %llu name %s filetype %u",
4696                       ino, name, filetype);
4697         else
4698                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4699                        stage == 2 ? "Delete" : "Add",
4700                        ino, name, filetype);
4701
4702         return ret;
4703 }
4704
4705 /*
4706  * Traverse the given INODE_REF and call find_dir_item() to find related
4707  * DIR_ITEM/DIR_INDEX.
4708  *
4709  * @root:       the root of the fs/file tree
4710  * @ref_key:    the key of the INODE_REF
4711  * @path        the path provides node and slot
4712  * @refs:       the count of INODE_REF
4713  * @mode:       the st_mode of INODE_ITEM
4714  * @name_ret:   returns with the first ref's name
4715  * @name_len_ret:    len of the name_ret
4716  *
4717  * Return 0 if no error occurred.
4718  */
4719 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4720                            struct btrfs_path *path, char *name_ret,
4721                            u32 *namelen_ret, u64 *refs_ret, int mode)
4722 {
4723         struct btrfs_key key;
4724         struct btrfs_key location;
4725         struct btrfs_inode_ref *ref;
4726         struct extent_buffer *node;
4727         char namebuf[BTRFS_NAME_LEN] = {0};
4728         u32 total;
4729         u32 cur = 0;
4730         u32 len;
4731         u32 name_len;
4732         u64 index;
4733         int ret;
4734         int err = 0;
4735         int tmp_err;
4736         int slot;
4737         int need_research = 0;
4738         u64 refs;
4739
4740 begin:
4741         err = 0;
4742         cur = 0;
4743         refs = *refs_ret;
4744
4745         /* since after repair, path and the dir item may be changed */
4746         if (need_research) {
4747                 need_research = 0;
4748                 btrfs_release_path(path);
4749                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4750                 /* the item was deleted, let path point to the last checked item */
4751                 if (ret > 0) {
4752                         if (path->slots[0] == 0)
4753                                 btrfs_prev_leaf(root, path);
4754                         else
4755                                 path->slots[0]--;
4756                 }
4757                 if (ret)
4758                         goto out;
4759         }
4760
4761         location.objectid = ref_key->objectid;
4762         location.type = BTRFS_INODE_ITEM_KEY;
4763         location.offset = 0;
4764         node = path->nodes[0];
4765         slot = path->slots[0];
4766
4767         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4768         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4769         total = btrfs_item_size_nr(node, slot);
4770
4771 next:
4772         /* Update inode ref count */
4773         refs++;
4774         tmp_err = 0;
4775         index = btrfs_inode_ref_index(node, ref);
4776         name_len = btrfs_inode_ref_name_len(node, ref);
4777
4778         if (name_len <= BTRFS_NAME_LEN) {
4779                 len = name_len;
4780         } else {
4781                 len = BTRFS_NAME_LEN;
4782                 warning("root %llu INODE_REF[%llu %llu] name too long",
4783                         root->objectid, ref_key->objectid, ref_key->offset);
4784         }
4785
4786         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4787
4788         /* copy the first name found to name_ret */
4789         if (refs == 1 && name_ret) {
4790                 memcpy(name_ret, namebuf, len);
4791                 *namelen_ret = len;
4792         }
4793
4794         /* Check root dir ref */
4795         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4796                 if (index != 0 || len != strlen("..") ||
4797                     strncmp("..", namebuf, len) ||
4798                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4799                         /* set err bits then repair will delete the ref */
4800                         err |= DIR_INDEX_MISSING;
4801                         err |= DIR_ITEM_MISSING;
4802                 }
4803                 goto end;
4804         }
4805
4806         /* Find related DIR_INDEX */
4807         key.objectid = ref_key->offset;
4808         key.type = BTRFS_DIR_INDEX_KEY;
4809         key.offset = index;
4810         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4811                             imode_to_type(mode));
4812
4813         /* Find related dir_item */
4814         key.objectid = ref_key->offset;
4815         key.type = BTRFS_DIR_ITEM_KEY;
4816         key.offset = btrfs_name_hash(namebuf, len);
4817         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4818                             imode_to_type(mode));
4819 end:
4820         if (tmp_err && repair) {
4821                 ret = repair_ternary_lowmem(root, ref_key->offset,
4822                                             ref_key->objectid, index, namebuf,
4823                                             name_len, imode_to_type(mode),
4824                                             tmp_err);
4825                 if (!ret) {
4826                         need_research = 1;
4827                         goto begin;
4828                 }
4829         }
4830         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4831                             imode_to_type(mode), tmp_err);
4832         err |= tmp_err;
4833         len = sizeof(*ref) + name_len;
4834         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4835         cur += len;
4836         if (cur < total)
4837                 goto next;
4838
4839 out:
4840         *refs_ret = refs;
4841         return err;
4842 }
4843
4844 /*
4845  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4846  * DIR_ITEM/DIR_INDEX.
4847  *
4848  * @root:       the root of the fs/file tree
4849  * @ref_key:    the key of the INODE_EXTREF
4850  * @refs:       the count of INODE_EXTREF
4851  * @mode:       the st_mode of INODE_ITEM
4852  *
4853  * Return 0 if no error occurred.
4854  */
4855 static int check_inode_extref(struct btrfs_root *root,
4856                               struct btrfs_key *ref_key,
4857                               struct extent_buffer *node, int slot, u64 *refs,
4858                               int mode)
4859 {
4860         struct btrfs_key key;
4861         struct btrfs_key location;
4862         struct btrfs_inode_extref *extref;
4863         char namebuf[BTRFS_NAME_LEN] = {0};
4864         u32 total;
4865         u32 cur = 0;
4866         u32 len;
4867         u32 name_len;
4868         u64 index;
4869         u64 parent;
4870         int ret;
4871         int err = 0;
4872
4873         location.objectid = ref_key->objectid;
4874         location.type = BTRFS_INODE_ITEM_KEY;
4875         location.offset = 0;
4876
4877         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4878         total = btrfs_item_size_nr(node, slot);
4879
4880 next:
4881         /* update inode ref count */
4882         (*refs)++;
4883         name_len = btrfs_inode_extref_name_len(node, extref);
4884         index = btrfs_inode_extref_index(node, extref);
4885         parent = btrfs_inode_extref_parent(node, extref);
4886         if (name_len <= BTRFS_NAME_LEN) {
4887                 len = name_len;
4888         } else {
4889                 len = BTRFS_NAME_LEN;
4890                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4891                         root->objectid, ref_key->objectid, ref_key->offset);
4892         }
4893         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4894
4895         /* Check root dir ref name */
4896         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4897                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4898                       root->objectid, ref_key->objectid, ref_key->offset,
4899                       namebuf);
4900                 err |= ROOT_DIR_ERROR;
4901         }
4902
4903         /* find related dir_index */
4904         key.objectid = parent;
4905         key.type = BTRFS_DIR_INDEX_KEY;
4906         key.offset = index;
4907         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4908         err |= ret;
4909
4910         /* find related dir_item */
4911         key.objectid = parent;
4912         key.type = BTRFS_DIR_ITEM_KEY;
4913         key.offset = btrfs_name_hash(namebuf, len);
4914         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4915         err |= ret;
4916
4917         len = sizeof(*extref) + name_len;
4918         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4919         cur += len;
4920
4921         if (cur < total)
4922                 goto next;
4923
4924         return err;
4925 }
4926
4927 /*
4928  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4929  * DIR_ITEM/DIR_INDEX match.
4930  * Return with @index_ret.
4931  *
4932  * @root:       the root of the fs/file tree
4933  * @key:        the key of the INODE_REF/INODE_EXTREF
4934  * @name:       the name in the INODE_REF/INODE_EXTREF
4935  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4936  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4937  *              value (64)-1 means do not check index
4938  * @ext_ref:    the EXTENDED_IREF feature
4939  *
4940  * Return 0 if no error occurred.
4941  * Return >0 for error bitmap
4942  */
4943 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4944                           char *name, int namelen, u64 *index_ret,
4945                           unsigned int ext_ref)
4946 {
4947         struct btrfs_path path;
4948         struct btrfs_inode_ref *ref;
4949         struct btrfs_inode_extref *extref;
4950         struct extent_buffer *node;
4951         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4952         u32 total;
4953         u32 cur = 0;
4954         u32 len;
4955         u32 ref_namelen;
4956         u64 ref_index;
4957         u64 parent;
4958         u64 dir_id;
4959         int slot;
4960         int ret;
4961
4962         ASSERT(index_ret);
4963
4964         btrfs_init_path(&path);
4965         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4966         if (ret) {
4967                 ret = INODE_REF_MISSING;
4968                 goto extref;
4969         }
4970
4971         node = path.nodes[0];
4972         slot = path.slots[0];
4973
4974         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4975         total = btrfs_item_size_nr(node, slot);
4976
4977         /* Iterate all entry of INODE_REF */
4978         while (cur < total) {
4979                 ret = INODE_REF_MISSING;
4980
4981                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4982                 ref_index = btrfs_inode_ref_index(node, ref);
4983                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4984                         goto next_ref;
4985
4986                 if (cur + sizeof(*ref) + ref_namelen > total ||
4987                     ref_namelen > BTRFS_NAME_LEN) {
4988                         warning("root %llu INODE %s[%llu %llu] name too long",
4989                                 root->objectid,
4990                                 key->type == BTRFS_INODE_REF_KEY ?
4991                                         "REF" : "EXTREF",
4992                                 key->objectid, key->offset);
4993
4994                         if (cur + sizeof(*ref) > total)
4995                                 break;
4996                         len = min_t(u32, total - cur - sizeof(*ref),
4997                                     BTRFS_NAME_LEN);
4998                 } else {
4999                         len = ref_namelen;
5000                 }
5001
5002                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5003                                    len);
5004
5005                 if (len != namelen || strncmp(ref_namebuf, name, len))
5006                         goto next_ref;
5007
5008                 *index_ret = ref_index;
5009                 ret = 0;
5010                 goto out;
5011 next_ref:
5012                 len = sizeof(*ref) + ref_namelen;
5013                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5014                 cur += len;
5015         }
5016
5017 extref:
5018         /* Skip if not support EXTENDED_IREF feature */
5019         if (!ext_ref)
5020                 goto out;
5021
5022         btrfs_release_path(&path);
5023         btrfs_init_path(&path);
5024
5025         dir_id = key->offset;
5026         key->type = BTRFS_INODE_EXTREF_KEY;
5027         key->offset = btrfs_extref_hash(dir_id, name, namelen);
5028
5029         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5030         if (ret) {
5031                 ret = INODE_REF_MISSING;
5032                 goto out;
5033         }
5034
5035         node = path.nodes[0];
5036         slot = path.slots[0];
5037
5038         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5039         cur = 0;
5040         total = btrfs_item_size_nr(node, slot);
5041
5042         /* Iterate all entry of INODE_EXTREF */
5043         while (cur < total) {
5044                 ret = INODE_REF_MISSING;
5045
5046                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5047                 ref_index = btrfs_inode_extref_index(node, extref);
5048                 parent = btrfs_inode_extref_parent(node, extref);
5049                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5050                         goto next_extref;
5051
5052                 if (parent != dir_id)
5053                         goto next_extref;
5054
5055                 if (ref_namelen <= BTRFS_NAME_LEN) {
5056                         len = ref_namelen;
5057                 } else {
5058                         len = BTRFS_NAME_LEN;
5059                         warning("root %llu INODE %s[%llu %llu] name too long",
5060                                 root->objectid,
5061                                 key->type == BTRFS_INODE_REF_KEY ?
5062                                         "REF" : "EXTREF",
5063                                 key->objectid, key->offset);
5064                 }
5065                 read_extent_buffer(node, ref_namebuf,
5066                                    (unsigned long)(extref + 1), len);
5067
5068                 if (len != namelen || strncmp(ref_namebuf, name, len))
5069                         goto next_extref;
5070
5071                 *index_ret = ref_index;
5072                 ret = 0;
5073                 goto out;
5074
5075 next_extref:
5076                 len = sizeof(*extref) + ref_namelen;
5077                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5078                 cur += len;
5079
5080         }
5081 out:
5082         btrfs_release_path(&path);
5083         return ret;
5084 }
5085
5086 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5087                                u64 ino, u64 index, const char *namebuf,
5088                                int name_len, u8 filetype, int err)
5089 {
5090         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5091                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5092                       root->objectid, key->objectid, key->offset, namebuf,
5093                       filetype,
5094                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5095         }
5096
5097         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5098                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5099                       root->objectid, key->objectid, index, namebuf, filetype,
5100                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5101         }
5102
5103         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5104                 error(
5105                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5106                       root->objectid, ino, index, namebuf, filetype,
5107                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5108         }
5109
5110         if (err & INODE_REF_MISSING)
5111                 error(
5112                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5113                       root->objectid, ino, key->objectid, namebuf, filetype);
5114
5115 }
5116
5117 /*
5118  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5119  *
5120  * Returns error after repair
5121  */
5122 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5123                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5124                            int err)
5125 {
5126         int ret;
5127
5128         if (err & INODE_ITEM_MISSING) {
5129                 ret = repair_inode_item_missing(root, ino, filetype);
5130                 if (!ret)
5131                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5132         }
5133
5134         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5135                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5136                                             name_len, filetype, err);
5137                 if (!ret) {
5138                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5139                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5140                         err &= ~(INODE_REF_MISSING);
5141                 }
5142         }
5143         return err;
5144 }
5145
5146 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5147                 u64 *size_ret)
5148 {
5149         struct btrfs_key key;
5150         struct btrfs_path path;
5151         u32 len;
5152         struct btrfs_dir_item *di;
5153         int ret;
5154         int cur = 0;
5155         int total = 0;
5156
5157         ASSERT(size_ret);
5158         *size_ret = 0;
5159
5160         key.objectid = ino;
5161         key.type = type;
5162         key.offset = (u64)-1;
5163
5164         btrfs_init_path(&path);
5165         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5166         if (ret < 0) {
5167                 ret = -EIO;
5168                 goto out;
5169         }
5170         /* if found, go to spacial case */
5171         if (ret == 0)
5172                 goto special_case;
5173
5174 loop:
5175         ret = btrfs_previous_item(root, &path, ino, type);
5176
5177         if (ret) {
5178                 ret = 0;
5179                 goto out;
5180         }
5181
5182 special_case:
5183         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5184         cur = 0;
5185         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5186
5187         while (cur < total) {
5188                 len = btrfs_dir_name_len(path.nodes[0], di);
5189                 if (len > BTRFS_NAME_LEN)
5190                         len = BTRFS_NAME_LEN;
5191                 *size_ret += len;
5192
5193                 len += btrfs_dir_data_len(path.nodes[0], di);
5194                 len += sizeof(*di);
5195                 di = (struct btrfs_dir_item *)((char *)di + len);
5196                 cur += len;
5197         }
5198         goto loop;
5199
5200 out:
5201         btrfs_release_path(&path);
5202         return ret;
5203 }
5204
5205 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5206 {
5207         u64 item_size;
5208         u64 index_size;
5209         int ret;
5210
5211         ASSERT(size);
5212         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5213         if (ret)
5214                 goto out;
5215
5216         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5217         if (ret)
5218                 goto out;
5219
5220         *size = item_size + index_size;
5221
5222 out:
5223         if (ret)
5224                 error("failed to count root %llu INODE[%llu] root size",
5225                       root->objectid, ino);
5226         return ret;
5227 }
5228
5229 /*
5230  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5231  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5232  *
5233  * @root:       the root of the fs/file tree
5234  * @key:        the key of the INODE_REF/INODE_EXTREF
5235  * @path:       the path
5236  * @size:       the st_size of the INODE_ITEM
5237  * @ext_ref:    the EXTENDED_IREF feature
5238  *
5239  * Return 0 if no error occurred.
5240  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5241  */
5242 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5243                           struct btrfs_path *path, u64 *size,
5244                           unsigned int ext_ref)
5245 {
5246         struct btrfs_dir_item *di;
5247         struct btrfs_inode_item *ii;
5248         struct btrfs_key key;
5249         struct btrfs_key location;
5250         struct extent_buffer *node;
5251         int slot;
5252         char namebuf[BTRFS_NAME_LEN] = {0};
5253         u32 total;
5254         u32 cur = 0;
5255         u32 len;
5256         u32 name_len;
5257         u32 data_len;
5258         u8 filetype;
5259         u32 mode = 0;
5260         u64 index;
5261         int ret;
5262         int err;
5263         int tmp_err;
5264         int need_research = 0;
5265
5266         /*
5267          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5268          * ignore index check.
5269          */
5270         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5271                 index = di_key->offset;
5272         else
5273                 index = (u64)-1;
5274 begin:
5275         err = 0;
5276         cur = 0;
5277
5278         /* since after repair, path and the dir item may be changed */
5279         if (need_research) {
5280                 need_research = 0;
5281                 err |= DIR_COUNT_AGAIN;
5282                 btrfs_release_path(path);
5283                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5284                 /* the item was deleted, let path point the last checked item */
5285                 if (ret > 0) {
5286                         if (path->slots[0] == 0)
5287                                 btrfs_prev_leaf(root, path);
5288                         else
5289                                 path->slots[0]--;
5290                 }
5291                 if (ret)
5292                         goto out;
5293         }
5294
5295         node = path->nodes[0];
5296         slot = path->slots[0];
5297
5298         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5299         total = btrfs_item_size_nr(node, slot);
5300         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5301
5302         while (cur < total) {
5303                 data_len = btrfs_dir_data_len(node, di);
5304                 tmp_err = 0;
5305                 if (data_len)
5306                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5307                               root->objectid,
5308               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5309                               di_key->objectid, di_key->offset, data_len);
5310
5311                 name_len = btrfs_dir_name_len(node, di);
5312                 if (name_len <= BTRFS_NAME_LEN) {
5313                         len = name_len;
5314                 } else {
5315                         len = BTRFS_NAME_LEN;
5316                         warning("root %llu %s[%llu %llu] name too long",
5317                                 root->objectid,
5318                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5319                                 di_key->objectid, di_key->offset);
5320                 }
5321                 (*size) += name_len;
5322                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5323                                    len);
5324                 filetype = btrfs_dir_type(node, di);
5325
5326                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5327                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5328                         err |= -EIO;
5329                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5330                         root->objectid, di_key->objectid, di_key->offset,
5331                         namebuf, len, filetype, di_key->offset,
5332                         btrfs_name_hash(namebuf, len));
5333                 }
5334
5335                 btrfs_dir_item_key_to_cpu(node, di, &location);
5336                 /* Ignore related ROOT_ITEM check */
5337                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5338                         goto next;
5339
5340                 btrfs_release_path(path);
5341                 /* Check relative INODE_ITEM(existence/filetype) */
5342                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5343                 if (ret) {
5344                         tmp_err |= INODE_ITEM_MISSING;
5345                         goto next;
5346                 }
5347
5348                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5349                                     struct btrfs_inode_item);
5350                 mode = btrfs_inode_mode(path->nodes[0], ii);
5351                 if (imode_to_type(mode) != filetype) {
5352                         tmp_err |= INODE_ITEM_MISMATCH;
5353                         goto next;
5354                 }
5355
5356                 /* Check relative INODE_REF/INODE_EXTREF */
5357                 key.objectid = location.objectid;
5358                 key.type = BTRFS_INODE_REF_KEY;
5359                 key.offset = di_key->objectid;
5360                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5361                                           &index, ext_ref);
5362
5363                 /* check relative INDEX/ITEM */
5364                 key.objectid = di_key->objectid;
5365                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5366                         key.type = BTRFS_DIR_INDEX_KEY;
5367                         key.offset = index;
5368                 } else {
5369                         key.type = BTRFS_DIR_ITEM_KEY;
5370                         key.offset = btrfs_name_hash(namebuf, name_len);
5371                 }
5372
5373                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5374                                          name_len, filetype);
5375                 /* find_dir_item may find index */
5376                 if (key.type == BTRFS_DIR_INDEX_KEY)
5377                         index = key.offset;
5378 next:
5379
5380                 if (tmp_err && repair) {
5381                         ret = repair_dir_item(root, di_key->objectid,
5382                                               location.objectid, index,
5383                                               imode_to_type(mode), namebuf,
5384                                               name_len, tmp_err);
5385                         if (ret != tmp_err) {
5386                                 need_research = 1;
5387                                 goto begin;
5388                         }
5389                 }
5390                 btrfs_release_path(path);
5391                 print_dir_item_err(root, di_key, location.objectid, index,
5392                                    namebuf, name_len, filetype, tmp_err);
5393                 err |= tmp_err;
5394                 len = sizeof(*di) + name_len + data_len;
5395                 di = (struct btrfs_dir_item *)((char *)di + len);
5396                 cur += len;
5397
5398                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5399                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5400                               root->objectid, di_key->objectid,
5401                               di_key->offset);
5402                         break;
5403                 }
5404         }
5405 out:
5406         /* research path */
5407         btrfs_release_path(path);
5408         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5409         if (ret)
5410                 err |= ret > 0 ? -ENOENT : ret;
5411         return err;
5412 }
5413
5414 /*
5415  * Wrapper function of btrfs_punch_hole.
5416  *
5417  * Returns 0 means success.
5418  * Returns not 0 means error.
5419  */
5420 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5421                              u64 len)
5422 {
5423         struct btrfs_trans_handle *trans;
5424         int ret = 0;
5425
5426         trans = btrfs_start_transaction(root, 1);
5427         if (IS_ERR(trans))
5428                 return PTR_ERR(trans);
5429
5430         ret = btrfs_punch_hole(trans, root, ino, start, len);
5431         if (ret)
5432                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5433                       start, len, ino);
5434         else
5435                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5436                        ino);
5437
5438         btrfs_commit_transaction(trans, root);
5439         return ret;
5440 }
5441
5442 /*
5443  * Check file extent datasum/hole, update the size of the file extents,
5444  * check and update the last offset of the file extent.
5445  *
5446  * @root:       the root of fs/file tree.
5447  * @fkey:       the key of the file extent.
5448  * @nodatasum:  INODE_NODATASUM feature.
5449  * @size:       the sum of all EXTENT_DATA items size for this inode.
5450  * @end:        the offset of the last extent.
5451  *
5452  * Return 0 if no error occurred.
5453  */
5454 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5455                              struct extent_buffer *node, int slot,
5456                              unsigned int nodatasum, u64 *size, u64 *end)
5457 {
5458         struct btrfs_file_extent_item *fi;
5459         u64 disk_bytenr;
5460         u64 disk_num_bytes;
5461         u64 extent_num_bytes;
5462         u64 extent_offset;
5463         u64 csum_found;         /* In byte size, sectorsize aligned */
5464         u64 search_start;       /* Logical range start we search for csum */
5465         u64 search_len;         /* Logical range len we search for csum */
5466         unsigned int extent_type;
5467         unsigned int is_hole;
5468         int compressed = 0;
5469         int ret;
5470         int err = 0;
5471
5472         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5473
5474         /* Check inline extent */
5475         extent_type = btrfs_file_extent_type(node, fi);
5476         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5477                 struct btrfs_item *e = btrfs_item_nr(slot);
5478                 u32 item_inline_len;
5479
5480                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5481                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5482                 compressed = btrfs_file_extent_compression(node, fi);
5483                 if (extent_num_bytes == 0) {
5484                         error(
5485                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5486                                 root->objectid, fkey->objectid, fkey->offset);
5487                         err |= FILE_EXTENT_ERROR;
5488                 }
5489                 if (!compressed && extent_num_bytes != item_inline_len) {
5490                         error(
5491                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5492                                 root->objectid, fkey->objectid, fkey->offset,
5493                                 extent_num_bytes, item_inline_len);
5494                         err |= FILE_EXTENT_ERROR;
5495                 }
5496                 *end += extent_num_bytes;
5497                 *size += extent_num_bytes;
5498                 return err;
5499         }
5500
5501         /* Check extent type */
5502         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5503                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5504                 err |= FILE_EXTENT_ERROR;
5505                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5506                       root->objectid, fkey->objectid, fkey->offset);
5507                 return err;
5508         }
5509
5510         /* Check REG_EXTENT/PREALLOC_EXTENT */
5511         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5512         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5513         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5514         extent_offset = btrfs_file_extent_offset(node, fi);
5515         compressed = btrfs_file_extent_compression(node, fi);
5516         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5517
5518         /*
5519          * Check EXTENT_DATA csum
5520          *
5521          * For plain (uncompressed) extent, we should only check the range
5522          * we're referring to, as it's possible that part of prealloc extent
5523          * has been written, and has csum:
5524          *
5525          * |<--- Original large preallocated extent A ---->|
5526          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5527          *      No csum                         Has csum
5528          *
5529          * For compressed extent, we should check the whole range.
5530          */
5531         if (!compressed) {
5532                 search_start = disk_bytenr + extent_offset;
5533                 search_len = extent_num_bytes;
5534         } else {
5535                 search_start = disk_bytenr;
5536                 search_len = disk_num_bytes;
5537         }
5538         ret = count_csum_range(root, search_start, search_len, &csum_found);
5539         if (csum_found > 0 && nodatasum) {
5540                 err |= ODD_CSUM_ITEM;
5541                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5542                       root->objectid, fkey->objectid, fkey->offset);
5543         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5544                    !is_hole && (ret < 0 || csum_found < search_len)) {
5545                 err |= CSUM_ITEM_MISSING;
5546                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5547                       root->objectid, fkey->objectid, fkey->offset,
5548                       csum_found, search_len);
5549         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5550                 err |= ODD_CSUM_ITEM;
5551                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5552                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5553         }
5554
5555         /* Check EXTENT_DATA hole */
5556         if (!no_holes && *end != fkey->offset) {
5557                 if (repair)
5558                         ret = punch_extent_hole(root, fkey->objectid,
5559                                                 *end, fkey->offset - *end);
5560                 if (!repair || ret) {
5561                         err |= FILE_EXTENT_ERROR;
5562                         error(
5563 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5564                                 root->objectid, fkey->objectid, fkey->offset,
5565                                 fkey->objectid, *end);
5566                 }
5567         }
5568
5569         *end += extent_num_bytes;
5570         if (!is_hole)
5571                 *size += extent_num_bytes;
5572
5573         return err;
5574 }
5575
5576 /*
5577  * Set inode item nbytes to @nbytes
5578  *
5579  * Returns  0     on success
5580  * Returns  != 0  on error
5581  */
5582 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5583                                       struct btrfs_path *path,
5584                                       u64 ino, u64 nbytes)
5585 {
5586         struct btrfs_trans_handle *trans;
5587         struct btrfs_inode_item *ii;
5588         struct btrfs_key key;
5589         struct btrfs_key research_key;
5590         int err = 0;
5591         int ret;
5592
5593         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5594
5595         key.objectid = ino;
5596         key.type = BTRFS_INODE_ITEM_KEY;
5597         key.offset = 0;
5598
5599         trans = btrfs_start_transaction(root, 1);
5600         if (IS_ERR(trans)) {
5601                 ret = PTR_ERR(trans);
5602                 err |= ret;
5603                 goto out;
5604         }
5605
5606         btrfs_release_path(path);
5607         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5608         if (ret > 0)
5609                 ret = -ENOENT;
5610         if (ret) {
5611                 err |= ret;
5612                 goto fail;
5613         }
5614
5615         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5616                             struct btrfs_inode_item);
5617         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5618         btrfs_mark_buffer_dirty(path->nodes[0]);
5619 fail:
5620         btrfs_commit_transaction(trans, root);
5621 out:
5622         if (ret)
5623                 error("failed to set nbytes in inode %llu root %llu",
5624                       ino, root->root_key.objectid);
5625         else
5626                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5627                        root->root_key.objectid, nbytes);
5628
5629         /* research path */
5630         btrfs_release_path(path);
5631         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5632         err |= ret;
5633
5634         return err;
5635 }
5636
5637 /*
5638  * Set directory inode isize to @isize.
5639  *
5640  * Returns 0     on success.
5641  * Returns != 0  on error.
5642  */
5643 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5644                                    struct btrfs_path *path,
5645                                    u64 ino, u64 isize)
5646 {
5647         struct btrfs_trans_handle *trans;
5648         struct btrfs_inode_item *ii;
5649         struct btrfs_key key;
5650         struct btrfs_key research_key;
5651         int ret;
5652         int err = 0;
5653
5654         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5655
5656         key.objectid = ino;
5657         key.type = BTRFS_INODE_ITEM_KEY;
5658         key.offset = 0;
5659
5660         trans = btrfs_start_transaction(root, 1);
5661         if (IS_ERR(trans)) {
5662                 ret = PTR_ERR(trans);
5663                 err |= ret;
5664                 goto out;
5665         }
5666
5667         btrfs_release_path(path);
5668         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5669         if (ret > 0)
5670                 ret = -ENOENT;
5671         if (ret) {
5672                 err |= ret;
5673                 goto fail;
5674         }
5675
5676         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5677                             struct btrfs_inode_item);
5678         btrfs_set_inode_size(path->nodes[0], ii, isize);
5679         btrfs_mark_buffer_dirty(path->nodes[0]);
5680 fail:
5681         btrfs_commit_transaction(trans, root);
5682 out:
5683         if (ret)
5684                 error("failed to set isize in inode %llu root %llu",
5685                       ino, root->root_key.objectid);
5686         else
5687                 printf("Set isize in inode %llu root %llu to %llu\n",
5688                        ino, root->root_key.objectid, isize);
5689
5690         btrfs_release_path(path);
5691         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5692         err |= ret;
5693
5694         return err;
5695 }
5696
5697 /*
5698  * Wrapper function for btrfs_add_orphan_item().
5699  *
5700  * Returns 0     on success.
5701  * Returns != 0  on error.
5702  */
5703 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5704                                            struct btrfs_path *path, u64 ino)
5705 {
5706         struct btrfs_trans_handle *trans;
5707         struct btrfs_key research_key;
5708         int ret;
5709         int err = 0;
5710
5711         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5712
5713         trans = btrfs_start_transaction(root, 1);
5714         if (IS_ERR(trans)) {
5715                 ret = PTR_ERR(trans);
5716                 err |= ret;
5717                 goto out;
5718         }
5719
5720         btrfs_release_path(path);
5721         ret = btrfs_add_orphan_item(trans, root, path, ino);
5722         err |= ret;
5723         btrfs_commit_transaction(trans, root);
5724 out:
5725         if (ret)
5726                 error("failed to add inode %llu as orphan item root %llu",
5727                       ino, root->root_key.objectid);
5728         else
5729                 printf("Added inode %llu as orphan item root %llu\n",
5730                        ino, root->root_key.objectid);
5731
5732         btrfs_release_path(path);
5733         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5734         err |= ret;
5735
5736         return err;
5737 }
5738
5739 /* Set inode_item nlink to @ref_count.
5740  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5741  *
5742  * Returns 0 on success
5743  */
5744 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5745                                       struct btrfs_path *path, u64 ino,
5746                                       const char *name, u32 namelen,
5747                                       u64 ref_count, u8 filetype, u64 *nlink)
5748 {
5749         struct btrfs_trans_handle *trans;
5750         struct btrfs_inode_item *ii;
5751         struct btrfs_key key;
5752         struct btrfs_key old_key;
5753         char namebuf[BTRFS_NAME_LEN] = {0};
5754         int name_len;
5755         int ret;
5756         int ret2;
5757
5758         /* save the key */
5759         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5760
5761         if (name && namelen) {
5762                 ASSERT(namelen <= BTRFS_NAME_LEN);
5763                 memcpy(namebuf, name, namelen);
5764                 name_len = namelen;
5765         } else {
5766                 sprintf(namebuf, "%llu", ino);
5767                 name_len = count_digits(ino);
5768                 printf("Can't find file name for inode %llu, use %s instead\n",
5769                        ino, namebuf);
5770         }
5771
5772         trans = btrfs_start_transaction(root, 1);
5773         if (IS_ERR(trans)) {
5774                 ret = PTR_ERR(trans);
5775                 goto out;
5776         }
5777
5778         btrfs_release_path(path);
5779         /* if refs is 0, put it into lostfound */
5780         if (ref_count == 0) {
5781                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5782                                               name_len, filetype, &ref_count);
5783                 if (ret)
5784                         goto fail;
5785         }
5786
5787         /* reset inode_item's nlink to ref_count */
5788         key.objectid = ino;
5789         key.type = BTRFS_INODE_ITEM_KEY;
5790         key.offset = 0;
5791
5792         btrfs_release_path(path);
5793         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5794         if (ret > 0)
5795                 ret = -ENOENT;
5796         if (ret)
5797                 goto fail;
5798
5799         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5800                             struct btrfs_inode_item);
5801         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5802         btrfs_mark_buffer_dirty(path->nodes[0]);
5803
5804         if (nlink)
5805                 *nlink = ref_count;
5806 fail:
5807         btrfs_commit_transaction(trans, root);
5808 out:
5809         if (ret)
5810                 error(
5811         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5812                        root->objectid, ino, namebuf, filetype);
5813         else
5814                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5815                        root->objectid, ino, namebuf, filetype);
5816
5817         /* research */
5818         btrfs_release_path(path);
5819         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5820         if (ret2 < 0)
5821                 return ret |= ret2;
5822         return ret;
5823 }
5824
5825 /*
5826  * Check INODE_ITEM and related ITEMs (the same inode number)
5827  * 1. check link count
5828  * 2. check inode ref/extref
5829  * 3. check dir item/index
5830  *
5831  * @ext_ref:    the EXTENDED_IREF feature
5832  *
5833  * Return 0 if no error occurred.
5834  * Return >0 for error or hit the traversal is done(by error bitmap)
5835  */
5836 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5837                             unsigned int ext_ref)
5838 {
5839         struct extent_buffer *node;
5840         struct btrfs_inode_item *ii;
5841         struct btrfs_key key;
5842         struct btrfs_key last_key;
5843         u64 inode_id;
5844         u32 mode;
5845         u64 nlink;
5846         u64 nbytes;
5847         u64 isize;
5848         u64 size = 0;
5849         u64 refs = 0;
5850         u64 extent_end = 0;
5851         u64 extent_size = 0;
5852         unsigned int dir;
5853         unsigned int nodatasum;
5854         int slot;
5855         int ret;
5856         int err = 0;
5857         char namebuf[BTRFS_NAME_LEN] = {0};
5858         u32 name_len = 0;
5859
5860         node = path->nodes[0];
5861         slot = path->slots[0];
5862
5863         btrfs_item_key_to_cpu(node, &key, slot);
5864         inode_id = key.objectid;
5865
5866         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5867                 ret = btrfs_next_item(root, path);
5868                 if (ret > 0)
5869                         err |= LAST_ITEM;
5870                 return err;
5871         }
5872
5873         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5874         isize = btrfs_inode_size(node, ii);
5875         nbytes = btrfs_inode_nbytes(node, ii);
5876         mode = btrfs_inode_mode(node, ii);
5877         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5878         nlink = btrfs_inode_nlink(node, ii);
5879         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5880
5881         while (1) {
5882                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5883                 ret = btrfs_next_item(root, path);
5884                 if (ret < 0) {
5885                         /* out will fill 'err' rusing current statistics */
5886                         goto out;
5887                 } else if (ret > 0) {
5888                         err |= LAST_ITEM;
5889                         goto out;
5890                 }
5891
5892                 node = path->nodes[0];
5893                 slot = path->slots[0];
5894                 btrfs_item_key_to_cpu(node, &key, slot);
5895                 if (key.objectid != inode_id)
5896                         goto out;
5897
5898                 switch (key.type) {
5899                 case BTRFS_INODE_REF_KEY:
5900                         ret = check_inode_ref(root, &key, path, namebuf,
5901                                               &name_len, &refs, mode);
5902                         err |= ret;
5903                         break;
5904                 case BTRFS_INODE_EXTREF_KEY:
5905                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5906                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5907                                         root->objectid, key.objectid,
5908                                         key.offset);
5909                         ret = check_inode_extref(root, &key, node, slot, &refs,
5910                                                  mode);
5911                         err |= ret;
5912                         break;
5913                 case BTRFS_DIR_ITEM_KEY:
5914                 case BTRFS_DIR_INDEX_KEY:
5915                         if (!dir) {
5916                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5917                                         root->objectid, inode_id,
5918                                         imode_to_type(mode), key.objectid,
5919                                         key.offset);
5920                         }
5921                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5922                         err |= ret;
5923                         break;
5924                 case BTRFS_EXTENT_DATA_KEY:
5925                         if (dir) {
5926                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5927                                         root->objectid, inode_id, key.objectid,
5928                                         key.offset);
5929                         }
5930                         ret = check_file_extent(root, &key, node, slot,
5931                                                 nodatasum, &extent_size,
5932                                                 &extent_end);
5933                         err |= ret;
5934                         break;
5935                 case BTRFS_XATTR_ITEM_KEY:
5936                         break;
5937                 default:
5938                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5939                               key.objectid, key.type, key.offset);
5940                 }
5941         }
5942
5943 out:
5944         if (err & LAST_ITEM) {
5945                 btrfs_release_path(path);
5946                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5947                 if (ret)
5948                         return err;
5949         }
5950
5951         /* verify INODE_ITEM nlink/isize/nbytes */
5952         if (dir) {
5953                 if (repair && (err & DIR_COUNT_AGAIN)) {
5954                         err &= ~DIR_COUNT_AGAIN;
5955                         count_dir_isize(root, inode_id, &size);
5956                 }
5957
5958                 if ((nlink != 1 || refs != 1) && repair) {
5959                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5960                                 namebuf, name_len, refs, imode_to_type(mode),
5961                                 &nlink);
5962                 }
5963
5964                 if (nlink != 1) {
5965                         err |= LINK_COUNT_ERROR;
5966                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5967                               root->objectid, inode_id, nlink);
5968                 }
5969
5970                 /*
5971                  * Just a warning, as dir inode nbytes is just an
5972                  * instructive value.
5973                  */
5974                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5975                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5976                                 root->objectid, inode_id,
5977                                 root->fs_info->nodesize);
5978                 }
5979
5980                 if (isize != size) {
5981                         if (repair)
5982                                 ret = repair_dir_isize_lowmem(root, path,
5983                                                               inode_id, size);
5984                         if (!repair || ret) {
5985                                 err |= ISIZE_ERROR;
5986                                 error(
5987                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5988                                       root->objectid, inode_id, isize, size);
5989                         }
5990                 }
5991         } else {
5992                 if (nlink != refs) {
5993                         if (repair)
5994                                 ret = repair_inode_nlinks_lowmem(root, path,
5995                                          inode_id, namebuf, name_len, refs,
5996                                          imode_to_type(mode), &nlink);
5997                         if (!repair || ret) {
5998                                 err |= LINK_COUNT_ERROR;
5999                                 error(
6000                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6001                                       root->objectid, inode_id, nlink, refs);
6002                         }
6003                 } else if (!nlink) {
6004                         if (repair)
6005                                 ret = repair_inode_orphan_item_lowmem(root,
6006                                                               path, inode_id);
6007                         if (!repair || ret) {
6008                                 err |= ORPHAN_ITEM;
6009                                 error("root %llu INODE[%llu] is orphan item",
6010                                       root->objectid, inode_id);
6011                         }
6012                 }
6013
6014                 if (!nbytes && !no_holes && extent_end < isize) {
6015                         if (repair)
6016                                 ret = punch_extent_hole(root, inode_id,
6017                                                 extent_end, isize - extent_end);
6018                         if (!repair || ret) {
6019                                 err |= NBYTES_ERROR;
6020                                 error(
6021         "root %llu INODE[%llu] size %llu should have a file extent hole",
6022                                       root->objectid, inode_id, isize);
6023                         }
6024                 }
6025
6026                 if (nbytes != extent_size) {
6027                         if (repair)
6028                                 ret = repair_inode_nbytes_lowmem(root, path,
6029                                                          inode_id, extent_size);
6030                         if (!repair || ret) {
6031                                 err |= NBYTES_ERROR;
6032                                 error(
6033         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6034                                       root->objectid, inode_id, nbytes,
6035                                       extent_size);
6036                         }
6037                 }
6038         }
6039
6040         if (err & LAST_ITEM)
6041                 btrfs_next_item(root, path);
6042         return err;
6043 }
6044
6045 /*
6046  * Insert the missing inode item and inode ref.
6047  *
6048  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6049  * Root dir should be handled specially because root dir is the root of fs.
6050  *
6051  * returns err (>0 or 0) after repair
6052  */
6053 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6054 {
6055         struct btrfs_trans_handle *trans;
6056         struct btrfs_key key;
6057         struct btrfs_path path;
6058         int filetype = BTRFS_FT_DIR;
6059         int ret = 0;
6060
6061         btrfs_init_path(&path);
6062
6063         if (err & INODE_REF_MISSING) {
6064                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6065                 key.type = BTRFS_INODE_REF_KEY;
6066                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6067
6068                 trans = btrfs_start_transaction(root, 1);
6069                 if (IS_ERR(trans)) {
6070                         ret = PTR_ERR(trans);
6071                         goto out;
6072                 }
6073
6074                 btrfs_release_path(&path);
6075                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6076                 if (ret)
6077                         goto trans_fail;
6078
6079                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6080                                              BTRFS_FIRST_FREE_OBJECTID,
6081                                              BTRFS_FIRST_FREE_OBJECTID, 0);
6082                 if (ret)
6083                         goto trans_fail;
6084
6085                 printf("Add INODE_REF[%llu %llu] name %s\n",
6086                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6087                        "..");
6088                 err &= ~INODE_REF_MISSING;
6089 trans_fail:
6090                 if (ret)
6091                         error("fail to insert first inode's ref");
6092                 btrfs_commit_transaction(trans, root);
6093         }
6094
6095         if (err & INODE_ITEM_MISSING) {
6096                 ret = repair_inode_item_missing(root,
6097                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
6098                 if (ret)
6099                         goto out;
6100                 err &= ~INODE_ITEM_MISSING;
6101         }
6102 out:
6103         if (ret)
6104                 error("fail to repair first inode");
6105         btrfs_release_path(&path);
6106         return err;
6107 }
6108
6109 /*
6110  * check first root dir's inode_item and inode_ref
6111  *
6112  * returns 0 means no error
6113  * returns >0 means error
6114  * returns <0 means fatal error
6115  */
6116 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6117 {
6118         struct btrfs_path path;
6119         struct btrfs_key key;
6120         struct btrfs_inode_item *ii;
6121         u64 index;
6122         u32 mode;
6123         int err = 0;
6124         int ret;
6125
6126         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6127         key.type = BTRFS_INODE_ITEM_KEY;
6128         key.offset = 0;
6129
6130         /* For root being dropped, we don't need to check first inode */
6131         if (btrfs_root_refs(&root->root_item) == 0 &&
6132             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6133             BTRFS_FIRST_FREE_OBJECTID)
6134                 return 0;
6135
6136         btrfs_init_path(&path);
6137         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6138         if (ret < 0)
6139                 goto out;
6140         if (ret > 0) {
6141                 ret = 0;
6142                 err |= INODE_ITEM_MISSING;
6143         } else {
6144                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6145                                     struct btrfs_inode_item);
6146                 mode = btrfs_inode_mode(path.nodes[0], ii);
6147                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6148                         err |= INODE_ITEM_MISMATCH;
6149         }
6150
6151         /* lookup first inode ref */
6152         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6153         key.type = BTRFS_INODE_REF_KEY;
6154         /* special index value */
6155         index = 0;
6156
6157         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6158         if (ret < 0)
6159                 goto out;
6160         err |= ret;
6161
6162 out:
6163         btrfs_release_path(&path);
6164
6165         if (err && repair)
6166                 err = repair_fs_first_inode(root, err);
6167
6168         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6169                 error("root dir INODE_ITEM is %s",
6170                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6171         if (err & INODE_REF_MISSING)
6172                 error("root dir INODE_REF is missing");
6173
6174         return ret < 0 ? ret : err;
6175 }
6176
6177 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6178                                                 u64 parent, u64 root)
6179 {
6180         struct rb_node *node;
6181         struct tree_backref *back = NULL;
6182         struct tree_backref match = {
6183                 .node = {
6184                         .is_data = 0,
6185                 },
6186         };
6187
6188         if (parent) {
6189                 match.parent = parent;
6190                 match.node.full_backref = 1;
6191         } else {
6192                 match.root = root;
6193         }
6194
6195         node = rb_search(&rec->backref_tree, &match.node.node,
6196                          (rb_compare_keys)compare_extent_backref, NULL);
6197         if (node)
6198                 back = to_tree_backref(rb_node_to_extent_backref(node));
6199
6200         return back;
6201 }
6202
6203 static struct data_backref *find_data_backref(struct extent_record *rec,
6204                                                 u64 parent, u64 root,
6205                                                 u64 owner, u64 offset,
6206                                                 int found_ref,
6207                                                 u64 disk_bytenr, u64 bytes)
6208 {
6209         struct rb_node *node;
6210         struct data_backref *back = NULL;
6211         struct data_backref match = {
6212                 .node = {
6213                         .is_data = 1,
6214                 },
6215                 .owner = owner,
6216                 .offset = offset,
6217                 .bytes = bytes,
6218                 .found_ref = found_ref,
6219                 .disk_bytenr = disk_bytenr,
6220         };
6221
6222         if (parent) {
6223                 match.parent = parent;
6224                 match.node.full_backref = 1;
6225         } else {
6226                 match.root = root;
6227         }
6228
6229         node = rb_search(&rec->backref_tree, &match.node.node,
6230                          (rb_compare_keys)compare_extent_backref, NULL);
6231         if (node)
6232                 back = to_data_backref(rb_node_to_extent_backref(node));
6233
6234         return back;
6235 }
6236 /*
6237  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6238  * blocks and integrity of fs tree items.
6239  *
6240  * @root:         the root of the tree to be checked.
6241  * @ext_ref       feature EXTENDED_IREF is enable or not.
6242  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6243  *                otherwise means check fs tree(s) items relationship and
6244  *                @root MUST be a fs tree root.
6245  * Returns 0      represents OK.
6246  * Returns not 0  represents error.
6247  */
6248 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6249                             struct btrfs_root *root, unsigned int ext_ref,
6250                             int check_all)
6251
6252 {
6253         struct btrfs_path path;
6254         struct node_refs nrefs;
6255         struct btrfs_root_item *root_item = &root->root_item;
6256         int ret;
6257         int level;
6258         int err = 0;
6259
6260         memset(&nrefs, 0, sizeof(nrefs));
6261         if (!check_all) {
6262                 /*
6263                  * We need to manually check the first inode item (256)
6264                  * As the following traversal function will only start from
6265                  * the first inode item in the leaf, if inode item (256) is
6266                  * missing we will skip it forever.
6267                  */
6268                 ret = check_fs_first_inode(root, ext_ref);
6269                 if (ret < 0)
6270                         return ret;
6271         }
6272
6273
6274         level = btrfs_header_level(root->node);
6275         btrfs_init_path(&path);
6276
6277         if (btrfs_root_refs(root_item) > 0 ||
6278             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6279                 path.nodes[level] = root->node;
6280                 path.slots[level] = 0;
6281                 extent_buffer_get(root->node);
6282         } else {
6283                 struct btrfs_key key;
6284
6285                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6286                 level = root_item->drop_level;
6287                 path.lowest_level = level;
6288                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6289                 if (ret < 0)
6290                         goto out;
6291                 ret = 0;
6292         }
6293
6294         while (1) {
6295                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6296                                         ext_ref, check_all);
6297
6298                 err |= !!ret;
6299
6300                 /* if ret is negative, walk shall stop */
6301                 if (ret < 0) {
6302                         ret = err;
6303                         break;
6304                 }
6305
6306                 ret = walk_up_tree_v2(root, &path, &level);
6307                 if (ret != 0) {
6308                         /* Normal exit, reset ret to err */
6309                         ret = err;
6310                         break;
6311                 }
6312         }
6313
6314 out:
6315         btrfs_release_path(&path);
6316         return ret;
6317 }
6318
6319 /*
6320  * Iterate all items in the tree and call check_inode_item() to check.
6321  *
6322  * @root:       the root of the tree to be checked.
6323  * @ext_ref:    the EXTENDED_IREF feature
6324  *
6325  * Return 0 if no error found.
6326  * Return <0 for error.
6327  */
6328 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6329 {
6330         reset_cached_block_groups(root->fs_info);
6331         return check_btrfs_root(NULL, root, ext_ref, 0);
6332 }
6333
6334 /*
6335  * Find the relative ref for root_ref and root_backref.
6336  *
6337  * @root:       the root of the root tree.
6338  * @ref_key:    the key of the root ref.
6339  *
6340  * Return 0 if no error occurred.
6341  */
6342 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6343                           struct extent_buffer *node, int slot)
6344 {
6345         struct btrfs_path path;
6346         struct btrfs_key key;
6347         struct btrfs_root_ref *ref;
6348         struct btrfs_root_ref *backref;
6349         char ref_name[BTRFS_NAME_LEN] = {0};
6350         char backref_name[BTRFS_NAME_LEN] = {0};
6351         u64 ref_dirid;
6352         u64 ref_seq;
6353         u32 ref_namelen;
6354         u64 backref_dirid;
6355         u64 backref_seq;
6356         u32 backref_namelen;
6357         u32 len;
6358         int ret;
6359         int err = 0;
6360
6361         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6362         ref_dirid = btrfs_root_ref_dirid(node, ref);
6363         ref_seq = btrfs_root_ref_sequence(node, ref);
6364         ref_namelen = btrfs_root_ref_name_len(node, ref);
6365
6366         if (ref_namelen <= BTRFS_NAME_LEN) {
6367                 len = ref_namelen;
6368         } else {
6369                 len = BTRFS_NAME_LEN;
6370                 warning("%s[%llu %llu] ref_name too long",
6371                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6372                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6373                         ref_key->offset);
6374         }
6375         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6376
6377         /* Find relative root_ref */
6378         key.objectid = ref_key->offset;
6379         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6380         key.offset = ref_key->objectid;
6381
6382         btrfs_init_path(&path);
6383         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6384         if (ret) {
6385                 err |= ROOT_REF_MISSING;
6386                 error("%s[%llu %llu] couldn't find relative ref",
6387                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6388                       "ROOT_REF" : "ROOT_BACKREF",
6389                       ref_key->objectid, ref_key->offset);
6390                 goto out;
6391         }
6392
6393         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6394                                  struct btrfs_root_ref);
6395         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6396         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6397         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6398
6399         if (backref_namelen <= BTRFS_NAME_LEN) {
6400                 len = backref_namelen;
6401         } else {
6402                 len = BTRFS_NAME_LEN;
6403                 warning("%s[%llu %llu] ref_name too long",
6404                         key.type == BTRFS_ROOT_REF_KEY ?
6405                         "ROOT_REF" : "ROOT_BACKREF",
6406                         key.objectid, key.offset);
6407         }
6408         read_extent_buffer(path.nodes[0], backref_name,
6409                            (unsigned long)(backref + 1), len);
6410
6411         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6412             ref_namelen != backref_namelen ||
6413             strncmp(ref_name, backref_name, len)) {
6414                 err |= ROOT_REF_MISMATCH;
6415                 error("%s[%llu %llu] mismatch relative ref",
6416                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6417                       "ROOT_REF" : "ROOT_BACKREF",
6418                       ref_key->objectid, ref_key->offset);
6419         }
6420 out:
6421         btrfs_release_path(&path);
6422         return err;
6423 }
6424
6425 /*
6426  * Check all fs/file tree in low_memory mode.
6427  *
6428  * 1. for fs tree root item, call check_fs_root_v2()
6429  * 2. for fs tree root ref/backref, call check_root_ref()
6430  *
6431  * Return 0 if no error occurred.
6432  */
6433 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6434 {
6435         struct btrfs_root *tree_root = fs_info->tree_root;
6436         struct btrfs_root *cur_root = NULL;
6437         struct btrfs_path path;
6438         struct btrfs_key key;
6439         struct extent_buffer *node;
6440         unsigned int ext_ref;
6441         int slot;
6442         int ret;
6443         int err = 0;
6444
6445         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6446
6447         btrfs_init_path(&path);
6448         key.objectid = BTRFS_FS_TREE_OBJECTID;
6449         key.offset = 0;
6450         key.type = BTRFS_ROOT_ITEM_KEY;
6451
6452         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6453         if (ret < 0) {
6454                 err = ret;
6455                 goto out;
6456         } else if (ret > 0) {
6457                 err = -ENOENT;
6458                 goto out;
6459         }
6460
6461         while (1) {
6462                 node = path.nodes[0];
6463                 slot = path.slots[0];
6464                 btrfs_item_key_to_cpu(node, &key, slot);
6465                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6466                         goto out;
6467                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6468                     fs_root_objectid(key.objectid)) {
6469                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6470                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6471                                                                        &key);
6472                         } else {
6473                                 key.offset = (u64)-1;
6474                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6475                         }
6476
6477                         if (IS_ERR(cur_root)) {
6478                                 error("Fail to read fs/subvol tree: %lld",
6479                                       key.objectid);
6480                                 err = -EIO;
6481                                 goto next;
6482                         }
6483
6484                         ret = check_fs_root_v2(cur_root, ext_ref);
6485                         err |= ret;
6486
6487                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6488                                 btrfs_free_fs_root(cur_root);
6489                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6490                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6491                         ret = check_root_ref(tree_root, &key, node, slot);
6492                         err |= ret;
6493                 }
6494 next:
6495                 ret = btrfs_next_item(tree_root, &path);
6496                 if (ret > 0)
6497                         goto out;
6498                 if (ret < 0) {
6499                         err = ret;
6500                         goto out;
6501                 }
6502         }
6503
6504 out:
6505         btrfs_release_path(&path);
6506         return err;
6507 }
6508
6509 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6510                           struct cache_tree *root_cache)
6511 {
6512         int ret;
6513
6514         if (!ctx.progress_enabled)
6515                 fprintf(stderr, "checking fs roots\n");
6516         if (check_mode == CHECK_MODE_LOWMEM)
6517                 ret = check_fs_roots_v2(fs_info);
6518         else
6519                 ret = check_fs_roots(fs_info, root_cache);
6520
6521         return ret;
6522 }
6523
6524 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6525 {
6526         struct extent_backref *back, *tmp;
6527         struct tree_backref *tback;
6528         struct data_backref *dback;
6529         u64 found = 0;
6530         int err = 0;
6531
6532         rbtree_postorder_for_each_entry_safe(back, tmp,
6533                                              &rec->backref_tree, node) {
6534                 if (!back->found_extent_tree) {
6535                         err = 1;
6536                         if (!print_errs)
6537                                 goto out;
6538                         if (back->is_data) {
6539                                 dback = to_data_backref(back);
6540                                 fprintf(stderr, "Data backref %llu %s %llu"
6541                                         " owner %llu offset %llu num_refs %lu"
6542                                         " not found in extent tree\n",
6543                                         (unsigned long long)rec->start,
6544                                         back->full_backref ?
6545                                         "parent" : "root",
6546                                         back->full_backref ?
6547                                         (unsigned long long)dback->parent:
6548                                         (unsigned long long)dback->root,
6549                                         (unsigned long long)dback->owner,
6550                                         (unsigned long long)dback->offset,
6551                                         (unsigned long)dback->num_refs);
6552                         } else {
6553                                 tback = to_tree_backref(back);
6554                                 fprintf(stderr, "Tree backref %llu parent %llu"
6555                                         " root %llu not found in extent tree\n",
6556                                         (unsigned long long)rec->start,
6557                                         (unsigned long long)tback->parent,
6558                                         (unsigned long long)tback->root);
6559                         }
6560                 }
6561                 if (!back->is_data && !back->found_ref) {
6562                         err = 1;
6563                         if (!print_errs)
6564                                 goto out;
6565                         tback = to_tree_backref(back);
6566                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6567                                 (unsigned long long)rec->start,
6568                                 back->full_backref ? "parent" : "root",
6569                                 back->full_backref ?
6570                                 (unsigned long long)tback->parent :
6571                                 (unsigned long long)tback->root, back);
6572                 }
6573                 if (back->is_data) {
6574                         dback = to_data_backref(back);
6575                         if (dback->found_ref != dback->num_refs) {
6576                                 err = 1;
6577                                 if (!print_errs)
6578                                         goto out;
6579                                 fprintf(stderr, "Incorrect local backref count"
6580                                         " on %llu %s %llu owner %llu"
6581                                         " offset %llu found %u wanted %u back %p\n",
6582                                         (unsigned long long)rec->start,
6583                                         back->full_backref ?
6584                                         "parent" : "root",
6585                                         back->full_backref ?
6586                                         (unsigned long long)dback->parent:
6587                                         (unsigned long long)dback->root,
6588                                         (unsigned long long)dback->owner,
6589                                         (unsigned long long)dback->offset,
6590                                         dback->found_ref, dback->num_refs, back);
6591                         }
6592                         if (dback->disk_bytenr != rec->start) {
6593                                 err = 1;
6594                                 if (!print_errs)
6595                                         goto out;
6596                                 fprintf(stderr, "Backref disk bytenr does not"
6597                                         " match extent record, bytenr=%llu, "
6598                                         "ref bytenr=%llu\n",
6599                                         (unsigned long long)rec->start,
6600                                         (unsigned long long)dback->disk_bytenr);
6601                         }
6602
6603                         if (dback->bytes != rec->nr) {
6604                                 err = 1;
6605                                 if (!print_errs)
6606                                         goto out;
6607                                 fprintf(stderr, "Backref bytes do not match "
6608                                         "extent backref, bytenr=%llu, ref "
6609                                         "bytes=%llu, backref bytes=%llu\n",
6610                                         (unsigned long long)rec->start,
6611                                         (unsigned long long)rec->nr,
6612                                         (unsigned long long)dback->bytes);
6613                         }
6614                 }
6615                 if (!back->is_data) {
6616                         found += 1;
6617                 } else {
6618                         dback = to_data_backref(back);
6619                         found += dback->found_ref;
6620                 }
6621         }
6622         if (found != rec->refs) {
6623                 err = 1;
6624                 if (!print_errs)
6625                         goto out;
6626                 fprintf(stderr, "Incorrect global backref count "
6627                         "on %llu found %llu wanted %llu\n",
6628                         (unsigned long long)rec->start,
6629                         (unsigned long long)found,
6630                         (unsigned long long)rec->refs);
6631         }
6632 out:
6633         return err;
6634 }
6635
6636 static void __free_one_backref(struct rb_node *node)
6637 {
6638         struct extent_backref *back = rb_node_to_extent_backref(node);
6639
6640         free(back);
6641 }
6642
6643 static void free_all_extent_backrefs(struct extent_record *rec)
6644 {
6645         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6646 }
6647
6648 static void free_extent_record_cache(struct cache_tree *extent_cache)
6649 {
6650         struct cache_extent *cache;
6651         struct extent_record *rec;
6652
6653         while (1) {
6654                 cache = first_cache_extent(extent_cache);
6655                 if (!cache)
6656                         break;
6657                 rec = container_of(cache, struct extent_record, cache);
6658                 remove_cache_extent(extent_cache, cache);
6659                 free_all_extent_backrefs(rec);
6660                 free(rec);
6661         }
6662 }
6663
6664 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6665                                  struct extent_record *rec)
6666 {
6667         if (rec->content_checked && rec->owner_ref_checked &&
6668             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6669             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6670             !rec->bad_full_backref && !rec->crossing_stripes &&
6671             !rec->wrong_chunk_type) {
6672                 remove_cache_extent(extent_cache, &rec->cache);
6673                 free_all_extent_backrefs(rec);
6674                 list_del_init(&rec->list);
6675                 free(rec);
6676         }
6677         return 0;
6678 }
6679
6680 static int check_owner_ref(struct btrfs_root *root,
6681                             struct extent_record *rec,
6682                             struct extent_buffer *buf)
6683 {
6684         struct extent_backref *node, *tmp;
6685         struct tree_backref *back;
6686         struct btrfs_root *ref_root;
6687         struct btrfs_key key;
6688         struct btrfs_path path;
6689         struct extent_buffer *parent;
6690         int level;
6691         int found = 0;
6692         int ret;
6693
6694         rbtree_postorder_for_each_entry_safe(node, tmp,
6695                                              &rec->backref_tree, node) {
6696                 if (node->is_data)
6697                         continue;
6698                 if (!node->found_ref)
6699                         continue;
6700                 if (node->full_backref)
6701                         continue;
6702                 back = to_tree_backref(node);
6703                 if (btrfs_header_owner(buf) == back->root)
6704                         return 0;
6705         }
6706         BUG_ON(rec->is_root);
6707
6708         /* try to find the block by search corresponding fs tree */
6709         key.objectid = btrfs_header_owner(buf);
6710         key.type = BTRFS_ROOT_ITEM_KEY;
6711         key.offset = (u64)-1;
6712
6713         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6714         if (IS_ERR(ref_root))
6715                 return 1;
6716
6717         level = btrfs_header_level(buf);
6718         if (level == 0)
6719                 btrfs_item_key_to_cpu(buf, &key, 0);
6720         else
6721                 btrfs_node_key_to_cpu(buf, &key, 0);
6722
6723         btrfs_init_path(&path);
6724         path.lowest_level = level + 1;
6725         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6726         if (ret < 0)
6727                 return 0;
6728
6729         parent = path.nodes[level + 1];
6730         if (parent && buf->start == btrfs_node_blockptr(parent,
6731                                                         path.slots[level + 1]))
6732                 found = 1;
6733
6734         btrfs_release_path(&path);
6735         return found ? 0 : 1;
6736 }
6737
6738 static int is_extent_tree_record(struct extent_record *rec)
6739 {
6740         struct extent_backref *node, *tmp;
6741         struct tree_backref *back;
6742         int is_extent = 0;
6743
6744         rbtree_postorder_for_each_entry_safe(node, tmp,
6745                                              &rec->backref_tree, node) {
6746                 if (node->is_data)
6747                         return 0;
6748                 back = to_tree_backref(node);
6749                 if (node->full_backref)
6750                         return 0;
6751                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6752                         is_extent = 1;
6753         }
6754         return is_extent;
6755 }
6756
6757
6758 static int record_bad_block_io(struct btrfs_fs_info *info,
6759                                struct cache_tree *extent_cache,
6760                                u64 start, u64 len)
6761 {
6762         struct extent_record *rec;
6763         struct cache_extent *cache;
6764         struct btrfs_key key;
6765
6766         cache = lookup_cache_extent(extent_cache, start, len);
6767         if (!cache)
6768                 return 0;
6769
6770         rec = container_of(cache, struct extent_record, cache);
6771         if (!is_extent_tree_record(rec))
6772                 return 0;
6773
6774         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6775         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6776 }
6777
6778 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6779                        struct extent_buffer *buf, int slot)
6780 {
6781         if (btrfs_header_level(buf)) {
6782                 struct btrfs_key_ptr ptr1, ptr2;
6783
6784                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6785                                    sizeof(struct btrfs_key_ptr));
6786                 read_extent_buffer(buf, &ptr2,
6787                                    btrfs_node_key_ptr_offset(slot + 1),
6788                                    sizeof(struct btrfs_key_ptr));
6789                 write_extent_buffer(buf, &ptr1,
6790                                     btrfs_node_key_ptr_offset(slot + 1),
6791                                     sizeof(struct btrfs_key_ptr));
6792                 write_extent_buffer(buf, &ptr2,
6793                                     btrfs_node_key_ptr_offset(slot),
6794                                     sizeof(struct btrfs_key_ptr));
6795                 if (slot == 0) {
6796                         struct btrfs_disk_key key;
6797                         btrfs_node_key(buf, &key, 0);
6798                         btrfs_fixup_low_keys(root, path, &key,
6799                                              btrfs_header_level(buf) + 1);
6800                 }
6801         } else {
6802                 struct btrfs_item *item1, *item2;
6803                 struct btrfs_key k1, k2;
6804                 char *item1_data, *item2_data;
6805                 u32 item1_offset, item2_offset, item1_size, item2_size;
6806
6807                 item1 = btrfs_item_nr(slot);
6808                 item2 = btrfs_item_nr(slot + 1);
6809                 btrfs_item_key_to_cpu(buf, &k1, slot);
6810                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6811                 item1_offset = btrfs_item_offset(buf, item1);
6812                 item2_offset = btrfs_item_offset(buf, item2);
6813                 item1_size = btrfs_item_size(buf, item1);
6814                 item2_size = btrfs_item_size(buf, item2);
6815
6816                 item1_data = malloc(item1_size);
6817                 if (!item1_data)
6818                         return -ENOMEM;
6819                 item2_data = malloc(item2_size);
6820                 if (!item2_data) {
6821                         free(item1_data);
6822                         return -ENOMEM;
6823                 }
6824
6825                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6826                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6827
6828                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6829                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6830                 free(item1_data);
6831                 free(item2_data);
6832
6833                 btrfs_set_item_offset(buf, item1, item2_offset);
6834                 btrfs_set_item_offset(buf, item2, item1_offset);
6835                 btrfs_set_item_size(buf, item1, item2_size);
6836                 btrfs_set_item_size(buf, item2, item1_size);
6837
6838                 path->slots[0] = slot;
6839                 btrfs_set_item_key_unsafe(root, path, &k2);
6840                 path->slots[0] = slot + 1;
6841                 btrfs_set_item_key_unsafe(root, path, &k1);
6842         }
6843         return 0;
6844 }
6845
6846 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6847 {
6848         struct extent_buffer *buf;
6849         struct btrfs_key k1, k2;
6850         int i;
6851         int level = path->lowest_level;
6852         int ret = -EIO;
6853
6854         buf = path->nodes[level];
6855         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6856                 if (level) {
6857                         btrfs_node_key_to_cpu(buf, &k1, i);
6858                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6859                 } else {
6860                         btrfs_item_key_to_cpu(buf, &k1, i);
6861                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6862                 }
6863                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6864                         continue;
6865                 ret = swap_values(root, path, buf, i);
6866                 if (ret)
6867                         break;
6868                 btrfs_mark_buffer_dirty(buf);
6869                 i = 0;
6870         }
6871         return ret;
6872 }
6873
6874 static int delete_bogus_item(struct btrfs_root *root,
6875                              struct btrfs_path *path,
6876                              struct extent_buffer *buf, int slot)
6877 {
6878         struct btrfs_key key;
6879         int nritems = btrfs_header_nritems(buf);
6880
6881         btrfs_item_key_to_cpu(buf, &key, slot);
6882
6883         /* These are all the keys we can deal with missing. */
6884         if (key.type != BTRFS_DIR_INDEX_KEY &&
6885             key.type != BTRFS_EXTENT_ITEM_KEY &&
6886             key.type != BTRFS_METADATA_ITEM_KEY &&
6887             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6888             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6889                 return -1;
6890
6891         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6892                (unsigned long long)key.objectid, key.type,
6893                (unsigned long long)key.offset, slot, buf->start);
6894         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6895                               btrfs_item_nr_offset(slot + 1),
6896                               sizeof(struct btrfs_item) *
6897                               (nritems - slot - 1));
6898         btrfs_set_header_nritems(buf, nritems - 1);
6899         if (slot == 0) {
6900                 struct btrfs_disk_key disk_key;
6901
6902                 btrfs_item_key(buf, &disk_key, 0);
6903                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6904         }
6905         btrfs_mark_buffer_dirty(buf);
6906         return 0;
6907 }
6908
6909 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6910 {
6911         struct extent_buffer *buf;
6912         int i;
6913         int ret = 0;
6914
6915         /* We should only get this for leaves */
6916         BUG_ON(path->lowest_level);
6917         buf = path->nodes[0];
6918 again:
6919         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6920                 unsigned int shift = 0, offset;
6921
6922                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6923                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6924                         if (btrfs_item_end_nr(buf, i) >
6925                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6926                                 ret = delete_bogus_item(root, path, buf, i);
6927                                 if (!ret)
6928                                         goto again;
6929                                 fprintf(stderr, "item is off the end of the "
6930                                         "leaf, can't fix\n");
6931                                 ret = -EIO;
6932                                 break;
6933                         }
6934                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6935                                 btrfs_item_end_nr(buf, i);
6936                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6937                            btrfs_item_offset_nr(buf, i - 1)) {
6938                         if (btrfs_item_end_nr(buf, i) >
6939                             btrfs_item_offset_nr(buf, i - 1)) {
6940                                 ret = delete_bogus_item(root, path, buf, i);
6941                                 if (!ret)
6942                                         goto again;
6943                                 fprintf(stderr, "items overlap, can't fix\n");
6944                                 ret = -EIO;
6945                                 break;
6946                         }
6947                         shift = btrfs_item_offset_nr(buf, i - 1) -
6948                                 btrfs_item_end_nr(buf, i);
6949                 }
6950                 if (!shift)
6951                         continue;
6952
6953                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6954                        i, shift, (unsigned long long)buf->start);
6955                 offset = btrfs_item_offset_nr(buf, i);
6956                 memmove_extent_buffer(buf,
6957                                       btrfs_leaf_data(buf) + offset + shift,
6958                                       btrfs_leaf_data(buf) + offset,
6959                                       btrfs_item_size_nr(buf, i));
6960                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6961                                       offset + shift);
6962                 btrfs_mark_buffer_dirty(buf);
6963         }
6964
6965         /*
6966          * We may have moved things, in which case we want to exit so we don't
6967          * write those changes out.  Once we have proper abort functionality in
6968          * progs this can be changed to something nicer.
6969          */
6970         BUG_ON(ret);
6971         return ret;
6972 }
6973
6974 /*
6975  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6976  * then just return -EIO.
6977  */
6978 static int try_to_fix_bad_block(struct btrfs_root *root,
6979                                 struct extent_buffer *buf,
6980                                 enum btrfs_tree_block_status status)
6981 {
6982         struct btrfs_trans_handle *trans;
6983         struct ulist *roots;
6984         struct ulist_node *node;
6985         struct btrfs_root *search_root;
6986         struct btrfs_path path;
6987         struct ulist_iterator iter;
6988         struct btrfs_key root_key, key;
6989         int ret;
6990
6991         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6992             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6993                 return -EIO;
6994
6995         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6996         if (ret)
6997                 return -EIO;
6998
6999         btrfs_init_path(&path);
7000         ULIST_ITER_INIT(&iter);
7001         while ((node = ulist_next(roots, &iter))) {
7002                 root_key.objectid = node->val;
7003                 root_key.type = BTRFS_ROOT_ITEM_KEY;
7004                 root_key.offset = (u64)-1;
7005
7006                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7007                 if (IS_ERR(root)) {
7008                         ret = -EIO;
7009                         break;
7010                 }
7011
7012
7013                 trans = btrfs_start_transaction(search_root, 0);
7014                 if (IS_ERR(trans)) {
7015                         ret = PTR_ERR(trans);
7016                         break;
7017                 }
7018
7019                 path.lowest_level = btrfs_header_level(buf);
7020                 path.skip_check_block = 1;
7021                 if (path.lowest_level)
7022                         btrfs_node_key_to_cpu(buf, &key, 0);
7023                 else
7024                         btrfs_item_key_to_cpu(buf, &key, 0);
7025                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7026                 if (ret) {
7027                         ret = -EIO;
7028                         btrfs_commit_transaction(trans, search_root);
7029                         break;
7030                 }
7031                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7032                         ret = fix_key_order(search_root, &path);
7033                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7034                         ret = fix_item_offset(search_root, &path);
7035                 if (ret) {
7036                         btrfs_commit_transaction(trans, search_root);
7037                         break;
7038                 }
7039                 btrfs_release_path(&path);
7040                 btrfs_commit_transaction(trans, search_root);
7041         }
7042         ulist_free(roots);
7043         btrfs_release_path(&path);
7044         return ret;
7045 }
7046
7047 static int check_block(struct btrfs_root *root,
7048                        struct cache_tree *extent_cache,
7049                        struct extent_buffer *buf, u64 flags)
7050 {
7051         struct extent_record *rec;
7052         struct cache_extent *cache;
7053         struct btrfs_key key;
7054         enum btrfs_tree_block_status status;
7055         int ret = 0;
7056         int level;
7057
7058         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7059         if (!cache)
7060                 return 1;
7061         rec = container_of(cache, struct extent_record, cache);
7062         rec->generation = btrfs_header_generation(buf);
7063
7064         level = btrfs_header_level(buf);
7065         if (btrfs_header_nritems(buf) > 0) {
7066
7067                 if (level == 0)
7068                         btrfs_item_key_to_cpu(buf, &key, 0);
7069                 else
7070                         btrfs_node_key_to_cpu(buf, &key, 0);
7071
7072                 rec->info_objectid = key.objectid;
7073         }
7074         rec->info_level = level;
7075
7076         if (btrfs_is_leaf(buf))
7077                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7078         else
7079                 status = btrfs_check_node(root, &rec->parent_key, buf);
7080
7081         if (status != BTRFS_TREE_BLOCK_CLEAN) {
7082                 if (repair)
7083                         status = try_to_fix_bad_block(root, buf, status);
7084                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7085                         ret = -EIO;
7086                         fprintf(stderr, "bad block %llu\n",
7087                                 (unsigned long long)buf->start);
7088                 } else {
7089                         /*
7090                          * Signal to callers we need to start the scan over
7091                          * again since we'll have cowed blocks.
7092                          */
7093                         ret = -EAGAIN;
7094                 }
7095         } else {
7096                 rec->content_checked = 1;
7097                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7098                         rec->owner_ref_checked = 1;
7099                 else {
7100                         ret = check_owner_ref(root, rec, buf);
7101                         if (!ret)
7102                                 rec->owner_ref_checked = 1;
7103                 }
7104         }
7105         if (!ret)
7106                 maybe_free_extent_rec(extent_cache, rec);
7107         return ret;
7108 }
7109
7110 #if 0
7111 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7112                                                 u64 parent, u64 root)
7113 {
7114         struct list_head *cur = rec->backrefs.next;
7115         struct extent_backref *node;
7116         struct tree_backref *back;
7117
7118         while(cur != &rec->backrefs) {
7119                 node = to_extent_backref(cur);
7120                 cur = cur->next;
7121                 if (node->is_data)
7122                         continue;
7123                 back = to_tree_backref(node);
7124                 if (parent > 0) {
7125                         if (!node->full_backref)
7126                                 continue;
7127                         if (parent == back->parent)
7128                                 return back;
7129                 } else {
7130                         if (node->full_backref)
7131                                 continue;
7132                         if (back->root == root)
7133                                 return back;
7134                 }
7135         }
7136         return NULL;
7137 }
7138 #endif
7139
7140 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7141                                                 u64 parent, u64 root)
7142 {
7143         struct tree_backref *ref = malloc(sizeof(*ref));
7144
7145         if (!ref)
7146                 return NULL;
7147         memset(&ref->node, 0, sizeof(ref->node));
7148         if (parent > 0) {
7149                 ref->parent = parent;
7150                 ref->node.full_backref = 1;
7151         } else {
7152                 ref->root = root;
7153                 ref->node.full_backref = 0;
7154         }
7155
7156         return ref;
7157 }
7158
7159 #if 0
7160 static struct data_backref *find_data_backref(struct extent_record *rec,
7161                                                 u64 parent, u64 root,
7162                                                 u64 owner, u64 offset,
7163                                                 int found_ref,
7164                                                 u64 disk_bytenr, u64 bytes)
7165 {
7166         struct list_head *cur = rec->backrefs.next;
7167         struct extent_backref *node;
7168         struct data_backref *back;
7169
7170         while(cur != &rec->backrefs) {
7171                 node = to_extent_backref(cur);
7172                 cur = cur->next;
7173                 if (!node->is_data)
7174                         continue;
7175                 back = to_data_backref(node);
7176                 if (parent > 0) {
7177                         if (!node->full_backref)
7178                                 continue;
7179                         if (parent == back->parent)
7180                                 return back;
7181                 } else {
7182                         if (node->full_backref)
7183                                 continue;
7184                         if (back->root == root && back->owner == owner &&
7185                             back->offset == offset) {
7186                                 if (found_ref && node->found_ref &&
7187                                     (back->bytes != bytes ||
7188                                     back->disk_bytenr != disk_bytenr))
7189                                         continue;
7190                                 return back;
7191                         }
7192                 }
7193         }
7194         return NULL;
7195 }
7196 #endif
7197
7198 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7199                                                 u64 parent, u64 root,
7200                                                 u64 owner, u64 offset,
7201                                                 u64 max_size)
7202 {
7203         struct data_backref *ref = malloc(sizeof(*ref));
7204
7205         if (!ref)
7206                 return NULL;
7207         memset(&ref->node, 0, sizeof(ref->node));
7208         ref->node.is_data = 1;
7209
7210         if (parent > 0) {
7211                 ref->parent = parent;
7212                 ref->owner = 0;
7213                 ref->offset = 0;
7214                 ref->node.full_backref = 1;
7215         } else {
7216                 ref->root = root;
7217                 ref->owner = owner;
7218                 ref->offset = offset;
7219                 ref->node.full_backref = 0;
7220         }
7221         ref->bytes = max_size;
7222         ref->found_ref = 0;
7223         ref->num_refs = 0;
7224         if (max_size > rec->max_size)
7225                 rec->max_size = max_size;
7226         return ref;
7227 }
7228
7229 /* Check if the type of extent matches with its chunk */
7230 static void check_extent_type(struct extent_record *rec)
7231 {
7232         struct btrfs_block_group_cache *bg_cache;
7233
7234         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7235         if (!bg_cache)
7236                 return;
7237
7238         /* data extent, check chunk directly*/
7239         if (!rec->metadata) {
7240                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7241                         rec->wrong_chunk_type = 1;
7242                 return;
7243         }
7244
7245         /* metadata extent, check the obvious case first */
7246         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7247                                  BTRFS_BLOCK_GROUP_METADATA))) {
7248                 rec->wrong_chunk_type = 1;
7249                 return;
7250         }
7251
7252         /*
7253          * Check SYSTEM extent, as it's also marked as metadata, we can only
7254          * make sure it's a SYSTEM extent by its backref
7255          */
7256         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7257                 struct extent_backref *node;
7258                 struct tree_backref *tback;
7259                 u64 bg_type;
7260
7261                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7262                 if (node->is_data) {
7263                         /* tree block shouldn't have data backref */
7264                         rec->wrong_chunk_type = 1;
7265                         return;
7266                 }
7267                 tback = container_of(node, struct tree_backref, node);
7268
7269                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7270                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7271                 else
7272                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7273                 if (!(bg_cache->flags & bg_type))
7274                         rec->wrong_chunk_type = 1;
7275         }
7276 }
7277
7278 /*
7279  * Allocate a new extent record, fill default values from @tmpl and insert int
7280  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7281  * the cache, otherwise it fails.
7282  */
7283 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7284                 struct extent_record *tmpl)
7285 {
7286         struct extent_record *rec;
7287         int ret = 0;
7288
7289         BUG_ON(tmpl->max_size == 0);
7290         rec = malloc(sizeof(*rec));
7291         if (!rec)
7292                 return -ENOMEM;
7293         rec->start = tmpl->start;
7294         rec->max_size = tmpl->max_size;
7295         rec->nr = max(tmpl->nr, tmpl->max_size);
7296         rec->found_rec = tmpl->found_rec;
7297         rec->content_checked = tmpl->content_checked;
7298         rec->owner_ref_checked = tmpl->owner_ref_checked;
7299         rec->num_duplicates = 0;
7300         rec->metadata = tmpl->metadata;
7301         rec->flag_block_full_backref = FLAG_UNSET;
7302         rec->bad_full_backref = 0;
7303         rec->crossing_stripes = 0;
7304         rec->wrong_chunk_type = 0;
7305         rec->is_root = tmpl->is_root;
7306         rec->refs = tmpl->refs;
7307         rec->extent_item_refs = tmpl->extent_item_refs;
7308         rec->parent_generation = tmpl->parent_generation;
7309         INIT_LIST_HEAD(&rec->backrefs);
7310         INIT_LIST_HEAD(&rec->dups);
7311         INIT_LIST_HEAD(&rec->list);
7312         rec->backref_tree = RB_ROOT;
7313         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7314         rec->cache.start = tmpl->start;
7315         rec->cache.size = tmpl->nr;
7316         ret = insert_cache_extent(extent_cache, &rec->cache);
7317         if (ret) {
7318                 free(rec);
7319                 return ret;
7320         }
7321         bytes_used += rec->nr;
7322
7323         if (tmpl->metadata)
7324                 rec->crossing_stripes = check_crossing_stripes(global_info,
7325                                 rec->start, global_info->nodesize);
7326         check_extent_type(rec);
7327         return ret;
7328 }
7329
7330 /*
7331  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7332  * some are hints:
7333  * - refs              - if found, increase refs
7334  * - is_root           - if found, set
7335  * - content_checked   - if found, set
7336  * - owner_ref_checked - if found, set
7337  *
7338  * If not found, create a new one, initialize and insert.
7339  */
7340 static int add_extent_rec(struct cache_tree *extent_cache,
7341                 struct extent_record *tmpl)
7342 {
7343         struct extent_record *rec;
7344         struct cache_extent *cache;
7345         int ret = 0;
7346         int dup = 0;
7347
7348         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7349         if (cache) {
7350                 rec = container_of(cache, struct extent_record, cache);
7351                 if (tmpl->refs)
7352                         rec->refs++;
7353                 if (rec->nr == 1)
7354                         rec->nr = max(tmpl->nr, tmpl->max_size);
7355
7356                 /*
7357                  * We need to make sure to reset nr to whatever the extent
7358                  * record says was the real size, this way we can compare it to
7359                  * the backrefs.
7360                  */
7361                 if (tmpl->found_rec) {
7362                         if (tmpl->start != rec->start || rec->found_rec) {
7363                                 struct extent_record *tmp;
7364
7365                                 dup = 1;
7366                                 if (list_empty(&rec->list))
7367                                         list_add_tail(&rec->list,
7368                                                       &duplicate_extents);
7369
7370                                 /*
7371                                  * We have to do this song and dance in case we
7372                                  * find an extent record that falls inside of
7373                                  * our current extent record but does not have
7374                                  * the same objectid.
7375                                  */
7376                                 tmp = malloc(sizeof(*tmp));
7377                                 if (!tmp)
7378                                         return -ENOMEM;
7379                                 tmp->start = tmpl->start;
7380                                 tmp->max_size = tmpl->max_size;
7381                                 tmp->nr = tmpl->nr;
7382                                 tmp->found_rec = 1;
7383                                 tmp->metadata = tmpl->metadata;
7384                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7385                                 INIT_LIST_HEAD(&tmp->list);
7386                                 list_add_tail(&tmp->list, &rec->dups);
7387                                 rec->num_duplicates++;
7388                         } else {
7389                                 rec->nr = tmpl->nr;
7390                                 rec->found_rec = 1;
7391                         }
7392                 }
7393
7394                 if (tmpl->extent_item_refs && !dup) {
7395                         if (rec->extent_item_refs) {
7396                                 fprintf(stderr, "block %llu rec "
7397                                         "extent_item_refs %llu, passed %llu\n",
7398                                         (unsigned long long)tmpl->start,
7399                                         (unsigned long long)
7400                                                         rec->extent_item_refs,
7401                                         (unsigned long long)tmpl->extent_item_refs);
7402                         }
7403                         rec->extent_item_refs = tmpl->extent_item_refs;
7404                 }
7405                 if (tmpl->is_root)
7406                         rec->is_root = 1;
7407                 if (tmpl->content_checked)
7408                         rec->content_checked = 1;
7409                 if (tmpl->owner_ref_checked)
7410                         rec->owner_ref_checked = 1;
7411                 memcpy(&rec->parent_key, &tmpl->parent_key,
7412                                 sizeof(tmpl->parent_key));
7413                 if (tmpl->parent_generation)
7414                         rec->parent_generation = tmpl->parent_generation;
7415                 if (rec->max_size < tmpl->max_size)
7416                         rec->max_size = tmpl->max_size;
7417
7418                 /*
7419                  * A metadata extent can't cross stripe_len boundary, otherwise
7420                  * kernel scrub won't be able to handle it.
7421                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7422                  * it.
7423                  */
7424                 if (tmpl->metadata)
7425                         rec->crossing_stripes = check_crossing_stripes(
7426                                         global_info, rec->start,
7427                                         global_info->nodesize);
7428                 check_extent_type(rec);
7429                 maybe_free_extent_rec(extent_cache, rec);
7430                 return ret;
7431         }
7432
7433         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7434
7435         return ret;
7436 }
7437
7438 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7439                             u64 parent, u64 root, int found_ref)
7440 {
7441         struct extent_record *rec;
7442         struct tree_backref *back;
7443         struct cache_extent *cache;
7444         int ret;
7445         bool insert = false;
7446
7447         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7448         if (!cache) {
7449                 struct extent_record tmpl;
7450
7451                 memset(&tmpl, 0, sizeof(tmpl));
7452                 tmpl.start = bytenr;
7453                 tmpl.nr = 1;
7454                 tmpl.metadata = 1;
7455                 tmpl.max_size = 1;
7456
7457                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7458                 if (ret)
7459                         return ret;
7460
7461                 /* really a bug in cache_extent implement now */
7462                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7463                 if (!cache)
7464                         return -ENOENT;
7465         }
7466
7467         rec = container_of(cache, struct extent_record, cache);
7468         if (rec->start != bytenr) {
7469                 /*
7470                  * Several cause, from unaligned bytenr to over lapping extents
7471                  */
7472                 return -EEXIST;
7473         }
7474
7475         back = find_tree_backref(rec, parent, root);
7476         if (!back) {
7477                 back = alloc_tree_backref(rec, parent, root);
7478                 if (!back)
7479                         return -ENOMEM;
7480                 insert = true;
7481         }
7482
7483         if (found_ref) {
7484                 if (back->node.found_ref) {
7485                         fprintf(stderr, "Extent back ref already exists "
7486                                 "for %llu parent %llu root %llu \n",
7487                                 (unsigned long long)bytenr,
7488                                 (unsigned long long)parent,
7489                                 (unsigned long long)root);
7490                 }
7491                 back->node.found_ref = 1;
7492         } else {
7493                 if (back->node.found_extent_tree) {
7494                         fprintf(stderr, "Extent back ref already exists "
7495                                 "for %llu parent %llu root %llu \n",
7496                                 (unsigned long long)bytenr,
7497                                 (unsigned long long)parent,
7498                                 (unsigned long long)root);
7499                 }
7500                 back->node.found_extent_tree = 1;
7501         }
7502         if (insert)
7503                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7504                         compare_extent_backref));
7505         check_extent_type(rec);
7506         maybe_free_extent_rec(extent_cache, rec);
7507         return 0;
7508 }
7509
7510 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7511                             u64 parent, u64 root, u64 owner, u64 offset,
7512                             u32 num_refs, int found_ref, u64 max_size)
7513 {
7514         struct extent_record *rec;
7515         struct data_backref *back;
7516         struct cache_extent *cache;
7517         int ret;
7518         bool insert = false;
7519
7520         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7521         if (!cache) {
7522                 struct extent_record tmpl;
7523
7524                 memset(&tmpl, 0, sizeof(tmpl));
7525                 tmpl.start = bytenr;
7526                 tmpl.nr = 1;
7527                 tmpl.max_size = max_size;
7528
7529                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7530                 if (ret)
7531                         return ret;
7532
7533                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7534                 if (!cache)
7535                         abort();
7536         }
7537
7538         rec = container_of(cache, struct extent_record, cache);
7539         if (rec->max_size < max_size)
7540                 rec->max_size = max_size;
7541
7542         /*
7543          * If found_ref is set then max_size is the real size and must match the
7544          * existing refs.  So if we have already found a ref then we need to
7545          * make sure that this ref matches the existing one, otherwise we need
7546          * to add a new backref so we can notice that the backrefs don't match
7547          * and we need to figure out who is telling the truth.  This is to
7548          * account for that awful fsync bug I introduced where we'd end up with
7549          * a btrfs_file_extent_item that would have its length include multiple
7550          * prealloc extents or point inside of a prealloc extent.
7551          */
7552         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7553                                  bytenr, max_size);
7554         if (!back) {
7555                 back = alloc_data_backref(rec, parent, root, owner, offset,
7556                                           max_size);
7557                 BUG_ON(!back);
7558                 insert = true;
7559         }
7560
7561         if (found_ref) {
7562                 BUG_ON(num_refs != 1);
7563                 if (back->node.found_ref)
7564                         BUG_ON(back->bytes != max_size);
7565                 back->node.found_ref = 1;
7566                 back->found_ref += 1;
7567                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7568                         back->bytes = max_size;
7569                         back->disk_bytenr = bytenr;
7570
7571                         /* Need to reinsert if not already in the tree */
7572                         if (!insert) {
7573                                 rb_erase(&back->node.node, &rec->backref_tree);
7574                                 insert = true;
7575                         }
7576                 }
7577                 rec->refs += 1;
7578                 rec->content_checked = 1;
7579                 rec->owner_ref_checked = 1;
7580         } else {
7581                 if (back->node.found_extent_tree) {
7582                         fprintf(stderr, "Extent back ref already exists "
7583                                 "for %llu parent %llu root %llu "
7584                                 "owner %llu offset %llu num_refs %lu\n",
7585                                 (unsigned long long)bytenr,
7586                                 (unsigned long long)parent,
7587                                 (unsigned long long)root,
7588                                 (unsigned long long)owner,
7589                                 (unsigned long long)offset,
7590                                 (unsigned long)num_refs);
7591                 }
7592                 back->num_refs = num_refs;
7593                 back->node.found_extent_tree = 1;
7594         }
7595         if (insert)
7596                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7597                         compare_extent_backref));
7598
7599         maybe_free_extent_rec(extent_cache, rec);
7600         return 0;
7601 }
7602
7603 static int add_pending(struct cache_tree *pending,
7604                        struct cache_tree *seen, u64 bytenr, u32 size)
7605 {
7606         int ret;
7607         ret = add_cache_extent(seen, bytenr, size);
7608         if (ret)
7609                 return ret;
7610         add_cache_extent(pending, bytenr, size);
7611         return 0;
7612 }
7613
7614 static int pick_next_pending(struct cache_tree *pending,
7615                         struct cache_tree *reada,
7616                         struct cache_tree *nodes,
7617                         u64 last, struct block_info *bits, int bits_nr,
7618                         int *reada_bits)
7619 {
7620         unsigned long node_start = last;
7621         struct cache_extent *cache;
7622         int ret;
7623
7624         cache = search_cache_extent(reada, 0);
7625         if (cache) {
7626                 bits[0].start = cache->start;
7627                 bits[0].size = cache->size;
7628                 *reada_bits = 1;
7629                 return 1;
7630         }
7631         *reada_bits = 0;
7632         if (node_start > 32768)
7633                 node_start -= 32768;
7634
7635         cache = search_cache_extent(nodes, node_start);
7636         if (!cache)
7637                 cache = search_cache_extent(nodes, 0);
7638
7639         if (!cache) {
7640                  cache = search_cache_extent(pending, 0);
7641                  if (!cache)
7642                          return 0;
7643                  ret = 0;
7644                  do {
7645                          bits[ret].start = cache->start;
7646                          bits[ret].size = cache->size;
7647                          cache = next_cache_extent(cache);
7648                          ret++;
7649                  } while (cache && ret < bits_nr);
7650                  return ret;
7651         }
7652
7653         ret = 0;
7654         do {
7655                 bits[ret].start = cache->start;
7656                 bits[ret].size = cache->size;
7657                 cache = next_cache_extent(cache);
7658                 ret++;
7659         } while (cache && ret < bits_nr);
7660
7661         if (bits_nr - ret > 8) {
7662                 u64 lookup = bits[0].start + bits[0].size;
7663                 struct cache_extent *next;
7664                 next = search_cache_extent(pending, lookup);
7665                 while(next) {
7666                         if (next->start - lookup > 32768)
7667                                 break;
7668                         bits[ret].start = next->start;
7669                         bits[ret].size = next->size;
7670                         lookup = next->start + next->size;
7671                         ret++;
7672                         if (ret == bits_nr)
7673                                 break;
7674                         next = next_cache_extent(next);
7675                         if (!next)
7676                                 break;
7677                 }
7678         }
7679         return ret;
7680 }
7681
7682 static void free_chunk_record(struct cache_extent *cache)
7683 {
7684         struct chunk_record *rec;
7685
7686         rec = container_of(cache, struct chunk_record, cache);
7687         list_del_init(&rec->list);
7688         list_del_init(&rec->dextents);
7689         free(rec);
7690 }
7691
7692 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7693 {
7694         cache_tree_free_extents(chunk_cache, free_chunk_record);
7695 }
7696
7697 static void free_device_record(struct rb_node *node)
7698 {
7699         struct device_record *rec;
7700
7701         rec = container_of(node, struct device_record, node);
7702         free(rec);
7703 }
7704
7705 FREE_RB_BASED_TREE(device_cache, free_device_record);
7706
7707 int insert_block_group_record(struct block_group_tree *tree,
7708                               struct block_group_record *bg_rec)
7709 {
7710         int ret;
7711
7712         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7713         if (ret)
7714                 return ret;
7715
7716         list_add_tail(&bg_rec->list, &tree->block_groups);
7717         return 0;
7718 }
7719
7720 static void free_block_group_record(struct cache_extent *cache)
7721 {
7722         struct block_group_record *rec;
7723
7724         rec = container_of(cache, struct block_group_record, cache);
7725         list_del_init(&rec->list);
7726         free(rec);
7727 }
7728
7729 void free_block_group_tree(struct block_group_tree *tree)
7730 {
7731         cache_tree_free_extents(&tree->tree, free_block_group_record);
7732 }
7733
7734 int insert_device_extent_record(struct device_extent_tree *tree,
7735                                 struct device_extent_record *de_rec)
7736 {
7737         int ret;
7738
7739         /*
7740          * Device extent is a bit different from the other extents, because
7741          * the extents which belong to the different devices may have the
7742          * same start and size, so we need use the special extent cache
7743          * search/insert functions.
7744          */
7745         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7746         if (ret)
7747                 return ret;
7748
7749         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7750         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7751         return 0;
7752 }
7753
7754 static void free_device_extent_record(struct cache_extent *cache)
7755 {
7756         struct device_extent_record *rec;
7757
7758         rec = container_of(cache, struct device_extent_record, cache);
7759         if (!list_empty(&rec->chunk_list))
7760                 list_del_init(&rec->chunk_list);
7761         if (!list_empty(&rec->device_list))
7762                 list_del_init(&rec->device_list);
7763         free(rec);
7764 }
7765
7766 void free_device_extent_tree(struct device_extent_tree *tree)
7767 {
7768         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7769 }
7770
7771 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7772 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7773                                  struct extent_buffer *leaf, int slot)
7774 {
7775         struct btrfs_extent_ref_v0 *ref0;
7776         struct btrfs_key key;
7777         int ret;
7778
7779         btrfs_item_key_to_cpu(leaf, &key, slot);
7780         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7781         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7782                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7783                                 0, 0);
7784         } else {
7785                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7786                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7787         }
7788         return ret;
7789 }
7790 #endif
7791
7792 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7793                                             struct btrfs_key *key,
7794                                             int slot)
7795 {
7796         struct btrfs_chunk *ptr;
7797         struct chunk_record *rec;
7798         int num_stripes, i;
7799
7800         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7801         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7802
7803         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7804         if (!rec) {
7805                 fprintf(stderr, "memory allocation failed\n");
7806                 exit(-1);
7807         }
7808
7809         INIT_LIST_HEAD(&rec->list);
7810         INIT_LIST_HEAD(&rec->dextents);
7811         rec->bg_rec = NULL;
7812
7813         rec->cache.start = key->offset;
7814         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7815
7816         rec->generation = btrfs_header_generation(leaf);
7817
7818         rec->objectid = key->objectid;
7819         rec->type = key->type;
7820         rec->offset = key->offset;
7821
7822         rec->length = rec->cache.size;
7823         rec->owner = btrfs_chunk_owner(leaf, ptr);
7824         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7825         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7826         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7827         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7828         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7829         rec->num_stripes = num_stripes;
7830         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7831
7832         for (i = 0; i < rec->num_stripes; ++i) {
7833                 rec->stripes[i].devid =
7834                         btrfs_stripe_devid_nr(leaf, ptr, i);
7835                 rec->stripes[i].offset =
7836                         btrfs_stripe_offset_nr(leaf, ptr, i);
7837                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7838                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7839                                 BTRFS_UUID_SIZE);
7840         }
7841
7842         return rec;
7843 }
7844
7845 static int process_chunk_item(struct cache_tree *chunk_cache,
7846                               struct btrfs_key *key, struct extent_buffer *eb,
7847                               int slot)
7848 {
7849         struct chunk_record *rec;
7850         struct btrfs_chunk *chunk;
7851         int ret = 0;
7852
7853         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7854         /*
7855          * Do extra check for this chunk item,
7856          *
7857          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7858          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7859          * and owner<->key_type check.
7860          */
7861         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7862                                       key->offset);
7863         if (ret < 0) {
7864                 error("chunk(%llu, %llu) is not valid, ignore it",
7865                       key->offset, btrfs_chunk_length(eb, chunk));
7866                 return 0;
7867         }
7868         rec = btrfs_new_chunk_record(eb, key, slot);
7869         ret = insert_cache_extent(chunk_cache, &rec->cache);
7870         if (ret) {
7871                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7872                         rec->offset, rec->length);
7873                 free(rec);
7874         }
7875
7876         return ret;
7877 }
7878
7879 static int process_device_item(struct rb_root *dev_cache,
7880                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7881 {
7882         struct btrfs_dev_item *ptr;
7883         struct device_record *rec;
7884         int ret = 0;
7885
7886         ptr = btrfs_item_ptr(eb,
7887                 slot, struct btrfs_dev_item);
7888
7889         rec = malloc(sizeof(*rec));
7890         if (!rec) {
7891                 fprintf(stderr, "memory allocation failed\n");
7892                 return -ENOMEM;
7893         }
7894
7895         rec->devid = key->offset;
7896         rec->generation = btrfs_header_generation(eb);
7897
7898         rec->objectid = key->objectid;
7899         rec->type = key->type;
7900         rec->offset = key->offset;
7901
7902         rec->devid = btrfs_device_id(eb, ptr);
7903         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7904         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7905
7906         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7907         if (ret) {
7908                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7909                 free(rec);
7910         }
7911
7912         return ret;
7913 }
7914
7915 struct block_group_record *
7916 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7917                              int slot)
7918 {
7919         struct btrfs_block_group_item *ptr;
7920         struct block_group_record *rec;
7921
7922         rec = calloc(1, sizeof(*rec));
7923         if (!rec) {
7924                 fprintf(stderr, "memory allocation failed\n");
7925                 exit(-1);
7926         }
7927
7928         rec->cache.start = key->objectid;
7929         rec->cache.size = key->offset;
7930
7931         rec->generation = btrfs_header_generation(leaf);
7932
7933         rec->objectid = key->objectid;
7934         rec->type = key->type;
7935         rec->offset = key->offset;
7936
7937         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7938         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7939
7940         INIT_LIST_HEAD(&rec->list);
7941
7942         return rec;
7943 }
7944
7945 static int process_block_group_item(struct block_group_tree *block_group_cache,
7946                                     struct btrfs_key *key,
7947                                     struct extent_buffer *eb, int slot)
7948 {
7949         struct block_group_record *rec;
7950         int ret = 0;
7951
7952         rec = btrfs_new_block_group_record(eb, key, slot);
7953         ret = insert_block_group_record(block_group_cache, rec);
7954         if (ret) {
7955                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7956                         rec->objectid, rec->offset);
7957                 free(rec);
7958         }
7959
7960         return ret;
7961 }
7962
7963 struct device_extent_record *
7964 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7965                                struct btrfs_key *key, int slot)
7966 {
7967         struct device_extent_record *rec;
7968         struct btrfs_dev_extent *ptr;
7969
7970         rec = calloc(1, sizeof(*rec));
7971         if (!rec) {
7972                 fprintf(stderr, "memory allocation failed\n");
7973                 exit(-1);
7974         }
7975
7976         rec->cache.objectid = key->objectid;
7977         rec->cache.start = key->offset;
7978
7979         rec->generation = btrfs_header_generation(leaf);
7980
7981         rec->objectid = key->objectid;
7982         rec->type = key->type;
7983         rec->offset = key->offset;
7984
7985         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7986         rec->chunk_objecteid =
7987                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7988         rec->chunk_offset =
7989                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7990         rec->length = btrfs_dev_extent_length(leaf, ptr);
7991         rec->cache.size = rec->length;
7992
7993         INIT_LIST_HEAD(&rec->chunk_list);
7994         INIT_LIST_HEAD(&rec->device_list);
7995
7996         return rec;
7997 }
7998
7999 static int
8000 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8001                            struct btrfs_key *key, struct extent_buffer *eb,
8002                            int slot)
8003 {
8004         struct device_extent_record *rec;
8005         int ret;
8006
8007         rec = btrfs_new_device_extent_record(eb, key, slot);
8008         ret = insert_device_extent_record(dev_extent_cache, rec);
8009         if (ret) {
8010                 fprintf(stderr,
8011                         "Device extent[%llu, %llu, %llu] existed.\n",
8012                         rec->objectid, rec->offset, rec->length);
8013                 free(rec);
8014         }
8015
8016         return ret;
8017 }
8018
8019 static int process_extent_item(struct btrfs_root *root,
8020                                struct cache_tree *extent_cache,
8021                                struct extent_buffer *eb, int slot)
8022 {
8023         struct btrfs_extent_item *ei;
8024         struct btrfs_extent_inline_ref *iref;
8025         struct btrfs_extent_data_ref *dref;
8026         struct btrfs_shared_data_ref *sref;
8027         struct btrfs_key key;
8028         struct extent_record tmpl;
8029         unsigned long end;
8030         unsigned long ptr;
8031         int ret;
8032         int type;
8033         u32 item_size = btrfs_item_size_nr(eb, slot);
8034         u64 refs = 0;
8035         u64 offset;
8036         u64 num_bytes;
8037         int metadata = 0;
8038
8039         btrfs_item_key_to_cpu(eb, &key, slot);
8040
8041         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8042                 metadata = 1;
8043                 num_bytes = root->fs_info->nodesize;
8044         } else {
8045                 num_bytes = key.offset;
8046         }
8047
8048         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8049                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8050                       key.objectid, root->fs_info->sectorsize);
8051                 return -EIO;
8052         }
8053         if (item_size < sizeof(*ei)) {
8054 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8055                 struct btrfs_extent_item_v0 *ei0;
8056                 if (item_size != sizeof(*ei0)) {
8057                         error(
8058         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
8059                                 key.objectid, key.type, key.offset,
8060                                 btrfs_header_bytenr(eb), slot);
8061                         BUG();
8062                 }
8063                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8064                 refs = btrfs_extent_refs_v0(eb, ei0);
8065 #else
8066                 BUG();
8067 #endif
8068                 memset(&tmpl, 0, sizeof(tmpl));
8069                 tmpl.start = key.objectid;
8070                 tmpl.nr = num_bytes;
8071                 tmpl.extent_item_refs = refs;
8072                 tmpl.metadata = metadata;
8073                 tmpl.found_rec = 1;
8074                 tmpl.max_size = num_bytes;
8075
8076                 return add_extent_rec(extent_cache, &tmpl);
8077         }
8078
8079         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8080         refs = btrfs_extent_refs(eb, ei);
8081         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8082                 metadata = 1;
8083         else
8084                 metadata = 0;
8085         if (metadata && num_bytes != root->fs_info->nodesize) {
8086                 error("ignore invalid metadata extent, length %llu does not equal to %u",
8087                       num_bytes, root->fs_info->nodesize);
8088                 return -EIO;
8089         }
8090         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8091                 error("ignore invalid data extent, length %llu is not aligned to %u",
8092                       num_bytes, root->fs_info->sectorsize);
8093                 return -EIO;
8094         }
8095
8096         memset(&tmpl, 0, sizeof(tmpl));
8097         tmpl.start = key.objectid;
8098         tmpl.nr = num_bytes;
8099         tmpl.extent_item_refs = refs;
8100         tmpl.metadata = metadata;
8101         tmpl.found_rec = 1;
8102         tmpl.max_size = num_bytes;
8103         add_extent_rec(extent_cache, &tmpl);
8104
8105         ptr = (unsigned long)(ei + 1);
8106         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8107             key.type == BTRFS_EXTENT_ITEM_KEY)
8108                 ptr += sizeof(struct btrfs_tree_block_info);
8109
8110         end = (unsigned long)ei + item_size;
8111         while (ptr < end) {
8112                 iref = (struct btrfs_extent_inline_ref *)ptr;
8113                 type = btrfs_extent_inline_ref_type(eb, iref);
8114                 offset = btrfs_extent_inline_ref_offset(eb, iref);
8115                 switch (type) {
8116                 case BTRFS_TREE_BLOCK_REF_KEY:
8117                         ret = add_tree_backref(extent_cache, key.objectid,
8118                                         0, offset, 0);
8119                         if (ret < 0)
8120                                 error(
8121                         "add_tree_backref failed (extent items tree block): %s",
8122                                       strerror(-ret));
8123                         break;
8124                 case BTRFS_SHARED_BLOCK_REF_KEY:
8125                         ret = add_tree_backref(extent_cache, key.objectid,
8126                                         offset, 0, 0);
8127                         if (ret < 0)
8128                                 error(
8129                         "add_tree_backref failed (extent items shared block): %s",
8130                                       strerror(-ret));
8131                         break;
8132                 case BTRFS_EXTENT_DATA_REF_KEY:
8133                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8134                         add_data_backref(extent_cache, key.objectid, 0,
8135                                         btrfs_extent_data_ref_root(eb, dref),
8136                                         btrfs_extent_data_ref_objectid(eb,
8137                                                                        dref),
8138                                         btrfs_extent_data_ref_offset(eb, dref),
8139                                         btrfs_extent_data_ref_count(eb, dref),
8140                                         0, num_bytes);
8141                         break;
8142                 case BTRFS_SHARED_DATA_REF_KEY:
8143                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8144                         add_data_backref(extent_cache, key.objectid, offset,
8145                                         0, 0, 0,
8146                                         btrfs_shared_data_ref_count(eb, sref),
8147                                         0, num_bytes);
8148                         break;
8149                 default:
8150                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8151                                 key.objectid, key.type, num_bytes);
8152                         goto out;
8153                 }
8154                 ptr += btrfs_extent_inline_ref_size(type);
8155         }
8156         WARN_ON(ptr > end);
8157 out:
8158         return 0;
8159 }
8160
8161 static int check_cache_range(struct btrfs_root *root,
8162                              struct btrfs_block_group_cache *cache,
8163                              u64 offset, u64 bytes)
8164 {
8165         struct btrfs_free_space *entry;
8166         u64 *logical;
8167         u64 bytenr;
8168         int stripe_len;
8169         int i, nr, ret;
8170
8171         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8172                 bytenr = btrfs_sb_offset(i);
8173                 ret = btrfs_rmap_block(root->fs_info,
8174                                        cache->key.objectid, bytenr, 0,
8175                                        &logical, &nr, &stripe_len);
8176                 if (ret)
8177                         return ret;
8178
8179                 while (nr--) {
8180                         if (logical[nr] + stripe_len <= offset)
8181                                 continue;
8182                         if (offset + bytes <= logical[nr])
8183                                 continue;
8184                         if (logical[nr] == offset) {
8185                                 if (stripe_len >= bytes) {
8186                                         free(logical);
8187                                         return 0;
8188                                 }
8189                                 bytes -= stripe_len;
8190                                 offset += stripe_len;
8191                         } else if (logical[nr] < offset) {
8192                                 if (logical[nr] + stripe_len >=
8193                                     offset + bytes) {
8194                                         free(logical);
8195                                         return 0;
8196                                 }
8197                                 bytes = (offset + bytes) -
8198                                         (logical[nr] + stripe_len);
8199                                 offset = logical[nr] + stripe_len;
8200                         } else {
8201                                 /*
8202                                  * Could be tricky, the super may land in the
8203                                  * middle of the area we're checking.  First
8204                                  * check the easiest case, it's at the end.
8205                                  */
8206                                 if (logical[nr] + stripe_len >=
8207                                     bytes + offset) {
8208                                         bytes = logical[nr] - offset;
8209                                         continue;
8210                                 }
8211
8212                                 /* Check the left side */
8213                                 ret = check_cache_range(root, cache,
8214                                                         offset,
8215                                                         logical[nr] - offset);
8216                                 if (ret) {
8217                                         free(logical);
8218                                         return ret;
8219                                 }
8220
8221                                 /* Now we continue with the right side */
8222                                 bytes = (offset + bytes) -
8223                                         (logical[nr] + stripe_len);
8224                                 offset = logical[nr] + stripe_len;
8225                         }
8226                 }
8227
8228                 free(logical);
8229         }
8230
8231         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8232         if (!entry) {
8233                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8234                         offset, offset+bytes);
8235                 return -EINVAL;
8236         }
8237
8238         if (entry->offset != offset) {
8239                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8240                         entry->offset);
8241                 return -EINVAL;
8242         }
8243
8244         if (entry->bytes != bytes) {
8245                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8246                         bytes, entry->bytes, offset);
8247                 return -EINVAL;
8248         }
8249
8250         unlink_free_space(cache->free_space_ctl, entry);
8251         free(entry);
8252         return 0;
8253 }
8254
8255 static int verify_space_cache(struct btrfs_root *root,
8256                               struct btrfs_block_group_cache *cache)
8257 {
8258         struct btrfs_path path;
8259         struct extent_buffer *leaf;
8260         struct btrfs_key key;
8261         u64 last;
8262         int ret = 0;
8263
8264         root = root->fs_info->extent_root;
8265
8266         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8267
8268         btrfs_init_path(&path);
8269         key.objectid = last;
8270         key.offset = 0;
8271         key.type = BTRFS_EXTENT_ITEM_KEY;
8272         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8273         if (ret < 0)
8274                 goto out;
8275         ret = 0;
8276         while (1) {
8277                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8278                         ret = btrfs_next_leaf(root, &path);
8279                         if (ret < 0)
8280                                 goto out;
8281                         if (ret > 0) {
8282                                 ret = 0;
8283                                 break;
8284                         }
8285                 }
8286                 leaf = path.nodes[0];
8287                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8288                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8289                         break;
8290                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8291                     key.type != BTRFS_METADATA_ITEM_KEY) {
8292                         path.slots[0]++;
8293                         continue;
8294                 }
8295
8296                 if (last == key.objectid) {
8297                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8298                                 last = key.objectid + key.offset;
8299                         else
8300                                 last = key.objectid + root->fs_info->nodesize;
8301                         path.slots[0]++;
8302                         continue;
8303                 }
8304
8305                 ret = check_cache_range(root, cache, last,
8306                                         key.objectid - last);
8307                 if (ret)
8308                         break;
8309                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8310                         last = key.objectid + key.offset;
8311                 else
8312                         last = key.objectid + root->fs_info->nodesize;
8313                 path.slots[0]++;
8314         }
8315
8316         if (last < cache->key.objectid + cache->key.offset)
8317                 ret = check_cache_range(root, cache, last,
8318                                         cache->key.objectid +
8319                                         cache->key.offset - last);
8320
8321 out:
8322         btrfs_release_path(&path);
8323
8324         if (!ret &&
8325             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8326                 fprintf(stderr, "There are still entries left in the space "
8327                         "cache\n");
8328                 ret = -EINVAL;
8329         }
8330
8331         return ret;
8332 }
8333
8334 static int check_space_cache(struct btrfs_root *root)
8335 {
8336         struct btrfs_block_group_cache *cache;
8337         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8338         int ret;
8339         int error = 0;
8340
8341         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8342             btrfs_super_generation(root->fs_info->super_copy) !=
8343             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8344                 printf("cache and super generation don't match, space cache "
8345                        "will be invalidated\n");
8346                 return 0;
8347         }
8348
8349         if (ctx.progress_enabled) {
8350                 ctx.tp = TASK_FREE_SPACE;
8351                 task_start(ctx.info);
8352         }
8353
8354         while (1) {
8355                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8356                 if (!cache)
8357                         break;
8358
8359                 start = cache->key.objectid + cache->key.offset;
8360                 if (!cache->free_space_ctl) {
8361                         if (btrfs_init_free_space_ctl(cache,
8362                                                 root->fs_info->sectorsize)) {
8363                                 ret = -ENOMEM;
8364                                 break;
8365                         }
8366                 } else {
8367                         btrfs_remove_free_space_cache(cache);
8368                 }
8369
8370                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8371                         ret = exclude_super_stripes(root, cache);
8372                         if (ret) {
8373                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8374                                         strerror(-ret));
8375                                 error++;
8376                                 continue;
8377                         }
8378                         ret = load_free_space_tree(root->fs_info, cache);
8379                         free_excluded_extents(root, cache);
8380                         if (ret < 0) {
8381                                 fprintf(stderr, "could not load free space tree: %s\n",
8382                                         strerror(-ret));
8383                                 error++;
8384                                 continue;
8385                         }
8386                         error += ret;
8387                 } else {
8388                         ret = load_free_space_cache(root->fs_info, cache);
8389                         if (!ret)
8390                                 continue;
8391                 }
8392
8393                 ret = verify_space_cache(root, cache);
8394                 if (ret) {
8395                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8396                                 cache->key.objectid);
8397                         error++;
8398                 }
8399         }
8400
8401         task_stop(ctx.info);
8402
8403         return error ? -EINVAL : 0;
8404 }
8405
8406 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8407                         u64 num_bytes, unsigned long leaf_offset,
8408                         struct extent_buffer *eb) {
8409
8410         struct btrfs_fs_info *fs_info = root->fs_info;
8411         u64 offset = 0;
8412         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8413         char *data;
8414         unsigned long csum_offset;
8415         u32 csum;
8416         u32 csum_expected;
8417         u64 read_len;
8418         u64 data_checked = 0;
8419         u64 tmp;
8420         int ret = 0;
8421         int mirror;
8422         int num_copies;
8423
8424         if (num_bytes % fs_info->sectorsize)
8425                 return -EINVAL;
8426
8427         data = malloc(num_bytes);
8428         if (!data)
8429                 return -ENOMEM;
8430
8431         while (offset < num_bytes) {
8432                 mirror = 0;
8433 again:
8434                 read_len = num_bytes - offset;
8435                 /* read as much space once a time */
8436                 ret = read_extent_data(fs_info, data + offset,
8437                                 bytenr + offset, &read_len, mirror);
8438                 if (ret)
8439                         goto out;
8440                 data_checked = 0;
8441                 /* verify every 4k data's checksum */
8442                 while (data_checked < read_len) {
8443                         csum = ~(u32)0;
8444                         tmp = offset + data_checked;
8445
8446                         csum = btrfs_csum_data((char *)data + tmp,
8447                                                csum, fs_info->sectorsize);
8448                         btrfs_csum_final(csum, (u8 *)&csum);
8449
8450                         csum_offset = leaf_offset +
8451                                  tmp / fs_info->sectorsize * csum_size;
8452                         read_extent_buffer(eb, (char *)&csum_expected,
8453                                            csum_offset, csum_size);
8454                         /* try another mirror */
8455                         if (csum != csum_expected) {
8456                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8457                                                 mirror, bytenr + tmp,
8458                                                 csum, csum_expected);
8459                                 num_copies = btrfs_num_copies(root->fs_info,
8460                                                 bytenr, num_bytes);
8461                                 if (mirror < num_copies - 1) {
8462                                         mirror += 1;
8463                                         goto again;
8464                                 }
8465                         }
8466                         data_checked += fs_info->sectorsize;
8467                 }
8468                 offset += read_len;
8469         }
8470 out:
8471         free(data);
8472         return ret;
8473 }
8474
8475 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8476                                u64 num_bytes)
8477 {
8478         struct btrfs_path path;
8479         struct extent_buffer *leaf;
8480         struct btrfs_key key;
8481         int ret;
8482
8483         btrfs_init_path(&path);
8484         key.objectid = bytenr;
8485         key.type = BTRFS_EXTENT_ITEM_KEY;
8486         key.offset = (u64)-1;
8487
8488 again:
8489         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8490                                 0, 0);
8491         if (ret < 0) {
8492                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8493                 btrfs_release_path(&path);
8494                 return ret;
8495         } else if (ret) {
8496                 if (path.slots[0] > 0) {
8497                         path.slots[0]--;
8498                 } else {
8499                         ret = btrfs_prev_leaf(root, &path);
8500                         if (ret < 0) {
8501                                 goto out;
8502                         } else if (ret > 0) {
8503                                 ret = 0;
8504                                 goto out;
8505                         }
8506                 }
8507         }
8508
8509         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8510
8511         /*
8512          * Block group items come before extent items if they have the same
8513          * bytenr, so walk back one more just in case.  Dear future traveller,
8514          * first congrats on mastering time travel.  Now if it's not too much
8515          * trouble could you go back to 2006 and tell Chris to make the
8516          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8517          * EXTENT_ITEM_KEY please?
8518          */
8519         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8520                 if (path.slots[0] > 0) {
8521                         path.slots[0]--;
8522                 } else {
8523                         ret = btrfs_prev_leaf(root, &path);
8524                         if (ret < 0) {
8525                                 goto out;
8526                         } else if (ret > 0) {
8527                                 ret = 0;
8528                                 goto out;
8529                         }
8530                 }
8531                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8532         }
8533
8534         while (num_bytes) {
8535                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8536                         ret = btrfs_next_leaf(root, &path);
8537                         if (ret < 0) {
8538                                 fprintf(stderr, "Error going to next leaf "
8539                                         "%d\n", ret);
8540                                 btrfs_release_path(&path);
8541                                 return ret;
8542                         } else if (ret) {
8543                                 break;
8544                         }
8545                 }
8546                 leaf = path.nodes[0];
8547                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8548                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8549                         path.slots[0]++;
8550                         continue;
8551                 }
8552                 if (key.objectid + key.offset < bytenr) {
8553                         path.slots[0]++;
8554                         continue;
8555                 }
8556                 if (key.objectid > bytenr + num_bytes)
8557                         break;
8558
8559                 if (key.objectid == bytenr) {
8560                         if (key.offset >= num_bytes) {
8561                                 num_bytes = 0;
8562                                 break;
8563                         }
8564                         num_bytes -= key.offset;
8565                         bytenr += key.offset;
8566                 } else if (key.objectid < bytenr) {
8567                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8568                                 num_bytes = 0;
8569                                 break;
8570                         }
8571                         num_bytes = (bytenr + num_bytes) -
8572                                 (key.objectid + key.offset);
8573                         bytenr = key.objectid + key.offset;
8574                 } else {
8575                         if (key.objectid + key.offset < bytenr + num_bytes) {
8576                                 u64 new_start = key.objectid + key.offset;
8577                                 u64 new_bytes = bytenr + num_bytes - new_start;
8578
8579                                 /*
8580                                  * Weird case, the extent is in the middle of
8581                                  * our range, we'll have to search one side
8582                                  * and then the other.  Not sure if this happens
8583                                  * in real life, but no harm in coding it up
8584                                  * anyway just in case.
8585                                  */
8586                                 btrfs_release_path(&path);
8587                                 ret = check_extent_exists(root, new_start,
8588                                                           new_bytes);
8589                                 if (ret) {
8590                                         fprintf(stderr, "Right section didn't "
8591                                                 "have a record\n");
8592                                         break;
8593                                 }
8594                                 num_bytes = key.objectid - bytenr;
8595                                 goto again;
8596                         }
8597                         num_bytes = key.objectid - bytenr;
8598                 }
8599                 path.slots[0]++;
8600         }
8601         ret = 0;
8602
8603 out:
8604         if (num_bytes && !ret) {
8605                 fprintf(stderr, "There are no extents for csum range "
8606                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8607                 ret = 1;
8608         }
8609
8610         btrfs_release_path(&path);
8611         return ret;
8612 }
8613
8614 static int check_csums(struct btrfs_root *root)
8615 {
8616         struct btrfs_path path;
8617         struct extent_buffer *leaf;
8618         struct btrfs_key key;
8619         u64 offset = 0, num_bytes = 0;
8620         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8621         int errors = 0;
8622         int ret;
8623         u64 data_len;
8624         unsigned long leaf_offset;
8625
8626         root = root->fs_info->csum_root;
8627         if (!extent_buffer_uptodate(root->node)) {
8628                 fprintf(stderr, "No valid csum tree found\n");
8629                 return -ENOENT;
8630         }
8631
8632         btrfs_init_path(&path);
8633         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8634         key.type = BTRFS_EXTENT_CSUM_KEY;
8635         key.offset = 0;
8636         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8637         if (ret < 0) {
8638                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8639                 btrfs_release_path(&path);
8640                 return ret;
8641         }
8642
8643         if (ret > 0 && path.slots[0])
8644                 path.slots[0]--;
8645         ret = 0;
8646
8647         while (1) {
8648                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8649                         ret = btrfs_next_leaf(root, &path);
8650                         if (ret < 0) {
8651                                 fprintf(stderr, "Error going to next leaf "
8652                                         "%d\n", ret);
8653                                 break;
8654                         }
8655                         if (ret)
8656                                 break;
8657                 }
8658                 leaf = path.nodes[0];
8659
8660                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8661                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8662                         path.slots[0]++;
8663                         continue;
8664                 }
8665
8666                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8667                               csum_size) * root->fs_info->sectorsize;
8668                 if (!check_data_csum)
8669                         goto skip_csum_check;
8670                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8671                 ret = check_extent_csums(root, key.offset, data_len,
8672                                          leaf_offset, leaf);
8673                 if (ret)
8674                         break;
8675 skip_csum_check:
8676                 if (!num_bytes) {
8677                         offset = key.offset;
8678                 } else if (key.offset != offset + num_bytes) {
8679                         ret = check_extent_exists(root, offset, num_bytes);
8680                         if (ret) {
8681                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8682                                         "there is no extent record\n",
8683                                         offset, offset+num_bytes);
8684                                 errors++;
8685                         }
8686                         offset = key.offset;
8687                         num_bytes = 0;
8688                 }
8689                 num_bytes += data_len;
8690                 path.slots[0]++;
8691         }
8692
8693         btrfs_release_path(&path);
8694         return errors;
8695 }
8696
8697 static int is_dropped_key(struct btrfs_key *key,
8698                           struct btrfs_key *drop_key) {
8699         if (key->objectid < drop_key->objectid)
8700                 return 1;
8701         else if (key->objectid == drop_key->objectid) {
8702                 if (key->type < drop_key->type)
8703                         return 1;
8704                 else if (key->type == drop_key->type) {
8705                         if (key->offset < drop_key->offset)
8706                                 return 1;
8707                 }
8708         }
8709         return 0;
8710 }
8711
8712 /*
8713  * Here are the rules for FULL_BACKREF.
8714  *
8715  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8716  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8717  *      FULL_BACKREF set.
8718  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8719  *    if it happened after the relocation occurred since we'll have dropped the
8720  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8721  *    have no real way to know for sure.
8722  *
8723  * We process the blocks one root at a time, and we start from the lowest root
8724  * objectid and go to the highest.  So we can just lookup the owner backref for
8725  * the record and if we don't find it then we know it doesn't exist and we have
8726  * a FULL BACKREF.
8727  *
8728  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8729  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8730  * be set or not and then we can check later once we've gathered all the refs.
8731  */
8732 static int calc_extent_flag(struct cache_tree *extent_cache,
8733                            struct extent_buffer *buf,
8734                            struct root_item_record *ri,
8735                            u64 *flags)
8736 {
8737         struct extent_record *rec;
8738         struct cache_extent *cache;
8739         struct tree_backref *tback;
8740         u64 owner = 0;
8741
8742         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8743         /* we have added this extent before */
8744         if (!cache)
8745                 return -ENOENT;
8746
8747         rec = container_of(cache, struct extent_record, cache);
8748
8749         /*
8750          * Except file/reloc tree, we can not have
8751          * FULL BACKREF MODE
8752          */
8753         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8754                 goto normal;
8755         /*
8756          * root node
8757          */
8758         if (buf->start == ri->bytenr)
8759                 goto normal;
8760
8761         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8762                 goto full_backref;
8763
8764         owner = btrfs_header_owner(buf);
8765         if (owner == ri->objectid)
8766                 goto normal;
8767
8768         tback = find_tree_backref(rec, 0, owner);
8769         if (!tback)
8770                 goto full_backref;
8771 normal:
8772         *flags = 0;
8773         if (rec->flag_block_full_backref != FLAG_UNSET &&
8774             rec->flag_block_full_backref != 0)
8775                 rec->bad_full_backref = 1;
8776         return 0;
8777 full_backref:
8778         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8779         if (rec->flag_block_full_backref != FLAG_UNSET &&
8780             rec->flag_block_full_backref != 1)
8781                 rec->bad_full_backref = 1;
8782         return 0;
8783 }
8784
8785 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8786 {
8787         fprintf(stderr, "Invalid key type(");
8788         print_key_type(stderr, 0, key_type);
8789         fprintf(stderr, ") found in root(");
8790         print_objectid(stderr, rootid, 0);
8791         fprintf(stderr, ")\n");
8792 }
8793
8794 /*
8795  * Check if the key is valid with its extent buffer.
8796  *
8797  * This is a early check in case invalid key exists in a extent buffer
8798  * This is not comprehensive yet, but should prevent wrong key/item passed
8799  * further
8800  */
8801 static int check_type_with_root(u64 rootid, u8 key_type)
8802 {
8803         switch (key_type) {
8804         /* Only valid in chunk tree */
8805         case BTRFS_DEV_ITEM_KEY:
8806         case BTRFS_CHUNK_ITEM_KEY:
8807                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8808                         goto err;
8809                 break;
8810         /* valid in csum and log tree */
8811         case BTRFS_CSUM_TREE_OBJECTID:
8812                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8813                       is_fstree(rootid)))
8814                         goto err;
8815                 break;
8816         case BTRFS_EXTENT_ITEM_KEY:
8817         case BTRFS_METADATA_ITEM_KEY:
8818         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8819                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8820                         goto err;
8821                 break;
8822         case BTRFS_ROOT_ITEM_KEY:
8823                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8824                         goto err;
8825                 break;
8826         case BTRFS_DEV_EXTENT_KEY:
8827                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8828                         goto err;
8829                 break;
8830         }
8831         return 0;
8832 err:
8833         report_mismatch_key_root(key_type, rootid);
8834         return -EINVAL;
8835 }
8836
8837 static int run_next_block(struct btrfs_root *root,
8838                           struct block_info *bits,
8839                           int bits_nr,
8840                           u64 *last,
8841                           struct cache_tree *pending,
8842                           struct cache_tree *seen,
8843                           struct cache_tree *reada,
8844                           struct cache_tree *nodes,
8845                           struct cache_tree *extent_cache,
8846                           struct cache_tree *chunk_cache,
8847                           struct rb_root *dev_cache,
8848                           struct block_group_tree *block_group_cache,
8849                           struct device_extent_tree *dev_extent_cache,
8850                           struct root_item_record *ri)
8851 {
8852         struct btrfs_fs_info *fs_info = root->fs_info;
8853         struct extent_buffer *buf;
8854         struct extent_record *rec = NULL;
8855         u64 bytenr;
8856         u32 size;
8857         u64 parent;
8858         u64 owner;
8859         u64 flags;
8860         u64 ptr;
8861         u64 gen = 0;
8862         int ret = 0;
8863         int i;
8864         int nritems;
8865         struct btrfs_key key;
8866         struct cache_extent *cache;
8867         int reada_bits;
8868
8869         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8870                                     bits_nr, &reada_bits);
8871         if (nritems == 0)
8872                 return 1;
8873
8874         if (!reada_bits) {
8875                 for(i = 0; i < nritems; i++) {
8876                         ret = add_cache_extent(reada, bits[i].start,
8877                                                bits[i].size);
8878                         if (ret == -EEXIST)
8879                                 continue;
8880
8881                         /* fixme, get the parent transid */
8882                         readahead_tree_block(fs_info, bits[i].start, 0);
8883                 }
8884         }
8885         *last = bits[0].start;
8886         bytenr = bits[0].start;
8887         size = bits[0].size;
8888
8889         cache = lookup_cache_extent(pending, bytenr, size);
8890         if (cache) {
8891                 remove_cache_extent(pending, cache);
8892                 free(cache);
8893         }
8894         cache = lookup_cache_extent(reada, bytenr, size);
8895         if (cache) {
8896                 remove_cache_extent(reada, cache);
8897                 free(cache);
8898         }
8899         cache = lookup_cache_extent(nodes, bytenr, size);
8900         if (cache) {
8901                 remove_cache_extent(nodes, cache);
8902                 free(cache);
8903         }
8904         cache = lookup_cache_extent(extent_cache, bytenr, size);
8905         if (cache) {
8906                 rec = container_of(cache, struct extent_record, cache);
8907                 gen = rec->parent_generation;
8908         }
8909
8910         /* fixme, get the real parent transid */
8911         buf = read_tree_block(root->fs_info, bytenr, gen);
8912         if (!extent_buffer_uptodate(buf)) {
8913                 record_bad_block_io(root->fs_info,
8914                                     extent_cache, bytenr, size);
8915                 goto out;
8916         }
8917
8918         nritems = btrfs_header_nritems(buf);
8919
8920         flags = 0;
8921         if (!init_extent_tree) {
8922                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8923                                        btrfs_header_level(buf), 1, NULL,
8924                                        &flags);
8925                 if (ret < 0) {
8926                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8927                         if (ret < 0) {
8928                                 fprintf(stderr, "Couldn't calc extent flags\n");
8929                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8930                         }
8931                 }
8932         } else {
8933                 flags = 0;
8934                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8935                 if (ret < 0) {
8936                         fprintf(stderr, "Couldn't calc extent flags\n");
8937                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8938                 }
8939         }
8940
8941         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8942                 if (ri != NULL &&
8943                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8944                     ri->objectid == btrfs_header_owner(buf)) {
8945                         /*
8946                          * Ok we got to this block from it's original owner and
8947                          * we have FULL_BACKREF set.  Relocation can leave
8948                          * converted blocks over so this is altogether possible,
8949                          * however it's not possible if the generation > the
8950                          * last snapshot, so check for this case.
8951                          */
8952                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8953                             btrfs_header_generation(buf) > ri->last_snapshot) {
8954                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8955                                 rec->bad_full_backref = 1;
8956                         }
8957                 }
8958         } else {
8959                 if (ri != NULL &&
8960                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8961                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8962                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8963                         rec->bad_full_backref = 1;
8964                 }
8965         }
8966
8967         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8968                 rec->flag_block_full_backref = 1;
8969                 parent = bytenr;
8970                 owner = 0;
8971         } else {
8972                 rec->flag_block_full_backref = 0;
8973                 parent = 0;
8974                 owner = btrfs_header_owner(buf);
8975         }
8976
8977         ret = check_block(root, extent_cache, buf, flags);
8978         if (ret)
8979                 goto out;
8980
8981         if (btrfs_is_leaf(buf)) {
8982                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8983                 for (i = 0; i < nritems; i++) {
8984                         struct btrfs_file_extent_item *fi;
8985                         btrfs_item_key_to_cpu(buf, &key, i);
8986                         /*
8987                          * Check key type against the leaf owner.
8988                          * Could filter quite a lot of early error if
8989                          * owner is correct
8990                          */
8991                         if (check_type_with_root(btrfs_header_owner(buf),
8992                                                  key.type)) {
8993                                 fprintf(stderr, "ignoring invalid key\n");
8994                                 continue;
8995                         }
8996                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8997                                 process_extent_item(root, extent_cache, buf,
8998                                                     i);
8999                                 continue;
9000                         }
9001                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9002                                 process_extent_item(root, extent_cache, buf,
9003                                                     i);
9004                                 continue;
9005                         }
9006                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9007                                 total_csum_bytes +=
9008                                         btrfs_item_size_nr(buf, i);
9009                                 continue;
9010                         }
9011                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9012                                 process_chunk_item(chunk_cache, &key, buf, i);
9013                                 continue;
9014                         }
9015                         if (key.type == BTRFS_DEV_ITEM_KEY) {
9016                                 process_device_item(dev_cache, &key, buf, i);
9017                                 continue;
9018                         }
9019                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9020                                 process_block_group_item(block_group_cache,
9021                                         &key, buf, i);
9022                                 continue;
9023                         }
9024                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
9025                                 process_device_extent_item(dev_extent_cache,
9026                                         &key, buf, i);
9027                                 continue;
9028
9029                         }
9030                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9031 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9032                                 process_extent_ref_v0(extent_cache, buf, i);
9033 #else
9034                                 BUG();
9035 #endif
9036                                 continue;
9037                         }
9038
9039                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9040                                 ret = add_tree_backref(extent_cache,
9041                                                 key.objectid, 0, key.offset, 0);
9042                                 if (ret < 0)
9043                                         error(
9044                                 "add_tree_backref failed (leaf tree block): %s",
9045                                               strerror(-ret));
9046                                 continue;
9047                         }
9048                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9049                                 ret = add_tree_backref(extent_cache,
9050                                                 key.objectid, key.offset, 0, 0);
9051                                 if (ret < 0)
9052                                         error(
9053                                 "add_tree_backref failed (leaf shared block): %s",
9054                                               strerror(-ret));
9055                                 continue;
9056                         }
9057                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9058                                 struct btrfs_extent_data_ref *ref;
9059                                 ref = btrfs_item_ptr(buf, i,
9060                                                 struct btrfs_extent_data_ref);
9061                                 add_data_backref(extent_cache,
9062                                         key.objectid, 0,
9063                                         btrfs_extent_data_ref_root(buf, ref),
9064                                         btrfs_extent_data_ref_objectid(buf,
9065                                                                        ref),
9066                                         btrfs_extent_data_ref_offset(buf, ref),
9067                                         btrfs_extent_data_ref_count(buf, ref),
9068                                         0, root->fs_info->sectorsize);
9069                                 continue;
9070                         }
9071                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9072                                 struct btrfs_shared_data_ref *ref;
9073                                 ref = btrfs_item_ptr(buf, i,
9074                                                 struct btrfs_shared_data_ref);
9075                                 add_data_backref(extent_cache,
9076                                         key.objectid, key.offset, 0, 0, 0,
9077                                         btrfs_shared_data_ref_count(buf, ref),
9078                                         0, root->fs_info->sectorsize);
9079                                 continue;
9080                         }
9081                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9082                                 struct bad_item *bad;
9083
9084                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9085                                         continue;
9086                                 if (!owner)
9087                                         continue;
9088                                 bad = malloc(sizeof(struct bad_item));
9089                                 if (!bad)
9090                                         continue;
9091                                 INIT_LIST_HEAD(&bad->list);
9092                                 memcpy(&bad->key, &key,
9093                                        sizeof(struct btrfs_key));
9094                                 bad->root_id = owner;
9095                                 list_add_tail(&bad->list, &delete_items);
9096                                 continue;
9097                         }
9098                         if (key.type != BTRFS_EXTENT_DATA_KEY)
9099                                 continue;
9100                         fi = btrfs_item_ptr(buf, i,
9101                                             struct btrfs_file_extent_item);
9102                         if (btrfs_file_extent_type(buf, fi) ==
9103                             BTRFS_FILE_EXTENT_INLINE)
9104                                 continue;
9105                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9106                                 continue;
9107
9108                         data_bytes_allocated +=
9109                                 btrfs_file_extent_disk_num_bytes(buf, fi);
9110                         if (data_bytes_allocated < root->fs_info->sectorsize) {
9111                                 abort();
9112                         }
9113                         data_bytes_referenced +=
9114                                 btrfs_file_extent_num_bytes(buf, fi);
9115                         add_data_backref(extent_cache,
9116                                 btrfs_file_extent_disk_bytenr(buf, fi),
9117                                 parent, owner, key.objectid, key.offset -
9118                                 btrfs_file_extent_offset(buf, fi), 1, 1,
9119                                 btrfs_file_extent_disk_num_bytes(buf, fi));
9120                 }
9121         } else {
9122                 int level;
9123                 struct btrfs_key first_key;
9124
9125                 first_key.objectid = 0;
9126
9127                 if (nritems > 0)
9128                         btrfs_item_key_to_cpu(buf, &first_key, 0);
9129                 level = btrfs_header_level(buf);
9130                 for (i = 0; i < nritems; i++) {
9131                         struct extent_record tmpl;
9132
9133                         ptr = btrfs_node_blockptr(buf, i);
9134                         size = root->fs_info->nodesize;
9135                         btrfs_node_key_to_cpu(buf, &key, i);
9136                         if (ri != NULL) {
9137                                 if ((level == ri->drop_level)
9138                                     && is_dropped_key(&key, &ri->drop_key)) {
9139                                         continue;
9140                                 }
9141                         }
9142
9143                         memset(&tmpl, 0, sizeof(tmpl));
9144                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9145                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9146                         tmpl.start = ptr;
9147                         tmpl.nr = size;
9148                         tmpl.refs = 1;
9149                         tmpl.metadata = 1;
9150                         tmpl.max_size = size;
9151                         ret = add_extent_rec(extent_cache, &tmpl);
9152                         if (ret < 0)
9153                                 goto out;
9154
9155                         ret = add_tree_backref(extent_cache, ptr, parent,
9156                                         owner, 1);
9157                         if (ret < 0) {
9158                                 error(
9159                                 "add_tree_backref failed (non-leaf block): %s",
9160                                       strerror(-ret));
9161                                 continue;
9162                         }
9163
9164                         if (level > 1) {
9165                                 add_pending(nodes, seen, ptr, size);
9166                         } else {
9167                                 add_pending(pending, seen, ptr, size);
9168                         }
9169                 }
9170                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
9171                                       nritems) * sizeof(struct btrfs_key_ptr);
9172         }
9173         total_btree_bytes += buf->len;
9174         if (fs_root_objectid(btrfs_header_owner(buf)))
9175                 total_fs_tree_bytes += buf->len;
9176         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9177                 total_extent_tree_bytes += buf->len;
9178 out:
9179         free_extent_buffer(buf);
9180         return ret;
9181 }
9182
9183 static int add_root_to_pending(struct extent_buffer *buf,
9184                                struct cache_tree *extent_cache,
9185                                struct cache_tree *pending,
9186                                struct cache_tree *seen,
9187                                struct cache_tree *nodes,
9188                                u64 objectid)
9189 {
9190         struct extent_record tmpl;
9191         int ret;
9192
9193         if (btrfs_header_level(buf) > 0)
9194                 add_pending(nodes, seen, buf->start, buf->len);
9195         else
9196                 add_pending(pending, seen, buf->start, buf->len);
9197
9198         memset(&tmpl, 0, sizeof(tmpl));
9199         tmpl.start = buf->start;
9200         tmpl.nr = buf->len;
9201         tmpl.is_root = 1;
9202         tmpl.refs = 1;
9203         tmpl.metadata = 1;
9204         tmpl.max_size = buf->len;
9205         add_extent_rec(extent_cache, &tmpl);
9206
9207         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9208             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9209                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9210                                 0, 1);
9211         else
9212                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9213                                 1);
9214         return ret;
9215 }
9216
9217 /* as we fix the tree, we might be deleting blocks that
9218  * we're tracking for repair.  This hook makes sure we
9219  * remove any backrefs for blocks as we are fixing them.
9220  */
9221 static int free_extent_hook(struct btrfs_trans_handle *trans,
9222                             struct btrfs_root *root,
9223                             u64 bytenr, u64 num_bytes, u64 parent,
9224                             u64 root_objectid, u64 owner, u64 offset,
9225                             int refs_to_drop)
9226 {
9227         struct extent_record *rec;
9228         struct cache_extent *cache;
9229         int is_data;
9230         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9231
9232         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9233         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9234         if (!cache)
9235                 return 0;
9236
9237         rec = container_of(cache, struct extent_record, cache);
9238         if (is_data) {
9239                 struct data_backref *back;
9240                 back = find_data_backref(rec, parent, root_objectid, owner,
9241                                          offset, 1, bytenr, num_bytes);
9242                 if (!back)
9243                         goto out;
9244                 if (back->node.found_ref) {
9245                         back->found_ref -= refs_to_drop;
9246                         if (rec->refs)
9247                                 rec->refs -= refs_to_drop;
9248                 }
9249                 if (back->node.found_extent_tree) {
9250                         back->num_refs -= refs_to_drop;
9251                         if (rec->extent_item_refs)
9252                                 rec->extent_item_refs -= refs_to_drop;
9253                 }
9254                 if (back->found_ref == 0)
9255                         back->node.found_ref = 0;
9256                 if (back->num_refs == 0)
9257                         back->node.found_extent_tree = 0;
9258
9259                 if (!back->node.found_extent_tree && back->node.found_ref) {
9260                         rb_erase(&back->node.node, &rec->backref_tree);
9261                         free(back);
9262                 }
9263         } else {
9264                 struct tree_backref *back;
9265                 back = find_tree_backref(rec, parent, root_objectid);
9266                 if (!back)
9267                         goto out;
9268                 if (back->node.found_ref) {
9269                         if (rec->refs)
9270                                 rec->refs--;
9271                         back->node.found_ref = 0;
9272                 }
9273                 if (back->node.found_extent_tree) {
9274                         if (rec->extent_item_refs)
9275                                 rec->extent_item_refs--;
9276                         back->node.found_extent_tree = 0;
9277                 }
9278                 if (!back->node.found_extent_tree && back->node.found_ref) {
9279                         rb_erase(&back->node.node, &rec->backref_tree);
9280                         free(back);
9281                 }
9282         }
9283         maybe_free_extent_rec(extent_cache, rec);
9284 out:
9285         return 0;
9286 }
9287
9288 static int delete_extent_records(struct btrfs_trans_handle *trans,
9289                                  struct btrfs_root *root,
9290                                  struct btrfs_path *path,
9291                                  u64 bytenr)
9292 {
9293         struct btrfs_key key;
9294         struct btrfs_key found_key;
9295         struct extent_buffer *leaf;
9296         int ret;
9297         int slot;
9298
9299
9300         key.objectid = bytenr;
9301         key.type = (u8)-1;
9302         key.offset = (u64)-1;
9303
9304         while(1) {
9305                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9306                                         &key, path, 0, 1);
9307                 if (ret < 0)
9308                         break;
9309
9310                 if (ret > 0) {
9311                         ret = 0;
9312                         if (path->slots[0] == 0)
9313                                 break;
9314                         path->slots[0]--;
9315                 }
9316                 ret = 0;
9317
9318                 leaf = path->nodes[0];
9319                 slot = path->slots[0];
9320
9321                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9322                 if (found_key.objectid != bytenr)
9323                         break;
9324
9325                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9326                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9327                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9328                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9329                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9330                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9331                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9332                         btrfs_release_path(path);
9333                         if (found_key.type == 0) {
9334                                 if (found_key.offset == 0)
9335                                         break;
9336                                 key.offset = found_key.offset - 1;
9337                                 key.type = found_key.type;
9338                         }
9339                         key.type = found_key.type - 1;
9340                         key.offset = (u64)-1;
9341                         continue;
9342                 }
9343
9344                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9345                         found_key.objectid, found_key.type, found_key.offset);
9346
9347                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9348                 if (ret)
9349                         break;
9350                 btrfs_release_path(path);
9351
9352                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9353                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9354                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9355                                 found_key.offset : root->fs_info->nodesize;
9356
9357                         ret = btrfs_update_block_group(root, bytenr,
9358                                                        bytes, 0, 0);
9359                         if (ret)
9360                                 break;
9361                 }
9362         }
9363
9364         btrfs_release_path(path);
9365         return ret;
9366 }
9367
9368 /*
9369  * for a single backref, this will allocate a new extent
9370  * and add the backref to it.
9371  */
9372 static int record_extent(struct btrfs_trans_handle *trans,
9373                          struct btrfs_fs_info *info,
9374                          struct btrfs_path *path,
9375                          struct extent_record *rec,
9376                          struct extent_backref *back,
9377                          int allocated, u64 flags)
9378 {
9379         int ret = 0;
9380         struct btrfs_root *extent_root = info->extent_root;
9381         struct extent_buffer *leaf;
9382         struct btrfs_key ins_key;
9383         struct btrfs_extent_item *ei;
9384         struct data_backref *dback;
9385         struct btrfs_tree_block_info *bi;
9386
9387         if (!back->is_data)
9388                 rec->max_size = max_t(u64, rec->max_size,
9389                                     info->nodesize);
9390
9391         if (!allocated) {
9392                 u32 item_size = sizeof(*ei);
9393
9394                 if (!back->is_data)
9395                         item_size += sizeof(*bi);
9396
9397                 ins_key.objectid = rec->start;
9398                 ins_key.offset = rec->max_size;
9399                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9400
9401                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9402                                         &ins_key, item_size);
9403                 if (ret)
9404                         goto fail;
9405
9406                 leaf = path->nodes[0];
9407                 ei = btrfs_item_ptr(leaf, path->slots[0],
9408                                     struct btrfs_extent_item);
9409
9410                 btrfs_set_extent_refs(leaf, ei, 0);
9411                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9412
9413                 if (back->is_data) {
9414                         btrfs_set_extent_flags(leaf, ei,
9415                                                BTRFS_EXTENT_FLAG_DATA);
9416                 } else {
9417                         struct btrfs_disk_key copy_key;;
9418
9419                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9420                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9421                                              sizeof(*bi));
9422
9423                         btrfs_set_disk_key_objectid(&copy_key,
9424                                                     rec->info_objectid);
9425                         btrfs_set_disk_key_type(&copy_key, 0);
9426                         btrfs_set_disk_key_offset(&copy_key, 0);
9427
9428                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9429                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9430
9431                         btrfs_set_extent_flags(leaf, ei,
9432                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9433                 }
9434
9435                 btrfs_mark_buffer_dirty(leaf);
9436                 ret = btrfs_update_block_group(extent_root, rec->start,
9437                                                rec->max_size, 1, 0);
9438                 if (ret)
9439                         goto fail;
9440                 btrfs_release_path(path);
9441         }
9442
9443         if (back->is_data) {
9444                 u64 parent;
9445                 int i;
9446
9447                 dback = to_data_backref(back);
9448                 if (back->full_backref)
9449                         parent = dback->parent;
9450                 else
9451                         parent = 0;
9452
9453                 for (i = 0; i < dback->found_ref; i++) {
9454                         /* if parent != 0, we're doing a full backref
9455                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9456                          * just makes the backref allocator create a data
9457                          * backref
9458                          */
9459                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9460                                                    rec->start, rec->max_size,
9461                                                    parent,
9462                                                    dback->root,
9463                                                    parent ?
9464                                                    BTRFS_FIRST_FREE_OBJECTID :
9465                                                    dback->owner,
9466                                                    dback->offset);
9467                         if (ret)
9468                                 break;
9469                 }
9470                 fprintf(stderr, "adding new data backref"
9471                                 " on %llu %s %llu owner %llu"
9472                                 " offset %llu found %d\n",
9473                                 (unsigned long long)rec->start,
9474                                 back->full_backref ?
9475                                 "parent" : "root",
9476                                 back->full_backref ?
9477                                 (unsigned long long)parent :
9478                                 (unsigned long long)dback->root,
9479                                 (unsigned long long)dback->owner,
9480                                 (unsigned long long)dback->offset,
9481                                 dback->found_ref);
9482         } else {
9483                 u64 parent;
9484                 struct tree_backref *tback;
9485
9486                 tback = to_tree_backref(back);
9487                 if (back->full_backref)
9488                         parent = tback->parent;
9489                 else
9490                         parent = 0;
9491
9492                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9493                                            rec->start, rec->max_size,
9494                                            parent, tback->root, 0, 0);
9495                 fprintf(stderr, "adding new tree backref on "
9496                         "start %llu len %llu parent %llu root %llu\n",
9497                         rec->start, rec->max_size, parent, tback->root);
9498         }
9499 fail:
9500         btrfs_release_path(path);
9501         return ret;
9502 }
9503
9504 static struct extent_entry *find_entry(struct list_head *entries,
9505                                        u64 bytenr, u64 bytes)
9506 {
9507         struct extent_entry *entry = NULL;
9508
9509         list_for_each_entry(entry, entries, list) {
9510                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9511                         return entry;
9512         }
9513
9514         return NULL;
9515 }
9516
9517 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9518 {
9519         struct extent_entry *entry, *best = NULL, *prev = NULL;
9520
9521         list_for_each_entry(entry, entries, list) {
9522                 /*
9523                  * If there are as many broken entries as entries then we know
9524                  * not to trust this particular entry.
9525                  */
9526                 if (entry->broken == entry->count)
9527                         continue;
9528
9529                 /*
9530                  * Special case, when there are only two entries and 'best' is
9531                  * the first one
9532                  */
9533                 if (!prev) {
9534                         best = entry;
9535                         prev = entry;
9536                         continue;
9537                 }
9538
9539                 /*
9540                  * If our current entry == best then we can't be sure our best
9541                  * is really the best, so we need to keep searching.
9542                  */
9543                 if (best && best->count == entry->count) {
9544                         prev = entry;
9545                         best = NULL;
9546                         continue;
9547                 }
9548
9549                 /* Prev == entry, not good enough, have to keep searching */
9550                 if (!prev->broken && prev->count == entry->count)
9551                         continue;
9552
9553                 if (!best)
9554                         best = (prev->count > entry->count) ? prev : entry;
9555                 else if (best->count < entry->count)
9556                         best = entry;
9557                 prev = entry;
9558         }
9559
9560         return best;
9561 }
9562
9563 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9564                       struct data_backref *dback, struct extent_entry *entry)
9565 {
9566         struct btrfs_trans_handle *trans;
9567         struct btrfs_root *root;
9568         struct btrfs_file_extent_item *fi;
9569         struct extent_buffer *leaf;
9570         struct btrfs_key key;
9571         u64 bytenr, bytes;
9572         int ret, err;
9573
9574         key.objectid = dback->root;
9575         key.type = BTRFS_ROOT_ITEM_KEY;
9576         key.offset = (u64)-1;
9577         root = btrfs_read_fs_root(info, &key);
9578         if (IS_ERR(root)) {
9579                 fprintf(stderr, "Couldn't find root for our ref\n");
9580                 return -EINVAL;
9581         }
9582
9583         /*
9584          * The backref points to the original offset of the extent if it was
9585          * split, so we need to search down to the offset we have and then walk
9586          * forward until we find the backref we're looking for.
9587          */
9588         key.objectid = dback->owner;
9589         key.type = BTRFS_EXTENT_DATA_KEY;
9590         key.offset = dback->offset;
9591         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9592         if (ret < 0) {
9593                 fprintf(stderr, "Error looking up ref %d\n", ret);
9594                 return ret;
9595         }
9596
9597         while (1) {
9598                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9599                         ret = btrfs_next_leaf(root, path);
9600                         if (ret) {
9601                                 fprintf(stderr, "Couldn't find our ref, next\n");
9602                                 return -EINVAL;
9603                         }
9604                 }
9605                 leaf = path->nodes[0];
9606                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9607                 if (key.objectid != dback->owner ||
9608                     key.type != BTRFS_EXTENT_DATA_KEY) {
9609                         fprintf(stderr, "Couldn't find our ref, search\n");
9610                         return -EINVAL;
9611                 }
9612                 fi = btrfs_item_ptr(leaf, path->slots[0],
9613                                     struct btrfs_file_extent_item);
9614                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9615                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9616
9617                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9618                         break;
9619                 path->slots[0]++;
9620         }
9621
9622         btrfs_release_path(path);
9623
9624         trans = btrfs_start_transaction(root, 1);
9625         if (IS_ERR(trans))
9626                 return PTR_ERR(trans);
9627
9628         /*
9629          * Ok we have the key of the file extent we want to fix, now we can cow
9630          * down to the thing and fix it.
9631          */
9632         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9633         if (ret < 0) {
9634                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9635                         key.objectid, key.type, key.offset, ret);
9636                 goto out;
9637         }
9638         if (ret > 0) {
9639                 fprintf(stderr, "Well that's odd, we just found this key "
9640                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9641                         key.offset);
9642                 ret = -EINVAL;
9643                 goto out;
9644         }
9645         leaf = path->nodes[0];
9646         fi = btrfs_item_ptr(leaf, path->slots[0],
9647                             struct btrfs_file_extent_item);
9648
9649         if (btrfs_file_extent_compression(leaf, fi) &&
9650             dback->disk_bytenr != entry->bytenr) {
9651                 fprintf(stderr, "Ref doesn't match the record start and is "
9652                         "compressed, please take a btrfs-image of this file "
9653                         "system and send it to a btrfs developer so they can "
9654                         "complete this functionality for bytenr %Lu\n",
9655                         dback->disk_bytenr);
9656                 ret = -EINVAL;
9657                 goto out;
9658         }
9659
9660         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9661                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9662         } else if (dback->disk_bytenr > entry->bytenr) {
9663                 u64 off_diff, offset;
9664
9665                 off_diff = dback->disk_bytenr - entry->bytenr;
9666                 offset = btrfs_file_extent_offset(leaf, fi);
9667                 if (dback->disk_bytenr + offset +
9668                     btrfs_file_extent_num_bytes(leaf, fi) >
9669                     entry->bytenr + entry->bytes) {
9670                         fprintf(stderr, "Ref is past the entry end, please "
9671                                 "take a btrfs-image of this file system and "
9672                                 "send it to a btrfs developer, ref %Lu\n",
9673                                 dback->disk_bytenr);
9674                         ret = -EINVAL;
9675                         goto out;
9676                 }
9677                 offset += off_diff;
9678                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9679                 btrfs_set_file_extent_offset(leaf, fi, offset);
9680         } else if (dback->disk_bytenr < entry->bytenr) {
9681                 u64 offset;
9682
9683                 offset = btrfs_file_extent_offset(leaf, fi);
9684                 if (dback->disk_bytenr + offset < entry->bytenr) {
9685                         fprintf(stderr, "Ref is before the entry start, please"
9686                                 " take a btrfs-image of this file system and "
9687                                 "send it to a btrfs developer, ref %Lu\n",
9688                                 dback->disk_bytenr);
9689                         ret = -EINVAL;
9690                         goto out;
9691                 }
9692
9693                 offset += dback->disk_bytenr;
9694                 offset -= entry->bytenr;
9695                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9696                 btrfs_set_file_extent_offset(leaf, fi, offset);
9697         }
9698
9699         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9700
9701         /*
9702          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9703          * only do this if we aren't using compression, otherwise it's a
9704          * trickier case.
9705          */
9706         if (!btrfs_file_extent_compression(leaf, fi))
9707                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9708         else
9709                 printf("ram bytes may be wrong?\n");
9710         btrfs_mark_buffer_dirty(leaf);
9711 out:
9712         err = btrfs_commit_transaction(trans, root);
9713         btrfs_release_path(path);
9714         return ret ? ret : err;
9715 }
9716
9717 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9718                            struct extent_record *rec)
9719 {
9720         struct extent_backref *back, *tmp;
9721         struct data_backref *dback;
9722         struct extent_entry *entry, *best = NULL;
9723         LIST_HEAD(entries);
9724         int nr_entries = 0;
9725         int broken_entries = 0;
9726         int ret = 0;
9727         short mismatch = 0;
9728
9729         /*
9730          * Metadata is easy and the backrefs should always agree on bytenr and
9731          * size, if not we've got bigger issues.
9732          */
9733         if (rec->metadata)
9734                 return 0;
9735
9736         rbtree_postorder_for_each_entry_safe(back, tmp,
9737                                              &rec->backref_tree, node) {
9738                 if (back->full_backref || !back->is_data)
9739                         continue;
9740
9741                 dback = to_data_backref(back);
9742
9743                 /*
9744                  * We only pay attention to backrefs that we found a real
9745                  * backref for.
9746                  */
9747                 if (dback->found_ref == 0)
9748                         continue;
9749
9750                 /*
9751                  * For now we only catch when the bytes don't match, not the
9752                  * bytenr.  We can easily do this at the same time, but I want
9753                  * to have a fs image to test on before we just add repair
9754                  * functionality willy-nilly so we know we won't screw up the
9755                  * repair.
9756                  */
9757
9758                 entry = find_entry(&entries, dback->disk_bytenr,
9759                                    dback->bytes);
9760                 if (!entry) {
9761                         entry = malloc(sizeof(struct extent_entry));
9762                         if (!entry) {
9763                                 ret = -ENOMEM;
9764                                 goto out;
9765                         }
9766                         memset(entry, 0, sizeof(*entry));
9767                         entry->bytenr = dback->disk_bytenr;
9768                         entry->bytes = dback->bytes;
9769                         list_add_tail(&entry->list, &entries);
9770                         nr_entries++;
9771                 }
9772
9773                 /*
9774                  * If we only have on entry we may think the entries agree when
9775                  * in reality they don't so we have to do some extra checking.
9776                  */
9777                 if (dback->disk_bytenr != rec->start ||
9778                     dback->bytes != rec->nr || back->broken)
9779                         mismatch = 1;
9780
9781                 if (back->broken) {
9782                         entry->broken++;
9783                         broken_entries++;
9784                 }
9785
9786                 entry->count++;
9787         }
9788
9789         /* Yay all the backrefs agree, carry on good sir */
9790         if (nr_entries <= 1 && !mismatch)
9791                 goto out;
9792
9793         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9794                 "%Lu\n", rec->start);
9795
9796         /*
9797          * First we want to see if the backrefs can agree amongst themselves who
9798          * is right, so figure out which one of the entries has the highest
9799          * count.
9800          */
9801         best = find_most_right_entry(&entries);
9802
9803         /*
9804          * Ok so we may have an even split between what the backrefs think, so
9805          * this is where we use the extent ref to see what it thinks.
9806          */
9807         if (!best) {
9808                 entry = find_entry(&entries, rec->start, rec->nr);
9809                 if (!entry && (!broken_entries || !rec->found_rec)) {
9810                         fprintf(stderr, "Backrefs don't agree with each other "
9811                                 "and extent record doesn't agree with anybody,"
9812                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9813                                 rec->start, rec->nr);
9814                         ret = -EINVAL;
9815                         goto out;
9816                 } else if (!entry) {
9817                         /*
9818                          * Ok our backrefs were broken, we'll assume this is the
9819                          * correct value and add an entry for this range.
9820                          */
9821                         entry = malloc(sizeof(struct extent_entry));
9822                         if (!entry) {
9823                                 ret = -ENOMEM;
9824                                 goto out;
9825                         }
9826                         memset(entry, 0, sizeof(*entry));
9827                         entry->bytenr = rec->start;
9828                         entry->bytes = rec->nr;
9829                         list_add_tail(&entry->list, &entries);
9830                         nr_entries++;
9831                 }
9832                 entry->count++;
9833                 best = find_most_right_entry(&entries);
9834                 if (!best) {
9835                         fprintf(stderr, "Backrefs and extent record evenly "
9836                                 "split on who is right, this is going to "
9837                                 "require user input to fix bytenr %Lu bytes "
9838                                 "%Lu\n", rec->start, rec->nr);
9839                         ret = -EINVAL;
9840                         goto out;
9841                 }
9842         }
9843
9844         /*
9845          * I don't think this can happen currently as we'll abort() if we catch
9846          * this case higher up, but in case somebody removes that we still can't
9847          * deal with it properly here yet, so just bail out of that's the case.
9848          */
9849         if (best->bytenr != rec->start) {
9850                 fprintf(stderr, "Extent start and backref starts don't match, "
9851                         "please use btrfs-image on this file system and send "
9852                         "it to a btrfs developer so they can make fsck fix "
9853                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9854                         rec->start, rec->nr);
9855                 ret = -EINVAL;
9856                 goto out;
9857         }
9858
9859         /*
9860          * Ok great we all agreed on an extent record, let's go find the real
9861          * references and fix up the ones that don't match.
9862          */
9863         rbtree_postorder_for_each_entry_safe(back, tmp,
9864                                              &rec->backref_tree, node) {
9865                 if (back->full_backref || !back->is_data)
9866                         continue;
9867
9868                 dback = to_data_backref(back);
9869
9870                 /*
9871                  * Still ignoring backrefs that don't have a real ref attached
9872                  * to them.
9873                  */
9874                 if (dback->found_ref == 0)
9875                         continue;
9876
9877                 if (dback->bytes == best->bytes &&
9878                     dback->disk_bytenr == best->bytenr)
9879                         continue;
9880
9881                 ret = repair_ref(info, path, dback, best);
9882                 if (ret)
9883                         goto out;
9884         }
9885
9886         /*
9887          * Ok we messed with the actual refs, which means we need to drop our
9888          * entire cache and go back and rescan.  I know this is a huge pain and
9889          * adds a lot of extra work, but it's the only way to be safe.  Once all
9890          * the backrefs agree we may not need to do anything to the extent
9891          * record itself.
9892          */
9893         ret = -EAGAIN;
9894 out:
9895         while (!list_empty(&entries)) {
9896                 entry = list_entry(entries.next, struct extent_entry, list);
9897                 list_del_init(&entry->list);
9898                 free(entry);
9899         }
9900         return ret;
9901 }
9902
9903 static int process_duplicates(struct cache_tree *extent_cache,
9904                               struct extent_record *rec)
9905 {
9906         struct extent_record *good, *tmp;
9907         struct cache_extent *cache;
9908         int ret;
9909
9910         /*
9911          * If we found a extent record for this extent then return, or if we
9912          * have more than one duplicate we are likely going to need to delete
9913          * something.
9914          */
9915         if (rec->found_rec || rec->num_duplicates > 1)
9916                 return 0;
9917
9918         /* Shouldn't happen but just in case */
9919         BUG_ON(!rec->num_duplicates);
9920
9921         /*
9922          * So this happens if we end up with a backref that doesn't match the
9923          * actual extent entry.  So either the backref is bad or the extent
9924          * entry is bad.  Either way we want to have the extent_record actually
9925          * reflect what we found in the extent_tree, so we need to take the
9926          * duplicate out and use that as the extent_record since the only way we
9927          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9928          */
9929         remove_cache_extent(extent_cache, &rec->cache);
9930
9931         good = to_extent_record(rec->dups.next);
9932         list_del_init(&good->list);
9933         INIT_LIST_HEAD(&good->backrefs);
9934         INIT_LIST_HEAD(&good->dups);
9935         good->cache.start = good->start;
9936         good->cache.size = good->nr;
9937         good->content_checked = 0;
9938         good->owner_ref_checked = 0;
9939         good->num_duplicates = 0;
9940         good->refs = rec->refs;
9941         list_splice_init(&rec->backrefs, &good->backrefs);
9942         while (1) {
9943                 cache = lookup_cache_extent(extent_cache, good->start,
9944                                             good->nr);
9945                 if (!cache)
9946                         break;
9947                 tmp = container_of(cache, struct extent_record, cache);
9948
9949                 /*
9950                  * If we find another overlapping extent and it's found_rec is
9951                  * set then it's a duplicate and we need to try and delete
9952                  * something.
9953                  */
9954                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9955                         if (list_empty(&good->list))
9956                                 list_add_tail(&good->list,
9957                                               &duplicate_extents);
9958                         good->num_duplicates += tmp->num_duplicates + 1;
9959                         list_splice_init(&tmp->dups, &good->dups);
9960                         list_del_init(&tmp->list);
9961                         list_add_tail(&tmp->list, &good->dups);
9962                         remove_cache_extent(extent_cache, &tmp->cache);
9963                         continue;
9964                 }
9965
9966                 /*
9967                  * Ok we have another non extent item backed extent rec, so lets
9968                  * just add it to this extent and carry on like we did above.
9969                  */
9970                 good->refs += tmp->refs;
9971                 list_splice_init(&tmp->backrefs, &good->backrefs);
9972                 remove_cache_extent(extent_cache, &tmp->cache);
9973                 free(tmp);
9974         }
9975         ret = insert_cache_extent(extent_cache, &good->cache);
9976         BUG_ON(ret);
9977         free(rec);
9978         return good->num_duplicates ? 0 : 1;
9979 }
9980
9981 static int delete_duplicate_records(struct btrfs_root *root,
9982                                     struct extent_record *rec)
9983 {
9984         struct btrfs_trans_handle *trans;
9985         LIST_HEAD(delete_list);
9986         struct btrfs_path path;
9987         struct extent_record *tmp, *good, *n;
9988         int nr_del = 0;
9989         int ret = 0, err;
9990         struct btrfs_key key;
9991
9992         btrfs_init_path(&path);
9993
9994         good = rec;
9995         /* Find the record that covers all of the duplicates. */
9996         list_for_each_entry(tmp, &rec->dups, list) {
9997                 if (good->start < tmp->start)
9998                         continue;
9999                 if (good->nr > tmp->nr)
10000                         continue;
10001
10002                 if (tmp->start + tmp->nr < good->start + good->nr) {
10003                         fprintf(stderr, "Ok we have overlapping extents that "
10004                                 "aren't completely covered by each other, this "
10005                                 "is going to require more careful thought.  "
10006                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10007                                 tmp->start, tmp->nr, good->start, good->nr);
10008                         abort();
10009                 }
10010                 good = tmp;
10011         }
10012
10013         if (good != rec)
10014                 list_add_tail(&rec->list, &delete_list);
10015
10016         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10017                 if (tmp == good)
10018                         continue;
10019                 list_move_tail(&tmp->list, &delete_list);
10020         }
10021
10022         root = root->fs_info->extent_root;
10023         trans = btrfs_start_transaction(root, 1);
10024         if (IS_ERR(trans)) {
10025                 ret = PTR_ERR(trans);
10026                 goto out;
10027         }
10028
10029         list_for_each_entry(tmp, &delete_list, list) {
10030                 if (tmp->found_rec == 0)
10031                         continue;
10032                 key.objectid = tmp->start;
10033                 key.type = BTRFS_EXTENT_ITEM_KEY;
10034                 key.offset = tmp->nr;
10035
10036                 /* Shouldn't happen but just in case */
10037                 if (tmp->metadata) {
10038                         fprintf(stderr, "Well this shouldn't happen, extent "
10039                                 "record overlaps but is metadata? "
10040                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10041                         abort();
10042                 }
10043
10044                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10045                 if (ret) {
10046                         if (ret > 0)
10047                                 ret = -EINVAL;
10048                         break;
10049                 }
10050                 ret = btrfs_del_item(trans, root, &path);
10051                 if (ret)
10052                         break;
10053                 btrfs_release_path(&path);
10054                 nr_del++;
10055         }
10056         err = btrfs_commit_transaction(trans, root);
10057         if (err && !ret)
10058                 ret = err;
10059 out:
10060         while (!list_empty(&delete_list)) {
10061                 tmp = to_extent_record(delete_list.next);
10062                 list_del_init(&tmp->list);
10063                 if (tmp == rec)
10064                         continue;
10065                 free(tmp);
10066         }
10067
10068         while (!list_empty(&rec->dups)) {
10069                 tmp = to_extent_record(rec->dups.next);
10070                 list_del_init(&tmp->list);
10071                 free(tmp);
10072         }
10073
10074         btrfs_release_path(&path);
10075
10076         if (!ret && !nr_del)
10077                 rec->num_duplicates = 0;
10078
10079         return ret ? ret : nr_del;
10080 }
10081
10082 static int find_possible_backrefs(struct btrfs_fs_info *info,
10083                                   struct btrfs_path *path,
10084                                   struct cache_tree *extent_cache,
10085                                   struct extent_record *rec)
10086 {
10087         struct btrfs_root *root;
10088         struct extent_backref *back, *tmp;
10089         struct data_backref *dback;
10090         struct cache_extent *cache;
10091         struct btrfs_file_extent_item *fi;
10092         struct btrfs_key key;
10093         u64 bytenr, bytes;
10094         int ret;
10095
10096         rbtree_postorder_for_each_entry_safe(back, tmp,
10097                                              &rec->backref_tree, node) {
10098                 /* Don't care about full backrefs (poor unloved backrefs) */
10099                 if (back->full_backref || !back->is_data)
10100                         continue;
10101
10102                 dback = to_data_backref(back);
10103
10104                 /* We found this one, we don't need to do a lookup */
10105                 if (dback->found_ref)
10106                         continue;
10107
10108                 key.objectid = dback->root;
10109                 key.type = BTRFS_ROOT_ITEM_KEY;
10110                 key.offset = (u64)-1;
10111
10112                 root = btrfs_read_fs_root(info, &key);
10113
10114                 /* No root, definitely a bad ref, skip */
10115                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10116                         continue;
10117                 /* Other err, exit */
10118                 if (IS_ERR(root))
10119                         return PTR_ERR(root);
10120
10121                 key.objectid = dback->owner;
10122                 key.type = BTRFS_EXTENT_DATA_KEY;
10123                 key.offset = dback->offset;
10124                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10125                 if (ret) {
10126                         btrfs_release_path(path);
10127                         if (ret < 0)
10128                                 return ret;
10129                         /* Didn't find it, we can carry on */
10130                         ret = 0;
10131                         continue;
10132                 }
10133
10134                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10135                                     struct btrfs_file_extent_item);
10136                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10137                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10138                 btrfs_release_path(path);
10139                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10140                 if (cache) {
10141                         struct extent_record *tmp;
10142                         tmp = container_of(cache, struct extent_record, cache);
10143
10144                         /*
10145                          * If we found an extent record for the bytenr for this
10146                          * particular backref then we can't add it to our
10147                          * current extent record.  We only want to add backrefs
10148                          * that don't have a corresponding extent item in the
10149                          * extent tree since they likely belong to this record
10150                          * and we need to fix it if it doesn't match bytenrs.
10151                          */
10152                         if  (tmp->found_rec)
10153                                 continue;
10154                 }
10155
10156                 dback->found_ref += 1;
10157                 dback->disk_bytenr = bytenr;
10158                 dback->bytes = bytes;
10159
10160                 /*
10161                  * Set this so the verify backref code knows not to trust the
10162                  * values in this backref.
10163                  */
10164                 back->broken = 1;
10165         }
10166
10167         return 0;
10168 }
10169
10170 /*
10171  * Record orphan data ref into corresponding root.
10172  *
10173  * Return 0 if the extent item contains data ref and recorded.
10174  * Return 1 if the extent item contains no useful data ref
10175  *   On that case, it may contains only shared_dataref or metadata backref
10176  *   or the file extent exists(this should be handled by the extent bytenr
10177  *   recovery routine)
10178  * Return <0 if something goes wrong.
10179  */
10180 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10181                                       struct extent_record *rec)
10182 {
10183         struct btrfs_key key;
10184         struct btrfs_root *dest_root;
10185         struct extent_backref *back, *tmp;
10186         struct data_backref *dback;
10187         struct orphan_data_extent *orphan;
10188         struct btrfs_path path;
10189         int recorded_data_ref = 0;
10190         int ret = 0;
10191
10192         if (rec->metadata)
10193                 return 1;
10194         btrfs_init_path(&path);
10195         rbtree_postorder_for_each_entry_safe(back, tmp,
10196                                              &rec->backref_tree, node) {
10197                 if (back->full_backref || !back->is_data ||
10198                     !back->found_extent_tree)
10199                         continue;
10200                 dback = to_data_backref(back);
10201                 if (dback->found_ref)
10202                         continue;
10203                 key.objectid = dback->root;
10204                 key.type = BTRFS_ROOT_ITEM_KEY;
10205                 key.offset = (u64)-1;
10206
10207                 dest_root = btrfs_read_fs_root(fs_info, &key);
10208
10209                 /* For non-exist root we just skip it */
10210                 if (IS_ERR(dest_root) || !dest_root)
10211                         continue;
10212
10213                 key.objectid = dback->owner;
10214                 key.type = BTRFS_EXTENT_DATA_KEY;
10215                 key.offset = dback->offset;
10216
10217                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10218                 btrfs_release_path(&path);
10219                 /*
10220                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10221                  * we need to record it for inode/file extent rebuild.
10222                  * For ret > 0, we record it only for file extent rebuild.
10223                  * For ret == 0, the file extent exists but only bytenr
10224                  * mismatch, let the original bytenr fix routine to handle,
10225                  * don't record it.
10226                  */
10227                 if (ret == 0)
10228                         continue;
10229                 ret = 0;
10230                 orphan = malloc(sizeof(*orphan));
10231                 if (!orphan) {
10232                         ret = -ENOMEM;
10233                         goto out;
10234                 }
10235                 INIT_LIST_HEAD(&orphan->list);
10236                 orphan->root = dback->root;
10237                 orphan->objectid = dback->owner;
10238                 orphan->offset = dback->offset;
10239                 orphan->disk_bytenr = rec->cache.start;
10240                 orphan->disk_len = rec->cache.size;
10241                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10242                 recorded_data_ref = 1;
10243         }
10244 out:
10245         btrfs_release_path(&path);
10246         if (!ret)
10247                 return !recorded_data_ref;
10248         else
10249                 return ret;
10250 }
10251
10252 /*
10253  * when an incorrect extent item is found, this will delete
10254  * all of the existing entries for it and recreate them
10255  * based on what the tree scan found.
10256  */
10257 static int fixup_extent_refs(struct btrfs_fs_info *info,
10258                              struct cache_tree *extent_cache,
10259                              struct extent_record *rec)
10260 {
10261         struct btrfs_trans_handle *trans = NULL;
10262         int ret;
10263         struct btrfs_path path;
10264         struct cache_extent *cache;
10265         struct extent_backref *back, *tmp;
10266         int allocated = 0;
10267         u64 flags = 0;
10268
10269         if (rec->flag_block_full_backref)
10270                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10271
10272         btrfs_init_path(&path);
10273         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10274                 /*
10275                  * Sometimes the backrefs themselves are so broken they don't
10276                  * get attached to any meaningful rec, so first go back and
10277                  * check any of our backrefs that we couldn't find and throw
10278                  * them into the list if we find the backref so that
10279                  * verify_backrefs can figure out what to do.
10280                  */
10281                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10282                 if (ret < 0)
10283                         goto out;
10284         }
10285
10286         /* step one, make sure all of the backrefs agree */
10287         ret = verify_backrefs(info, &path, rec);
10288         if (ret < 0)
10289                 goto out;
10290
10291         trans = btrfs_start_transaction(info->extent_root, 1);
10292         if (IS_ERR(trans)) {
10293                 ret = PTR_ERR(trans);
10294                 goto out;
10295         }
10296
10297         /* step two, delete all the existing records */
10298         ret = delete_extent_records(trans, info->extent_root, &path,
10299                                     rec->start);
10300
10301         if (ret < 0)
10302                 goto out;
10303
10304         /* was this block corrupt?  If so, don't add references to it */
10305         cache = lookup_cache_extent(info->corrupt_blocks,
10306                                     rec->start, rec->max_size);
10307         if (cache) {
10308                 ret = 0;
10309                 goto out;
10310         }
10311
10312         /* step three, recreate all the refs we did find */
10313         rbtree_postorder_for_each_entry_safe(back, tmp,
10314                                              &rec->backref_tree, node) {
10315                 /*
10316                  * if we didn't find any references, don't create a
10317                  * new extent record
10318                  */
10319                 if (!back->found_ref)
10320                         continue;
10321
10322                 rec->bad_full_backref = 0;
10323                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10324                 allocated = 1;
10325
10326                 if (ret)
10327                         goto out;
10328         }
10329 out:
10330         if (trans) {
10331                 int err = btrfs_commit_transaction(trans, info->extent_root);
10332                 if (!ret)
10333                         ret = err;
10334         }
10335
10336         if (!ret)
10337                 fprintf(stderr, "Repaired extent references for %llu\n",
10338                                 (unsigned long long)rec->start);
10339
10340         btrfs_release_path(&path);
10341         return ret;
10342 }
10343
10344 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10345                               struct extent_record *rec)
10346 {
10347         struct btrfs_trans_handle *trans;
10348         struct btrfs_root *root = fs_info->extent_root;
10349         struct btrfs_path path;
10350         struct btrfs_extent_item *ei;
10351         struct btrfs_key key;
10352         u64 flags;
10353         int ret = 0;
10354
10355         key.objectid = rec->start;
10356         if (rec->metadata) {
10357                 key.type = BTRFS_METADATA_ITEM_KEY;
10358                 key.offset = rec->info_level;
10359         } else {
10360                 key.type = BTRFS_EXTENT_ITEM_KEY;
10361                 key.offset = rec->max_size;
10362         }
10363
10364         trans = btrfs_start_transaction(root, 0);
10365         if (IS_ERR(trans))
10366                 return PTR_ERR(trans);
10367
10368         btrfs_init_path(&path);
10369         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10370         if (ret < 0) {
10371                 btrfs_release_path(&path);
10372                 btrfs_commit_transaction(trans, root);
10373                 return ret;
10374         } else if (ret) {
10375                 fprintf(stderr, "Didn't find extent for %llu\n",
10376                         (unsigned long long)rec->start);
10377                 btrfs_release_path(&path);
10378                 btrfs_commit_transaction(trans, root);
10379                 return -ENOENT;
10380         }
10381
10382         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10383                             struct btrfs_extent_item);
10384         flags = btrfs_extent_flags(path.nodes[0], ei);
10385         if (rec->flag_block_full_backref) {
10386                 fprintf(stderr, "setting full backref on %llu\n",
10387                         (unsigned long long)key.objectid);
10388                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10389         } else {
10390                 fprintf(stderr, "clearing full backref on %llu\n",
10391                         (unsigned long long)key.objectid);
10392                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10393         }
10394         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10395         btrfs_mark_buffer_dirty(path.nodes[0]);
10396         btrfs_release_path(&path);
10397         ret = btrfs_commit_transaction(trans, root);
10398         if (!ret)
10399                 fprintf(stderr, "Repaired extent flags for %llu\n",
10400                                 (unsigned long long)rec->start);
10401
10402         return ret;
10403 }
10404
10405 /* right now we only prune from the extent allocation tree */
10406 static int prune_one_block(struct btrfs_trans_handle *trans,
10407                            struct btrfs_fs_info *info,
10408                            struct btrfs_corrupt_block *corrupt)
10409 {
10410         int ret;
10411         struct btrfs_path path;
10412         struct extent_buffer *eb;
10413         u64 found;
10414         int slot;
10415         int nritems;
10416         int level = corrupt->level + 1;
10417
10418         btrfs_init_path(&path);
10419 again:
10420         /* we want to stop at the parent to our busted block */
10421         path.lowest_level = level;
10422
10423         ret = btrfs_search_slot(trans, info->extent_root,
10424                                 &corrupt->key, &path, -1, 1);
10425
10426         if (ret < 0)
10427                 goto out;
10428
10429         eb = path.nodes[level];
10430         if (!eb) {
10431                 ret = -ENOENT;
10432                 goto out;
10433         }
10434
10435         /*
10436          * hopefully the search gave us the block we want to prune,
10437          * lets try that first
10438          */
10439         slot = path.slots[level];
10440         found =  btrfs_node_blockptr(eb, slot);
10441         if (found == corrupt->cache.start)
10442                 goto del_ptr;
10443
10444         nritems = btrfs_header_nritems(eb);
10445
10446         /* the search failed, lets scan this node and hope we find it */
10447         for (slot = 0; slot < nritems; slot++) {
10448                 found =  btrfs_node_blockptr(eb, slot);
10449                 if (found == corrupt->cache.start)
10450                         goto del_ptr;
10451         }
10452         /*
10453          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10454          * to this block
10455          */
10456         if (eb == info->extent_root->node) {
10457                 ret = -ENOENT;
10458                 goto out;
10459         } else {
10460                 level++;
10461                 btrfs_release_path(&path);
10462                 goto again;
10463         }
10464
10465 del_ptr:
10466         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10467         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10468
10469 out:
10470         btrfs_release_path(&path);
10471         return ret;
10472 }
10473
10474 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10475 {
10476         struct btrfs_trans_handle *trans = NULL;
10477         struct cache_extent *cache;
10478         struct btrfs_corrupt_block *corrupt;
10479
10480         while (1) {
10481                 cache = search_cache_extent(info->corrupt_blocks, 0);
10482                 if (!cache)
10483                         break;
10484                 if (!trans) {
10485                         trans = btrfs_start_transaction(info->extent_root, 1);
10486                         if (IS_ERR(trans))
10487                                 return PTR_ERR(trans);
10488                 }
10489                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10490                 prune_one_block(trans, info, corrupt);
10491                 remove_cache_extent(info->corrupt_blocks, cache);
10492         }
10493         if (trans)
10494                 return btrfs_commit_transaction(trans, info->extent_root);
10495         return 0;
10496 }
10497
10498 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10499 {
10500         struct btrfs_block_group_cache *cache;
10501         u64 start, end;
10502         int ret;
10503
10504         while (1) {
10505                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10506                                             &start, &end, EXTENT_DIRTY);
10507                 if (ret)
10508                         break;
10509                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10510         }
10511
10512         start = 0;
10513         while (1) {
10514                 cache = btrfs_lookup_first_block_group(fs_info, start);
10515                 if (!cache)
10516                         break;
10517                 if (cache->cached)
10518                         cache->cached = 0;
10519                 start = cache->key.objectid + cache->key.offset;
10520         }
10521 }
10522
10523 static int check_extent_refs(struct btrfs_root *root,
10524                              struct cache_tree *extent_cache)
10525 {
10526         struct extent_record *rec;
10527         struct cache_extent *cache;
10528         int ret = 0;
10529         int had_dups = 0;
10530         int err = 0;
10531
10532         if (repair) {
10533                 /*
10534                  * if we're doing a repair, we have to make sure
10535                  * we don't allocate from the problem extents.
10536                  * In the worst case, this will be all the
10537                  * extents in the FS
10538                  */
10539                 cache = search_cache_extent(extent_cache, 0);
10540                 while(cache) {
10541                         rec = container_of(cache, struct extent_record, cache);
10542                         set_extent_dirty(root->fs_info->excluded_extents,
10543                                          rec->start,
10544                                          rec->start + rec->max_size - 1);
10545                         cache = next_cache_extent(cache);
10546                 }
10547
10548                 /* pin down all the corrupted blocks too */
10549                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10550                 while(cache) {
10551                         set_extent_dirty(root->fs_info->excluded_extents,
10552                                          cache->start,
10553                                          cache->start + cache->size - 1);
10554                         cache = next_cache_extent(cache);
10555                 }
10556                 prune_corrupt_blocks(root->fs_info);
10557                 reset_cached_block_groups(root->fs_info);
10558         }
10559
10560         reset_cached_block_groups(root->fs_info);
10561
10562         /*
10563          * We need to delete any duplicate entries we find first otherwise we
10564          * could mess up the extent tree when we have backrefs that actually
10565          * belong to a different extent item and not the weird duplicate one.
10566          */
10567         while (repair && !list_empty(&duplicate_extents)) {
10568                 rec = to_extent_record(duplicate_extents.next);
10569                 list_del_init(&rec->list);
10570
10571                 /* Sometimes we can find a backref before we find an actual
10572                  * extent, so we need to process it a little bit to see if there
10573                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10574                  * if this is a backref screwup.  If we need to delete stuff
10575                  * process_duplicates() will return 0, otherwise it will return
10576                  * 1 and we
10577                  */
10578                 if (process_duplicates(extent_cache, rec))
10579                         continue;
10580                 ret = delete_duplicate_records(root, rec);
10581                 if (ret < 0)
10582                         return ret;
10583                 /*
10584                  * delete_duplicate_records will return the number of entries
10585                  * deleted, so if it's greater than 0 then we know we actually
10586                  * did something and we need to remove.
10587                  */
10588                 if (ret)
10589                         had_dups = 1;
10590         }
10591
10592         if (had_dups)
10593                 return -EAGAIN;
10594
10595         while(1) {
10596                 int cur_err = 0;
10597                 int fix = 0;
10598
10599                 cache = search_cache_extent(extent_cache, 0);
10600                 if (!cache)
10601                         break;
10602                 rec = container_of(cache, struct extent_record, cache);
10603                 if (rec->num_duplicates) {
10604                         fprintf(stderr, "extent item %llu has multiple extent "
10605                                 "items\n", (unsigned long long)rec->start);
10606                         cur_err = 1;
10607                 }
10608
10609                 if (rec->refs != rec->extent_item_refs) {
10610                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10611                                 (unsigned long long)rec->start,
10612                                 (unsigned long long)rec->nr);
10613                         fprintf(stderr, "extent item %llu, found %llu\n",
10614                                 (unsigned long long)rec->extent_item_refs,
10615                                 (unsigned long long)rec->refs);
10616                         ret = record_orphan_data_extents(root->fs_info, rec);
10617                         if (ret < 0)
10618                                 goto repair_abort;
10619                         fix = ret;
10620                         cur_err = 1;
10621                 }
10622                 if (all_backpointers_checked(rec, 1)) {
10623                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10624                                 (unsigned long long)rec->start,
10625                                 (unsigned long long)rec->nr);
10626                         fix = 1;
10627                         cur_err = 1;
10628                 }
10629                 if (!rec->owner_ref_checked) {
10630                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10631                                 (unsigned long long)rec->start,
10632                                 (unsigned long long)rec->nr);
10633                         fix = 1;
10634                         cur_err = 1;
10635                 }
10636
10637                 if (repair && fix) {
10638                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10639                         if (ret)
10640                                 goto repair_abort;
10641                 }
10642
10643
10644                 if (rec->bad_full_backref) {
10645                         fprintf(stderr, "bad full backref, on [%llu]\n",
10646                                 (unsigned long long)rec->start);
10647                         if (repair) {
10648                                 ret = fixup_extent_flags(root->fs_info, rec);
10649                                 if (ret)
10650                                         goto repair_abort;
10651                                 fix = 1;
10652                         }
10653                         cur_err = 1;
10654                 }
10655                 /*
10656                  * Although it's not a extent ref's problem, we reuse this
10657                  * routine for error reporting.
10658                  * No repair function yet.
10659                  */
10660                 if (rec->crossing_stripes) {
10661                         fprintf(stderr,
10662                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10663                                 rec->start, rec->start + rec->max_size);
10664                         cur_err = 1;
10665                 }
10666
10667                 if (rec->wrong_chunk_type) {
10668                         fprintf(stderr,
10669                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10670                                 rec->start, rec->start + rec->max_size);
10671                         cur_err = 1;
10672                 }
10673
10674                 err = cur_err;
10675                 remove_cache_extent(extent_cache, cache);
10676                 free_all_extent_backrefs(rec);
10677                 if (!init_extent_tree && repair && (!cur_err || fix))
10678                         clear_extent_dirty(root->fs_info->excluded_extents,
10679                                            rec->start,
10680                                            rec->start + rec->max_size - 1);
10681                 free(rec);
10682         }
10683 repair_abort:
10684         if (repair) {
10685                 if (ret && ret != -EAGAIN) {
10686                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10687                         exit(1);
10688                 } else if (!ret) {
10689                         struct btrfs_trans_handle *trans;
10690
10691                         root = root->fs_info->extent_root;
10692                         trans = btrfs_start_transaction(root, 1);
10693                         if (IS_ERR(trans)) {
10694                                 ret = PTR_ERR(trans);
10695                                 goto repair_abort;
10696                         }
10697
10698                         ret = btrfs_fix_block_accounting(trans, root);
10699                         if (ret)
10700                                 goto repair_abort;
10701                         ret = btrfs_commit_transaction(trans, root);
10702                         if (ret)
10703                                 goto repair_abort;
10704                 }
10705                 return ret;
10706         }
10707
10708         if (err)
10709                 err = -EIO;
10710         return err;
10711 }
10712
10713 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10714 {
10715         u64 stripe_size;
10716
10717         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10718                 stripe_size = length;
10719                 stripe_size /= num_stripes;
10720         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10721                 stripe_size = length * 2;
10722                 stripe_size /= num_stripes;
10723         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10724                 stripe_size = length;
10725                 stripe_size /= (num_stripes - 1);
10726         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10727                 stripe_size = length;
10728                 stripe_size /= (num_stripes - 2);
10729         } else {
10730                 stripe_size = length;
10731         }
10732         return stripe_size;
10733 }
10734
10735 /*
10736  * Check the chunk with its block group/dev list ref:
10737  * Return 0 if all refs seems valid.
10738  * Return 1 if part of refs seems valid, need later check for rebuild ref
10739  * like missing block group and needs to search extent tree to rebuild them.
10740  * Return -1 if essential refs are missing and unable to rebuild.
10741  */
10742 static int check_chunk_refs(struct chunk_record *chunk_rec,
10743                             struct block_group_tree *block_group_cache,
10744                             struct device_extent_tree *dev_extent_cache,
10745                             int silent)
10746 {
10747         struct cache_extent *block_group_item;
10748         struct block_group_record *block_group_rec;
10749         struct cache_extent *dev_extent_item;
10750         struct device_extent_record *dev_extent_rec;
10751         u64 devid;
10752         u64 offset;
10753         u64 length;
10754         int metadump_v2 = 0;
10755         int i;
10756         int ret = 0;
10757
10758         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10759                                                chunk_rec->offset,
10760                                                chunk_rec->length);
10761         if (block_group_item) {
10762                 block_group_rec = container_of(block_group_item,
10763                                                struct block_group_record,
10764                                                cache);
10765                 if (chunk_rec->length != block_group_rec->offset ||
10766                     chunk_rec->offset != block_group_rec->objectid ||
10767                     (!metadump_v2 &&
10768                      chunk_rec->type_flags != block_group_rec->flags)) {
10769                         if (!silent)
10770                                 fprintf(stderr,
10771                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10772                                         chunk_rec->objectid,
10773                                         chunk_rec->type,
10774                                         chunk_rec->offset,
10775                                         chunk_rec->length,
10776                                         chunk_rec->offset,
10777                                         chunk_rec->type_flags,
10778                                         block_group_rec->objectid,
10779                                         block_group_rec->type,
10780                                         block_group_rec->offset,
10781                                         block_group_rec->offset,
10782                                         block_group_rec->objectid,
10783                                         block_group_rec->flags);
10784                         ret = -1;
10785                 } else {
10786                         list_del_init(&block_group_rec->list);
10787                         chunk_rec->bg_rec = block_group_rec;
10788                 }
10789         } else {
10790                 if (!silent)
10791                         fprintf(stderr,
10792                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10793                                 chunk_rec->objectid,
10794                                 chunk_rec->type,
10795                                 chunk_rec->offset,
10796                                 chunk_rec->length,
10797                                 chunk_rec->offset,
10798                                 chunk_rec->type_flags);
10799                 ret = 1;
10800         }
10801
10802         if (metadump_v2)
10803                 return ret;
10804
10805         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10806                                     chunk_rec->num_stripes);
10807         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10808                 devid = chunk_rec->stripes[i].devid;
10809                 offset = chunk_rec->stripes[i].offset;
10810                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10811                                                        devid, offset, length);
10812                 if (dev_extent_item) {
10813                         dev_extent_rec = container_of(dev_extent_item,
10814                                                 struct device_extent_record,
10815                                                 cache);
10816                         if (dev_extent_rec->objectid != devid ||
10817                             dev_extent_rec->offset != offset ||
10818                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10819                             dev_extent_rec->length != length) {
10820                                 if (!silent)
10821                                         fprintf(stderr,
10822                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10823                                                 chunk_rec->objectid,
10824                                                 chunk_rec->type,
10825                                                 chunk_rec->offset,
10826                                                 chunk_rec->stripes[i].devid,
10827                                                 chunk_rec->stripes[i].offset,
10828                                                 dev_extent_rec->objectid,
10829                                                 dev_extent_rec->offset,
10830                                                 dev_extent_rec->length);
10831                                 ret = -1;
10832                         } else {
10833                                 list_move(&dev_extent_rec->chunk_list,
10834                                           &chunk_rec->dextents);
10835                         }
10836                 } else {
10837                         if (!silent)
10838                                 fprintf(stderr,
10839                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10840                                         chunk_rec->objectid,
10841                                         chunk_rec->type,
10842                                         chunk_rec->offset,
10843                                         chunk_rec->stripes[i].devid,
10844                                         chunk_rec->stripes[i].offset);
10845                         ret = -1;
10846                 }
10847         }
10848         return ret;
10849 }
10850
10851 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10852 int check_chunks(struct cache_tree *chunk_cache,
10853                  struct block_group_tree *block_group_cache,
10854                  struct device_extent_tree *dev_extent_cache,
10855                  struct list_head *good, struct list_head *bad,
10856                  struct list_head *rebuild, int silent)
10857 {
10858         struct cache_extent *chunk_item;
10859         struct chunk_record *chunk_rec;
10860         struct block_group_record *bg_rec;
10861         struct device_extent_record *dext_rec;
10862         int err;
10863         int ret = 0;
10864
10865         chunk_item = first_cache_extent(chunk_cache);
10866         while (chunk_item) {
10867                 chunk_rec = container_of(chunk_item, struct chunk_record,
10868                                          cache);
10869                 err = check_chunk_refs(chunk_rec, block_group_cache,
10870                                        dev_extent_cache, silent);
10871                 if (err < 0)
10872                         ret = err;
10873                 if (err == 0 && good)
10874                         list_add_tail(&chunk_rec->list, good);
10875                 if (err > 0 && rebuild)
10876                         list_add_tail(&chunk_rec->list, rebuild);
10877                 if (err < 0 && bad)
10878                         list_add_tail(&chunk_rec->list, bad);
10879                 chunk_item = next_cache_extent(chunk_item);
10880         }
10881
10882         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10883                 if (!silent)
10884                         fprintf(stderr,
10885                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10886                                 bg_rec->objectid,
10887                                 bg_rec->offset,
10888                                 bg_rec->flags);
10889                 if (!ret)
10890                         ret = 1;
10891         }
10892
10893         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10894                             chunk_list) {
10895                 if (!silent)
10896                         fprintf(stderr,
10897                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10898                                 dext_rec->objectid,
10899                                 dext_rec->offset,
10900                                 dext_rec->length);
10901                 if (!ret)
10902                         ret = 1;
10903         }
10904         return ret;
10905 }
10906
10907
10908 static int check_device_used(struct device_record *dev_rec,
10909                              struct device_extent_tree *dext_cache)
10910 {
10911         struct cache_extent *cache;
10912         struct device_extent_record *dev_extent_rec;
10913         u64 total_byte = 0;
10914
10915         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10916         while (cache) {
10917                 dev_extent_rec = container_of(cache,
10918                                               struct device_extent_record,
10919                                               cache);
10920                 if (dev_extent_rec->objectid != dev_rec->devid)
10921                         break;
10922
10923                 list_del_init(&dev_extent_rec->device_list);
10924                 total_byte += dev_extent_rec->length;
10925                 cache = next_cache_extent(cache);
10926         }
10927
10928         if (total_byte != dev_rec->byte_used) {
10929                 fprintf(stderr,
10930                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10931                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10932                         dev_rec->type, dev_rec->offset);
10933                 return -1;
10934         } else {
10935                 return 0;
10936         }
10937 }
10938
10939 /*
10940  * Extra (optional) check for dev_item size to report possbile problem on a new
10941  * kernel.
10942  */
10943 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
10944 {
10945         if (!IS_ALIGNED(total_bytes, sectorsize)) {
10946                 warning(
10947 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
10948                         devid, total_bytes, sectorsize);
10949                 warning(
10950 "this is OK for older kernel, but may cause kernel warning for newer kernels");
10951                 warning("this can be fixed by 'btrfs rescue fix-device-size'");
10952         }
10953 }
10954
10955 /*
10956  * Unlike device size alignment check above, some super total_bytes check
10957  * failure can lead to mount failure for newer kernel.
10958  *
10959  * So this function will return the error for a fatal super total_bytes problem.
10960  */
10961 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10962 {
10963         struct btrfs_device *dev;
10964         struct list_head *dev_list = &fs_info->fs_devices->devices;
10965         u64 total_bytes = 0;
10966         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10967
10968         list_for_each_entry(dev, dev_list, dev_list)
10969                 total_bytes += dev->total_bytes;
10970
10971         /* Important check, which can cause unmountable fs */
10972         if (super_bytes < total_bytes) {
10973                 error("super total bytes %llu smaller than real device(s) size %llu",
10974                         super_bytes, total_bytes);
10975                 error("mounting this fs may fail for newer kernels");
10976                 error("this can be fixed by 'btrfs rescue fix-device-size'");
10977                 return false;
10978         }
10979
10980         /*
10981          * Optional check, just to make everything aligned and match with each
10982          * other.
10983          *
10984          * For a btrfs-image restored fs, we don't need to check it anyway.
10985          */
10986         if (btrfs_super_flags(fs_info->super_copy) &
10987             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10988                 return true;
10989         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10990             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10991             super_bytes != total_bytes) {
10992                 warning("minor unaligned/mismatch device size detected");
10993                 warning(
10994                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10995         }
10996         return true;
10997 }
10998
10999 /* check btrfs_dev_item -> btrfs_dev_extent */
11000 static int check_devices(struct rb_root *dev_cache,
11001                          struct device_extent_tree *dev_extent_cache)
11002 {
11003         struct rb_node *dev_node;
11004         struct device_record *dev_rec;
11005         struct device_extent_record *dext_rec;
11006         int err;
11007         int ret = 0;
11008
11009         dev_node = rb_first(dev_cache);
11010         while (dev_node) {
11011                 dev_rec = container_of(dev_node, struct device_record, node);
11012                 err = check_device_used(dev_rec, dev_extent_cache);
11013                 if (err)
11014                         ret = err;
11015
11016                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
11017                                          global_info->sectorsize);
11018                 dev_node = rb_next(dev_node);
11019         }
11020         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11021                             device_list) {
11022                 fprintf(stderr,
11023                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11024                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
11025                 if (!ret)
11026                         ret = 1;
11027         }
11028         return ret;
11029 }
11030
11031 static int add_root_item_to_list(struct list_head *head,
11032                                   u64 objectid, u64 bytenr, u64 last_snapshot,
11033                                   u8 level, u8 drop_level,
11034                                   struct btrfs_key *drop_key)
11035 {
11036
11037         struct root_item_record *ri_rec;
11038         ri_rec = malloc(sizeof(*ri_rec));
11039         if (!ri_rec)
11040                 return -ENOMEM;
11041         ri_rec->bytenr = bytenr;
11042         ri_rec->objectid = objectid;
11043         ri_rec->level = level;
11044         ri_rec->drop_level = drop_level;
11045         ri_rec->last_snapshot = last_snapshot;
11046         if (drop_key)
11047                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11048         list_add_tail(&ri_rec->list, head);
11049
11050         return 0;
11051 }
11052
11053 static void free_root_item_list(struct list_head *list)
11054 {
11055         struct root_item_record *ri_rec;
11056
11057         while (!list_empty(list)) {
11058                 ri_rec = list_first_entry(list, struct root_item_record,
11059                                           list);
11060                 list_del_init(&ri_rec->list);
11061                 free(ri_rec);
11062         }
11063 }
11064
11065 static int deal_root_from_list(struct list_head *list,
11066                                struct btrfs_root *root,
11067                                struct block_info *bits,
11068                                int bits_nr,
11069                                struct cache_tree *pending,
11070                                struct cache_tree *seen,
11071                                struct cache_tree *reada,
11072                                struct cache_tree *nodes,
11073                                struct cache_tree *extent_cache,
11074                                struct cache_tree *chunk_cache,
11075                                struct rb_root *dev_cache,
11076                                struct block_group_tree *block_group_cache,
11077                                struct device_extent_tree *dev_extent_cache)
11078 {
11079         int ret = 0;
11080         u64 last;
11081
11082         while (!list_empty(list)) {
11083                 struct root_item_record *rec;
11084                 struct extent_buffer *buf;
11085                 rec = list_entry(list->next,
11086                                  struct root_item_record, list);
11087                 last = 0;
11088                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11089                 if (!extent_buffer_uptodate(buf)) {
11090                         free_extent_buffer(buf);
11091                         ret = -EIO;
11092                         break;
11093                 }
11094                 ret = add_root_to_pending(buf, extent_cache, pending,
11095                                     seen, nodes, rec->objectid);
11096                 if (ret < 0)
11097                         break;
11098                 /*
11099                  * To rebuild extent tree, we need deal with snapshot
11100                  * one by one, otherwise we deal with node firstly which
11101                  * can maximize readahead.
11102                  */
11103                 while (1) {
11104                         ret = run_next_block(root, bits, bits_nr, &last,
11105                                              pending, seen, reada, nodes,
11106                                              extent_cache, chunk_cache,
11107                                              dev_cache, block_group_cache,
11108                                              dev_extent_cache, rec);
11109                         if (ret != 0)
11110                                 break;
11111                 }
11112                 free_extent_buffer(buf);
11113                 list_del(&rec->list);
11114                 free(rec);
11115                 if (ret < 0)
11116                         break;
11117         }
11118         while (ret >= 0) {
11119                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11120                                      reada, nodes, extent_cache, chunk_cache,
11121                                      dev_cache, block_group_cache,
11122                                      dev_extent_cache, NULL);
11123                 if (ret != 0) {
11124                         if (ret > 0)
11125                                 ret = 0;
11126                         break;
11127                 }
11128         }
11129         return ret;
11130 }
11131
11132 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11133 {
11134         struct rb_root dev_cache;
11135         struct cache_tree chunk_cache;
11136         struct block_group_tree block_group_cache;
11137         struct device_extent_tree dev_extent_cache;
11138         struct cache_tree extent_cache;
11139         struct cache_tree seen;
11140         struct cache_tree pending;
11141         struct cache_tree reada;
11142         struct cache_tree nodes;
11143         struct extent_io_tree excluded_extents;
11144         struct cache_tree corrupt_blocks;
11145         struct btrfs_path path;
11146         struct btrfs_key key;
11147         struct btrfs_key found_key;
11148         int ret, err = 0;
11149         struct block_info *bits;
11150         int bits_nr;
11151         struct extent_buffer *leaf;
11152         int slot;
11153         struct btrfs_root_item ri;
11154         struct list_head dropping_trees;
11155         struct list_head normal_trees;
11156         struct btrfs_root *root1;
11157         struct btrfs_root *root;
11158         u64 objectid;
11159         u8 level;
11160
11161         root = fs_info->fs_root;
11162         dev_cache = RB_ROOT;
11163         cache_tree_init(&chunk_cache);
11164         block_group_tree_init(&block_group_cache);
11165         device_extent_tree_init(&dev_extent_cache);
11166
11167         cache_tree_init(&extent_cache);
11168         cache_tree_init(&seen);
11169         cache_tree_init(&pending);
11170         cache_tree_init(&nodes);
11171         cache_tree_init(&reada);
11172         cache_tree_init(&corrupt_blocks);
11173         extent_io_tree_init(&excluded_extents);
11174         INIT_LIST_HEAD(&dropping_trees);
11175         INIT_LIST_HEAD(&normal_trees);
11176
11177         if (repair) {
11178                 fs_info->excluded_extents = &excluded_extents;
11179                 fs_info->fsck_extent_cache = &extent_cache;
11180                 fs_info->free_extent_hook = free_extent_hook;
11181                 fs_info->corrupt_blocks = &corrupt_blocks;
11182         }
11183
11184         bits_nr = 1024;
11185         bits = malloc(bits_nr * sizeof(struct block_info));
11186         if (!bits) {
11187                 perror("malloc");
11188                 exit(1);
11189         }
11190
11191         if (ctx.progress_enabled) {
11192                 ctx.tp = TASK_EXTENTS;
11193                 task_start(ctx.info);
11194         }
11195
11196 again:
11197         root1 = fs_info->tree_root;
11198         level = btrfs_header_level(root1->node);
11199         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11200                                     root1->node->start, 0, level, 0, NULL);
11201         if (ret < 0)
11202                 goto out;
11203         root1 = fs_info->chunk_root;
11204         level = btrfs_header_level(root1->node);
11205         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11206                                     root1->node->start, 0, level, 0, NULL);
11207         if (ret < 0)
11208                 goto out;
11209         btrfs_init_path(&path);
11210         key.offset = 0;
11211         key.objectid = 0;
11212         key.type = BTRFS_ROOT_ITEM_KEY;
11213         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11214         if (ret < 0)
11215                 goto out;
11216         while(1) {
11217                 leaf = path.nodes[0];
11218                 slot = path.slots[0];
11219                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11220                         ret = btrfs_next_leaf(root, &path);
11221                         if (ret != 0)
11222                                 break;
11223                         leaf = path.nodes[0];
11224                         slot = path.slots[0];
11225                 }
11226                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11227                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11228                         unsigned long offset;
11229                         u64 last_snapshot;
11230
11231                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11232                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11233                         last_snapshot = btrfs_root_last_snapshot(&ri);
11234                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11235                                 level = btrfs_root_level(&ri);
11236                                 ret = add_root_item_to_list(&normal_trees,
11237                                                 found_key.objectid,
11238                                                 btrfs_root_bytenr(&ri),
11239                                                 last_snapshot, level,
11240                                                 0, NULL);
11241                                 if (ret < 0)
11242                                         goto out;
11243                         } else {
11244                                 level = btrfs_root_level(&ri);
11245                                 objectid = found_key.objectid;
11246                                 btrfs_disk_key_to_cpu(&found_key,
11247                                                       &ri.drop_progress);
11248                                 ret = add_root_item_to_list(&dropping_trees,
11249                                                 objectid,
11250                                                 btrfs_root_bytenr(&ri),
11251                                                 last_snapshot, level,
11252                                                 ri.drop_level, &found_key);
11253                                 if (ret < 0)
11254                                         goto out;
11255                         }
11256                 }
11257                 path.slots[0]++;
11258         }
11259         btrfs_release_path(&path);
11260
11261         /*
11262          * check_block can return -EAGAIN if it fixes something, please keep
11263          * this in mind when dealing with return values from these functions, if
11264          * we get -EAGAIN we want to fall through and restart the loop.
11265          */
11266         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11267                                   &seen, &reada, &nodes, &extent_cache,
11268                                   &chunk_cache, &dev_cache, &block_group_cache,
11269                                   &dev_extent_cache);
11270         if (ret < 0) {
11271                 if (ret == -EAGAIN)
11272                         goto loop;
11273                 goto out;
11274         }
11275         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11276                                   &pending, &seen, &reada, &nodes,
11277                                   &extent_cache, &chunk_cache, &dev_cache,
11278                                   &block_group_cache, &dev_extent_cache);
11279         if (ret < 0) {
11280                 if (ret == -EAGAIN)
11281                         goto loop;
11282                 goto out;
11283         }
11284
11285         ret = check_chunks(&chunk_cache, &block_group_cache,
11286                            &dev_extent_cache, NULL, NULL, NULL, 0);
11287         if (ret) {
11288                 if (ret == -EAGAIN)
11289                         goto loop;
11290                 err = ret;
11291         }
11292
11293         ret = check_extent_refs(root, &extent_cache);
11294         if (ret < 0) {
11295                 if (ret == -EAGAIN)
11296                         goto loop;
11297                 goto out;
11298         }
11299
11300         ret = check_devices(&dev_cache, &dev_extent_cache);
11301         if (ret && err)
11302                 ret = err;
11303
11304 out:
11305         task_stop(ctx.info);
11306         if (repair) {
11307                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11308                 extent_io_tree_cleanup(&excluded_extents);
11309                 fs_info->fsck_extent_cache = NULL;
11310                 fs_info->free_extent_hook = NULL;
11311                 fs_info->corrupt_blocks = NULL;
11312                 fs_info->excluded_extents = NULL;
11313         }
11314         free(bits);
11315         free_chunk_cache_tree(&chunk_cache);
11316         free_device_cache_tree(&dev_cache);
11317         free_block_group_tree(&block_group_cache);
11318         free_device_extent_tree(&dev_extent_cache);
11319         free_extent_cache_tree(&seen);
11320         free_extent_cache_tree(&pending);
11321         free_extent_cache_tree(&reada);
11322         free_extent_cache_tree(&nodes);
11323         free_root_item_list(&normal_trees);
11324         free_root_item_list(&dropping_trees);
11325         return ret;
11326 loop:
11327         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11328         free_extent_cache_tree(&seen);
11329         free_extent_cache_tree(&pending);
11330         free_extent_cache_tree(&reada);
11331         free_extent_cache_tree(&nodes);
11332         free_chunk_cache_tree(&chunk_cache);
11333         free_block_group_tree(&block_group_cache);
11334         free_device_cache_tree(&dev_cache);
11335         free_device_extent_tree(&dev_extent_cache);
11336         free_extent_record_cache(&extent_cache);
11337         free_root_item_list(&normal_trees);
11338         free_root_item_list(&dropping_trees);
11339         extent_io_tree_cleanup(&excluded_extents);
11340         goto again;
11341 }
11342
11343 static int check_extent_inline_ref(struct extent_buffer *eb,
11344                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11345 {
11346         int ret;
11347         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11348
11349         switch (type) {
11350         case BTRFS_TREE_BLOCK_REF_KEY:
11351         case BTRFS_EXTENT_DATA_REF_KEY:
11352         case BTRFS_SHARED_BLOCK_REF_KEY:
11353         case BTRFS_SHARED_DATA_REF_KEY:
11354                 ret = 0;
11355                 break;
11356         default:
11357                 error("extent[%llu %u %llu] has unknown ref type: %d",
11358                       key->objectid, key->type, key->offset, type);
11359                 ret = UNKNOWN_TYPE;
11360                 break;
11361         }
11362
11363         return ret;
11364 }
11365
11366 /*
11367  * Check backrefs of a tree block given by @bytenr or @eb.
11368  *
11369  * @root:       the root containing the @bytenr or @eb
11370  * @eb:         tree block extent buffer, can be NULL
11371  * @bytenr:     bytenr of the tree block to search
11372  * @level:      tree level of the tree block
11373  * @owner:      owner of the tree block
11374  *
11375  * Return >0 for any error found and output error message
11376  * Return 0 for no error found
11377  */
11378 static int check_tree_block_ref(struct btrfs_root *root,
11379                                 struct extent_buffer *eb, u64 bytenr,
11380                                 int level, u64 owner, struct node_refs *nrefs)
11381 {
11382         struct btrfs_key key;
11383         struct btrfs_root *extent_root = root->fs_info->extent_root;
11384         struct btrfs_path path;
11385         struct btrfs_extent_item *ei;
11386         struct btrfs_extent_inline_ref *iref;
11387         struct extent_buffer *leaf;
11388         unsigned long end;
11389         unsigned long ptr;
11390         int slot;
11391         int skinny_level;
11392         int root_level = btrfs_header_level(root->node);
11393         int type;
11394         u32 nodesize = root->fs_info->nodesize;
11395         u32 item_size;
11396         u64 offset;
11397         int found_ref = 0;
11398         int err = 0;
11399         int ret;
11400         int strict = 1;
11401         int parent = 0;
11402
11403         btrfs_init_path(&path);
11404         key.objectid = bytenr;
11405         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11406                 key.type = BTRFS_METADATA_ITEM_KEY;
11407         else
11408                 key.type = BTRFS_EXTENT_ITEM_KEY;
11409         key.offset = (u64)-1;
11410
11411         /* Search for the backref in extent tree */
11412         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11413         if (ret < 0) {
11414                 err |= BACKREF_MISSING;
11415                 goto out;
11416         }
11417         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11418         if (ret) {
11419                 err |= BACKREF_MISSING;
11420                 goto out;
11421         }
11422
11423         leaf = path.nodes[0];
11424         slot = path.slots[0];
11425         btrfs_item_key_to_cpu(leaf, &key, slot);
11426
11427         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11428
11429         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11430                 skinny_level = (int)key.offset;
11431                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11432         } else {
11433                 struct btrfs_tree_block_info *info;
11434
11435                 info = (struct btrfs_tree_block_info *)(ei + 1);
11436                 skinny_level = btrfs_tree_block_level(leaf, info);
11437                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11438         }
11439
11440
11441         if (eb) {
11442                 u64 header_gen;
11443                 u64 extent_gen;
11444
11445                 /*
11446                  * Due to the feature of shared tree blocks, if the upper node
11447                  * is a fs root or shared node, the extent of checked node may
11448                  * not be updated until the next CoW.
11449                  */
11450                 if (nrefs)
11451                         strict = should_check_extent_strictly(root, nrefs,
11452                                         level);
11453                 if (!(btrfs_extent_flags(leaf, ei) &
11454                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11455                         error(
11456                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11457                                 key.objectid, nodesize,
11458                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11459                         err = BACKREF_MISMATCH;
11460                 }
11461                 header_gen = btrfs_header_generation(eb);
11462                 extent_gen = btrfs_extent_generation(leaf, ei);
11463                 if (header_gen != extent_gen) {
11464                         error(
11465         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11466                                 key.objectid, nodesize, header_gen,
11467                                 extent_gen);
11468                         err = BACKREF_MISMATCH;
11469                 }
11470                 if (level != skinny_level) {
11471                         error(
11472                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11473                                 key.objectid, nodesize, level, skinny_level);
11474                         err = BACKREF_MISMATCH;
11475                 }
11476                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11477                         error(
11478                         "extent[%llu %u] is referred by other roots than %llu",
11479                                 key.objectid, nodesize, root->objectid);
11480                         err = BACKREF_MISMATCH;
11481                 }
11482         }
11483
11484         /*
11485          * Iterate the extent/metadata item to find the exact backref
11486          */
11487         item_size = btrfs_item_size_nr(leaf, slot);
11488         ptr = (unsigned long)iref;
11489         end = (unsigned long)ei + item_size;
11490
11491         while (ptr < end) {
11492                 iref = (struct btrfs_extent_inline_ref *)ptr;
11493                 type = btrfs_extent_inline_ref_type(leaf, iref);
11494                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11495
11496                 ret = check_extent_inline_ref(leaf, &key, iref);
11497                 if (ret) {
11498                         err |= ret;
11499                         break;
11500                 }
11501                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11502                         if (offset == root->objectid)
11503                                 found_ref = 1;
11504                         if (!strict && owner == offset)
11505                                 found_ref = 1;
11506                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11507                         /*
11508                          * Backref of tree reloc root points to itself, no need
11509                          * to check backref any more.
11510                          *
11511                          * This may be an error of loop backref, but extent tree
11512                          * checker should have already handled it.
11513                          * Here we only need to avoid infinite iteration.
11514                          */
11515                         if (offset == bytenr) {
11516                                 found_ref = 1;
11517                         } else {
11518                                 /*
11519                                  * Check if the backref points to valid
11520                                  * referencer
11521                                  */
11522                                 found_ref = !check_tree_block_ref( root, NULL,
11523                                                 offset, level + 1, owner,
11524                                                 NULL);
11525                         }
11526                 }
11527
11528                 if (found_ref)
11529                         break;
11530                 ptr += btrfs_extent_inline_ref_size(type);
11531         }
11532
11533         /*
11534          * Inlined extent item doesn't have what we need, check
11535          * TREE_BLOCK_REF_KEY
11536          */
11537         if (!found_ref) {
11538                 btrfs_release_path(&path);
11539                 key.objectid = bytenr;
11540                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11541                 key.offset = root->objectid;
11542
11543                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11544                 if (!ret)
11545                         found_ref = 1;
11546         }
11547         /*
11548          * Finally check SHARED BLOCK REF, any found will be good
11549          * Here we're not doing comprehensive extent backref checking,
11550          * only need to ensure there is some extent referring to this
11551          * tree block.
11552          */
11553         if (!found_ref) {
11554                 btrfs_release_path(&path);
11555                 key.objectid = bytenr;
11556                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11557                 key.offset = (u64)-1;
11558
11559                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11560                 if (ret < 0) {
11561                         err |= BACKREF_MISSING;
11562                         goto out;
11563                 }
11564                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11565                 if (ret) {
11566                         err |= BACKREF_MISSING;
11567                         goto out;
11568                 }
11569                 found_ref = 1;
11570         }
11571         if (!found_ref)
11572                 err |= BACKREF_MISSING;
11573 out:
11574         btrfs_release_path(&path);
11575         if (nrefs && strict &&
11576             level < root_level && nrefs->full_backref[level + 1])
11577                 parent = nrefs->bytenr[level + 1];
11578         if (eb && (err & BACKREF_MISSING))
11579                 error(
11580         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11581                       bytenr, nodesize, owner, level,
11582                       parent ? "parent" : "root",
11583                       parent ? parent : root->objectid);
11584         return err;
11585 }
11586
11587 /*
11588  * If @err contains BACKREF_MISSING then add extent of the
11589  * file_extent_data_item.
11590  *
11591  * Returns error bits after reapir.
11592  */
11593 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11594                                    struct btrfs_root *root,
11595                                    struct btrfs_path *pathp,
11596                                    struct node_refs *nrefs,
11597                                    int err)
11598 {
11599         struct btrfs_file_extent_item *fi;
11600         struct btrfs_key fi_key;
11601         struct btrfs_key key;
11602         struct btrfs_extent_item *ei;
11603         struct btrfs_path path;
11604         struct btrfs_root *extent_root = root->fs_info->extent_root;
11605         struct extent_buffer *eb;
11606         u64 size;
11607         u64 disk_bytenr;
11608         u64 num_bytes;
11609         u64 parent;
11610         u64 offset;
11611         u64 extent_offset;
11612         u64 file_offset;
11613         int generation;
11614         int slot;
11615         int ret = 0;
11616
11617         eb = pathp->nodes[0];
11618         slot = pathp->slots[0];
11619         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11620         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11621
11622         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11623             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11624                 return err;
11625
11626         file_offset = fi_key.offset;
11627         generation = btrfs_file_extent_generation(eb, fi);
11628         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11629         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11630         extent_offset = btrfs_file_extent_offset(eb, fi);
11631         offset = file_offset - extent_offset;
11632
11633         /* now repair only adds backref */
11634         if ((err & BACKREF_MISSING) == 0)
11635                 return err;
11636
11637         /* search extent item */
11638         key.objectid = disk_bytenr;
11639         key.type = BTRFS_EXTENT_ITEM_KEY;
11640         key.offset = num_bytes;
11641
11642         btrfs_init_path(&path);
11643         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11644         if (ret < 0) {
11645                 ret = -EIO;
11646                 goto out;
11647         }
11648
11649         /* insert an extent item */
11650         if (ret > 0) {
11651                 key.objectid = disk_bytenr;
11652                 key.type = BTRFS_EXTENT_ITEM_KEY;
11653                 key.offset = num_bytes;
11654                 size = sizeof(*ei);
11655
11656                 btrfs_release_path(&path);
11657                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11658                                               size);
11659                 if (ret)
11660                         goto out;
11661                 eb = path.nodes[0];
11662                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11663
11664                 btrfs_set_extent_refs(eb, ei, 0);
11665                 btrfs_set_extent_generation(eb, ei, generation);
11666                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11667
11668                 btrfs_mark_buffer_dirty(eb);
11669                 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11670                                                num_bytes, 1, 0);
11671                 btrfs_release_path(&path);
11672         }
11673
11674         if (nrefs->full_backref[0])
11675                 parent = btrfs_header_bytenr(eb);
11676         else
11677                 parent = 0;
11678
11679         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11680                                    root->objectid,
11681                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11682                                    offset);
11683         if (ret) {
11684                 error(
11685                 "failed to increase extent data backref[%llu %llu] root %llu",
11686                       disk_bytenr, num_bytes, root->objectid);
11687                 goto out;
11688         } else {
11689                 printf("Add one extent data backref [%llu %llu]\n",
11690                        disk_bytenr, num_bytes);
11691         }
11692
11693         err &= ~BACKREF_MISSING;
11694 out:
11695         if (ret)
11696                 error("can't repair root %llu extent data item[%llu %llu]",
11697                       root->objectid, disk_bytenr, num_bytes);
11698         return err;
11699 }
11700
11701 /*
11702  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11703  *
11704  * Return >0 any error found and output error message
11705  * Return 0 for no error found
11706  */
11707 static int check_extent_data_item(struct btrfs_root *root,
11708                                   struct btrfs_path *pathp,
11709                                   struct node_refs *nrefs,  int account_bytes)
11710 {
11711         struct btrfs_file_extent_item *fi;
11712         struct extent_buffer *eb = pathp->nodes[0];
11713         struct btrfs_path path;
11714         struct btrfs_root *extent_root = root->fs_info->extent_root;
11715         struct btrfs_key fi_key;
11716         struct btrfs_key dbref_key;
11717         struct extent_buffer *leaf;
11718         struct btrfs_extent_item *ei;
11719         struct btrfs_extent_inline_ref *iref;
11720         struct btrfs_extent_data_ref *dref;
11721         u64 owner;
11722         u64 disk_bytenr;
11723         u64 disk_num_bytes;
11724         u64 extent_num_bytes;
11725         u64 extent_flags;
11726         u64 offset;
11727         u32 item_size;
11728         unsigned long end;
11729         unsigned long ptr;
11730         int type;
11731         int found_dbackref = 0;
11732         int slot = pathp->slots[0];
11733         int err = 0;
11734         int ret;
11735         int strict;
11736
11737         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11738         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11739
11740         /* Nothing to check for hole and inline data extents */
11741         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11742             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11743                 return 0;
11744
11745         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11746         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11747         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11748         offset = btrfs_file_extent_offset(eb, fi);
11749
11750         /* Check unaligned disk_num_bytes and num_bytes */
11751         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11752                 error(
11753 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11754                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11755                         root->fs_info->sectorsize);
11756                 err |= BYTES_UNALIGNED;
11757         } else if (account_bytes) {
11758                 data_bytes_allocated += disk_num_bytes;
11759         }
11760         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11761                 error(
11762 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11763                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11764                         root->fs_info->sectorsize);
11765                 err |= BYTES_UNALIGNED;
11766         } else if (account_bytes) {
11767                 data_bytes_referenced += extent_num_bytes;
11768         }
11769         owner = btrfs_header_owner(eb);
11770
11771         /* Check the extent item of the file extent in extent tree */
11772         btrfs_init_path(&path);
11773         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11774         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11775         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11776
11777         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11778         if (ret)
11779                 goto out;
11780
11781         leaf = path.nodes[0];
11782         slot = path.slots[0];
11783         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11784
11785         extent_flags = btrfs_extent_flags(leaf, ei);
11786
11787         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11788                 error(
11789                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11790                     disk_bytenr, disk_num_bytes,
11791                     BTRFS_EXTENT_FLAG_DATA);
11792                 err |= BACKREF_MISMATCH;
11793         }
11794
11795         /* Check data backref inside that extent item */
11796         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11797         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11798         ptr = (unsigned long)iref;
11799         end = (unsigned long)ei + item_size;
11800         strict = should_check_extent_strictly(root, nrefs, -1);
11801
11802         while (ptr < end) {
11803                 u64 ref_root;
11804                 u64 ref_objectid;
11805                 u64 ref_offset;
11806                 bool match = false;
11807
11808                 iref = (struct btrfs_extent_inline_ref *)ptr;
11809                 type = btrfs_extent_inline_ref_type(leaf, iref);
11810                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11811
11812                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11813                 if (ret) {
11814                         err |= ret;
11815                         break;
11816                 }
11817                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11818                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11819                         ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11820                         ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11821
11822                         if (ref_objectid == fi_key.objectid &&
11823                             ref_offset == fi_key.offset - offset)
11824                                 match = true;
11825                         if (ref_root == root->objectid && match)
11826                                 found_dbackref = 1;
11827                         else if (!strict && owner == ref_root && match)
11828                                 found_dbackref = 1;
11829                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11830                         found_dbackref = !check_tree_block_ref(root, NULL,
11831                                 btrfs_extent_inline_ref_offset(leaf, iref),
11832                                 0, owner, NULL);
11833                 }
11834
11835                 if (found_dbackref)
11836                         break;
11837                 ptr += btrfs_extent_inline_ref_size(type);
11838         }
11839
11840         if (!found_dbackref) {
11841                 btrfs_release_path(&path);
11842
11843                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11844                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11845                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11846                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11847                                 fi_key.objectid, fi_key.offset - offset);
11848
11849                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11850                                         &dbref_key, &path, 0, 0);
11851                 if (!ret) {
11852                         found_dbackref = 1;
11853                         goto out;
11854                 }
11855
11856                 btrfs_release_path(&path);
11857
11858                 /*
11859                  * Neither inlined nor EXTENT_DATA_REF found, try
11860                  * SHARED_DATA_REF as last chance.
11861                  */
11862                 dbref_key.objectid = disk_bytenr;
11863                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11864                 dbref_key.offset = eb->start;
11865
11866                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11867                                         &dbref_key, &path, 0, 0);
11868                 if (!ret) {
11869                         found_dbackref = 1;
11870                         goto out;
11871                 }
11872         }
11873
11874 out:
11875         if (!found_dbackref)
11876                 err |= BACKREF_MISSING;
11877         btrfs_release_path(&path);
11878         if (err & BACKREF_MISSING) {
11879                 error("data extent[%llu %llu] backref lost",
11880                       disk_bytenr, disk_num_bytes);
11881         }
11882         return err;
11883 }
11884
11885 /*
11886  * Get real tree block level for the case like shared block
11887  * Return >= 0 as tree level
11888  * Return <0 for error
11889  */
11890 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11891 {
11892         struct extent_buffer *eb;
11893         struct btrfs_path path;
11894         struct btrfs_key key;
11895         struct btrfs_extent_item *ei;
11896         u64 flags;
11897         u64 transid;
11898         u8 backref_level;
11899         u8 header_level;
11900         int ret;
11901
11902         /* Search extent tree for extent generation and level */
11903         key.objectid = bytenr;
11904         key.type = BTRFS_METADATA_ITEM_KEY;
11905         key.offset = (u64)-1;
11906
11907         btrfs_init_path(&path);
11908         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11909         if (ret < 0)
11910                 goto release_out;
11911         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11912         if (ret < 0)
11913                 goto release_out;
11914         if (ret > 0) {
11915                 ret = -ENOENT;
11916                 goto release_out;
11917         }
11918
11919         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11920         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11921                             struct btrfs_extent_item);
11922         flags = btrfs_extent_flags(path.nodes[0], ei);
11923         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11924                 ret = -ENOENT;
11925                 goto release_out;
11926         }
11927
11928         /* Get transid for later read_tree_block() check */
11929         transid = btrfs_extent_generation(path.nodes[0], ei);
11930
11931         /* Get backref level as one source */
11932         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11933                 backref_level = key.offset;
11934         } else {
11935                 struct btrfs_tree_block_info *info;
11936
11937                 info = (struct btrfs_tree_block_info *)(ei + 1);
11938                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11939         }
11940         btrfs_release_path(&path);
11941
11942         /* Get level from tree block as an alternative source */
11943         eb = read_tree_block(fs_info, bytenr, transid);
11944         if (!extent_buffer_uptodate(eb)) {
11945                 free_extent_buffer(eb);
11946                 return -EIO;
11947         }
11948         header_level = btrfs_header_level(eb);
11949         free_extent_buffer(eb);
11950
11951         if (header_level != backref_level)
11952                 return -EIO;
11953         return header_level;
11954
11955 release_out:
11956         btrfs_release_path(&path);
11957         return ret;
11958 }
11959
11960 /*
11961  * Check if a tree block backref is valid (points to a valid tree block)
11962  * if level == -1, level will be resolved
11963  * Return >0 for any error found and print error message
11964  */
11965 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11966                                     u64 bytenr, int level)
11967 {
11968         struct btrfs_root *root;
11969         struct btrfs_key key;
11970         struct btrfs_path path;
11971         struct extent_buffer *eb;
11972         struct extent_buffer *node;
11973         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11974         int err = 0;
11975         int ret;
11976
11977         /* Query level for level == -1 special case */
11978         if (level == -1)
11979                 level = query_tree_block_level(fs_info, bytenr);
11980         if (level < 0) {
11981                 err |= REFERENCER_MISSING;
11982                 goto out;
11983         }
11984
11985         key.objectid = root_id;
11986         key.type = BTRFS_ROOT_ITEM_KEY;
11987         key.offset = (u64)-1;
11988
11989         root = btrfs_read_fs_root(fs_info, &key);
11990         if (IS_ERR(root)) {
11991                 err |= REFERENCER_MISSING;
11992                 goto out;
11993         }
11994
11995         /* Read out the tree block to get item/node key */
11996         eb = read_tree_block(fs_info, bytenr, 0);
11997         if (!extent_buffer_uptodate(eb)) {
11998                 err |= REFERENCER_MISSING;
11999                 free_extent_buffer(eb);
12000                 goto out;
12001         }
12002
12003         /* Empty tree, no need to check key */
12004         if (!btrfs_header_nritems(eb) && !level) {
12005                 free_extent_buffer(eb);
12006                 goto out;
12007         }
12008
12009         if (level)
12010                 btrfs_node_key_to_cpu(eb, &key, 0);
12011         else
12012                 btrfs_item_key_to_cpu(eb, &key, 0);
12013
12014         free_extent_buffer(eb);
12015
12016         btrfs_init_path(&path);
12017         path.lowest_level = level;
12018         /* Search with the first key, to ensure we can reach it */
12019         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12020         if (ret < 0) {
12021                 err |= REFERENCER_MISSING;
12022                 goto release_out;
12023         }
12024
12025         node = path.nodes[level];
12026         if (btrfs_header_bytenr(node) != bytenr) {
12027                 error(
12028         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12029                         bytenr, nodesize, bytenr,
12030                         btrfs_header_bytenr(node));
12031                 err |= REFERENCER_MISMATCH;
12032         }
12033         if (btrfs_header_level(node) != level) {
12034                 error(
12035         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12036                         bytenr, nodesize, level,
12037                         btrfs_header_level(node));
12038                 err |= REFERENCER_MISMATCH;
12039         }
12040
12041 release_out:
12042         btrfs_release_path(&path);
12043 out:
12044         if (err & REFERENCER_MISSING) {
12045                 if (level < 0)
12046                         error("extent [%llu %d] lost referencer (owner: %llu)",
12047                                 bytenr, nodesize, root_id);
12048                 else
12049                         error(
12050                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12051                                 bytenr, nodesize, root_id, level);
12052         }
12053
12054         return err;
12055 }
12056
12057 /*
12058  * Check if tree block @eb is tree reloc root.
12059  * Return 0 if it's not or any problem happens
12060  * Return 1 if it's a tree reloc root
12061  */
12062 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12063                                  struct extent_buffer *eb)
12064 {
12065         struct btrfs_root *tree_reloc_root;
12066         struct btrfs_key key;
12067         u64 bytenr = btrfs_header_bytenr(eb);
12068         u64 owner = btrfs_header_owner(eb);
12069         int ret = 0;
12070
12071         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12072         key.offset = owner;
12073         key.type = BTRFS_ROOT_ITEM_KEY;
12074
12075         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12076         if (IS_ERR(tree_reloc_root))
12077                 return 0;
12078
12079         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12080                 ret = 1;
12081         btrfs_free_fs_root(tree_reloc_root);
12082         return ret;
12083 }
12084
12085 /*
12086  * Check referencer for shared block backref
12087  * If level == -1, this function will resolve the level.
12088  */
12089 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12090                                      u64 parent, u64 bytenr, int level)
12091 {
12092         struct extent_buffer *eb;
12093         u32 nr;
12094         int found_parent = 0;
12095         int i;
12096
12097         eb = read_tree_block(fs_info, parent, 0);
12098         if (!extent_buffer_uptodate(eb))
12099                 goto out;
12100
12101         if (level == -1)
12102                 level = query_tree_block_level(fs_info, bytenr);
12103         if (level < 0)
12104                 goto out;
12105
12106         /* It's possible it's a tree reloc root */
12107         if (parent == bytenr) {
12108                 if (is_tree_reloc_root(fs_info, eb))
12109                         found_parent = 1;
12110                 goto out;
12111         }
12112
12113         if (level + 1 != btrfs_header_level(eb))
12114                 goto out;
12115
12116         nr = btrfs_header_nritems(eb);
12117         for (i = 0; i < nr; i++) {
12118                 if (bytenr == btrfs_node_blockptr(eb, i)) {
12119                         found_parent = 1;
12120                         break;
12121                 }
12122         }
12123 out:
12124         free_extent_buffer(eb);
12125         if (!found_parent) {
12126                 error(
12127         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12128                         bytenr, fs_info->nodesize, parent, level);
12129                 return REFERENCER_MISSING;
12130         }
12131         return 0;
12132 }
12133
12134 /*
12135  * Check referencer for normal (inlined) data ref
12136  * If len == 0, it will be resolved by searching in extent tree
12137  */
12138 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12139                                      u64 root_id, u64 objectid, u64 offset,
12140                                      u64 bytenr, u64 len, u32 count)
12141 {
12142         struct btrfs_root *root;
12143         struct btrfs_root *extent_root = fs_info->extent_root;
12144         struct btrfs_key key;
12145         struct btrfs_path path;
12146         struct extent_buffer *leaf;
12147         struct btrfs_file_extent_item *fi;
12148         u32 found_count = 0;
12149         int slot;
12150         int ret = 0;
12151
12152         if (!len) {
12153                 key.objectid = bytenr;
12154                 key.type = BTRFS_EXTENT_ITEM_KEY;
12155                 key.offset = (u64)-1;
12156
12157                 btrfs_init_path(&path);
12158                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12159                 if (ret < 0)
12160                         goto out;
12161                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12162                 if (ret)
12163                         goto out;
12164                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12165                 if (key.objectid != bytenr ||
12166                     key.type != BTRFS_EXTENT_ITEM_KEY)
12167                         goto out;
12168                 len = key.offset;
12169                 btrfs_release_path(&path);
12170         }
12171         key.objectid = root_id;
12172         key.type = BTRFS_ROOT_ITEM_KEY;
12173         key.offset = (u64)-1;
12174         btrfs_init_path(&path);
12175
12176         root = btrfs_read_fs_root(fs_info, &key);
12177         if (IS_ERR(root))
12178                 goto out;
12179
12180         key.objectid = objectid;
12181         key.type = BTRFS_EXTENT_DATA_KEY;
12182         /*
12183          * It can be nasty as data backref offset is
12184          * file offset - file extent offset, which is smaller or
12185          * equal to original backref offset.  The only special case is
12186          * overflow.  So we need to special check and do further search.
12187          */
12188         key.offset = offset & (1ULL << 63) ? 0 : offset;
12189
12190         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12191         if (ret < 0)
12192                 goto out;
12193
12194         /*
12195          * Search afterwards to get correct one
12196          * NOTE: As we must do a comprehensive check on the data backref to
12197          * make sure the dref count also matches, we must iterate all file
12198          * extents for that inode.
12199          */
12200         while (1) {
12201                 leaf = path.nodes[0];
12202                 slot = path.slots[0];
12203
12204                 if (slot >= btrfs_header_nritems(leaf) ||
12205                     btrfs_header_owner(leaf) != root_id)
12206                         goto next;
12207                 btrfs_item_key_to_cpu(leaf, &key, slot);
12208                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12209                         break;
12210                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12211                 /*
12212                  * Except normal disk bytenr and disk num bytes, we still
12213                  * need to do extra check on dbackref offset as
12214                  * dbackref offset = file_offset - file_extent_offset
12215                  *
12216                  * Also, we must check the leaf owner.
12217                  * In case of shared tree blocks (snapshots) we can inherit
12218                  * leaves from source snapshot.
12219                  * In that case, reference from source snapshot should not
12220                  * count.
12221                  */
12222                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12223                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12224                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12225                     offset && btrfs_header_owner(leaf) == root_id)
12226                         found_count++;
12227
12228 next:
12229                 ret = btrfs_next_item(root, &path);
12230                 if (ret)
12231                         break;
12232         }
12233 out:
12234         btrfs_release_path(&path);
12235         if (found_count != count) {
12236                 error(
12237 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12238                         bytenr, len, root_id, objectid, offset, count, found_count);
12239                 return REFERENCER_MISSING;
12240         }
12241         return 0;
12242 }
12243
12244 /*
12245  * Check if the referencer of a shared data backref exists
12246  */
12247 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12248                                      u64 parent, u64 bytenr)
12249 {
12250         struct extent_buffer *eb;
12251         struct btrfs_key key;
12252         struct btrfs_file_extent_item *fi;
12253         u32 nr;
12254         int found_parent = 0;
12255         int i;
12256
12257         eb = read_tree_block(fs_info, parent, 0);
12258         if (!extent_buffer_uptodate(eb))
12259                 goto out;
12260
12261         nr = btrfs_header_nritems(eb);
12262         for (i = 0; i < nr; i++) {
12263                 btrfs_item_key_to_cpu(eb, &key, i);
12264                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12265                         continue;
12266
12267                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12268                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12269                         continue;
12270
12271                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12272                         found_parent = 1;
12273                         break;
12274                 }
12275         }
12276
12277 out:
12278         free_extent_buffer(eb);
12279         if (!found_parent) {
12280                 error("shared extent %llu referencer lost (parent: %llu)",
12281                         bytenr, parent);
12282                 return REFERENCER_MISSING;
12283         }
12284         return 0;
12285 }
12286
12287 /*
12288  * Only delete backref if REFERENCER_MISSING now
12289  *
12290  * Returns <0   the extent was deleted
12291  * Returns >0   the backref was deleted but extent still exists, returned value
12292  *               means error after repair
12293  * Returns  0   nothing happened
12294  */
12295 static int repair_extent_item(struct btrfs_trans_handle *trans,
12296                       struct btrfs_root *root, struct btrfs_path *path,
12297                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12298                       u64 owner, u64 offset, int err)
12299 {
12300         struct btrfs_key old_key;
12301         int freed = 0;
12302         int ret;
12303
12304         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12305
12306         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12307                 /* delete the backref */
12308                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12309                           num_bytes, parent, root_objectid, owner, offset);
12310                 if (!ret) {
12311                         freed = 1;
12312                         err &= ~REFERENCER_MISSING;
12313                         printf("Delete backref in extent [%llu %llu]\n",
12314                                bytenr, num_bytes);
12315                 } else {
12316                         error("fail to delete backref in extent [%llu %llu]",
12317                                bytenr, num_bytes);
12318                 }
12319         }
12320
12321         /* btrfs_free_extent may delete the extent */
12322         btrfs_release_path(path);
12323         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12324
12325         if (ret)
12326                 ret = -ENOENT;
12327         else if (freed)
12328                 ret = err;
12329         return ret;
12330 }
12331
12332 /*
12333  * This function will check a given extent item, including its backref and
12334  * itself (like crossing stripe boundary and type)
12335  *
12336  * Since we don't use extent_record anymore, introduce new error bit
12337  */
12338 static int check_extent_item(struct btrfs_trans_handle *trans,
12339                              struct btrfs_fs_info *fs_info,
12340                              struct btrfs_path *path)
12341 {
12342         struct btrfs_extent_item *ei;
12343         struct btrfs_extent_inline_ref *iref;
12344         struct btrfs_extent_data_ref *dref;
12345         struct extent_buffer *eb = path->nodes[0];
12346         unsigned long end;
12347         unsigned long ptr;
12348         int slot = path->slots[0];
12349         int type;
12350         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12351         u32 item_size = btrfs_item_size_nr(eb, slot);
12352         u64 flags;
12353         u64 offset;
12354         u64 parent;
12355         u64 num_bytes;
12356         u64 root_objectid;
12357         u64 owner;
12358         u64 owner_offset;
12359         int metadata = 0;
12360         int level;
12361         struct btrfs_key key;
12362         int ret;
12363         int err = 0;
12364
12365         btrfs_item_key_to_cpu(eb, &key, slot);
12366         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12367                 bytes_used += key.offset;
12368                 num_bytes = key.offset;
12369         } else {
12370                 bytes_used += nodesize;
12371                 num_bytes = nodesize;
12372         }
12373
12374         if (item_size < sizeof(*ei)) {
12375                 /*
12376                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12377                  * old thing when on disk format is still un-determined.
12378                  * No need to care about it anymore
12379                  */
12380                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12381                 return -ENOTTY;
12382         }
12383
12384         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12385         flags = btrfs_extent_flags(eb, ei);
12386
12387         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12388                 metadata = 1;
12389         if (metadata && check_crossing_stripes(global_info, key.objectid,
12390                                                eb->len)) {
12391                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12392                       key.objectid, key.objectid + nodesize);
12393                 err |= CROSSING_STRIPE_BOUNDARY;
12394         }
12395
12396         ptr = (unsigned long)(ei + 1);
12397
12398         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12399                 /* Old EXTENT_ITEM metadata */
12400                 struct btrfs_tree_block_info *info;
12401
12402                 info = (struct btrfs_tree_block_info *)ptr;
12403                 level = btrfs_tree_block_level(eb, info);
12404                 ptr += sizeof(struct btrfs_tree_block_info);
12405         } else {
12406                 /* New METADATA_ITEM */
12407                 level = key.offset;
12408         }
12409         end = (unsigned long)ei + item_size;
12410
12411 next:
12412         /* Reached extent item end normally */
12413         if (ptr == end)
12414                 goto out;
12415
12416         /* Beyond extent item end, wrong item size */
12417         if (ptr > end) {
12418                 err |= ITEM_SIZE_MISMATCH;
12419                 error("extent item at bytenr %llu slot %d has wrong size",
12420                         eb->start, slot);
12421                 goto out;
12422         }
12423
12424         parent = 0;
12425         root_objectid = 0;
12426         owner = 0;
12427         owner_offset = 0;
12428         /* Now check every backref in this extent item */
12429         iref = (struct btrfs_extent_inline_ref *)ptr;
12430         type = btrfs_extent_inline_ref_type(eb, iref);
12431         offset = btrfs_extent_inline_ref_offset(eb, iref);
12432         switch (type) {
12433         case BTRFS_TREE_BLOCK_REF_KEY:
12434                 root_objectid = offset;
12435                 owner = level;
12436                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12437                                                level);
12438                 err |= ret;
12439                 break;
12440         case BTRFS_SHARED_BLOCK_REF_KEY:
12441                 parent = offset;
12442                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12443                                                  level);
12444                 err |= ret;
12445                 break;
12446         case BTRFS_EXTENT_DATA_REF_KEY:
12447                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12448                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12449                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12450                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12451                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12452                                         owner_offset, key.objectid, key.offset,
12453                                         btrfs_extent_data_ref_count(eb, dref));
12454                 err |= ret;
12455                 break;
12456         case BTRFS_SHARED_DATA_REF_KEY:
12457                 parent = offset;
12458                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12459                 err |= ret;
12460                 break;
12461         default:
12462                 error("extent[%llu %d %llu] has unknown ref type: %d",
12463                         key.objectid, key.type, key.offset, type);
12464                 ret = UNKNOWN_TYPE;
12465                 err |= ret;
12466                 goto out;
12467         }
12468
12469         if (err && repair) {
12470                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12471                          key.objectid, num_bytes, parent, root_objectid,
12472                          owner, owner_offset, ret);
12473                 if (ret < 0)
12474                         goto out;
12475                 if (ret) {
12476                         goto next;
12477                         err = ret;
12478                 }
12479         }
12480
12481         ptr += btrfs_extent_inline_ref_size(type);
12482         goto next;
12483
12484 out:
12485         return err;
12486 }
12487
12488 /*
12489  * Check if a dev extent item is referred correctly by its chunk
12490  */
12491 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12492                                  struct extent_buffer *eb, int slot)
12493 {
12494         struct btrfs_root *chunk_root = fs_info->chunk_root;
12495         struct btrfs_dev_extent *ptr;
12496         struct btrfs_path path;
12497         struct btrfs_key chunk_key;
12498         struct btrfs_key devext_key;
12499         struct btrfs_chunk *chunk;
12500         struct extent_buffer *l;
12501         int num_stripes;
12502         u64 length;
12503         int i;
12504         int found_chunk = 0;
12505         int ret;
12506
12507         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12508         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12509         length = btrfs_dev_extent_length(eb, ptr);
12510
12511         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12512         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12513         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12514
12515         btrfs_init_path(&path);
12516         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12517         if (ret)
12518                 goto out;
12519
12520         l = path.nodes[0];
12521         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12522         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12523                                       chunk_key.offset);
12524         if (ret < 0)
12525                 goto out;
12526
12527         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12528                 goto out;
12529
12530         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12531         for (i = 0; i < num_stripes; i++) {
12532                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12533                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12534
12535                 if (devid == devext_key.objectid &&
12536                     offset == devext_key.offset) {
12537                         found_chunk = 1;
12538                         break;
12539                 }
12540         }
12541 out:
12542         btrfs_release_path(&path);
12543         if (!found_chunk) {
12544                 error(
12545                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12546                         devext_key.objectid, devext_key.offset, length);
12547                 return REFERENCER_MISSING;
12548         }
12549         return 0;
12550 }
12551
12552 /*
12553  * Check if the used space is correct with the dev item
12554  */
12555 static int check_dev_item(struct btrfs_fs_info *fs_info,
12556                           struct extent_buffer *eb, int slot)
12557 {
12558         struct btrfs_root *dev_root = fs_info->dev_root;
12559         struct btrfs_dev_item *dev_item;
12560         struct btrfs_path path;
12561         struct btrfs_key key;
12562         struct btrfs_dev_extent *ptr;
12563         u64 total_bytes;
12564         u64 dev_id;
12565         u64 used;
12566         u64 total = 0;
12567         int ret;
12568
12569         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12570         dev_id = btrfs_device_id(eb, dev_item);
12571         used = btrfs_device_bytes_used(eb, dev_item);
12572         total_bytes = btrfs_device_total_bytes(eb, dev_item);
12573
12574         key.objectid = dev_id;
12575         key.type = BTRFS_DEV_EXTENT_KEY;
12576         key.offset = 0;
12577
12578         btrfs_init_path(&path);
12579         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12580         if (ret < 0) {
12581                 btrfs_item_key_to_cpu(eb, &key, slot);
12582                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12583                         key.objectid, key.type, key.offset);
12584                 btrfs_release_path(&path);
12585                 return REFERENCER_MISSING;
12586         }
12587
12588         /* Iterate dev_extents to calculate the used space of a device */
12589         while (1) {
12590                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12591                         goto next;
12592
12593                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12594                 if (key.objectid > dev_id)
12595                         break;
12596                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12597                         goto next;
12598
12599                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12600                                      struct btrfs_dev_extent);
12601                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12602 next:
12603                 ret = btrfs_next_item(dev_root, &path);
12604                 if (ret)
12605                         break;
12606         }
12607         btrfs_release_path(&path);
12608
12609         if (used != total) {
12610                 btrfs_item_key_to_cpu(eb, &key, slot);
12611                 error(
12612 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12613                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12614                         BTRFS_DEV_EXTENT_KEY, dev_id);
12615                 return ACCOUNTING_MISMATCH;
12616         }
12617         check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12618
12619         return 0;
12620 }
12621
12622 /*
12623  * Check a block group item with its referener (chunk) and its used space
12624  * with extent/metadata item
12625  */
12626 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12627                                   struct extent_buffer *eb, int slot)
12628 {
12629         struct btrfs_root *extent_root = fs_info->extent_root;
12630         struct btrfs_root *chunk_root = fs_info->chunk_root;
12631         struct btrfs_block_group_item *bi;
12632         struct btrfs_block_group_item bg_item;
12633         struct btrfs_path path;
12634         struct btrfs_key bg_key;
12635         struct btrfs_key chunk_key;
12636         struct btrfs_key extent_key;
12637         struct btrfs_chunk *chunk;
12638         struct extent_buffer *leaf;
12639         struct btrfs_extent_item *ei;
12640         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12641         u64 flags;
12642         u64 bg_flags;
12643         u64 used;
12644         u64 total = 0;
12645         int ret;
12646         int err = 0;
12647
12648         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12649         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12650         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12651         used = btrfs_block_group_used(&bg_item);
12652         bg_flags = btrfs_block_group_flags(&bg_item);
12653
12654         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12655         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12656         chunk_key.offset = bg_key.objectid;
12657
12658         btrfs_init_path(&path);
12659         /* Search for the referencer chunk */
12660         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12661         if (ret) {
12662                 error(
12663                 "block group[%llu %llu] did not find the related chunk item",
12664                         bg_key.objectid, bg_key.offset);
12665                 err |= REFERENCER_MISSING;
12666         } else {
12667                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12668                                         struct btrfs_chunk);
12669                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12670                                                 bg_key.offset) {
12671                         error(
12672         "block group[%llu %llu] related chunk item length does not match",
12673                                 bg_key.objectid, bg_key.offset);
12674                         err |= REFERENCER_MISMATCH;
12675                 }
12676         }
12677         btrfs_release_path(&path);
12678
12679         /* Search from the block group bytenr */
12680         extent_key.objectid = bg_key.objectid;
12681         extent_key.type = 0;
12682         extent_key.offset = 0;
12683
12684         btrfs_init_path(&path);
12685         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12686         if (ret < 0)
12687                 goto out;
12688
12689         /* Iterate extent tree to account used space */
12690         while (1) {
12691                 leaf = path.nodes[0];
12692
12693                 /* Search slot can point to the last item beyond leaf nritems */
12694                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12695                         goto next;
12696
12697                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12698                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12699                         break;
12700
12701                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12702                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12703                         goto next;
12704                 if (extent_key.objectid < bg_key.objectid)
12705                         goto next;
12706
12707                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12708                         total += nodesize;
12709                 else
12710                         total += extent_key.offset;
12711
12712                 ei = btrfs_item_ptr(leaf, path.slots[0],
12713                                     struct btrfs_extent_item);
12714                 flags = btrfs_extent_flags(leaf, ei);
12715                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12716                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12717                                 error(
12718                         "bad extent[%llu, %llu) type mismatch with chunk",
12719                                         extent_key.objectid,
12720                                         extent_key.objectid + extent_key.offset);
12721                                 err |= CHUNK_TYPE_MISMATCH;
12722                         }
12723                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12724                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12725                                     BTRFS_BLOCK_GROUP_METADATA))) {
12726                                 error(
12727                         "bad extent[%llu, %llu) type mismatch with chunk",
12728                                         extent_key.objectid,
12729                                         extent_key.objectid + nodesize);
12730                                 err |= CHUNK_TYPE_MISMATCH;
12731                         }
12732                 }
12733 next:
12734                 ret = btrfs_next_item(extent_root, &path);
12735                 if (ret)
12736                         break;
12737         }
12738
12739 out:
12740         btrfs_release_path(&path);
12741
12742         if (total != used) {
12743                 error(
12744                 "block group[%llu %llu] used %llu but extent items used %llu",
12745                         bg_key.objectid, bg_key.offset, used, total);
12746                 err |= BG_ACCOUNTING_ERROR;
12747         }
12748         return err;
12749 }
12750
12751 /*
12752  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12753  * FIXME: We still need to repair error of dev_item.
12754  *
12755  * Returns error after repair.
12756  */
12757 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12758                              struct btrfs_root *chunk_root,
12759                              struct btrfs_path *path, int err)
12760 {
12761         struct btrfs_chunk *chunk;
12762         struct btrfs_key chunk_key;
12763         struct extent_buffer *eb = path->nodes[0];
12764         u64 length;
12765         int slot = path->slots[0];
12766         u64 type;
12767         int ret = 0;
12768
12769         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12770         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12771                 return err;
12772         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12773         type = btrfs_chunk_type(path->nodes[0], chunk);
12774         length = btrfs_chunk_length(eb, chunk);
12775
12776         if (err & REFERENCER_MISSING) {
12777                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12778                                              type, chunk_key.offset, length);
12779                 if (ret) {
12780                         error("fail to add block group item[%llu %llu]",
12781                               chunk_key.offset, length);
12782                         goto out;
12783                 } else {
12784                         err &= ~REFERENCER_MISSING;
12785                         printf("Added block group item[%llu %llu]\n",
12786                                chunk_key.offset, length);
12787                 }
12788         }
12789
12790 out:
12791         return err;
12792 }
12793
12794 /*
12795  * Check a chunk item.
12796  * Including checking all referred dev_extents and block group
12797  */
12798 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12799                             struct extent_buffer *eb, int slot)
12800 {
12801         struct btrfs_root *extent_root = fs_info->extent_root;
12802         struct btrfs_root *dev_root = fs_info->dev_root;
12803         struct btrfs_path path;
12804         struct btrfs_key chunk_key;
12805         struct btrfs_key bg_key;
12806         struct btrfs_key devext_key;
12807         struct btrfs_chunk *chunk;
12808         struct extent_buffer *leaf;
12809         struct btrfs_block_group_item *bi;
12810         struct btrfs_block_group_item bg_item;
12811         struct btrfs_dev_extent *ptr;
12812         u64 length;
12813         u64 chunk_end;
12814         u64 stripe_len;
12815         u64 type;
12816         int num_stripes;
12817         u64 offset;
12818         u64 objectid;
12819         int i;
12820         int ret;
12821         int err = 0;
12822
12823         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12824         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12825         length = btrfs_chunk_length(eb, chunk);
12826         chunk_end = chunk_key.offset + length;
12827         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12828                                       chunk_key.offset);
12829         if (ret < 0) {
12830                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12831                         chunk_end);
12832                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12833                 goto out;
12834         }
12835         type = btrfs_chunk_type(eb, chunk);
12836
12837         bg_key.objectid = chunk_key.offset;
12838         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12839         bg_key.offset = length;
12840
12841         btrfs_init_path(&path);
12842         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12843         if (ret) {
12844                 error(
12845                 "chunk[%llu %llu) did not find the related block group item",
12846                         chunk_key.offset, chunk_end);
12847                 err |= REFERENCER_MISSING;
12848         } else{
12849                 leaf = path.nodes[0];
12850                 bi = btrfs_item_ptr(leaf, path.slots[0],
12851                                     struct btrfs_block_group_item);
12852                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12853                                    sizeof(bg_item));
12854                 if (btrfs_block_group_flags(&bg_item) != type) {
12855                         error(
12856 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12857                                 chunk_key.offset, chunk_end, type,
12858                                 btrfs_block_group_flags(&bg_item));
12859                         err |= REFERENCER_MISSING;
12860                 }
12861         }
12862
12863         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12864         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12865         for (i = 0; i < num_stripes; i++) {
12866                 btrfs_release_path(&path);
12867                 btrfs_init_path(&path);
12868                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12869                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12870                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12871
12872                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12873                                         0, 0);
12874                 if (ret)
12875                         goto not_match_dev;
12876
12877                 leaf = path.nodes[0];
12878                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12879                                      struct btrfs_dev_extent);
12880                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12881                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12882                 if (objectid != chunk_key.objectid ||
12883                     offset != chunk_key.offset ||
12884                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12885                         goto not_match_dev;
12886                 continue;
12887 not_match_dev:
12888                 err |= BACKREF_MISSING;
12889                 error(
12890                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12891                         chunk_key.objectid, chunk_end, i);
12892                 continue;
12893         }
12894         btrfs_release_path(&path);
12895 out:
12896         return err;
12897 }
12898
12899 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12900                                    struct btrfs_root *root,
12901                                    struct btrfs_path *path)
12902 {
12903         struct btrfs_key key;
12904         int ret = 0;
12905
12906         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12907         btrfs_release_path(path);
12908         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12909         if (ret) {
12910                 ret = -ENOENT;
12911                 goto out;
12912         }
12913
12914         ret = btrfs_del_item(trans, root, path);
12915         if (ret)
12916                 goto out;
12917
12918         if (path->slots[0] == 0)
12919                 btrfs_prev_leaf(root, path);
12920         else
12921                 path->slots[0]--;
12922 out:
12923         if (ret)
12924                 error("failed to delete root %llu item[%llu, %u, %llu]",
12925                       root->objectid, key.objectid, key.type, key.offset);
12926         else
12927                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12928                        root->objectid, key.objectid, key.type, key.offset);
12929         return ret;
12930 }
12931
12932 /*
12933  * Main entry function to check known items and update related accounting info
12934  */
12935 static int check_leaf_items(struct btrfs_trans_handle *trans,
12936                             struct btrfs_root *root, struct btrfs_path *path,
12937                             struct node_refs *nrefs, int account_bytes)
12938 {
12939         struct btrfs_fs_info *fs_info = root->fs_info;
12940         struct btrfs_key key;
12941         struct extent_buffer *eb;
12942         int slot;
12943         int type;
12944         struct btrfs_extent_data_ref *dref;
12945         int ret = 0;
12946         int err = 0;
12947
12948 again:
12949         eb = path->nodes[0];
12950         slot = path->slots[0];
12951         if (slot >= btrfs_header_nritems(eb)) {
12952                 if (slot == 0) {
12953                         error("empty leaf [%llu %u] root %llu", eb->start,
12954                                 root->fs_info->nodesize, root->objectid);
12955                         err |= EIO;
12956                 }
12957                 goto out;
12958         }
12959
12960         btrfs_item_key_to_cpu(eb, &key, slot);
12961         type = key.type;
12962
12963         switch (type) {
12964         case BTRFS_EXTENT_DATA_KEY:
12965                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12966                 if (repair && ret)
12967                         ret = repair_extent_data_item(trans, root, path, nrefs,
12968                                                       ret);
12969                 err |= ret;
12970                 break;
12971         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12972                 ret = check_block_group_item(fs_info, eb, slot);
12973                 if (repair &&
12974                     ret & REFERENCER_MISSING)
12975                         ret = delete_extent_tree_item(trans, root, path);
12976                 err |= ret;
12977                 break;
12978         case BTRFS_DEV_ITEM_KEY:
12979                 ret = check_dev_item(fs_info, eb, slot);
12980                 err |= ret;
12981                 break;
12982         case BTRFS_CHUNK_ITEM_KEY:
12983                 ret = check_chunk_item(fs_info, eb, slot);
12984                 if (repair && ret)
12985                         ret = repair_chunk_item(trans, root, path, ret);
12986                 err |= ret;
12987                 break;
12988         case BTRFS_DEV_EXTENT_KEY:
12989                 ret = check_dev_extent_item(fs_info, eb, slot);
12990                 err |= ret;
12991                 break;
12992         case BTRFS_EXTENT_ITEM_KEY:
12993         case BTRFS_METADATA_ITEM_KEY:
12994                 ret = check_extent_item(trans, fs_info, path);
12995                 err |= ret;
12996                 break;
12997         case BTRFS_EXTENT_CSUM_KEY:
12998                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12999                 err |= ret;
13000                 break;
13001         case BTRFS_TREE_BLOCK_REF_KEY:
13002                 ret = check_tree_block_backref(fs_info, key.offset,
13003                                                key.objectid, -1);
13004                 if (repair &&
13005                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13006                         ret = delete_extent_tree_item(trans, root, path);
13007                 err |= ret;
13008                 break;
13009         case BTRFS_EXTENT_DATA_REF_KEY:
13010                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13011                 ret = check_extent_data_backref(fs_info,
13012                                 btrfs_extent_data_ref_root(eb, dref),
13013                                 btrfs_extent_data_ref_objectid(eb, dref),
13014                                 btrfs_extent_data_ref_offset(eb, dref),
13015                                 key.objectid, 0,
13016                                 btrfs_extent_data_ref_count(eb, dref));
13017                 if (repair &&
13018                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13019                         ret = delete_extent_tree_item(trans, root, path);
13020                 err |= ret;
13021                 break;
13022         case BTRFS_SHARED_BLOCK_REF_KEY:
13023                 ret = check_shared_block_backref(fs_info, key.offset,
13024                                                  key.objectid, -1);
13025                 if (repair &&
13026                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13027                         ret = delete_extent_tree_item(trans, root, path);
13028                 err |= ret;
13029                 break;
13030         case BTRFS_SHARED_DATA_REF_KEY:
13031                 ret = check_shared_data_backref(fs_info, key.offset,
13032                                                 key.objectid);
13033                 if (repair &&
13034                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13035                         ret = delete_extent_tree_item(trans, root, path);
13036                 err |= ret;
13037                 break;
13038         default:
13039                 break;
13040         }
13041
13042         ++path->slots[0];
13043         goto again;
13044 out:
13045         return err;
13046 }
13047
13048 /*
13049  * Low memory usage version check_chunks_and_extents.
13050  */
13051 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13052 {
13053         struct btrfs_trans_handle *trans = NULL;
13054         struct btrfs_path path;
13055         struct btrfs_key old_key;
13056         struct btrfs_key key;
13057         struct btrfs_root *root1;
13058         struct btrfs_root *root;
13059         struct btrfs_root *cur_root;
13060         int err = 0;
13061         int ret;
13062
13063         root = fs_info->fs_root;
13064
13065         if (repair) {
13066                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13067                 if (IS_ERR(trans)) {
13068                         error("failed to start transaction before check");
13069                         return PTR_ERR(trans);
13070                 }
13071         }
13072
13073         root1 = root->fs_info->chunk_root;
13074         ret = check_btrfs_root(trans, root1, 0, 1);
13075         err |= ret;
13076
13077         root1 = root->fs_info->tree_root;
13078         ret = check_btrfs_root(trans, root1, 0, 1);
13079         err |= ret;
13080
13081         btrfs_init_path(&path);
13082         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13083         key.offset = 0;
13084         key.type = BTRFS_ROOT_ITEM_KEY;
13085
13086         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13087         if (ret) {
13088                 error("cannot find extent tree in tree_root");
13089                 goto out;
13090         }
13091
13092         while (1) {
13093                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13094                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13095                         goto next;
13096                 old_key = key;
13097                 key.offset = (u64)-1;
13098
13099                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13100                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13101                                         &key);
13102                 else
13103                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
13104                 if (IS_ERR(cur_root) || !cur_root) {
13105                         error("failed to read tree: %lld", key.objectid);
13106                         goto next;
13107                 }
13108
13109                 ret = check_btrfs_root(trans, cur_root, 0, 1);
13110                 err |= ret;
13111
13112                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13113                         btrfs_free_fs_root(cur_root);
13114
13115                 btrfs_release_path(&path);
13116                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13117                                         &old_key, &path, 0, 0);
13118                 if (ret)
13119                         goto out;
13120 next:
13121                 ret = btrfs_next_item(root1, &path);
13122                 if (ret)
13123                         goto out;
13124         }
13125 out:
13126
13127         /* if repair, update block accounting */
13128         if (repair) {
13129                 ret = btrfs_fix_block_accounting(trans, root);
13130                 if (ret)
13131                         err |= ret;
13132                 else
13133                         err &= ~BG_ACCOUNTING_ERROR;
13134         }
13135
13136         if (trans)
13137                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13138
13139         btrfs_release_path(&path);
13140
13141         return err;
13142 }
13143
13144 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13145 {
13146         int ret;
13147
13148         if (!ctx.progress_enabled)
13149                 fprintf(stderr, "checking extents\n");
13150         if (check_mode == CHECK_MODE_LOWMEM)
13151                 ret = check_chunks_and_extents_v2(fs_info);
13152         else
13153                 ret = check_chunks_and_extents(fs_info);
13154
13155         /* Also repair device size related problems */
13156         if (repair && !ret) {
13157                 ret = btrfs_fix_device_and_super_size(fs_info);
13158                 if (ret > 0)
13159                         ret = 0;
13160         }
13161         return ret;
13162 }
13163
13164 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13165                            struct btrfs_root *root, int overwrite)
13166 {
13167         struct extent_buffer *c;
13168         struct extent_buffer *old = root->node;
13169         int level;
13170         int ret;
13171         struct btrfs_disk_key disk_key = {0,0,0};
13172
13173         level = 0;
13174
13175         if (overwrite) {
13176                 c = old;
13177                 extent_buffer_get(c);
13178                 goto init;
13179         }
13180         c = btrfs_alloc_free_block(trans, root,
13181                                    root->fs_info->nodesize,
13182                                    root->root_key.objectid,
13183                                    &disk_key, level, 0, 0);
13184         if (IS_ERR(c)) {
13185                 c = old;
13186                 extent_buffer_get(c);
13187                 overwrite = 1;
13188         }
13189 init:
13190         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13191         btrfs_set_header_level(c, level);
13192         btrfs_set_header_bytenr(c, c->start);
13193         btrfs_set_header_generation(c, trans->transid);
13194         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13195         btrfs_set_header_owner(c, root->root_key.objectid);
13196
13197         write_extent_buffer(c, root->fs_info->fsid,
13198                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
13199
13200         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13201                             btrfs_header_chunk_tree_uuid(c),
13202                             BTRFS_UUID_SIZE);
13203
13204         btrfs_mark_buffer_dirty(c);
13205         /*
13206          * this case can happen in the following case:
13207          *
13208          * 1.overwrite previous root.
13209          *
13210          * 2.reinit reloc data root, this is because we skip pin
13211          * down reloc data tree before which means we can allocate
13212          * same block bytenr here.
13213          */
13214         if (old->start == c->start) {
13215                 btrfs_set_root_generation(&root->root_item,
13216                                           trans->transid);
13217                 root->root_item.level = btrfs_header_level(root->node);
13218                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13219                                         &root->root_key, &root->root_item);
13220                 if (ret) {
13221                         free_extent_buffer(c);
13222                         return ret;
13223                 }
13224         }
13225         free_extent_buffer(old);
13226         root->node = c;
13227         add_root_to_dirty_list(root);
13228         return 0;
13229 }
13230
13231 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13232                                 struct extent_buffer *eb, int tree_root)
13233 {
13234         struct extent_buffer *tmp;
13235         struct btrfs_root_item *ri;
13236         struct btrfs_key key;
13237         u64 bytenr;
13238         int level = btrfs_header_level(eb);
13239         int nritems;
13240         int ret;
13241         int i;
13242
13243         /*
13244          * If we have pinned this block before, don't pin it again.
13245          * This can not only avoid forever loop with broken filesystem
13246          * but also give us some speedups.
13247          */
13248         if (test_range_bit(&fs_info->pinned_extents, eb->start,
13249                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13250                 return 0;
13251
13252         btrfs_pin_extent(fs_info, eb->start, eb->len);
13253
13254         nritems = btrfs_header_nritems(eb);
13255         for (i = 0; i < nritems; i++) {
13256                 if (level == 0) {
13257                         btrfs_item_key_to_cpu(eb, &key, i);
13258                         if (key.type != BTRFS_ROOT_ITEM_KEY)
13259                                 continue;
13260                         /* Skip the extent root and reloc roots */
13261                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13262                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13263                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13264                                 continue;
13265                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13266                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13267
13268                         /*
13269                          * If at any point we start needing the real root we
13270                          * will have to build a stump root for the root we are
13271                          * in, but for now this doesn't actually use the root so
13272                          * just pass in extent_root.
13273                          */
13274                         tmp = read_tree_block(fs_info, bytenr, 0);
13275                         if (!extent_buffer_uptodate(tmp)) {
13276                                 fprintf(stderr, "Error reading root block\n");
13277                                 return -EIO;
13278                         }
13279                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13280                         free_extent_buffer(tmp);
13281                         if (ret)
13282                                 return ret;
13283                 } else {
13284                         bytenr = btrfs_node_blockptr(eb, i);
13285
13286                         /* If we aren't the tree root don't read the block */
13287                         if (level == 1 && !tree_root) {
13288                                 btrfs_pin_extent(fs_info, bytenr,
13289                                                 fs_info->nodesize);
13290                                 continue;
13291                         }
13292
13293                         tmp = read_tree_block(fs_info, bytenr, 0);
13294                         if (!extent_buffer_uptodate(tmp)) {
13295                                 fprintf(stderr, "Error reading tree block\n");
13296                                 return -EIO;
13297                         }
13298                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13299                         free_extent_buffer(tmp);
13300                         if (ret)
13301                                 return ret;
13302                 }
13303         }
13304
13305         return 0;
13306 }
13307
13308 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13309 {
13310         int ret;
13311
13312         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13313         if (ret)
13314                 return ret;
13315
13316         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13317 }
13318
13319 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13320 {
13321         struct btrfs_block_group_cache *cache;
13322         struct btrfs_path path;
13323         struct extent_buffer *leaf;
13324         struct btrfs_chunk *chunk;
13325         struct btrfs_key key;
13326         int ret;
13327         u64 start;
13328
13329         btrfs_init_path(&path);
13330         key.objectid = 0;
13331         key.type = BTRFS_CHUNK_ITEM_KEY;
13332         key.offset = 0;
13333         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13334         if (ret < 0) {
13335                 btrfs_release_path(&path);
13336                 return ret;
13337         }
13338
13339         /*
13340          * We do this in case the block groups were screwed up and had alloc
13341          * bits that aren't actually set on the chunks.  This happens with
13342          * restored images every time and could happen in real life I guess.
13343          */
13344         fs_info->avail_data_alloc_bits = 0;
13345         fs_info->avail_metadata_alloc_bits = 0;
13346         fs_info->avail_system_alloc_bits = 0;
13347
13348         /* First we need to create the in-memory block groups */
13349         while (1) {
13350                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13351                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13352                         if (ret < 0) {
13353                                 btrfs_release_path(&path);
13354                                 return ret;
13355                         }
13356                         if (ret) {
13357                                 ret = 0;
13358                                 break;
13359                         }
13360                 }
13361                 leaf = path.nodes[0];
13362                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13363                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13364                         path.slots[0]++;
13365                         continue;
13366                 }
13367
13368                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13369                 btrfs_add_block_group(fs_info, 0,
13370                                       btrfs_chunk_type(leaf, chunk), key.offset,
13371                                       btrfs_chunk_length(leaf, chunk));
13372                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13373                                  key.offset + btrfs_chunk_length(leaf, chunk));
13374                 path.slots[0]++;
13375         }
13376         start = 0;
13377         while (1) {
13378                 cache = btrfs_lookup_first_block_group(fs_info, start);
13379                 if (!cache)
13380                         break;
13381                 cache->cached = 1;
13382                 start = cache->key.objectid + cache->key.offset;
13383         }
13384
13385         btrfs_release_path(&path);
13386         return 0;
13387 }
13388
13389 static int reset_balance(struct btrfs_trans_handle *trans,
13390                          struct btrfs_fs_info *fs_info)
13391 {
13392         struct btrfs_root *root = fs_info->tree_root;
13393         struct btrfs_path path;
13394         struct extent_buffer *leaf;
13395         struct btrfs_key key;
13396         int del_slot, del_nr = 0;
13397         int ret;
13398         int found = 0;
13399
13400         btrfs_init_path(&path);
13401         key.objectid = BTRFS_BALANCE_OBJECTID;
13402         key.type = BTRFS_BALANCE_ITEM_KEY;
13403         key.offset = 0;
13404         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13405         if (ret) {
13406                 if (ret > 0)
13407                         ret = 0;
13408                 if (!ret)
13409                         goto reinit_data_reloc;
13410                 else
13411                         goto out;
13412         }
13413
13414         ret = btrfs_del_item(trans, root, &path);
13415         if (ret)
13416                 goto out;
13417         btrfs_release_path(&path);
13418
13419         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13420         key.type = BTRFS_ROOT_ITEM_KEY;
13421         key.offset = 0;
13422         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13423         if (ret < 0)
13424                 goto out;
13425         while (1) {
13426                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13427                         if (!found)
13428                                 break;
13429
13430                         if (del_nr) {
13431                                 ret = btrfs_del_items(trans, root, &path,
13432                                                       del_slot, del_nr);
13433                                 del_nr = 0;
13434                                 if (ret)
13435                                         goto out;
13436                         }
13437                         key.offset++;
13438                         btrfs_release_path(&path);
13439
13440                         found = 0;
13441                         ret = btrfs_search_slot(trans, root, &key, &path,
13442                                                 -1, 1);
13443                         if (ret < 0)
13444                                 goto out;
13445                         continue;
13446                 }
13447                 found = 1;
13448                 leaf = path.nodes[0];
13449                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13450                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13451                         break;
13452                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13453                         path.slots[0]++;
13454                         continue;
13455                 }
13456                 if (!del_nr) {
13457                         del_slot = path.slots[0];
13458                         del_nr = 1;
13459                 } else {
13460                         del_nr++;
13461                 }
13462                 path.slots[0]++;
13463         }
13464
13465         if (del_nr) {
13466                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13467                 if (ret)
13468                         goto out;
13469         }
13470         btrfs_release_path(&path);
13471
13472 reinit_data_reloc:
13473         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13474         key.type = BTRFS_ROOT_ITEM_KEY;
13475         key.offset = (u64)-1;
13476         root = btrfs_read_fs_root(fs_info, &key);
13477         if (IS_ERR(root)) {
13478                 fprintf(stderr, "Error reading data reloc tree\n");
13479                 ret = PTR_ERR(root);
13480                 goto out;
13481         }
13482         record_root_in_trans(trans, root);
13483         ret = btrfs_fsck_reinit_root(trans, root, 0);
13484         if (ret)
13485                 goto out;
13486         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13487 out:
13488         btrfs_release_path(&path);
13489         return ret;
13490 }
13491
13492 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13493                               struct btrfs_fs_info *fs_info)
13494 {
13495         u64 start = 0;
13496         int ret;
13497
13498         /*
13499          * The only reason we don't do this is because right now we're just
13500          * walking the trees we find and pinning down their bytes, we don't look
13501          * at any of the leaves.  In order to do mixed groups we'd have to check
13502          * the leaves of any fs roots and pin down the bytes for any file
13503          * extents we find.  Not hard but why do it if we don't have to?
13504          */
13505         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13506                 fprintf(stderr, "We don't support re-initing the extent tree "
13507                         "for mixed block groups yet, please notify a btrfs "
13508                         "developer you want to do this so they can add this "
13509                         "functionality.\n");
13510                 return -EINVAL;
13511         }
13512
13513         /*
13514          * first we need to walk all of the trees except the extent tree and pin
13515          * down the bytes that are in use so we don't overwrite any existing
13516          * metadata.
13517          */
13518         ret = pin_metadata_blocks(fs_info);
13519         if (ret) {
13520                 fprintf(stderr, "error pinning down used bytes\n");
13521                 return ret;
13522         }
13523
13524         /*
13525          * Need to drop all the block groups since we're going to recreate all
13526          * of them again.
13527          */
13528         btrfs_free_block_groups(fs_info);
13529         ret = reset_block_groups(fs_info);
13530         if (ret) {
13531                 fprintf(stderr, "error resetting the block groups\n");
13532                 return ret;
13533         }
13534
13535         /* Ok we can allocate now, reinit the extent root */
13536         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13537         if (ret) {
13538                 fprintf(stderr, "extent root initialization failed\n");
13539                 /*
13540                  * When the transaction code is updated we should end the
13541                  * transaction, but for now progs only knows about commit so
13542                  * just return an error.
13543                  */
13544                 return ret;
13545         }
13546
13547         /*
13548          * Now we have all the in-memory block groups setup so we can make
13549          * allocations properly, and the metadata we care about is safe since we
13550          * pinned all of it above.
13551          */
13552         while (1) {
13553                 struct btrfs_block_group_cache *cache;
13554
13555                 cache = btrfs_lookup_first_block_group(fs_info, start);
13556                 if (!cache)
13557                         break;
13558                 start = cache->key.objectid + cache->key.offset;
13559                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13560                                         &cache->key, &cache->item,
13561                                         sizeof(cache->item));
13562                 if (ret) {
13563                         fprintf(stderr, "Error adding block group\n");
13564                         return ret;
13565                 }
13566                 btrfs_extent_post_op(trans, fs_info->extent_root);
13567         }
13568
13569         ret = reset_balance(trans, fs_info);
13570         if (ret)
13571                 fprintf(stderr, "error resetting the pending balance\n");
13572
13573         return ret;
13574 }
13575
13576 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13577 {
13578         struct btrfs_path path;
13579         struct btrfs_trans_handle *trans;
13580         struct btrfs_key key;
13581         int ret;
13582
13583         printf("Recowing metadata block %llu\n", eb->start);
13584         key.objectid = btrfs_header_owner(eb);
13585         key.type = BTRFS_ROOT_ITEM_KEY;
13586         key.offset = (u64)-1;
13587
13588         root = btrfs_read_fs_root(root->fs_info, &key);
13589         if (IS_ERR(root)) {
13590                 fprintf(stderr, "Couldn't find owner root %llu\n",
13591                         key.objectid);
13592                 return PTR_ERR(root);
13593         }
13594
13595         trans = btrfs_start_transaction(root, 1);
13596         if (IS_ERR(trans))
13597                 return PTR_ERR(trans);
13598
13599         btrfs_init_path(&path);
13600         path.lowest_level = btrfs_header_level(eb);
13601         if (path.lowest_level)
13602                 btrfs_node_key_to_cpu(eb, &key, 0);
13603         else
13604                 btrfs_item_key_to_cpu(eb, &key, 0);
13605
13606         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13607         btrfs_commit_transaction(trans, root);
13608         btrfs_release_path(&path);
13609         return ret;
13610 }
13611
13612 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13613 {
13614         struct btrfs_path path;
13615         struct btrfs_trans_handle *trans;
13616         struct btrfs_key key;
13617         int ret;
13618
13619         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13620                bad->key.type, bad->key.offset);
13621         key.objectid = bad->root_id;
13622         key.type = BTRFS_ROOT_ITEM_KEY;
13623         key.offset = (u64)-1;
13624
13625         root = btrfs_read_fs_root(root->fs_info, &key);
13626         if (IS_ERR(root)) {
13627                 fprintf(stderr, "Couldn't find owner root %llu\n",
13628                         key.objectid);
13629                 return PTR_ERR(root);
13630         }
13631
13632         trans = btrfs_start_transaction(root, 1);
13633         if (IS_ERR(trans))
13634                 return PTR_ERR(trans);
13635
13636         btrfs_init_path(&path);
13637         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13638         if (ret) {
13639                 if (ret > 0)
13640                         ret = 0;
13641                 goto out;
13642         }
13643         ret = btrfs_del_item(trans, root, &path);
13644 out:
13645         btrfs_commit_transaction(trans, root);
13646         btrfs_release_path(&path);
13647         return ret;
13648 }
13649
13650 static int zero_log_tree(struct btrfs_root *root)
13651 {
13652         struct btrfs_trans_handle *trans;
13653         int ret;
13654
13655         trans = btrfs_start_transaction(root, 1);
13656         if (IS_ERR(trans)) {
13657                 ret = PTR_ERR(trans);
13658                 return ret;
13659         }
13660         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13661         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13662         ret = btrfs_commit_transaction(trans, root);
13663         return ret;
13664 }
13665
13666 static int populate_csum(struct btrfs_trans_handle *trans,
13667                          struct btrfs_root *csum_root, char *buf, u64 start,
13668                          u64 len)
13669 {
13670         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13671         u64 offset = 0;
13672         u64 sectorsize;
13673         int ret = 0;
13674
13675         while (offset < len) {
13676                 sectorsize = fs_info->sectorsize;
13677                 ret = read_extent_data(fs_info, buf, start + offset,
13678                                        &sectorsize, 0);
13679                 if (ret)
13680                         break;
13681                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13682                                             start + offset, buf, sectorsize);
13683                 if (ret)
13684                         break;
13685                 offset += sectorsize;
13686         }
13687         return ret;
13688 }
13689
13690 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13691                                       struct btrfs_root *csum_root,
13692                                       struct btrfs_root *cur_root)
13693 {
13694         struct btrfs_path path;
13695         struct btrfs_key key;
13696         struct extent_buffer *node;
13697         struct btrfs_file_extent_item *fi;
13698         char *buf = NULL;
13699         u64 start = 0;
13700         u64 len = 0;
13701         int slot = 0;
13702         int ret = 0;
13703
13704         buf = malloc(cur_root->fs_info->sectorsize);
13705         if (!buf)
13706                 return -ENOMEM;
13707
13708         btrfs_init_path(&path);
13709         key.objectid = 0;
13710         key.offset = 0;
13711         key.type = 0;
13712         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13713         if (ret < 0)
13714                 goto out;
13715         /* Iterate all regular file extents and fill its csum */
13716         while (1) {
13717                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13718
13719                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13720                         goto next;
13721                 node = path.nodes[0];
13722                 slot = path.slots[0];
13723                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13724                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13725                         goto next;
13726                 start = btrfs_file_extent_disk_bytenr(node, fi);
13727                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13728
13729                 ret = populate_csum(trans, csum_root, buf, start, len);
13730                 if (ret == -EEXIST)
13731                         ret = 0;
13732                 if (ret < 0)
13733                         goto out;
13734 next:
13735                 /*
13736                  * TODO: if next leaf is corrupted, jump to nearest next valid
13737                  * leaf.
13738                  */
13739                 ret = btrfs_next_item(cur_root, &path);
13740                 if (ret < 0)
13741                         goto out;
13742                 if (ret > 0) {
13743                         ret = 0;
13744                         goto out;
13745                 }
13746         }
13747
13748 out:
13749         btrfs_release_path(&path);
13750         free(buf);
13751         return ret;
13752 }
13753
13754 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13755                                   struct btrfs_root *csum_root)
13756 {
13757         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13758         struct btrfs_path path;
13759         struct btrfs_root *tree_root = fs_info->tree_root;
13760         struct btrfs_root *cur_root;
13761         struct extent_buffer *node;
13762         struct btrfs_key key;
13763         int slot = 0;
13764         int ret = 0;
13765
13766         btrfs_init_path(&path);
13767         key.objectid = BTRFS_FS_TREE_OBJECTID;
13768         key.offset = 0;
13769         key.type = BTRFS_ROOT_ITEM_KEY;
13770         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13771         if (ret < 0)
13772                 goto out;
13773         if (ret > 0) {
13774                 ret = -ENOENT;
13775                 goto out;
13776         }
13777
13778         while (1) {
13779                 node = path.nodes[0];
13780                 slot = path.slots[0];
13781                 btrfs_item_key_to_cpu(node, &key, slot);
13782                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13783                         goto out;
13784                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13785                         goto next;
13786                 if (!is_fstree(key.objectid))
13787                         goto next;
13788                 key.offset = (u64)-1;
13789
13790                 cur_root = btrfs_read_fs_root(fs_info, &key);
13791                 if (IS_ERR(cur_root) || !cur_root) {
13792                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13793                                 key.objectid);
13794                         goto out;
13795                 }
13796                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13797                                 cur_root);
13798                 if (ret < 0)
13799                         goto out;
13800 next:
13801                 ret = btrfs_next_item(tree_root, &path);
13802                 if (ret > 0) {
13803                         ret = 0;
13804                         goto out;
13805                 }
13806                 if (ret < 0)
13807                         goto out;
13808         }
13809
13810 out:
13811         btrfs_release_path(&path);
13812         return ret;
13813 }
13814
13815 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13816                                       struct btrfs_root *csum_root)
13817 {
13818         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13819         struct btrfs_path path;
13820         struct btrfs_extent_item *ei;
13821         struct extent_buffer *leaf;
13822         char *buf;
13823         struct btrfs_key key;
13824         int ret;
13825
13826         btrfs_init_path(&path);
13827         key.objectid = 0;
13828         key.type = BTRFS_EXTENT_ITEM_KEY;
13829         key.offset = 0;
13830         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13831         if (ret < 0) {
13832                 btrfs_release_path(&path);
13833                 return ret;
13834         }
13835
13836         buf = malloc(csum_root->fs_info->sectorsize);
13837         if (!buf) {
13838                 btrfs_release_path(&path);
13839                 return -ENOMEM;
13840         }
13841
13842         while (1) {
13843                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13844                         ret = btrfs_next_leaf(extent_root, &path);
13845                         if (ret < 0)
13846                                 break;
13847                         if (ret) {
13848                                 ret = 0;
13849                                 break;
13850                         }
13851                 }
13852                 leaf = path.nodes[0];
13853
13854                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13855                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13856                         path.slots[0]++;
13857                         continue;
13858                 }
13859
13860                 ei = btrfs_item_ptr(leaf, path.slots[0],
13861                                     struct btrfs_extent_item);
13862                 if (!(btrfs_extent_flags(leaf, ei) &
13863                       BTRFS_EXTENT_FLAG_DATA)) {
13864                         path.slots[0]++;
13865                         continue;
13866                 }
13867
13868                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13869                                     key.offset);
13870                 if (ret)
13871                         break;
13872                 path.slots[0]++;
13873         }
13874
13875         btrfs_release_path(&path);
13876         free(buf);
13877         return ret;
13878 }
13879
13880 /*
13881  * Recalculate the csum and put it into the csum tree.
13882  *
13883  * Extent tree init will wipe out all the extent info, so in that case, we
13884  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13885  * will use fs/subvol trees to init the csum tree.
13886  */
13887 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13888                           struct btrfs_root *csum_root,
13889                           int search_fs_tree)
13890 {
13891         if (search_fs_tree)
13892                 return fill_csum_tree_from_fs(trans, csum_root);
13893         else
13894                 return fill_csum_tree_from_extent(trans, csum_root);
13895 }
13896
13897 static void free_roots_info_cache(void)
13898 {
13899         if (!roots_info_cache)
13900                 return;
13901
13902         while (!cache_tree_empty(roots_info_cache)) {
13903                 struct cache_extent *entry;
13904                 struct root_item_info *rii;
13905
13906                 entry = first_cache_extent(roots_info_cache);
13907                 if (!entry)
13908                         break;
13909                 remove_cache_extent(roots_info_cache, entry);
13910                 rii = container_of(entry, struct root_item_info, cache_extent);
13911                 free(rii);
13912         }
13913
13914         free(roots_info_cache);
13915         roots_info_cache = NULL;
13916 }
13917
13918 static int build_roots_info_cache(struct btrfs_fs_info *info)
13919 {
13920         int ret = 0;
13921         struct btrfs_key key;
13922         struct extent_buffer *leaf;
13923         struct btrfs_path path;
13924
13925         if (!roots_info_cache) {
13926                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13927                 if (!roots_info_cache)
13928                         return -ENOMEM;
13929                 cache_tree_init(roots_info_cache);
13930         }
13931
13932         btrfs_init_path(&path);
13933         key.objectid = 0;
13934         key.type = BTRFS_EXTENT_ITEM_KEY;
13935         key.offset = 0;
13936         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13937         if (ret < 0)
13938                 goto out;
13939         leaf = path.nodes[0];
13940
13941         while (1) {
13942                 struct btrfs_key found_key;
13943                 struct btrfs_extent_item *ei;
13944                 struct btrfs_extent_inline_ref *iref;
13945                 int slot = path.slots[0];
13946                 int type;
13947                 u64 flags;
13948                 u64 root_id;
13949                 u8 level;
13950                 struct cache_extent *entry;
13951                 struct root_item_info *rii;
13952
13953                 if (slot >= btrfs_header_nritems(leaf)) {
13954                         ret = btrfs_next_leaf(info->extent_root, &path);
13955                         if (ret < 0) {
13956                                 break;
13957                         } else if (ret) {
13958                                 ret = 0;
13959                                 break;
13960                         }
13961                         leaf = path.nodes[0];
13962                         slot = path.slots[0];
13963                 }
13964
13965                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13966
13967                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13968                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13969                         goto next;
13970
13971                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13972                 flags = btrfs_extent_flags(leaf, ei);
13973
13974                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13975                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13976                         goto next;
13977
13978                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13979                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13980                         level = found_key.offset;
13981                 } else {
13982                         struct btrfs_tree_block_info *binfo;
13983
13984                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13985                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13986                         level = btrfs_tree_block_level(leaf, binfo);
13987                 }
13988
13989                 /*
13990                  * For a root extent, it must be of the following type and the
13991                  * first (and only one) iref in the item.
13992                  */
13993                 type = btrfs_extent_inline_ref_type(leaf, iref);
13994                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13995                         goto next;
13996
13997                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13998                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13999                 if (!entry) {
14000                         rii = malloc(sizeof(struct root_item_info));
14001                         if (!rii) {
14002                                 ret = -ENOMEM;
14003                                 goto out;
14004                         }
14005                         rii->cache_extent.start = root_id;
14006                         rii->cache_extent.size = 1;
14007                         rii->level = (u8)-1;
14008                         entry = &rii->cache_extent;
14009                         ret = insert_cache_extent(roots_info_cache, entry);
14010                         ASSERT(ret == 0);
14011                 } else {
14012                         rii = container_of(entry, struct root_item_info,
14013                                            cache_extent);
14014                 }
14015
14016                 ASSERT(rii->cache_extent.start == root_id);
14017                 ASSERT(rii->cache_extent.size == 1);
14018
14019                 if (level > rii->level || rii->level == (u8)-1) {
14020                         rii->level = level;
14021                         rii->bytenr = found_key.objectid;
14022                         rii->gen = btrfs_extent_generation(leaf, ei);
14023                         rii->node_count = 1;
14024                 } else if (level == rii->level) {
14025                         rii->node_count++;
14026                 }
14027 next:
14028                 path.slots[0]++;
14029         }
14030
14031 out:
14032         btrfs_release_path(&path);
14033
14034         return ret;
14035 }
14036
14037 static int maybe_repair_root_item(struct btrfs_path *path,
14038                                   const struct btrfs_key *root_key,
14039                                   const int read_only_mode)
14040 {
14041         const u64 root_id = root_key->objectid;
14042         struct cache_extent *entry;
14043         struct root_item_info *rii;
14044         struct btrfs_root_item ri;
14045         unsigned long offset;
14046
14047         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14048         if (!entry) {
14049                 fprintf(stderr,
14050                         "Error: could not find extent items for root %llu\n",
14051                         root_key->objectid);
14052                 return -ENOENT;
14053         }
14054
14055         rii = container_of(entry, struct root_item_info, cache_extent);
14056         ASSERT(rii->cache_extent.start == root_id);
14057         ASSERT(rii->cache_extent.size == 1);
14058
14059         if (rii->node_count != 1) {
14060                 fprintf(stderr,
14061                         "Error: could not find btree root extent for root %llu\n",
14062                         root_id);
14063                 return -ENOENT;
14064         }
14065
14066         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14067         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14068
14069         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14070             btrfs_root_level(&ri) != rii->level ||
14071             btrfs_root_generation(&ri) != rii->gen) {
14072
14073                 /*
14074                  * If we're in repair mode but our caller told us to not update
14075                  * the root item, i.e. just check if it needs to be updated, don't
14076                  * print this message, since the caller will call us again shortly
14077                  * for the same root item without read only mode (the caller will
14078                  * open a transaction first).
14079                  */
14080                 if (!(read_only_mode && repair))
14081                         fprintf(stderr,
14082                                 "%sroot item for root %llu,"
14083                                 " current bytenr %llu, current gen %llu, current level %u,"
14084                                 " new bytenr %llu, new gen %llu, new level %u\n",
14085                                 (read_only_mode ? "" : "fixing "),
14086                                 root_id,
14087                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14088                                 btrfs_root_level(&ri),
14089                                 rii->bytenr, rii->gen, rii->level);
14090
14091                 if (btrfs_root_generation(&ri) > rii->gen) {
14092                         fprintf(stderr,
14093                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14094                                 root_id, btrfs_root_generation(&ri), rii->gen);
14095                         return -EINVAL;
14096                 }
14097
14098                 if (!read_only_mode) {
14099                         btrfs_set_root_bytenr(&ri, rii->bytenr);
14100                         btrfs_set_root_level(&ri, rii->level);
14101                         btrfs_set_root_generation(&ri, rii->gen);
14102                         write_extent_buffer(path->nodes[0], &ri,
14103                                             offset, sizeof(ri));
14104                 }
14105
14106                 return 1;
14107         }
14108
14109         return 0;
14110 }
14111
14112 /*
14113  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14114  * caused read-only snapshots to be corrupted if they were created at a moment
14115  * when the source subvolume/snapshot had orphan items. The issue was that the
14116  * on-disk root items became incorrect, referring to the pre orphan cleanup root
14117  * node instead of the post orphan cleanup root node.
14118  * So this function, and its callees, just detects and fixes those cases. Even
14119  * though the regression was for read-only snapshots, this function applies to
14120  * any snapshot/subvolume root.
14121  * This must be run before any other repair code - not doing it so, makes other
14122  * repair code delete or modify backrefs in the extent tree for example, which
14123  * will result in an inconsistent fs after repairing the root items.
14124  */
14125 static int repair_root_items(struct btrfs_fs_info *info)
14126 {
14127         struct btrfs_path path;
14128         struct btrfs_key key;
14129         struct extent_buffer *leaf;
14130         struct btrfs_trans_handle *trans = NULL;
14131         int ret = 0;
14132         int bad_roots = 0;
14133         int need_trans = 0;
14134
14135         btrfs_init_path(&path);
14136
14137         ret = build_roots_info_cache(info);
14138         if (ret)
14139                 goto out;
14140
14141         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14142         key.type = BTRFS_ROOT_ITEM_KEY;
14143         key.offset = 0;
14144
14145 again:
14146         /*
14147          * Avoid opening and committing transactions if a leaf doesn't have
14148          * any root items that need to be fixed, so that we avoid rotating
14149          * backup roots unnecessarily.
14150          */
14151         if (need_trans) {
14152                 trans = btrfs_start_transaction(info->tree_root, 1);
14153                 if (IS_ERR(trans)) {
14154                         ret = PTR_ERR(trans);
14155                         goto out;
14156                 }
14157         }
14158
14159         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14160                                 0, trans ? 1 : 0);
14161         if (ret < 0)
14162                 goto out;
14163         leaf = path.nodes[0];
14164
14165         while (1) {
14166                 struct btrfs_key found_key;
14167
14168                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14169                         int no_more_keys = find_next_key(&path, &key);
14170
14171                         btrfs_release_path(&path);
14172                         if (trans) {
14173                                 ret = btrfs_commit_transaction(trans,
14174                                                                info->tree_root);
14175                                 trans = NULL;
14176                                 if (ret < 0)
14177                                         goto out;
14178                         }
14179                         need_trans = 0;
14180                         if (no_more_keys)
14181                                 break;
14182                         goto again;
14183                 }
14184
14185                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14186
14187                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14188                         goto next;
14189                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14190                         goto next;
14191
14192                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14193                 if (ret < 0)
14194                         goto out;
14195                 if (ret) {
14196                         if (!trans && repair) {
14197                                 need_trans = 1;
14198                                 key = found_key;
14199                                 btrfs_release_path(&path);
14200                                 goto again;
14201                         }
14202                         bad_roots++;
14203                 }
14204 next:
14205                 path.slots[0]++;
14206         }
14207         ret = 0;
14208 out:
14209         free_roots_info_cache();
14210         btrfs_release_path(&path);
14211         if (trans)
14212                 btrfs_commit_transaction(trans, info->tree_root);
14213         if (ret < 0)
14214                 return ret;
14215
14216         return bad_roots;
14217 }
14218
14219 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14220 {
14221         struct btrfs_trans_handle *trans;
14222         struct btrfs_block_group_cache *bg_cache;
14223         u64 current = 0;
14224         int ret = 0;
14225
14226         /* Clear all free space cache inodes and its extent data */
14227         while (1) {
14228                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14229                 if (!bg_cache)
14230                         break;
14231                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14232                 if (ret < 0)
14233                         return ret;
14234                 current = bg_cache->key.objectid + bg_cache->key.offset;
14235         }
14236
14237         /* Don't forget to set cache_generation to -1 */
14238         trans = btrfs_start_transaction(fs_info->tree_root, 0);
14239         if (IS_ERR(trans)) {
14240                 error("failed to update super block cache generation");
14241                 return PTR_ERR(trans);
14242         }
14243         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14244         btrfs_commit_transaction(trans, fs_info->tree_root);
14245
14246         return ret;
14247 }
14248
14249 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14250                 int clear_version)
14251 {
14252         int ret = 0;
14253
14254         if (clear_version == 1) {
14255                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14256                         error(
14257                 "free space cache v2 detected, use --clear-space-cache v2");
14258                         ret = 1;
14259                         goto close_out;
14260                 }
14261                 printf("Clearing free space cache\n");
14262                 ret = clear_free_space_cache(fs_info);
14263                 if (ret) {
14264                         error("failed to clear free space cache");
14265                         ret = 1;
14266                 } else {
14267                         printf("Free space cache cleared\n");
14268                 }
14269         } else if (clear_version == 2) {
14270                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14271                         printf("no free space cache v2 to clear\n");
14272                         ret = 0;
14273                         goto close_out;
14274                 }
14275                 printf("Clear free space cache v2\n");
14276                 ret = btrfs_clear_free_space_tree(fs_info);
14277                 if (ret) {
14278                         error("failed to clear free space cache v2: %d", ret);
14279                         ret = 1;
14280                 } else {
14281                         printf("free space cache v2 cleared\n");
14282                 }
14283         }
14284 close_out:
14285         return ret;
14286 }
14287
14288 const char * const cmd_check_usage[] = {
14289         "btrfs check [options] <device>",
14290         "Check structural integrity of a filesystem (unmounted).",
14291         "Check structural integrity of an unmounted filesystem. Verify internal",
14292         "trees' consistency and item connectivity. In the repair mode try to",
14293         "fix the problems found. ",
14294         "WARNING: the repair mode is considered dangerous",
14295         "",
14296         "-s|--super <superblock>     use this superblock copy",
14297         "-b|--backup                 use the first valid backup root copy",
14298         "--force                     skip mount checks, repair is not possible",
14299         "--repair                    try to repair the filesystem",
14300         "--readonly                  run in read-only mode (default)",
14301         "--init-csum-tree            create a new CRC tree",
14302         "--init-extent-tree          create a new extent tree",
14303         "--mode <MODE>               allows choice of memory/IO trade-offs",
14304         "                            where MODE is one of:",
14305         "                            original - read inodes and extents to memory (requires",
14306         "                                       more memory, does less IO)",
14307         "                            lowmem   - try to use less memory but read blocks again",
14308         "                                       when needed",
14309         "--check-data-csum           verify checksums of data blocks",
14310         "-Q|--qgroup-report          print a report on qgroup consistency",
14311         "-E|--subvol-extents <subvolid>",
14312         "                            print subvolume extents and sharing state",
14313         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14314         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14315         "-p|--progress               indicate progress",
14316         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14317         NULL
14318 };
14319
14320 int cmd_check(int argc, char **argv)
14321 {
14322         struct cache_tree root_cache;
14323         struct btrfs_root *root;
14324         struct btrfs_fs_info *info;
14325         u64 bytenr = 0;
14326         u64 subvolid = 0;
14327         u64 tree_root_bytenr = 0;
14328         u64 chunk_root_bytenr = 0;
14329         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14330         int ret = 0;
14331         int err = 0;
14332         u64 num;
14333         int init_csum_tree = 0;
14334         int readonly = 0;
14335         int clear_space_cache = 0;
14336         int qgroup_report = 0;
14337         int qgroups_repaired = 0;
14338         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14339         int force = 0;
14340
14341         while(1) {
14342                 int c;
14343                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14344                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14345                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14346                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14347                         GETOPT_VAL_FORCE };
14348                 static const struct option long_options[] = {
14349                         { "super", required_argument, NULL, 's' },
14350                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14351                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14352                         { "init-csum-tree", no_argument, NULL,
14353                                 GETOPT_VAL_INIT_CSUM },
14354                         { "init-extent-tree", no_argument, NULL,
14355                                 GETOPT_VAL_INIT_EXTENT },
14356                         { "check-data-csum", no_argument, NULL,
14357                                 GETOPT_VAL_CHECK_CSUM },
14358                         { "backup", no_argument, NULL, 'b' },
14359                         { "subvol-extents", required_argument, NULL, 'E' },
14360                         { "qgroup-report", no_argument, NULL, 'Q' },
14361                         { "tree-root", required_argument, NULL, 'r' },
14362                         { "chunk-root", required_argument, NULL,
14363                                 GETOPT_VAL_CHUNK_TREE },
14364                         { "progress", no_argument, NULL, 'p' },
14365                         { "mode", required_argument, NULL,
14366                                 GETOPT_VAL_MODE },
14367                         { "clear-space-cache", required_argument, NULL,
14368                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14369                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14370                         { NULL, 0, NULL, 0}
14371                 };
14372
14373                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14374                 if (c < 0)
14375                         break;
14376                 switch(c) {
14377                         case 'a': /* ignored */ break;
14378                         case 'b':
14379                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14380                                 break;
14381                         case 's':
14382                                 num = arg_strtou64(optarg);
14383                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14384                                         error(
14385                                         "super mirror should be less than %d",
14386                                                 BTRFS_SUPER_MIRROR_MAX);
14387                                         exit(1);
14388                                 }
14389                                 bytenr = btrfs_sb_offset(((int)num));
14390                                 printf("using SB copy %llu, bytenr %llu\n", num,
14391                                        (unsigned long long)bytenr);
14392                                 break;
14393                         case 'Q':
14394                                 qgroup_report = 1;
14395                                 break;
14396                         case 'E':
14397                                 subvolid = arg_strtou64(optarg);
14398                                 break;
14399                         case 'r':
14400                                 tree_root_bytenr = arg_strtou64(optarg);
14401                                 break;
14402                         case GETOPT_VAL_CHUNK_TREE:
14403                                 chunk_root_bytenr = arg_strtou64(optarg);
14404                                 break;
14405                         case 'p':
14406                                 ctx.progress_enabled = true;
14407                                 break;
14408                         case '?':
14409                         case 'h':
14410                                 usage(cmd_check_usage);
14411                         case GETOPT_VAL_REPAIR:
14412                                 printf("enabling repair mode\n");
14413                                 repair = 1;
14414                                 ctree_flags |= OPEN_CTREE_WRITES;
14415                                 break;
14416                         case GETOPT_VAL_READONLY:
14417                                 readonly = 1;
14418                                 break;
14419                         case GETOPT_VAL_INIT_CSUM:
14420                                 printf("Creating a new CRC tree\n");
14421                                 init_csum_tree = 1;
14422                                 repair = 1;
14423                                 ctree_flags |= OPEN_CTREE_WRITES;
14424                                 break;
14425                         case GETOPT_VAL_INIT_EXTENT:
14426                                 init_extent_tree = 1;
14427                                 ctree_flags |= (OPEN_CTREE_WRITES |
14428                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14429                                 repair = 1;
14430                                 break;
14431                         case GETOPT_VAL_CHECK_CSUM:
14432                                 check_data_csum = 1;
14433                                 break;
14434                         case GETOPT_VAL_MODE:
14435                                 check_mode = parse_check_mode(optarg);
14436                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14437                                         error("unknown mode: %s", optarg);
14438                                         exit(1);
14439                                 }
14440                                 break;
14441                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14442                                 if (strcmp(optarg, "v1") == 0) {
14443                                         clear_space_cache = 1;
14444                                 } else if (strcmp(optarg, "v2") == 0) {
14445                                         clear_space_cache = 2;
14446                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14447                                 } else {
14448                                         error(
14449                 "invalid argument to --clear-space-cache, must be v1 or v2");
14450                                         exit(1);
14451                                 }
14452                                 ctree_flags |= OPEN_CTREE_WRITES;
14453                                 break;
14454                         case GETOPT_VAL_FORCE:
14455                                 force = 1;
14456                                 break;
14457                 }
14458         }
14459
14460         if (check_argc_exact(argc - optind, 1))
14461                 usage(cmd_check_usage);
14462
14463         if (ctx.progress_enabled) {
14464                 ctx.tp = TASK_NOTHING;
14465                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14466         }
14467
14468         /* This check is the only reason for --readonly to exist */
14469         if (readonly && repair) {
14470                 error("repair options are not compatible with --readonly");
14471                 exit(1);
14472         }
14473
14474         /*
14475          * experimental and dangerous
14476          */
14477         if (repair && check_mode == CHECK_MODE_LOWMEM)
14478                 warning("low-memory mode repair support is only partial");
14479
14480         radix_tree_init();
14481         cache_tree_init(&root_cache);
14482
14483         ret = check_mounted(argv[optind]);
14484         if (!force) {
14485                 if (ret < 0) {
14486                         error("could not check mount status: %s",
14487                                         strerror(-ret));
14488                         err |= !!ret;
14489                         goto err_out;
14490                 } else if (ret) {
14491                         error(
14492 "%s is currently mounted, use --force if you really intend to check the filesystem",
14493                                 argv[optind]);
14494                         ret = -EBUSY;
14495                         err |= !!ret;
14496                         goto err_out;
14497                 }
14498         } else {
14499                 if (repair) {
14500                         error("repair and --force is not yet supported");
14501                         ret = 1;
14502                         err |= !!ret;
14503                         goto err_out;
14504                 }
14505                 if (ret < 0) {
14506                         warning(
14507 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14508                                 argv[optind]);
14509                 } else if (ret) {
14510                         warning(
14511                         "filesystem mounted, continuing because of --force");
14512                 }
14513                 /* A block device is mounted in exclusive mode by kernel */
14514                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14515         }
14516
14517         /* only allow partial opening under repair mode */
14518         if (repair)
14519                 ctree_flags |= OPEN_CTREE_PARTIAL;
14520
14521         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14522                                   chunk_root_bytenr, ctree_flags);
14523         if (!info) {
14524                 error("cannot open file system");
14525                 ret = -EIO;
14526                 err |= !!ret;
14527                 goto err_out;
14528         }
14529
14530         global_info = info;
14531         root = info->fs_root;
14532         uuid_unparse(info->super_copy->fsid, uuidbuf);
14533
14534         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14535
14536         /*
14537          * Check the bare minimum before starting anything else that could rely
14538          * on it, namely the tree roots, any local consistency checks
14539          */
14540         if (!extent_buffer_uptodate(info->tree_root->node) ||
14541             !extent_buffer_uptodate(info->dev_root->node) ||
14542             !extent_buffer_uptodate(info->chunk_root->node)) {
14543                 error("critical roots corrupted, unable to check the filesystem");
14544                 err |= !!ret;
14545                 ret = -EIO;
14546                 goto close_out;
14547         }
14548
14549         if (clear_space_cache) {
14550                 ret = do_clear_free_space_cache(info, clear_space_cache);
14551                 err |= !!ret;
14552                 goto close_out;
14553         }
14554
14555         /*
14556          * repair mode will force us to commit transaction which
14557          * will make us fail to load log tree when mounting.
14558          */
14559         if (repair && btrfs_super_log_root(info->super_copy)) {
14560                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14561                 if (!ret) {
14562                         ret = 1;
14563                         err |= !!ret;
14564                         goto close_out;
14565                 }
14566                 ret = zero_log_tree(root);
14567                 err |= !!ret;
14568                 if (ret) {
14569                         error("failed to zero log tree: %d", ret);
14570                         goto close_out;
14571                 }
14572         }
14573
14574         if (qgroup_report) {
14575                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14576                        uuidbuf);
14577                 ret = qgroup_verify_all(info);
14578                 err |= !!ret;
14579                 if (ret == 0)
14580                         report_qgroups(1);
14581                 goto close_out;
14582         }
14583         if (subvolid) {
14584                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14585                        subvolid, argv[optind], uuidbuf);
14586                 ret = print_extent_state(info, subvolid);
14587                 err |= !!ret;
14588                 goto close_out;
14589         }
14590
14591         if (init_extent_tree || init_csum_tree) {
14592                 struct btrfs_trans_handle *trans;
14593
14594                 trans = btrfs_start_transaction(info->extent_root, 0);
14595                 if (IS_ERR(trans)) {
14596                         error("error starting transaction");
14597                         ret = PTR_ERR(trans);
14598                         err |= !!ret;
14599                         goto close_out;
14600                 }
14601
14602                 if (init_extent_tree) {
14603                         printf("Creating a new extent tree\n");
14604                         ret = reinit_extent_tree(trans, info);
14605                         err |= !!ret;
14606                         if (ret)
14607                                 goto close_out;
14608                 }
14609
14610                 if (init_csum_tree) {
14611                         printf("Reinitialize checksum tree\n");
14612                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14613                         if (ret) {
14614                                 error("checksum tree initialization failed: %d",
14615                                                 ret);
14616                                 ret = -EIO;
14617                                 err |= !!ret;
14618                                 goto close_out;
14619                         }
14620
14621                         ret = fill_csum_tree(trans, info->csum_root,
14622                                              init_extent_tree);
14623                         err |= !!ret;
14624                         if (ret) {
14625                                 error("checksum tree refilling failed: %d", ret);
14626                                 return -EIO;
14627                         }
14628                 }
14629                 /*
14630                  * Ok now we commit and run the normal fsck, which will add
14631                  * extent entries for all of the items it finds.
14632                  */
14633                 ret = btrfs_commit_transaction(trans, info->extent_root);
14634                 err |= !!ret;
14635                 if (ret)
14636                         goto close_out;
14637         }
14638         if (!extent_buffer_uptodate(info->extent_root->node)) {
14639                 error("critical: extent_root, unable to check the filesystem");
14640                 ret = -EIO;
14641                 err |= !!ret;
14642                 goto close_out;
14643         }
14644         if (!extent_buffer_uptodate(info->csum_root->node)) {
14645                 error("critical: csum_root, unable to check the filesystem");
14646                 ret = -EIO;
14647                 err |= !!ret;
14648                 goto close_out;
14649         }
14650
14651         if (!init_extent_tree) {
14652                 ret = repair_root_items(info);
14653                 if (ret < 0) {
14654                         err = !!ret;
14655                         error("failed to repair root items: %s", strerror(-ret));
14656                         goto close_out;
14657                 }
14658                 if (repair) {
14659                         fprintf(stderr, "Fixed %d roots.\n", ret);
14660                         ret = 0;
14661                 } else if (ret > 0) {
14662                         fprintf(stderr,
14663                                 "Found %d roots with an outdated root item.\n",
14664                                 ret);
14665                         fprintf(stderr,
14666         "Please run a filesystem check with the option --repair to fix them.\n");
14667                         ret = 1;
14668                         err |= ret;
14669                         goto close_out;
14670                 }
14671         }
14672
14673         ret = do_check_chunks_and_extents(info);
14674         err |= !!ret;
14675         if (ret)
14676                 error(
14677                 "errors found in extent allocation tree or chunk allocation");
14678
14679         /* Only re-check super size after we checked and repaired the fs */
14680         err |= !is_super_size_valid(info);
14681
14682         if (!ctx.progress_enabled) {
14683                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14684                         fprintf(stderr, "checking free space tree\n");
14685                 else
14686                         fprintf(stderr, "checking free space cache\n");
14687         }
14688         ret = check_space_cache(root);
14689         err |= !!ret;
14690         if (ret) {
14691                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14692                         error("errors found in free space tree");
14693                 else
14694                         error("errors found in free space cache");
14695                 goto out;
14696         }
14697
14698         /*
14699          * We used to have to have these hole extents in between our real
14700          * extents so if we don't have this flag set we need to make sure there
14701          * are no gaps in the file extents for inodes, otherwise we can just
14702          * ignore it when this happens.
14703          */
14704         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14705         ret = do_check_fs_roots(info, &root_cache);
14706         err |= !!ret;
14707         if (ret) {
14708                 error("errors found in fs roots");
14709                 goto out;
14710         }
14711
14712         fprintf(stderr, "checking csums\n");
14713         ret = check_csums(root);
14714         err |= !!ret;
14715         if (ret) {
14716                 error("errors found in csum tree");
14717                 goto out;
14718         }
14719
14720         fprintf(stderr, "checking root refs\n");
14721         /* For low memory mode, check_fs_roots_v2 handles root refs */
14722         if (check_mode != CHECK_MODE_LOWMEM) {
14723                 ret = check_root_refs(root, &root_cache);
14724                 err |= !!ret;
14725                 if (ret) {
14726                         error("errors found in root refs");
14727                         goto out;
14728                 }
14729         }
14730
14731         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14732                 struct extent_buffer *eb;
14733
14734                 eb = list_first_entry(&root->fs_info->recow_ebs,
14735                                       struct extent_buffer, recow);
14736                 list_del_init(&eb->recow);
14737                 ret = recow_extent_buffer(root, eb);
14738                 err |= !!ret;
14739                 if (ret) {
14740                         error("fails to fix transid errors");
14741                         break;
14742                 }
14743         }
14744
14745         while (!list_empty(&delete_items)) {
14746                 struct bad_item *bad;
14747
14748                 bad = list_first_entry(&delete_items, struct bad_item, list);
14749                 list_del_init(&bad->list);
14750                 if (repair) {
14751                         ret = delete_bad_item(root, bad);
14752                         err |= !!ret;
14753                 }
14754                 free(bad);
14755         }
14756
14757         if (info->quota_enabled) {
14758                 fprintf(stderr, "checking quota groups\n");
14759                 ret = qgroup_verify_all(info);
14760                 err |= !!ret;
14761                 if (ret) {
14762                         error("failed to check quota groups");
14763                         goto out;
14764                 }
14765                 report_qgroups(0);
14766                 ret = repair_qgroups(info, &qgroups_repaired);
14767                 err |= !!ret;
14768                 if (err) {
14769                         error("failed to repair quota groups");
14770                         goto out;
14771                 }
14772                 ret = 0;
14773         }
14774
14775         if (!list_empty(&root->fs_info->recow_ebs)) {
14776                 error("transid errors in file system");
14777                 ret = 1;
14778                 err |= !!ret;
14779         }
14780 out:
14781         printf("found %llu bytes used, ",
14782                (unsigned long long)bytes_used);
14783         if (err)
14784                 printf("error(s) found\n");
14785         else
14786                 printf("no error found\n");
14787         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14788         printf("total tree bytes: %llu\n",
14789                (unsigned long long)total_btree_bytes);
14790         printf("total fs tree bytes: %llu\n",
14791                (unsigned long long)total_fs_tree_bytes);
14792         printf("total extent tree bytes: %llu\n",
14793                (unsigned long long)total_extent_tree_bytes);
14794         printf("btree space waste bytes: %llu\n",
14795                (unsigned long long)btree_space_waste);
14796         printf("file data blocks allocated: %llu\n referenced %llu\n",
14797                 (unsigned long long)data_bytes_allocated,
14798                 (unsigned long long)data_bytes_referenced);
14799
14800         free_qgroup_counts();
14801         free_root_recs_tree(&root_cache);
14802 close_out:
14803         close_ctree(root);
14804 err_out:
14805         if (ctx.progress_enabled)
14806                 task_deinit(ctx.info);
14807
14808         return err;
14809 }