btrfs-progs: check: Move fs_root_objectid function to check/common.h
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
416
417 static void record_root_in_trans(struct btrfs_trans_handle *trans,
418                                  struct btrfs_root *root)
419 {
420         if (root->last_trans != trans->transid) {
421                 root->track_dirty = 1;
422                 root->last_trans = trans->transid;
423                 root->commit_root = root->node;
424                 extent_buffer_get(root->node);
425         }
426 }
427
428 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
429 {
430         struct device_record *rec1;
431         struct device_record *rec2;
432
433         rec1 = rb_entry(node1, struct device_record, node);
434         rec2 = rb_entry(node2, struct device_record, node);
435         if (rec1->devid > rec2->devid)
436                 return -1;
437         else if (rec1->devid < rec2->devid)
438                 return 1;
439         else
440                 return 0;
441 }
442
443 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
444 {
445         struct inode_record *rec;
446         struct inode_backref *backref;
447         struct inode_backref *orig;
448         struct inode_backref *tmp;
449         struct orphan_data_extent *src_orphan;
450         struct orphan_data_extent *dst_orphan;
451         struct rb_node *rb;
452         size_t size;
453         int ret;
454
455         rec = malloc(sizeof(*rec));
456         if (!rec)
457                 return ERR_PTR(-ENOMEM);
458         memcpy(rec, orig_rec, sizeof(*rec));
459         rec->refs = 1;
460         INIT_LIST_HEAD(&rec->backrefs);
461         INIT_LIST_HEAD(&rec->orphan_extents);
462         rec->holes = RB_ROOT;
463
464         list_for_each_entry(orig, &orig_rec->backrefs, list) {
465                 size = sizeof(*orig) + orig->namelen + 1;
466                 backref = malloc(size);
467                 if (!backref) {
468                         ret = -ENOMEM;
469                         goto cleanup;
470                 }
471                 memcpy(backref, orig, size);
472                 list_add_tail(&backref->list, &rec->backrefs);
473         }
474         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
475                 dst_orphan = malloc(sizeof(*dst_orphan));
476                 if (!dst_orphan) {
477                         ret = -ENOMEM;
478                         goto cleanup;
479                 }
480                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
481                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
482         }
483         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
484         if (ret < 0)
485                 goto cleanup_rb;
486
487         return rec;
488
489 cleanup_rb:
490         rb = rb_first(&rec->holes);
491         while (rb) {
492                 struct file_extent_hole *hole;
493
494                 hole = rb_entry(rb, struct file_extent_hole, node);
495                 rb = rb_next(rb);
496                 free(hole);
497         }
498
499 cleanup:
500         if (!list_empty(&rec->backrefs))
501                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
502                         list_del(&orig->list);
503                         free(orig);
504                 }
505
506         if (!list_empty(&rec->orphan_extents))
507                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
508                         list_del(&orig->list);
509                         free(orig);
510                 }
511
512         free(rec);
513
514         return ERR_PTR(ret);
515 }
516
517 static void print_orphan_data_extents(struct list_head *orphan_extents,
518                                       u64 objectid)
519 {
520         struct orphan_data_extent *orphan;
521
522         if (list_empty(orphan_extents))
523                 return;
524         printf("The following data extent is lost in tree %llu:\n",
525                objectid);
526         list_for_each_entry(orphan, orphan_extents, list) {
527                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
528                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
529                        orphan->disk_len);
530         }
531 }
532
533 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
534 {
535         u64 root_objectid = root->root_key.objectid;
536         int errors = rec->errors;
537
538         if (!errors)
539                 return;
540         /* reloc root errors, we print its corresponding fs root objectid*/
541         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
542                 root_objectid = root->root_key.offset;
543                 fprintf(stderr, "reloc");
544         }
545         fprintf(stderr, "root %llu inode %llu errors %x",
546                 (unsigned long long) root_objectid,
547                 (unsigned long long) rec->ino, rec->errors);
548
549         if (errors & I_ERR_NO_INODE_ITEM)
550                 fprintf(stderr, ", no inode item");
551         if (errors & I_ERR_NO_ORPHAN_ITEM)
552                 fprintf(stderr, ", no orphan item");
553         if (errors & I_ERR_DUP_INODE_ITEM)
554                 fprintf(stderr, ", dup inode item");
555         if (errors & I_ERR_DUP_DIR_INDEX)
556                 fprintf(stderr, ", dup dir index");
557         if (errors & I_ERR_ODD_DIR_ITEM)
558                 fprintf(stderr, ", odd dir item");
559         if (errors & I_ERR_ODD_FILE_EXTENT)
560                 fprintf(stderr, ", odd file extent");
561         if (errors & I_ERR_BAD_FILE_EXTENT)
562                 fprintf(stderr, ", bad file extent");
563         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
564                 fprintf(stderr, ", file extent overlap");
565         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566                 fprintf(stderr, ", file extent discount");
567         if (errors & I_ERR_DIR_ISIZE_WRONG)
568                 fprintf(stderr, ", dir isize wrong");
569         if (errors & I_ERR_FILE_NBYTES_WRONG)
570                 fprintf(stderr, ", nbytes wrong");
571         if (errors & I_ERR_ODD_CSUM_ITEM)
572                 fprintf(stderr, ", odd csum item");
573         if (errors & I_ERR_SOME_CSUM_MISSING)
574                 fprintf(stderr, ", some csum missing");
575         if (errors & I_ERR_LINK_COUNT_WRONG)
576                 fprintf(stderr, ", link count wrong");
577         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578                 fprintf(stderr, ", orphan file extent");
579         fprintf(stderr, "\n");
580         /* Print the orphan extents if needed */
581         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
583
584         /* Print the holes if needed */
585         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586                 struct file_extent_hole *hole;
587                 struct rb_node *node;
588                 int found = 0;
589
590                 node = rb_first(&rec->holes);
591                 fprintf(stderr, "Found file extent holes:\n");
592                 while (node) {
593                         found = 1;
594                         hole = rb_entry(node, struct file_extent_hole, node);
595                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
596                                 hole->start, hole->len);
597                         node = rb_next(node);
598                 }
599                 if (!found)
600                         fprintf(stderr, "\tstart: 0, len: %llu\n",
601                                 round_up(rec->isize,
602                                          root->fs_info->sectorsize));
603         }
604 }
605
606 static void print_ref_error(int errors)
607 {
608         if (errors & REF_ERR_NO_DIR_ITEM)
609                 fprintf(stderr, ", no dir item");
610         if (errors & REF_ERR_NO_DIR_INDEX)
611                 fprintf(stderr, ", no dir index");
612         if (errors & REF_ERR_NO_INODE_REF)
613                 fprintf(stderr, ", no inode ref");
614         if (errors & REF_ERR_DUP_DIR_ITEM)
615                 fprintf(stderr, ", dup dir item");
616         if (errors & REF_ERR_DUP_DIR_INDEX)
617                 fprintf(stderr, ", dup dir index");
618         if (errors & REF_ERR_DUP_INODE_REF)
619                 fprintf(stderr, ", dup inode ref");
620         if (errors & REF_ERR_INDEX_UNMATCH)
621                 fprintf(stderr, ", index mismatch");
622         if (errors & REF_ERR_FILETYPE_UNMATCH)
623                 fprintf(stderr, ", filetype mismatch");
624         if (errors & REF_ERR_NAME_TOO_LONG)
625                 fprintf(stderr, ", name too long");
626         if (errors & REF_ERR_NO_ROOT_REF)
627                 fprintf(stderr, ", no root ref");
628         if (errors & REF_ERR_NO_ROOT_BACKREF)
629                 fprintf(stderr, ", no root backref");
630         if (errors & REF_ERR_DUP_ROOT_REF)
631                 fprintf(stderr, ", dup root ref");
632         if (errors & REF_ERR_DUP_ROOT_BACKREF)
633                 fprintf(stderr, ", dup root backref");
634         fprintf(stderr, "\n");
635 }
636
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
638                                           u64 ino, int mod)
639 {
640         struct ptr_node *node;
641         struct cache_extent *cache;
642         struct inode_record *rec = NULL;
643         int ret;
644
645         cache = lookup_cache_extent(inode_cache, ino, 1);
646         if (cache) {
647                 node = container_of(cache, struct ptr_node, cache);
648                 rec = node->data;
649                 if (mod && rec->refs > 1) {
650                         node->data = clone_inode_rec(rec);
651                         if (IS_ERR(node->data))
652                                 return node->data;
653                         rec->refs--;
654                         rec = node->data;
655                 }
656         } else if (mod) {
657                 rec = calloc(1, sizeof(*rec));
658                 if (!rec)
659                         return ERR_PTR(-ENOMEM);
660                 rec->ino = ino;
661                 rec->extent_start = (u64)-1;
662                 rec->refs = 1;
663                 INIT_LIST_HEAD(&rec->backrefs);
664                 INIT_LIST_HEAD(&rec->orphan_extents);
665                 rec->holes = RB_ROOT;
666
667                 node = malloc(sizeof(*node));
668                 if (!node) {
669                         free(rec);
670                         return ERR_PTR(-ENOMEM);
671                 }
672                 node->cache.start = ino;
673                 node->cache.size = 1;
674                 node->data = rec;
675
676                 if (ino == BTRFS_FREE_INO_OBJECTID)
677                         rec->found_link = 1;
678
679                 ret = insert_cache_extent(inode_cache, &node->cache);
680                 if (ret)
681                         return ERR_PTR(-EEXIST);
682         }
683         return rec;
684 }
685
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
687 {
688         struct orphan_data_extent *orphan;
689
690         while (!list_empty(orphan_extents)) {
691                 orphan = list_entry(orphan_extents->next,
692                                     struct orphan_data_extent, list);
693                 list_del(&orphan->list);
694                 free(orphan);
695         }
696 }
697
698 static void free_inode_rec(struct inode_record *rec)
699 {
700         struct inode_backref *backref;
701
702         if (--rec->refs > 0)
703                 return;
704
705         while (!list_empty(&rec->backrefs)) {
706                 backref = to_inode_backref(rec->backrefs.next);
707                 list_del(&backref->list);
708                 free(backref);
709         }
710         free_orphan_data_extents(&rec->orphan_extents);
711         free_file_extent_holes(&rec->holes);
712         free(rec);
713 }
714
715 static int can_free_inode_rec(struct inode_record *rec)
716 {
717         if (!rec->errors && rec->checked && rec->found_inode_item &&
718             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
719                 return 1;
720         return 0;
721 }
722
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724                                  struct inode_record *rec)
725 {
726         struct cache_extent *cache;
727         struct inode_backref *tmp, *backref;
728         struct ptr_node *node;
729         u8 filetype;
730
731         if (!rec->found_inode_item)
732                 return;
733
734         filetype = imode_to_type(rec->imode);
735         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736                 if (backref->found_dir_item && backref->found_dir_index) {
737                         if (backref->filetype != filetype)
738                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739                         if (!backref->errors && backref->found_inode_ref &&
740                             rec->nlink == rec->found_link) {
741                                 list_del(&backref->list);
742                                 free(backref);
743                         }
744                 }
745         }
746
747         if (!rec->checked || rec->merging)
748                 return;
749
750         if (S_ISDIR(rec->imode)) {
751                 if (rec->found_size != rec->isize)
752                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753                 if (rec->found_file_extent)
754                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
755         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756                 if (rec->found_dir_item)
757                         rec->errors |= I_ERR_ODD_DIR_ITEM;
758                 if (rec->found_size != rec->nbytes)
759                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760                 if (rec->nlink > 0 && !no_holes &&
761                     (rec->extent_end < rec->isize ||
762                      first_extent_gap(&rec->holes) < rec->isize))
763                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
764         }
765
766         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767                 if (rec->found_csum_item && rec->nodatasum)
768                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
769                 if (rec->some_csum_missing && !rec->nodatasum)
770                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
771         }
772
773         BUG_ON(rec->refs != 1);
774         if (can_free_inode_rec(rec)) {
775                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776                 node = container_of(cache, struct ptr_node, cache);
777                 BUG_ON(node->data != rec);
778                 remove_cache_extent(inode_cache, &node->cache);
779                 free(node);
780                 free_inode_rec(rec);
781         }
782 }
783
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
785 {
786         struct btrfs_path path;
787         struct btrfs_key key;
788         int ret;
789
790         key.objectid = BTRFS_ORPHAN_OBJECTID;
791         key.type = BTRFS_ORPHAN_ITEM_KEY;
792         key.offset = ino;
793
794         btrfs_init_path(&path);
795         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796         btrfs_release_path(&path);
797         if (ret > 0)
798                 ret = -ENOENT;
799         return ret;
800 }
801
802 static int process_inode_item(struct extent_buffer *eb,
803                               int slot, struct btrfs_key *key,
804                               struct shared_node *active_node)
805 {
806         struct inode_record *rec;
807         struct btrfs_inode_item *item;
808
809         rec = active_node->current;
810         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811         if (rec->found_inode_item) {
812                 rec->errors |= I_ERR_DUP_INODE_ITEM;
813                 return 1;
814         }
815         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816         rec->nlink = btrfs_inode_nlink(eb, item);
817         rec->isize = btrfs_inode_size(eb, item);
818         rec->nbytes = btrfs_inode_nbytes(eb, item);
819         rec->imode = btrfs_inode_mode(eb, item);
820         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
821                 rec->nodatasum = 1;
822         rec->found_inode_item = 1;
823         if (rec->nlink == 0)
824                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825         maybe_free_inode_rec(&active_node->inode_cache, rec);
826         return 0;
827 }
828
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
830                                                 const char *name,
831                                                 int namelen, u64 dir)
832 {
833         struct inode_backref *backref;
834
835         list_for_each_entry(backref, &rec->backrefs, list) {
836                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
837                         break;
838                 if (backref->dir != dir || backref->namelen != namelen)
839                         continue;
840                 if (memcmp(name, backref->name, namelen))
841                         continue;
842                 return backref;
843         }
844
845         backref = malloc(sizeof(*backref) + namelen + 1);
846         if (!backref)
847                 return NULL;
848         memset(backref, 0, sizeof(*backref));
849         backref->dir = dir;
850         backref->namelen = namelen;
851         memcpy(backref->name, name, namelen);
852         backref->name[namelen] = '\0';
853         list_add_tail(&backref->list, &rec->backrefs);
854         return backref;
855 }
856
857 static int add_inode_backref(struct cache_tree *inode_cache,
858                              u64 ino, u64 dir, u64 index,
859                              const char *name, int namelen,
860                              u8 filetype, u8 itemtype, int errors)
861 {
862         struct inode_record *rec;
863         struct inode_backref *backref;
864
865         rec = get_inode_rec(inode_cache, ino, 1);
866         BUG_ON(IS_ERR(rec));
867         backref = get_inode_backref(rec, name, namelen, dir);
868         BUG_ON(!backref);
869         if (errors)
870                 backref->errors |= errors;
871         if (itemtype == BTRFS_DIR_INDEX_KEY) {
872                 if (backref->found_dir_index)
873                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
874                 if (backref->found_inode_ref && backref->index != index)
875                         backref->errors |= REF_ERR_INDEX_UNMATCH;
876                 if (backref->found_dir_item && backref->filetype != filetype)
877                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
878
879                 backref->index = index;
880                 backref->filetype = filetype;
881                 backref->found_dir_index = 1;
882         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
883                 rec->found_link++;
884                 if (backref->found_dir_item)
885                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
886                 if (backref->found_dir_index && backref->filetype != filetype)
887                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
888
889                 backref->filetype = filetype;
890                 backref->found_dir_item = 1;
891         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893                 if (backref->found_inode_ref)
894                         backref->errors |= REF_ERR_DUP_INODE_REF;
895                 if (backref->found_dir_index && backref->index != index)
896                         backref->errors |= REF_ERR_INDEX_UNMATCH;
897                 else
898                         backref->index = index;
899
900                 backref->ref_type = itemtype;
901                 backref->found_inode_ref = 1;
902         } else {
903                 BUG_ON(1);
904         }
905
906         maybe_free_inode_rec(inode_cache, rec);
907         return 0;
908 }
909
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911                             struct cache_tree *dst_cache)
912 {
913         struct inode_backref *backref;
914         u32 dir_count = 0;
915         int ret = 0;
916
917         dst->merging = 1;
918         list_for_each_entry(backref, &src->backrefs, list) {
919                 if (backref->found_dir_index) {
920                         add_inode_backref(dst_cache, dst->ino, backref->dir,
921                                         backref->index, backref->name,
922                                         backref->namelen, backref->filetype,
923                                         BTRFS_DIR_INDEX_KEY, backref->errors);
924                 }
925                 if (backref->found_dir_item) {
926                         dir_count++;
927                         add_inode_backref(dst_cache, dst->ino,
928                                         backref->dir, 0, backref->name,
929                                         backref->namelen, backref->filetype,
930                                         BTRFS_DIR_ITEM_KEY, backref->errors);
931                 }
932                 if (backref->found_inode_ref) {
933                         add_inode_backref(dst_cache, dst->ino,
934                                         backref->dir, backref->index,
935                                         backref->name, backref->namelen, 0,
936                                         backref->ref_type, backref->errors);
937                 }
938         }
939
940         if (src->found_dir_item)
941                 dst->found_dir_item = 1;
942         if (src->found_file_extent)
943                 dst->found_file_extent = 1;
944         if (src->found_csum_item)
945                 dst->found_csum_item = 1;
946         if (src->some_csum_missing)
947                 dst->some_csum_missing = 1;
948         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
950                 if (ret < 0)
951                         return ret;
952         }
953
954         BUG_ON(src->found_link < dir_count);
955         dst->found_link += src->found_link - dir_count;
956         dst->found_size += src->found_size;
957         if (src->extent_start != (u64)-1) {
958                 if (dst->extent_start == (u64)-1) {
959                         dst->extent_start = src->extent_start;
960                         dst->extent_end = src->extent_end;
961                 } else {
962                         if (dst->extent_end > src->extent_start)
963                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964                         else if (dst->extent_end < src->extent_start) {
965                                 ret = add_file_extent_hole(&dst->holes,
966                                         dst->extent_end,
967                                         src->extent_start - dst->extent_end);
968                         }
969                         if (dst->extent_end < src->extent_end)
970                                 dst->extent_end = src->extent_end;
971                 }
972         }
973
974         dst->errors |= src->errors;
975         if (src->found_inode_item) {
976                 if (!dst->found_inode_item) {
977                         dst->nlink = src->nlink;
978                         dst->isize = src->isize;
979                         dst->nbytes = src->nbytes;
980                         dst->imode = src->imode;
981                         dst->nodatasum = src->nodatasum;
982                         dst->found_inode_item = 1;
983                 } else {
984                         dst->errors |= I_ERR_DUP_INODE_ITEM;
985                 }
986         }
987         dst->merging = 0;
988
989         return 0;
990 }
991
992 static int splice_shared_node(struct shared_node *src_node,
993                               struct shared_node *dst_node)
994 {
995         struct cache_extent *cache;
996         struct ptr_node *node, *ins;
997         struct cache_tree *src, *dst;
998         struct inode_record *rec, *conflict;
999         u64 current_ino = 0;
1000         int splice = 0;
1001         int ret;
1002
1003         if (--src_node->refs == 0)
1004                 splice = 1;
1005         if (src_node->current)
1006                 current_ino = src_node->current->ino;
1007
1008         src = &src_node->root_cache;
1009         dst = &dst_node->root_cache;
1010 again:
1011         cache = search_cache_extent(src, 0);
1012         while (cache) {
1013                 node = container_of(cache, struct ptr_node, cache);
1014                 rec = node->data;
1015                 cache = next_cache_extent(cache);
1016
1017                 if (splice) {
1018                         remove_cache_extent(src, &node->cache);
1019                         ins = node;
1020                 } else {
1021                         ins = malloc(sizeof(*ins));
1022                         BUG_ON(!ins);
1023                         ins->cache.start = node->cache.start;
1024                         ins->cache.size = node->cache.size;
1025                         ins->data = rec;
1026                         rec->refs++;
1027                 }
1028                 ret = insert_cache_extent(dst, &ins->cache);
1029                 if (ret == -EEXIST) {
1030                         conflict = get_inode_rec(dst, rec->ino, 1);
1031                         BUG_ON(IS_ERR(conflict));
1032                         merge_inode_recs(rec, conflict, dst);
1033                         if (rec->checked) {
1034                                 conflict->checked = 1;
1035                                 if (dst_node->current == conflict)
1036                                         dst_node->current = NULL;
1037                         }
1038                         maybe_free_inode_rec(dst, conflict);
1039                         free_inode_rec(rec);
1040                         free(ins);
1041                 } else {
1042                         BUG_ON(ret);
1043                 }
1044         }
1045
1046         if (src == &src_node->root_cache) {
1047                 src = &src_node->inode_cache;
1048                 dst = &dst_node->inode_cache;
1049                 goto again;
1050         }
1051
1052         if (current_ino > 0 && (!dst_node->current ||
1053             current_ino > dst_node->current->ino)) {
1054                 if (dst_node->current) {
1055                         dst_node->current->checked = 1;
1056                         maybe_free_inode_rec(dst, dst_node->current);
1057                 }
1058                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059                 BUG_ON(IS_ERR(dst_node->current));
1060         }
1061         return 0;
1062 }
1063
1064 static void free_inode_ptr(struct cache_extent *cache)
1065 {
1066         struct ptr_node *node;
1067         struct inode_record *rec;
1068
1069         node = container_of(cache, struct ptr_node, cache);
1070         rec = node->data;
1071         free_inode_rec(rec);
1072         free(node);
1073 }
1074
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1076
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1078                                             u64 bytenr)
1079 {
1080         struct cache_extent *cache;
1081         struct shared_node *node;
1082
1083         cache = lookup_cache_extent(shared, bytenr, 1);
1084         if (cache) {
1085                 node = container_of(cache, struct shared_node, cache);
1086                 return node;
1087         }
1088         return NULL;
1089 }
1090
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1092 {
1093         int ret;
1094         struct shared_node *node;
1095
1096         node = calloc(1, sizeof(*node));
1097         if (!node)
1098                 return -ENOMEM;
1099         node->cache.start = bytenr;
1100         node->cache.size = 1;
1101         cache_tree_init(&node->root_cache);
1102         cache_tree_init(&node->inode_cache);
1103         node->refs = refs;
1104
1105         ret = insert_cache_extent(shared, &node->cache);
1106
1107         return ret;
1108 }
1109
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111                              struct walk_control *wc, int level)
1112 {
1113         struct shared_node *node;
1114         struct shared_node *dest;
1115         int ret;
1116
1117         if (level == wc->active_node)
1118                 return 0;
1119
1120         BUG_ON(wc->active_node <= level);
1121         node = find_shared_node(&wc->shared, bytenr);
1122         if (!node) {
1123                 ret = add_shared_node(&wc->shared, bytenr, refs);
1124                 BUG_ON(ret);
1125                 node = find_shared_node(&wc->shared, bytenr);
1126                 wc->nodes[level] = node;
1127                 wc->active_node = level;
1128                 return 0;
1129         }
1130
1131         if (wc->root_level == wc->active_node &&
1132             btrfs_root_refs(&root->root_item) == 0) {
1133                 if (--node->refs == 0) {
1134                         free_inode_recs_tree(&node->root_cache);
1135                         free_inode_recs_tree(&node->inode_cache);
1136                         remove_cache_extent(&wc->shared, &node->cache);
1137                         free(node);
1138                 }
1139                 return 1;
1140         }
1141
1142         dest = wc->nodes[wc->active_node];
1143         splice_shared_node(node, dest);
1144         if (node->refs == 0) {
1145                 remove_cache_extent(&wc->shared, &node->cache);
1146                 free(node);
1147         }
1148         return 1;
1149 }
1150
1151 static int leave_shared_node(struct btrfs_root *root,
1152                              struct walk_control *wc, int level)
1153 {
1154         struct shared_node *node;
1155         struct shared_node *dest;
1156         int i;
1157
1158         if (level == wc->root_level)
1159                 return 0;
1160
1161         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1162                 if (wc->nodes[i])
1163                         break;
1164         }
1165         BUG_ON(i >= BTRFS_MAX_LEVEL);
1166
1167         node = wc->nodes[wc->active_node];
1168         wc->nodes[wc->active_node] = NULL;
1169         wc->active_node = i;
1170
1171         dest = wc->nodes[wc->active_node];
1172         if (wc->active_node < wc->root_level ||
1173             btrfs_root_refs(&root->root_item) > 0) {
1174                 BUG_ON(node->refs <= 1);
1175                 splice_shared_node(node, dest);
1176         } else {
1177                 BUG_ON(node->refs < 2);
1178                 node->refs--;
1179         }
1180         return 0;
1181 }
1182
1183 /*
1184  * Returns:
1185  * < 0 - on error
1186  * 1   - if the root with id child_root_id is a child of root parent_root_id
1187  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1188  *       has other root(s) as parent(s)
1189  * 2   - if the root child_root_id doesn't have any parent roots
1190  */
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1192                          u64 child_root_id)
1193 {
1194         struct btrfs_path path;
1195         struct btrfs_key key;
1196         struct extent_buffer *leaf;
1197         int has_parent = 0;
1198         int ret;
1199
1200         btrfs_init_path(&path);
1201
1202         key.objectid = parent_root_id;
1203         key.type = BTRFS_ROOT_REF_KEY;
1204         key.offset = child_root_id;
1205         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1206                                 0, 0);
1207         if (ret < 0)
1208                 return ret;
1209         btrfs_release_path(&path);
1210         if (!ret)
1211                 return 1;
1212
1213         key.objectid = child_root_id;
1214         key.type = BTRFS_ROOT_BACKREF_KEY;
1215         key.offset = 0;
1216         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1217                                 0, 0);
1218         if (ret < 0)
1219                 goto out;
1220
1221         while (1) {
1222                 leaf = path.nodes[0];
1223                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1225                         if (ret)
1226                                 break;
1227                         leaf = path.nodes[0];
1228                 }
1229
1230                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231                 if (key.objectid != child_root_id ||
1232                     key.type != BTRFS_ROOT_BACKREF_KEY)
1233                         break;
1234
1235                 has_parent = 1;
1236
1237                 if (key.offset == parent_root_id) {
1238                         btrfs_release_path(&path);
1239                         return 1;
1240                 }
1241
1242                 path.slots[0]++;
1243         }
1244 out:
1245         btrfs_release_path(&path);
1246         if (ret < 0)
1247                 return ret;
1248         return has_parent ? 0 : 2;
1249 }
1250
1251 static int process_dir_item(struct extent_buffer *eb,
1252                             int slot, struct btrfs_key *key,
1253                             struct shared_node *active_node)
1254 {
1255         u32 total;
1256         u32 cur = 0;
1257         u32 len;
1258         u32 name_len;
1259         u32 data_len;
1260         int error;
1261         int nritems = 0;
1262         u8 filetype;
1263         struct btrfs_dir_item *di;
1264         struct inode_record *rec;
1265         struct cache_tree *root_cache;
1266         struct cache_tree *inode_cache;
1267         struct btrfs_key location;
1268         char namebuf[BTRFS_NAME_LEN];
1269
1270         root_cache = &active_node->root_cache;
1271         inode_cache = &active_node->inode_cache;
1272         rec = active_node->current;
1273         rec->found_dir_item = 1;
1274
1275         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276         total = btrfs_item_size_nr(eb, slot);
1277         while (cur < total) {
1278                 nritems++;
1279                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280                 name_len = btrfs_dir_name_len(eb, di);
1281                 data_len = btrfs_dir_data_len(eb, di);
1282                 filetype = btrfs_dir_type(eb, di);
1283
1284                 rec->found_size += name_len;
1285                 if (cur + sizeof(*di) + name_len > total ||
1286                     name_len > BTRFS_NAME_LEN) {
1287                         error = REF_ERR_NAME_TOO_LONG;
1288
1289                         if (cur + sizeof(*di) > total)
1290                                 break;
1291                         len = min_t(u32, total - cur - sizeof(*di),
1292                                     BTRFS_NAME_LEN);
1293                 } else {
1294                         len = name_len;
1295                         error = 0;
1296                 }
1297
1298                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1299
1300                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301                     key->offset != btrfs_name_hash(namebuf, len)) {
1302                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1303                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304                         key->objectid, key->offset, namebuf, len, filetype,
1305                         key->offset, btrfs_name_hash(namebuf, len));
1306                 }
1307
1308                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309                         add_inode_backref(inode_cache, location.objectid,
1310                                           key->objectid, key->offset, namebuf,
1311                                           len, filetype, key->type, error);
1312                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313                         add_inode_backref(root_cache, location.objectid,
1314                                           key->objectid, key->offset,
1315                                           namebuf, len, filetype,
1316                                           key->type, error);
1317                 } else {
1318                         fprintf(stderr,
1319                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320                                 location.type, key->objectid, key->offset);
1321                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322                                           key->objectid, key->offset, namebuf,
1323                                           len, filetype, key->type, error);
1324                 }
1325
1326                 len = sizeof(*di) + name_len + data_len;
1327                 di = (struct btrfs_dir_item *)((char *)di + len);
1328                 cur += len;
1329         }
1330         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1332
1333         return 0;
1334 }
1335
1336 static int process_inode_ref(struct extent_buffer *eb,
1337                              int slot, struct btrfs_key *key,
1338                              struct shared_node *active_node)
1339 {
1340         u32 total;
1341         u32 cur = 0;
1342         u32 len;
1343         u32 name_len;
1344         u64 index;
1345         int error;
1346         struct cache_tree *inode_cache;
1347         struct btrfs_inode_ref *ref;
1348         char namebuf[BTRFS_NAME_LEN];
1349
1350         inode_cache = &active_node->inode_cache;
1351
1352         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353         total = btrfs_item_size_nr(eb, slot);
1354         while (cur < total) {
1355                 name_len = btrfs_inode_ref_name_len(eb, ref);
1356                 index = btrfs_inode_ref_index(eb, ref);
1357
1358                 /* inode_ref + namelen should not cross item boundary */
1359                 if (cur + sizeof(*ref) + name_len > total ||
1360                     name_len > BTRFS_NAME_LEN) {
1361                         if (total < cur + sizeof(*ref))
1362                                 break;
1363
1364                         /* Still try to read out the remaining part */
1365                         len = min_t(u32, total - cur - sizeof(*ref),
1366                                     BTRFS_NAME_LEN);
1367                         error = REF_ERR_NAME_TOO_LONG;
1368                 } else {
1369                         len = name_len;
1370                         error = 0;
1371                 }
1372
1373                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374                 add_inode_backref(inode_cache, key->objectid, key->offset,
1375                                   index, namebuf, len, 0, key->type, error);
1376
1377                 len = sizeof(*ref) + name_len;
1378                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1379                 cur += len;
1380         }
1381         return 0;
1382 }
1383
1384 static int process_inode_extref(struct extent_buffer *eb,
1385                                 int slot, struct btrfs_key *key,
1386                                 struct shared_node *active_node)
1387 {
1388         u32 total;
1389         u32 cur = 0;
1390         u32 len;
1391         u32 name_len;
1392         u64 index;
1393         u64 parent;
1394         int error;
1395         struct cache_tree *inode_cache;
1396         struct btrfs_inode_extref *extref;
1397         char namebuf[BTRFS_NAME_LEN];
1398
1399         inode_cache = &active_node->inode_cache;
1400
1401         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402         total = btrfs_item_size_nr(eb, slot);
1403         while (cur < total) {
1404                 name_len = btrfs_inode_extref_name_len(eb, extref);
1405                 index = btrfs_inode_extref_index(eb, extref);
1406                 parent = btrfs_inode_extref_parent(eb, extref);
1407                 if (name_len <= BTRFS_NAME_LEN) {
1408                         len = name_len;
1409                         error = 0;
1410                 } else {
1411                         len = BTRFS_NAME_LEN;
1412                         error = REF_ERR_NAME_TOO_LONG;
1413                 }
1414                 read_extent_buffer(eb, namebuf,
1415                                    (unsigned long)(extref + 1), len);
1416                 add_inode_backref(inode_cache, key->objectid, parent,
1417                                   index, namebuf, len, 0, key->type, error);
1418
1419                 len = sizeof(*extref) + name_len;
1420                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1421                 cur += len;
1422         }
1423         return 0;
1424
1425 }
1426
1427 static int count_csum_range(struct btrfs_root *root, u64 start,
1428                             u64 len, u64 *found)
1429 {
1430         struct btrfs_key key;
1431         struct btrfs_path path;
1432         struct extent_buffer *leaf;
1433         int ret;
1434         size_t size;
1435         *found = 0;
1436         u64 csum_end;
1437         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1438
1439         btrfs_init_path(&path);
1440
1441         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1442         key.offset = start;
1443         key.type = BTRFS_EXTENT_CSUM_KEY;
1444
1445         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1446                                 &key, &path, 0, 0);
1447         if (ret < 0)
1448                 goto out;
1449         if (ret > 0 && path.slots[0] > 0) {
1450                 leaf = path.nodes[0];
1451                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1452                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1453                     key.type == BTRFS_EXTENT_CSUM_KEY)
1454                         path.slots[0]--;
1455         }
1456
1457         while (len > 0) {
1458                 leaf = path.nodes[0];
1459                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1460                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1461                         if (ret > 0)
1462                                 break;
1463                         else if (ret < 0)
1464                                 goto out;
1465                         leaf = path.nodes[0];
1466                 }
1467
1468                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1469                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1470                     key.type != BTRFS_EXTENT_CSUM_KEY)
1471                         break;
1472
1473                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1474                 if (key.offset >= start + len)
1475                         break;
1476
1477                 if (key.offset > start)
1478                         start = key.offset;
1479
1480                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1481                 csum_end = key.offset + (size / csum_size) *
1482                            root->fs_info->sectorsize;
1483                 if (csum_end > start) {
1484                         size = min(csum_end - start, len);
1485                         len -= size;
1486                         start += size;
1487                         *found += size;
1488                 }
1489
1490                 path.slots[0]++;
1491         }
1492 out:
1493         btrfs_release_path(&path);
1494         if (ret < 0)
1495                 return ret;
1496         return 0;
1497 }
1498
1499 static int process_file_extent(struct btrfs_root *root,
1500                                 struct extent_buffer *eb,
1501                                 int slot, struct btrfs_key *key,
1502                                 struct shared_node *active_node)
1503 {
1504         struct inode_record *rec;
1505         struct btrfs_file_extent_item *fi;
1506         u64 num_bytes = 0;
1507         u64 disk_bytenr = 0;
1508         u64 extent_offset = 0;
1509         u64 mask = root->fs_info->sectorsize - 1;
1510         int extent_type;
1511         int ret;
1512
1513         rec = active_node->current;
1514         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1515         rec->found_file_extent = 1;
1516
1517         if (rec->extent_start == (u64)-1) {
1518                 rec->extent_start = key->offset;
1519                 rec->extent_end = key->offset;
1520         }
1521
1522         if (rec->extent_end > key->offset)
1523                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1524         else if (rec->extent_end < key->offset) {
1525                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1526                                            key->offset - rec->extent_end);
1527                 if (ret < 0)
1528                         return ret;
1529         }
1530
1531         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1532         extent_type = btrfs_file_extent_type(eb, fi);
1533
1534         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1535                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1536                 if (num_bytes == 0)
1537                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1538                 rec->found_size += num_bytes;
1539                 num_bytes = (num_bytes + mask) & ~mask;
1540         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1541                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1542                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1543                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1544                 extent_offset = btrfs_file_extent_offset(eb, fi);
1545                 if (num_bytes == 0 || (num_bytes & mask))
1546                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1547                 if (num_bytes + extent_offset >
1548                     btrfs_file_extent_ram_bytes(eb, fi))
1549                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1550                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1551                     (btrfs_file_extent_compression(eb, fi) ||
1552                      btrfs_file_extent_encryption(eb, fi) ||
1553                      btrfs_file_extent_other_encoding(eb, fi)))
1554                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1555                 if (disk_bytenr > 0)
1556                         rec->found_size += num_bytes;
1557         } else {
1558                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1559         }
1560         rec->extent_end = key->offset + num_bytes;
1561
1562         /*
1563          * The data reloc tree will copy full extents into its inode and then
1564          * copy the corresponding csums.  Because the extent it copied could be
1565          * a preallocated extent that hasn't been written to yet there may be no
1566          * csums to copy, ergo we won't have csums for our file extent.  This is
1567          * ok so just don't bother checking csums if the inode belongs to the
1568          * data reloc tree.
1569          */
1570         if (disk_bytenr > 0 &&
1571             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1572                 u64 found;
1573                 if (btrfs_file_extent_compression(eb, fi))
1574                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1575                 else
1576                         disk_bytenr += extent_offset;
1577
1578                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1579                 if (ret < 0)
1580                         return ret;
1581                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1582                         if (found > 0)
1583                                 rec->found_csum_item = 1;
1584                         if (found < num_bytes)
1585                                 rec->some_csum_missing = 1;
1586                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1587                         if (found > 0)
1588                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1589                 }
1590         }
1591         return 0;
1592 }
1593
1594 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1595                             struct walk_control *wc)
1596 {
1597         struct btrfs_key key;
1598         u32 nritems;
1599         int i;
1600         int ret = 0;
1601         struct cache_tree *inode_cache;
1602         struct shared_node *active_node;
1603
1604         if (wc->root_level == wc->active_node &&
1605             btrfs_root_refs(&root->root_item) == 0)
1606                 return 0;
1607
1608         active_node = wc->nodes[wc->active_node];
1609         inode_cache = &active_node->inode_cache;
1610         nritems = btrfs_header_nritems(eb);
1611         for (i = 0; i < nritems; i++) {
1612                 btrfs_item_key_to_cpu(eb, &key, i);
1613
1614                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1615                         continue;
1616                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1617                         continue;
1618
1619                 if (active_node->current == NULL ||
1620                     active_node->current->ino < key.objectid) {
1621                         if (active_node->current) {
1622                                 active_node->current->checked = 1;
1623                                 maybe_free_inode_rec(inode_cache,
1624                                                      active_node->current);
1625                         }
1626                         active_node->current = get_inode_rec(inode_cache,
1627                                                              key.objectid, 1);
1628                         BUG_ON(IS_ERR(active_node->current));
1629                 }
1630                 switch (key.type) {
1631                 case BTRFS_DIR_ITEM_KEY:
1632                 case BTRFS_DIR_INDEX_KEY:
1633                         ret = process_dir_item(eb, i, &key, active_node);
1634                         break;
1635                 case BTRFS_INODE_REF_KEY:
1636                         ret = process_inode_ref(eb, i, &key, active_node);
1637                         break;
1638                 case BTRFS_INODE_EXTREF_KEY:
1639                         ret = process_inode_extref(eb, i, &key, active_node);
1640                         break;
1641                 case BTRFS_INODE_ITEM_KEY:
1642                         ret = process_inode_item(eb, i, &key, active_node);
1643                         break;
1644                 case BTRFS_EXTENT_DATA_KEY:
1645                         ret = process_file_extent(root, eb, i, &key,
1646                                                   active_node);
1647                         break;
1648                 default:
1649                         break;
1650                 };
1651         }
1652         return ret;
1653 }
1654
1655 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1656                              struct extent_buffer *eb, struct node_refs *nrefs,
1657                              u64 level, int check_all);
1658 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1659                             unsigned int ext_ref);
1660
1661 /*
1662  * Returns >0  Found error, not fatal, should continue
1663  * Returns <0  Fatal error, must exit the whole check
1664  * Returns 0   No errors found
1665  */
1666 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1667                                struct node_refs *nrefs, int *level, int ext_ref)
1668 {
1669         struct extent_buffer *cur = path->nodes[0];
1670         struct btrfs_key key;
1671         u64 cur_bytenr;
1672         u32 nritems;
1673         u64 first_ino = 0;
1674         int root_level = btrfs_header_level(root->node);
1675         int i;
1676         int ret = 0; /* Final return value */
1677         int err = 0; /* Positive error bitmap */
1678
1679         cur_bytenr = cur->start;
1680
1681         /* skip to first inode item or the first inode number change */
1682         nritems = btrfs_header_nritems(cur);
1683         for (i = 0; i < nritems; i++) {
1684                 btrfs_item_key_to_cpu(cur, &key, i);
1685                 if (i == 0)
1686                         first_ino = key.objectid;
1687                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1688                     (first_ino && first_ino != key.objectid))
1689                         break;
1690         }
1691         if (i == nritems) {
1692                 path->slots[0] = nritems;
1693                 return 0;
1694         }
1695         path->slots[0] = i;
1696
1697 again:
1698         err |= check_inode_item(root, path, ext_ref);
1699
1700         /* modify cur since check_inode_item may change path */
1701         cur = path->nodes[0];
1702
1703         if (err & LAST_ITEM)
1704                 goto out;
1705
1706         /* still have inode items in thie leaf */
1707         if (cur->start == cur_bytenr)
1708                 goto again;
1709
1710         /*
1711          * we have switched to another leaf, above nodes may
1712          * have changed, here walk down the path, if a node
1713          * or leaf is shared, check whether we can skip this
1714          * node or leaf.
1715          */
1716         for (i = root_level; i >= 0; i--) {
1717                 if (path->nodes[i]->start == nrefs->bytenr[i])
1718                         continue;
1719
1720                 ret = update_nodes_refs(root, path->nodes[i]->start,
1721                                 path->nodes[i], nrefs, i, 0);
1722                 if (ret)
1723                         goto out;
1724
1725                 if (!nrefs->need_check[i]) {
1726                         *level += 1;
1727                         break;
1728                 }
1729         }
1730
1731         for (i = 0; i < *level; i++) {
1732                 free_extent_buffer(path->nodes[i]);
1733                 path->nodes[i] = NULL;
1734         }
1735 out:
1736         err &= ~LAST_ITEM;
1737         if (err && !ret)
1738                 ret = err;
1739         return ret;
1740 }
1741
1742 static void reada_walk_down(struct btrfs_root *root,
1743                             struct extent_buffer *node, int slot)
1744 {
1745         struct btrfs_fs_info *fs_info = root->fs_info;
1746         u64 bytenr;
1747         u64 ptr_gen;
1748         u32 nritems;
1749         int i;
1750         int level;
1751
1752         level = btrfs_header_level(node);
1753         if (level != 1)
1754                 return;
1755
1756         nritems = btrfs_header_nritems(node);
1757         for (i = slot; i < nritems; i++) {
1758                 bytenr = btrfs_node_blockptr(node, i);
1759                 ptr_gen = btrfs_node_ptr_generation(node, i);
1760                 readahead_tree_block(fs_info, bytenr, ptr_gen);
1761         }
1762 }
1763
1764 /*
1765  * Check the child node/leaf by the following condition:
1766  * 1. the first item key of the node/leaf should be the same with the one
1767  *    in parent.
1768  * 2. block in parent node should match the child node/leaf.
1769  * 3. generation of parent node and child's header should be consistent.
1770  *
1771  * Or the child node/leaf pointed by the key in parent is not valid.
1772  *
1773  * We hope to check leaf owner too, but since subvol may share leaves,
1774  * which makes leaf owner check not so strong, key check should be
1775  * sufficient enough for that case.
1776  */
1777 static int check_child_node(struct extent_buffer *parent, int slot,
1778                             struct extent_buffer *child)
1779 {
1780         struct btrfs_key parent_key;
1781         struct btrfs_key child_key;
1782         int ret = 0;
1783
1784         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1785         if (btrfs_header_level(child) == 0)
1786                 btrfs_item_key_to_cpu(child, &child_key, 0);
1787         else
1788                 btrfs_node_key_to_cpu(child, &child_key, 0);
1789
1790         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1791                 ret = -EINVAL;
1792                 fprintf(stderr,
1793                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1794                         parent_key.objectid, parent_key.type, parent_key.offset,
1795                         child_key.objectid, child_key.type, child_key.offset);
1796         }
1797         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1798                 ret = -EINVAL;
1799                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1800                         btrfs_node_blockptr(parent, slot),
1801                         btrfs_header_bytenr(child));
1802         }
1803         if (btrfs_node_ptr_generation(parent, slot) !=
1804             btrfs_header_generation(child)) {
1805                 ret = -EINVAL;
1806                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1807                         btrfs_header_generation(child),
1808                         btrfs_node_ptr_generation(parent, slot));
1809         }
1810         return ret;
1811 }
1812
1813 /*
1814  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1815  * in every fs or file tree check. Here we find its all root ids, and only check
1816  * it in the fs or file tree which has the smallest root id.
1817  */
1818 static int need_check(struct btrfs_root *root, struct ulist *roots)
1819 {
1820         struct rb_node *node;
1821         struct ulist_node *u;
1822
1823         /*
1824          * @roots can be empty if it belongs to tree reloc tree
1825          * In that case, we should always check the leaf, as we can't use
1826          * the tree owner to ensure some other root will check it.
1827          */
1828         if (roots->nnodes == 1 || roots->nnodes == 0)
1829                 return 1;
1830
1831         node = rb_first(&roots->root);
1832         u = rb_entry(node, struct ulist_node, rb_node);
1833         /*
1834          * current root id is not smallest, we skip it and let it be checked
1835          * in the fs or file tree who hash the smallest root id.
1836          */
1837         if (root->objectid != u->val)
1838                 return 0;
1839
1840         return 1;
1841 }
1842
1843 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1844                                u64 *flags_ret)
1845 {
1846         struct btrfs_root *extent_root = root->fs_info->extent_root;
1847         struct btrfs_root_item *ri = &root->root_item;
1848         struct btrfs_extent_inline_ref *iref;
1849         struct btrfs_extent_item *ei;
1850         struct btrfs_key key;
1851         struct btrfs_path *path = NULL;
1852         unsigned long ptr;
1853         unsigned long end;
1854         u64 flags;
1855         u64 owner = 0;
1856         u64 offset;
1857         int slot;
1858         int type;
1859         int ret = 0;
1860
1861         /*
1862          * Except file/reloc tree, we can not have FULL BACKREF MODE
1863          */
1864         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1865                 goto normal;
1866
1867         /* root node */
1868         if (eb->start == btrfs_root_bytenr(ri))
1869                 goto normal;
1870
1871         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1872                 goto full_backref;
1873
1874         owner = btrfs_header_owner(eb);
1875         if (owner == root->objectid)
1876                 goto normal;
1877
1878         path = btrfs_alloc_path();
1879         if (!path)
1880                 return -ENOMEM;
1881
1882         key.objectid = btrfs_header_bytenr(eb);
1883         key.type = (u8)-1;
1884         key.offset = (u64)-1;
1885
1886         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1887         if (ret <= 0) {
1888                 ret = -EIO;
1889                 goto out;
1890         }
1891
1892         if (ret > 0) {
1893                 ret = btrfs_previous_extent_item(extent_root, path,
1894                                                  key.objectid);
1895                 if (ret)
1896                         goto full_backref;
1897
1898         }
1899         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1900
1901         eb = path->nodes[0];
1902         slot = path->slots[0];
1903         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1904
1905         flags = btrfs_extent_flags(eb, ei);
1906         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1907                 goto full_backref;
1908
1909         ptr = (unsigned long)(ei + 1);
1910         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1911
1912         if (key.type == BTRFS_EXTENT_ITEM_KEY)
1913                 ptr += sizeof(struct btrfs_tree_block_info);
1914
1915 next:
1916         /* Reached extent item ends normally */
1917         if (ptr == end)
1918                 goto full_backref;
1919
1920         /* Beyond extent item end, wrong item size */
1921         if (ptr > end) {
1922                 error("extent item at bytenr %llu slot %d has wrong size",
1923                         eb->start, slot);
1924                 goto full_backref;
1925         }
1926
1927         iref = (struct btrfs_extent_inline_ref *)ptr;
1928         offset = btrfs_extent_inline_ref_offset(eb, iref);
1929         type = btrfs_extent_inline_ref_type(eb, iref);
1930
1931         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1932                 goto normal;
1933         ptr += btrfs_extent_inline_ref_size(type);
1934         goto next;
1935
1936 normal:
1937         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1938         goto out;
1939
1940 full_backref:
1941         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1942 out:
1943         btrfs_free_path(path);
1944         return ret;
1945 }
1946
1947 /*
1948  * for a tree node or leaf, we record its reference count, so later if we still
1949  * process this node or leaf, don't need to compute its reference count again.
1950  *
1951  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1952  */
1953 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1954                              struct extent_buffer *eb, struct node_refs *nrefs,
1955                              u64 level, int check_all)
1956 {
1957         struct ulist *roots;
1958         u64 refs = 0;
1959         u64 flags = 0;
1960         int root_level = btrfs_header_level(root->node);
1961         int check;
1962         int ret;
1963
1964         if (nrefs->bytenr[level] == bytenr)
1965                 return 0;
1966
1967         if (bytenr != (u64)-1) {
1968                 /* the return value of this function seems a mistake */
1969                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1970                                        level, 1, &refs, &flags);
1971                 /* temporary fix */
1972                 if (ret < 0 && !check_all)
1973                         return ret;
1974
1975                 nrefs->bytenr[level] = bytenr;
1976                 nrefs->refs[level] = refs;
1977                 nrefs->full_backref[level] = 0;
1978                 nrefs->checked[level] = 0;
1979
1980                 if (refs > 1) {
1981                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1982                                                    0, &roots);
1983                         if (ret)
1984                                 return -EIO;
1985
1986                         check = need_check(root, roots);
1987                         ulist_free(roots);
1988                         nrefs->need_check[level] = check;
1989                 } else {
1990                         if (!check_all) {
1991                                 nrefs->need_check[level] = 1;
1992                         } else {
1993                                 if (level == root_level) {
1994                                         nrefs->need_check[level] = 1;
1995                                 } else {
1996                                         /*
1997                                          * The node refs may have not been
1998                                          * updated if upper needs checking (the
1999                                          * lowest root_objectid) the node can
2000                                          * be checked.
2001                                          */
2002                                         nrefs->need_check[level] =
2003                                                 nrefs->need_check[level + 1];
2004                                 }
2005                         }
2006                 }
2007         }
2008
2009         if (check_all && eb) {
2010                 calc_extent_flag_v2(root, eb, &flags);
2011                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2012                         nrefs->full_backref[level] = 1;
2013         }
2014
2015         return 0;
2016 }
2017
2018 /*
2019  * @level           if @level == -1 means extent data item
2020  *                  else normal treeblocl.
2021  */
2022 static int should_check_extent_strictly(struct btrfs_root *root,
2023                                         struct node_refs *nrefs, int level)
2024 {
2025         int root_level = btrfs_header_level(root->node);
2026
2027         if (level > root_level || level < -1)
2028                 return 1;
2029         if (level == root_level)
2030                 return 1;
2031         /*
2032          * if the upper node is marked full backref, it should contain shared
2033          * backref of the parent (except owner == root->objectid).
2034          */
2035         while (++level <= root_level)
2036                 if (nrefs->refs[level] > 1)
2037                         return 0;
2038
2039         return 1;
2040 }
2041
2042 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2043                           struct walk_control *wc, int *level,
2044                           struct node_refs *nrefs)
2045 {
2046         enum btrfs_tree_block_status status;
2047         u64 bytenr;
2048         u64 ptr_gen;
2049         struct btrfs_fs_info *fs_info = root->fs_info;
2050         struct extent_buffer *next;
2051         struct extent_buffer *cur;
2052         int ret, err = 0;
2053         u64 refs;
2054
2055         WARN_ON(*level < 0);
2056         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2057
2058         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2059                 refs = nrefs->refs[*level];
2060                 ret = 0;
2061         } else {
2062                 ret = btrfs_lookup_extent_info(NULL, root,
2063                                        path->nodes[*level]->start,
2064                                        *level, 1, &refs, NULL);
2065                 if (ret < 0) {
2066                         err = ret;
2067                         goto out;
2068                 }
2069                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2070                 nrefs->refs[*level] = refs;
2071         }
2072
2073         if (refs > 1) {
2074                 ret = enter_shared_node(root, path->nodes[*level]->start,
2075                                         refs, wc, *level);
2076                 if (ret > 0) {
2077                         err = ret;
2078                         goto out;
2079                 }
2080         }
2081
2082         while (*level >= 0) {
2083                 WARN_ON(*level < 0);
2084                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2085                 cur = path->nodes[*level];
2086
2087                 if (btrfs_header_level(cur) != *level)
2088                         WARN_ON(1);
2089
2090                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2091                         break;
2092                 if (*level == 0) {
2093                         ret = process_one_leaf(root, cur, wc);
2094                         if (ret < 0)
2095                                 err = ret;
2096                         break;
2097                 }
2098                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2099                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2100
2101                 if (bytenr == nrefs->bytenr[*level - 1]) {
2102                         refs = nrefs->refs[*level - 1];
2103                 } else {
2104                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2105                                         *level - 1, 1, &refs, NULL);
2106                         if (ret < 0) {
2107                                 refs = 0;
2108                         } else {
2109                                 nrefs->bytenr[*level - 1] = bytenr;
2110                                 nrefs->refs[*level - 1] = refs;
2111                         }
2112                 }
2113
2114                 if (refs > 1) {
2115                         ret = enter_shared_node(root, bytenr, refs,
2116                                                 wc, *level - 1);
2117                         if (ret > 0) {
2118                                 path->slots[*level]++;
2119                                 continue;
2120                         }
2121                 }
2122
2123                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2124                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2125                         free_extent_buffer(next);
2126                         reada_walk_down(root, cur, path->slots[*level]);
2127                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2128                         if (!extent_buffer_uptodate(next)) {
2129                                 struct btrfs_key node_key;
2130
2131                                 btrfs_node_key_to_cpu(path->nodes[*level],
2132                                                       &node_key,
2133                                                       path->slots[*level]);
2134                                 btrfs_add_corrupt_extent_record(root->fs_info,
2135                                                 &node_key,
2136                                                 path->nodes[*level]->start,
2137                                                 root->fs_info->nodesize,
2138                                                 *level);
2139                                 err = -EIO;
2140                                 goto out;
2141                         }
2142                 }
2143
2144                 ret = check_child_node(cur, path->slots[*level], next);
2145                 if (ret) {
2146                         free_extent_buffer(next);
2147                         err = ret;
2148                         goto out;
2149                 }
2150
2151                 if (btrfs_is_leaf(next))
2152                         status = btrfs_check_leaf(root, NULL, next);
2153                 else
2154                         status = btrfs_check_node(root, NULL, next);
2155                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2156                         free_extent_buffer(next);
2157                         err = -EIO;
2158                         goto out;
2159                 }
2160
2161                 *level = *level - 1;
2162                 free_extent_buffer(path->nodes[*level]);
2163                 path->nodes[*level] = next;
2164                 path->slots[*level] = 0;
2165         }
2166 out:
2167         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2168         return err;
2169 }
2170
2171 /*
2172  * Update global fs information.
2173  */
2174 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2175                          int level)
2176 {
2177         u32 free_nrs;
2178         struct extent_buffer *eb = path->nodes[level];
2179
2180         total_btree_bytes += eb->len;
2181         if (fs_root_objectid(root->objectid))
2182                 total_fs_tree_bytes += eb->len;
2183         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2184                 total_extent_tree_bytes += eb->len;
2185
2186         if (level == 0) {
2187                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2188         } else {
2189                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2190                             btrfs_header_nritems(eb));
2191                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2192         }
2193 }
2194
2195 /*
2196  * This function only handles BACKREF_MISSING,
2197  * If corresponding extent item exists, increase the ref, else insert an extent
2198  * item and backref.
2199  *
2200  * Returns error bits after repair.
2201  */
2202 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2203                                  struct btrfs_root *root,
2204                                  struct extent_buffer *node,
2205                                  struct node_refs *nrefs, int level, int err)
2206 {
2207         struct btrfs_fs_info *fs_info = root->fs_info;
2208         struct btrfs_root *extent_root = fs_info->extent_root;
2209         struct btrfs_path path;
2210         struct btrfs_extent_item *ei;
2211         struct btrfs_tree_block_info *bi;
2212         struct btrfs_key key;
2213         struct extent_buffer *eb;
2214         u32 size = sizeof(*ei);
2215         u32 node_size = root->fs_info->nodesize;
2216         int insert_extent = 0;
2217         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2218         int root_level = btrfs_header_level(root->node);
2219         int generation;
2220         int ret;
2221         u64 owner;
2222         u64 bytenr;
2223         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2224         u64 parent = 0;
2225
2226         if ((err & BACKREF_MISSING) == 0)
2227                 return err;
2228
2229         WARN_ON(level > BTRFS_MAX_LEVEL);
2230         WARN_ON(level < 0);
2231
2232         btrfs_init_path(&path);
2233         bytenr = btrfs_header_bytenr(node);
2234         owner = btrfs_header_owner(node);
2235         generation = btrfs_header_generation(node);
2236
2237         key.objectid = bytenr;
2238         key.type = (u8)-1;
2239         key.offset = (u64)-1;
2240
2241         /* Search for the extent item */
2242         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2243         if (ret <= 0) {
2244                 ret = -EIO;
2245                 goto out;
2246         }
2247
2248         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2249         if (ret)
2250                 insert_extent = 1;
2251
2252         /* calculate if the extent item flag is full backref or not */
2253         if (nrefs->full_backref[level] != 0)
2254                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2255
2256         /* insert an extent item */
2257         if (insert_extent) {
2258                 struct btrfs_disk_key copy_key;
2259
2260                 generation = btrfs_header_generation(node);
2261
2262                 if (level < root_level && nrefs->full_backref[level + 1] &&
2263                     owner != root->objectid) {
2264                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2265                 }
2266
2267                 key.objectid = bytenr;
2268                 if (!skinny_metadata) {
2269                         key.type = BTRFS_EXTENT_ITEM_KEY;
2270                         key.offset = node_size;
2271                         size += sizeof(*bi);
2272                 } else {
2273                         key.type = BTRFS_METADATA_ITEM_KEY;
2274                         key.offset = level;
2275                 }
2276
2277                 btrfs_release_path(&path);
2278                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2279                                               size);
2280                 if (ret)
2281                         goto out;
2282
2283                 eb = path.nodes[0];
2284                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2285
2286                 btrfs_set_extent_refs(eb, ei, 0);
2287                 btrfs_set_extent_generation(eb, ei, generation);
2288                 btrfs_set_extent_flags(eb, ei, flags);
2289
2290                 if (!skinny_metadata) {
2291                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2292                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2293                                              sizeof(*bi));
2294                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2295                         btrfs_set_disk_key_type(&copy_key, 0);
2296                         btrfs_set_disk_key_offset(&copy_key, 0);
2297
2298                         btrfs_set_tree_block_level(eb, bi, level);
2299                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2300                 }
2301                 btrfs_mark_buffer_dirty(eb);
2302                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2303                 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2304
2305                 nrefs->refs[level] = 0;
2306                 nrefs->full_backref[level] =
2307                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2308                 btrfs_release_path(&path);
2309         }
2310
2311         if (level < root_level && nrefs->full_backref[level + 1] &&
2312             owner != root->objectid)
2313                 parent = nrefs->bytenr[level + 1];
2314
2315         /* increase the ref */
2316         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2317                         parent, root->objectid, level, 0);
2318
2319         nrefs->refs[level]++;
2320 out:
2321         btrfs_release_path(&path);
2322         if (ret) {
2323                 error(
2324         "failed to repair tree block ref start %llu root %llu due to %s",
2325                       bytenr, root->objectid, strerror(-ret));
2326         } else {
2327                 printf("Added one tree block ref start %llu %s %llu\n",
2328                        bytenr, parent ? "parent" : "root",
2329                        parent ? parent : root->objectid);
2330                 err &= ~BACKREF_MISSING;
2331         }
2332
2333         return err;
2334 }
2335
2336 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2337                             unsigned int ext_ref);
2338 static int check_tree_block_ref(struct btrfs_root *root,
2339                                 struct extent_buffer *eb, u64 bytenr,
2340                                 int level, u64 owner, struct node_refs *nrefs);
2341 static int check_leaf_items(struct btrfs_trans_handle *trans,
2342                             struct btrfs_root *root, struct btrfs_path *path,
2343                             struct node_refs *nrefs, int account_bytes);
2344
2345 /*
2346  * @trans      just for lowmem repair mode
2347  * @check all  if not 0 then check all tree block backrefs and items
2348  *             0 then just check relationship of items in fs tree(s)
2349  *
2350  * Returns >0  Found error, should continue
2351  * Returns <0  Fatal error, must exit the whole check
2352  * Returns 0   No errors found
2353  */
2354 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2355                              struct btrfs_root *root, struct btrfs_path *path,
2356                              int *level, struct node_refs *nrefs, int ext_ref,
2357                              int check_all)
2358
2359 {
2360         enum btrfs_tree_block_status status;
2361         u64 bytenr;
2362         u64 ptr_gen;
2363         struct btrfs_fs_info *fs_info = root->fs_info;
2364         struct extent_buffer *next;
2365         struct extent_buffer *cur;
2366         int ret;
2367         int err = 0;
2368         int check;
2369         int account_file_data = 0;
2370
2371         WARN_ON(*level < 0);
2372         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2373
2374         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2375                                 path->nodes[*level], nrefs, *level, check_all);
2376         if (ret < 0)
2377                 return ret;
2378
2379         while (*level >= 0) {
2380                 WARN_ON(*level < 0);
2381                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2382                 cur = path->nodes[*level];
2383                 bytenr = btrfs_header_bytenr(cur);
2384                 check = nrefs->need_check[*level];
2385
2386                 if (btrfs_header_level(cur) != *level)
2387                         WARN_ON(1);
2388                /*
2389                 * Update bytes accounting and check tree block ref
2390                 * NOTE: Doing accounting and check before checking nritems
2391                 * is necessary because of empty node/leaf.
2392                 */
2393                 if ((check_all && !nrefs->checked[*level]) ||
2394                     (!check_all && nrefs->need_check[*level])) {
2395                         ret = check_tree_block_ref(root, cur,
2396                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2397                            btrfs_header_owner(cur), nrefs);
2398
2399                         if (repair && ret)
2400                                 ret = repair_tree_block_ref(trans, root,
2401                                     path->nodes[*level], nrefs, *level, ret);
2402                         err |= ret;
2403
2404                         if (check_all && nrefs->need_check[*level] &&
2405                                 nrefs->refs[*level]) {
2406                                 account_bytes(root, path, *level);
2407                                 account_file_data = 1;
2408                         }
2409                         nrefs->checked[*level] = 1;
2410                 }
2411
2412                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2413                         break;
2414
2415                 /* Don't forgot to check leaf/node validation */
2416                 if (*level == 0) {
2417                         /* skip duplicate check */
2418                         if (check || !check_all) {
2419                                 ret = btrfs_check_leaf(root, NULL, cur);
2420                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2421                                         err |= -EIO;
2422                                         break;
2423                                 }
2424                         }
2425
2426                         ret = 0;
2427                         if (!check_all)
2428                                 ret = process_one_leaf_v2(root, path, nrefs,
2429                                                           level, ext_ref);
2430                         else
2431                                 ret = check_leaf_items(trans, root, path,
2432                                                nrefs, account_file_data);
2433                         err |= ret;
2434                         break;
2435                 } else {
2436                         if (check || !check_all) {
2437                                 ret = btrfs_check_node(root, NULL, cur);
2438                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2439                                         err |= -EIO;
2440                                         break;
2441                                 }
2442                         }
2443                 }
2444
2445                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2446                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2447
2448                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2449                                         check_all);
2450                 if (ret < 0)
2451                         break;
2452                 /*
2453                  * check all trees in check_chunks_and_extent_v2
2454                  * check shared node once in check_fs_roots
2455                  */
2456                 if (!check_all && !nrefs->need_check[*level - 1]) {
2457                         path->slots[*level]++;
2458                         continue;
2459                 }
2460
2461                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2462                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2463                         free_extent_buffer(next);
2464                         reada_walk_down(root, cur, path->slots[*level]);
2465                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2466                         if (!extent_buffer_uptodate(next)) {
2467                                 struct btrfs_key node_key;
2468
2469                                 btrfs_node_key_to_cpu(path->nodes[*level],
2470                                                       &node_key,
2471                                                       path->slots[*level]);
2472                                 btrfs_add_corrupt_extent_record(fs_info,
2473                                         &node_key, path->nodes[*level]->start,
2474                                         fs_info->nodesize, *level);
2475                                 err |= -EIO;
2476                                 break;
2477                         }
2478                 }
2479
2480                 ret = check_child_node(cur, path->slots[*level], next);
2481                 err |= ret;
2482                 if (ret < 0) 
2483                         break;
2484
2485                 if (btrfs_is_leaf(next))
2486                         status = btrfs_check_leaf(root, NULL, next);
2487                 else
2488                         status = btrfs_check_node(root, NULL, next);
2489                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2490                         free_extent_buffer(next);
2491                         err |= -EIO;
2492                         break;
2493                 }
2494
2495                 *level = *level - 1;
2496                 free_extent_buffer(path->nodes[*level]);
2497                 path->nodes[*level] = next;
2498                 path->slots[*level] = 0;
2499                 account_file_data = 0;
2500
2501                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2502         }
2503         return err;
2504 }
2505
2506 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2507                         struct walk_control *wc, int *level)
2508 {
2509         int i;
2510         struct extent_buffer *leaf;
2511
2512         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2513                 leaf = path->nodes[i];
2514                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2515                         path->slots[i]++;
2516                         *level = i;
2517                         return 0;
2518                 } else {
2519                         free_extent_buffer(path->nodes[*level]);
2520                         path->nodes[*level] = NULL;
2521                         BUG_ON(*level > wc->active_node);
2522                         if (*level == wc->active_node)
2523                                 leave_shared_node(root, wc, *level);
2524                         *level = i + 1;
2525                 }
2526         }
2527         return 1;
2528 }
2529
2530 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2531                            int *level)
2532 {
2533         int i;
2534         struct extent_buffer *leaf;
2535
2536         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2537                 leaf = path->nodes[i];
2538                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2539                         path->slots[i]++;
2540                         *level = i;
2541                         return 0;
2542                 } else {
2543                         free_extent_buffer(path->nodes[*level]);
2544                         path->nodes[*level] = NULL;
2545                         *level = i + 1;
2546                 }
2547         }
2548         return 1;
2549 }
2550
2551 static int check_root_dir(struct inode_record *rec)
2552 {
2553         struct inode_backref *backref;
2554         int ret = -1;
2555
2556         if (!rec->found_inode_item || rec->errors)
2557                 goto out;
2558         if (rec->nlink != 1 || rec->found_link != 0)
2559                 goto out;
2560         if (list_empty(&rec->backrefs))
2561                 goto out;
2562         backref = to_inode_backref(rec->backrefs.next);
2563         if (!backref->found_inode_ref)
2564                 goto out;
2565         if (backref->index != 0 || backref->namelen != 2 ||
2566             memcmp(backref->name, "..", 2))
2567                 goto out;
2568         if (backref->found_dir_index || backref->found_dir_item)
2569                 goto out;
2570         ret = 0;
2571 out:
2572         return ret;
2573 }
2574
2575 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2576                               struct btrfs_root *root, struct btrfs_path *path,
2577                               struct inode_record *rec)
2578 {
2579         struct btrfs_inode_item *ei;
2580         struct btrfs_key key;
2581         int ret;
2582
2583         key.objectid = rec->ino;
2584         key.type = BTRFS_INODE_ITEM_KEY;
2585         key.offset = (u64)-1;
2586
2587         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2588         if (ret < 0)
2589                 goto out;
2590         if (ret) {
2591                 if (!path->slots[0]) {
2592                         ret = -ENOENT;
2593                         goto out;
2594                 }
2595                 path->slots[0]--;
2596                 ret = 0;
2597         }
2598         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2599         if (key.objectid != rec->ino) {
2600                 ret = -ENOENT;
2601                 goto out;
2602         }
2603
2604         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2605                             struct btrfs_inode_item);
2606         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2607         btrfs_mark_buffer_dirty(path->nodes[0]);
2608         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2609         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2610                root->root_key.objectid);
2611 out:
2612         btrfs_release_path(path);
2613         return ret;
2614 }
2615
2616 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2617                                     struct btrfs_root *root,
2618                                     struct btrfs_path *path,
2619                                     struct inode_record *rec)
2620 {
2621         int ret;
2622
2623         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2624         btrfs_release_path(path);
2625         if (!ret)
2626                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2627         return ret;
2628 }
2629
2630 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2631                                struct btrfs_root *root,
2632                                struct btrfs_path *path,
2633                                struct inode_record *rec)
2634 {
2635         struct btrfs_inode_item *ei;
2636         struct btrfs_key key;
2637         int ret = 0;
2638
2639         key.objectid = rec->ino;
2640         key.type = BTRFS_INODE_ITEM_KEY;
2641         key.offset = 0;
2642
2643         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2644         if (ret) {
2645                 if (ret > 0)
2646                         ret = -ENOENT;
2647                 goto out;
2648         }
2649
2650         /* Since ret == 0, no need to check anything */
2651         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2652                             struct btrfs_inode_item);
2653         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2654         btrfs_mark_buffer_dirty(path->nodes[0]);
2655         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2656         printf("reset nbytes for ino %llu root %llu\n",
2657                rec->ino, root->root_key.objectid);
2658 out:
2659         btrfs_release_path(path);
2660         return ret;
2661 }
2662
2663 static int add_missing_dir_index(struct btrfs_root *root,
2664                                  struct cache_tree *inode_cache,
2665                                  struct inode_record *rec,
2666                                  struct inode_backref *backref)
2667 {
2668         struct btrfs_path path;
2669         struct btrfs_trans_handle *trans;
2670         struct btrfs_dir_item *dir_item;
2671         struct extent_buffer *leaf;
2672         struct btrfs_key key;
2673         struct btrfs_disk_key disk_key;
2674         struct inode_record *dir_rec;
2675         unsigned long name_ptr;
2676         u32 data_size = sizeof(*dir_item) + backref->namelen;
2677         int ret;
2678
2679         trans = btrfs_start_transaction(root, 1);
2680         if (IS_ERR(trans))
2681                 return PTR_ERR(trans);
2682
2683         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2684                 (unsigned long long)rec->ino);
2685
2686         btrfs_init_path(&path);
2687         key.objectid = backref->dir;
2688         key.type = BTRFS_DIR_INDEX_KEY;
2689         key.offset = backref->index;
2690         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2691         BUG_ON(ret);
2692
2693         leaf = path.nodes[0];
2694         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2695
2696         disk_key.objectid = cpu_to_le64(rec->ino);
2697         disk_key.type = BTRFS_INODE_ITEM_KEY;
2698         disk_key.offset = 0;
2699
2700         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2701         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2702         btrfs_set_dir_data_len(leaf, dir_item, 0);
2703         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2704         name_ptr = (unsigned long)(dir_item + 1);
2705         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2706         btrfs_mark_buffer_dirty(leaf);
2707         btrfs_release_path(&path);
2708         btrfs_commit_transaction(trans, root);
2709
2710         backref->found_dir_index = 1;
2711         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2712         BUG_ON(IS_ERR(dir_rec));
2713         if (!dir_rec)
2714                 return 0;
2715         dir_rec->found_size += backref->namelen;
2716         if (dir_rec->found_size == dir_rec->isize &&
2717             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2718                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2719         if (dir_rec->found_size != dir_rec->isize)
2720                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2721
2722         return 0;
2723 }
2724
2725 static int delete_dir_index(struct btrfs_root *root,
2726                             struct inode_backref *backref)
2727 {
2728         struct btrfs_trans_handle *trans;
2729         struct btrfs_dir_item *di;
2730         struct btrfs_path path;
2731         int ret = 0;
2732
2733         trans = btrfs_start_transaction(root, 1);
2734         if (IS_ERR(trans))
2735                 return PTR_ERR(trans);
2736
2737         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2738                 (unsigned long long)backref->dir,
2739                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2740                 (unsigned long long)root->objectid);
2741
2742         btrfs_init_path(&path);
2743         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2744                                     backref->name, backref->namelen,
2745                                     backref->index, -1);
2746         if (IS_ERR(di)) {
2747                 ret = PTR_ERR(di);
2748                 btrfs_release_path(&path);
2749                 btrfs_commit_transaction(trans, root);
2750                 if (ret == -ENOENT)
2751                         return 0;
2752                 return ret;
2753         }
2754
2755         if (!di)
2756                 ret = btrfs_del_item(trans, root, &path);
2757         else
2758                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2759         BUG_ON(ret);
2760         btrfs_release_path(&path);
2761         btrfs_commit_transaction(trans, root);
2762         return ret;
2763 }
2764
2765 static int __create_inode_item(struct btrfs_trans_handle *trans,
2766                                struct btrfs_root *root, u64 ino, u64 size,
2767                                u64 nbytes, u64 nlink, u32 mode)
2768 {
2769         struct btrfs_inode_item ii;
2770         time_t now = time(NULL);
2771         int ret;
2772
2773         btrfs_set_stack_inode_size(&ii, size);
2774         btrfs_set_stack_inode_nbytes(&ii, nbytes);
2775         btrfs_set_stack_inode_nlink(&ii, nlink);
2776         btrfs_set_stack_inode_mode(&ii, mode);
2777         btrfs_set_stack_inode_generation(&ii, trans->transid);
2778         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2779         btrfs_set_stack_timespec_sec(&ii.ctime, now);
2780         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2781         btrfs_set_stack_timespec_sec(&ii.mtime, now);
2782         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2783         btrfs_set_stack_timespec_sec(&ii.otime, 0);
2784         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2785
2786         ret = btrfs_insert_inode(trans, root, ino, &ii);
2787         ASSERT(!ret);
2788
2789         warning("root %llu inode %llu recreating inode item, this may "
2790                 "be incomplete, please check permissions and content after "
2791                 "the fsck completes.\n", (unsigned long long)root->objectid,
2792                 (unsigned long long)ino);
2793
2794         return 0;
2795 }
2796
2797 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2798                                     struct btrfs_root *root, u64 ino,
2799                                     u8 filetype)
2800 {
2801         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2802
2803         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2804 }
2805
2806 static int create_inode_item(struct btrfs_root *root,
2807                              struct inode_record *rec, int root_dir)
2808 {
2809         struct btrfs_trans_handle *trans;
2810         u64 nlink = 0;
2811         u32 mode = 0;
2812         u64 size = 0;
2813         int ret;
2814
2815         trans = btrfs_start_transaction(root, 1);
2816         if (IS_ERR(trans)) {
2817                 ret = PTR_ERR(trans);
2818                 return ret;
2819         }
2820
2821         nlink = root_dir ? 1 : rec->found_link;
2822         if (rec->found_dir_item) {
2823                 if (rec->found_file_extent)
2824                         fprintf(stderr, "root %llu inode %llu has both a dir "
2825                                 "item and extents, unsure if it is a dir or a "
2826                                 "regular file so setting it as a directory\n",
2827                                 (unsigned long long)root->objectid,
2828                                 (unsigned long long)rec->ino);
2829                 mode = S_IFDIR | 0755;
2830                 size = rec->found_size;
2831         } else if (!rec->found_dir_item) {
2832                 size = rec->extent_end;
2833                 mode =  S_IFREG | 0755;
2834         }
2835
2836         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2837                                   nlink, mode);
2838         btrfs_commit_transaction(trans, root);
2839         return 0;
2840 }
2841
2842 static int repair_inode_backrefs(struct btrfs_root *root,
2843                                  struct inode_record *rec,
2844                                  struct cache_tree *inode_cache,
2845                                  int delete)
2846 {
2847         struct inode_backref *tmp, *backref;
2848         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2849         int ret = 0;
2850         int repaired = 0;
2851
2852         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2853                 if (!delete && rec->ino == root_dirid) {
2854                         if (!rec->found_inode_item) {
2855                                 ret = create_inode_item(root, rec, 1);
2856                                 if (ret)
2857                                         break;
2858                                 repaired++;
2859                         }
2860                 }
2861
2862                 /* Index 0 for root dir's are special, don't mess with it */
2863                 if (rec->ino == root_dirid && backref->index == 0)
2864                         continue;
2865
2866                 if (delete &&
2867                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2868                      (backref->found_dir_index && backref->found_inode_ref &&
2869                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2870                         ret = delete_dir_index(root, backref);
2871                         if (ret)
2872                                 break;
2873                         repaired++;
2874                         list_del(&backref->list);
2875                         free(backref);
2876                         continue;
2877                 }
2878
2879                 if (!delete && !backref->found_dir_index &&
2880                     backref->found_dir_item && backref->found_inode_ref) {
2881                         ret = add_missing_dir_index(root, inode_cache, rec,
2882                                                     backref);
2883                         if (ret)
2884                                 break;
2885                         repaired++;
2886                         if (backref->found_dir_item &&
2887                             backref->found_dir_index) {
2888                                 if (!backref->errors &&
2889                                     backref->found_inode_ref) {
2890                                         list_del(&backref->list);
2891                                         free(backref);
2892                                         continue;
2893                                 }
2894                         }
2895                 }
2896
2897                 if (!delete && (!backref->found_dir_index &&
2898                                 !backref->found_dir_item &&
2899                                 backref->found_inode_ref)) {
2900                         struct btrfs_trans_handle *trans;
2901                         struct btrfs_key location;
2902
2903                         ret = check_dir_conflict(root, backref->name,
2904                                                  backref->namelen,
2905                                                  backref->dir,
2906                                                  backref->index);
2907                         if (ret) {
2908                                 /*
2909                                  * let nlink fixing routine to handle it,
2910                                  * which can do it better.
2911                                  */
2912                                 ret = 0;
2913                                 break;
2914                         }
2915                         location.objectid = rec->ino;
2916                         location.type = BTRFS_INODE_ITEM_KEY;
2917                         location.offset = 0;
2918
2919                         trans = btrfs_start_transaction(root, 1);
2920                         if (IS_ERR(trans)) {
2921                                 ret = PTR_ERR(trans);
2922                                 break;
2923                         }
2924                         fprintf(stderr, "adding missing dir index/item pair "
2925                                 "for inode %llu\n",
2926                                 (unsigned long long)rec->ino);
2927                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2928                                                     backref->namelen,
2929                                                     backref->dir, &location,
2930                                                     imode_to_type(rec->imode),
2931                                                     backref->index);
2932                         BUG_ON(ret);
2933                         btrfs_commit_transaction(trans, root);
2934                         repaired++;
2935                 }
2936
2937                 if (!delete && (backref->found_inode_ref &&
2938                                 backref->found_dir_index &&
2939                                 backref->found_dir_item &&
2940                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2941                                 !rec->found_inode_item)) {
2942                         ret = create_inode_item(root, rec, 0);
2943                         if (ret)
2944                                 break;
2945                         repaired++;
2946                 }
2947
2948         }
2949         return ret ? ret : repaired;
2950 }
2951
2952 /*
2953  * To determine the file type for nlink/inode_item repair
2954  *
2955  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2956  * Return -ENOENT if file type is not found.
2957  */
2958 static int find_file_type(struct inode_record *rec, u8 *type)
2959 {
2960         struct inode_backref *backref;
2961
2962         /* For inode item recovered case */
2963         if (rec->found_inode_item) {
2964                 *type = imode_to_type(rec->imode);
2965                 return 0;
2966         }
2967
2968         list_for_each_entry(backref, &rec->backrefs, list) {
2969                 if (backref->found_dir_index || backref->found_dir_item) {
2970                         *type = backref->filetype;
2971                         return 0;
2972                 }
2973         }
2974         return -ENOENT;
2975 }
2976
2977 /*
2978  * To determine the file name for nlink repair
2979  *
2980  * Return 0 if file name is found, set name and namelen.
2981  * Return -ENOENT if file name is not found.
2982  */
2983 static int find_file_name(struct inode_record *rec,
2984                           char *name, int *namelen)
2985 {
2986         struct inode_backref *backref;
2987
2988         list_for_each_entry(backref, &rec->backrefs, list) {
2989                 if (backref->found_dir_index || backref->found_dir_item ||
2990                     backref->found_inode_ref) {
2991                         memcpy(name, backref->name, backref->namelen);
2992                         *namelen = backref->namelen;
2993                         return 0;
2994                 }
2995         }
2996         return -ENOENT;
2997 }
2998
2999 /* Reset the nlink of the inode to the correct one */
3000 static int reset_nlink(struct btrfs_trans_handle *trans,
3001                        struct btrfs_root *root,
3002                        struct btrfs_path *path,
3003                        struct inode_record *rec)
3004 {
3005         struct inode_backref *backref;
3006         struct inode_backref *tmp;
3007         struct btrfs_key key;
3008         struct btrfs_inode_item *inode_item;
3009         int ret = 0;
3010
3011         /* We don't believe this either, reset it and iterate backref */
3012         rec->found_link = 0;
3013
3014         /* Remove all backref including the valid ones */
3015         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3016                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3017                                    backref->index, backref->name,
3018                                    backref->namelen, 0);
3019                 if (ret < 0)
3020                         goto out;
3021
3022                 /* remove invalid backref, so it won't be added back */
3023                 if (!(backref->found_dir_index &&
3024                       backref->found_dir_item &&
3025                       backref->found_inode_ref)) {
3026                         list_del(&backref->list);
3027                         free(backref);
3028                 } else {
3029                         rec->found_link++;
3030                 }
3031         }
3032
3033         /* Set nlink to 0 */
3034         key.objectid = rec->ino;
3035         key.type = BTRFS_INODE_ITEM_KEY;
3036         key.offset = 0;
3037         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3038         if (ret < 0)
3039                 goto out;
3040         if (ret > 0) {
3041                 ret = -ENOENT;
3042                 goto out;
3043         }
3044         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3045                                     struct btrfs_inode_item);
3046         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3047         btrfs_mark_buffer_dirty(path->nodes[0]);
3048         btrfs_release_path(path);
3049
3050         /*
3051          * Add back valid inode_ref/dir_item/dir_index,
3052          * add_link() will handle the nlink inc, so new nlink must be correct
3053          */
3054         list_for_each_entry(backref, &rec->backrefs, list) {
3055                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3056                                      backref->name, backref->namelen,
3057                                      backref->filetype, &backref->index, 1, 0);
3058                 if (ret < 0)
3059                         goto out;
3060         }
3061 out:
3062         btrfs_release_path(path);
3063         return ret;
3064 }
3065
3066 static int get_highest_inode(struct btrfs_trans_handle *trans,
3067                                 struct btrfs_root *root,
3068                                 struct btrfs_path *path,
3069                                 u64 *highest_ino)
3070 {
3071         struct btrfs_key key, found_key;
3072         int ret;
3073
3074         btrfs_init_path(path);
3075         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3076         key.offset = -1;
3077         key.type = BTRFS_INODE_ITEM_KEY;
3078         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3079         if (ret == 1) {
3080                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3081                                 path->slots[0] - 1);
3082                 *highest_ino = found_key.objectid;
3083                 ret = 0;
3084         }
3085         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3086                 ret = -EOVERFLOW;
3087         btrfs_release_path(path);
3088         return ret;
3089 }
3090
3091 /*
3092  * Link inode to dir 'lost+found'. Increase @ref_count.
3093  *
3094  * Returns 0 means success.
3095  * Returns <0 means failure.
3096  */
3097 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3098                                    struct btrfs_root *root,
3099                                    struct btrfs_path *path,
3100                                    u64 ino, char *namebuf, u32 name_len,
3101                                    u8 filetype, u64 *ref_count)
3102 {
3103         char *dir_name = "lost+found";
3104         u64 lost_found_ino;
3105         int ret;
3106         u32 mode = 0700;
3107
3108         btrfs_release_path(path);
3109         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3110         if (ret < 0)
3111                 goto out;
3112         lost_found_ino++;
3113
3114         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3115                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3116                           mode);
3117         if (ret < 0) {
3118                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3119                 goto out;
3120         }
3121         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3122                              namebuf, name_len, filetype, NULL, 1, 0);
3123         /*
3124          * Add ".INO" suffix several times to handle case where
3125          * "FILENAME.INO" is already taken by another file.
3126          */
3127         while (ret == -EEXIST) {
3128                 /*
3129                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3130                  */
3131                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3132                         ret = -EFBIG;
3133                         goto out;
3134                 }
3135                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3136                          ".%llu", ino);
3137                 name_len += count_digits(ino) + 1;
3138                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3139                                      name_len, filetype, NULL, 1, 0);
3140         }
3141         if (ret < 0) {
3142                 error("failed to link the inode %llu to %s dir: %s",
3143                       ino, dir_name, strerror(-ret));
3144                 goto out;
3145         }
3146
3147         ++*ref_count;
3148         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3149                name_len, namebuf, dir_name);
3150 out:
3151         btrfs_release_path(path);
3152         if (ret)
3153                 error("failed to move file '%.*s' to '%s' dir", name_len,
3154                                 namebuf, dir_name);
3155         return ret;
3156 }
3157
3158 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3159                                struct btrfs_root *root,
3160                                struct btrfs_path *path,
3161                                struct inode_record *rec)
3162 {
3163         char namebuf[BTRFS_NAME_LEN] = {0};
3164         u8 type = 0;
3165         int namelen = 0;
3166         int name_recovered = 0;
3167         int type_recovered = 0;
3168         int ret = 0;
3169
3170         /*
3171          * Get file name and type first before these invalid inode ref
3172          * are deleted by remove_all_invalid_backref()
3173          */
3174         name_recovered = !find_file_name(rec, namebuf, &namelen);
3175         type_recovered = !find_file_type(rec, &type);
3176
3177         if (!name_recovered) {
3178                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3179                        rec->ino, rec->ino);
3180                 namelen = count_digits(rec->ino);
3181                 sprintf(namebuf, "%llu", rec->ino);
3182                 name_recovered = 1;
3183         }
3184         if (!type_recovered) {
3185                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3186                        rec->ino);
3187                 type = BTRFS_FT_REG_FILE;
3188                 type_recovered = 1;
3189         }
3190
3191         ret = reset_nlink(trans, root, path, rec);
3192         if (ret < 0) {
3193                 fprintf(stderr,
3194                         "Failed to reset nlink for inode %llu: %s\n",
3195                         rec->ino, strerror(-ret));
3196                 goto out;
3197         }
3198
3199         if (rec->found_link == 0) {
3200                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3201                                               namebuf, namelen, type,
3202                                               (u64 *)&rec->found_link);
3203                 if (ret)
3204                         goto out;
3205         }
3206         printf("Fixed the nlink of inode %llu\n", rec->ino);
3207 out:
3208         /*
3209          * Clear the flag anyway, or we will loop forever for the same inode
3210          * as it will not be removed from the bad inode list and the dead loop
3211          * happens.
3212          */
3213         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3214         btrfs_release_path(path);
3215         return ret;
3216 }
3217
3218 /*
3219  * Check if there is any normal(reg or prealloc) file extent for given
3220  * ino.
3221  * This is used to determine the file type when neither its dir_index/item or
3222  * inode_item exists.
3223  *
3224  * This will *NOT* report error, if any error happens, just consider it does
3225  * not have any normal file extent.
3226  */
3227 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3228 {
3229         struct btrfs_path path;
3230         struct btrfs_key key;
3231         struct btrfs_key found_key;
3232         struct btrfs_file_extent_item *fi;
3233         u8 type;
3234         int ret = 0;
3235
3236         btrfs_init_path(&path);
3237         key.objectid = ino;
3238         key.type = BTRFS_EXTENT_DATA_KEY;
3239         key.offset = 0;
3240
3241         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3242         if (ret < 0) {
3243                 ret = 0;
3244                 goto out;
3245         }
3246         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3247                 ret = btrfs_next_leaf(root, &path);
3248                 if (ret) {
3249                         ret = 0;
3250                         goto out;
3251                 }
3252         }
3253         while (1) {
3254                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3255                                       path.slots[0]);
3256                 if (found_key.objectid != ino ||
3257                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3258                         break;
3259                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3260                                     struct btrfs_file_extent_item);
3261                 type = btrfs_file_extent_type(path.nodes[0], fi);
3262                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3263                         ret = 1;
3264                         goto out;
3265                 }
3266         }
3267 out:
3268         btrfs_release_path(&path);
3269         return ret;
3270 }
3271
3272 static u32 btrfs_type_to_imode(u8 type)
3273 {
3274         static u32 imode_by_btrfs_type[] = {
3275                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3276                 [BTRFS_FT_DIR]          = S_IFDIR,
3277                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3278                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3279                 [BTRFS_FT_FIFO]         = S_IFIFO,
3280                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3281                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3282         };
3283
3284         return imode_by_btrfs_type[(type)];
3285 }
3286
3287 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3288                                 struct btrfs_root *root,
3289                                 struct btrfs_path *path,
3290                                 struct inode_record *rec)
3291 {
3292         u8 filetype;
3293         u32 mode = 0700;
3294         int type_recovered = 0;
3295         int ret = 0;
3296
3297         printf("Trying to rebuild inode:%llu\n", rec->ino);
3298
3299         type_recovered = !find_file_type(rec, &filetype);
3300
3301         /*
3302          * Try to determine inode type if type not found.
3303          *
3304          * For found regular file extent, it must be FILE.
3305          * For found dir_item/index, it must be DIR.
3306          *
3307          * For undetermined one, use FILE as fallback.
3308          *
3309          * TODO:
3310          * 1. If found backref(inode_index/item is already handled) to it,
3311          *    it must be DIR.
3312          *    Need new inode-inode ref structure to allow search for that.
3313          */
3314         if (!type_recovered) {
3315                 if (rec->found_file_extent &&
3316                     find_normal_file_extent(root, rec->ino)) {
3317                         type_recovered = 1;
3318                         filetype = BTRFS_FT_REG_FILE;
3319                 } else if (rec->found_dir_item) {
3320                         type_recovered = 1;
3321                         filetype = BTRFS_FT_DIR;
3322                 } else if (!list_empty(&rec->orphan_extents)) {
3323                         type_recovered = 1;
3324                         filetype = BTRFS_FT_REG_FILE;
3325                 } else{
3326                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3327                                rec->ino);
3328                         type_recovered = 1;
3329                         filetype = BTRFS_FT_REG_FILE;
3330                 }
3331         }
3332
3333         ret = btrfs_new_inode(trans, root, rec->ino,
3334                               mode | btrfs_type_to_imode(filetype));
3335         if (ret < 0)
3336                 goto out;
3337
3338         /*
3339          * Here inode rebuild is done, we only rebuild the inode item,
3340          * don't repair the nlink(like move to lost+found).
3341          * That is the job of nlink repair.
3342          *
3343          * We just fill the record and return
3344          */
3345         rec->found_dir_item = 1;
3346         rec->imode = mode | btrfs_type_to_imode(filetype);
3347         rec->nlink = 0;
3348         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3349         /* Ensure the inode_nlinks repair function will be called */
3350         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3351 out:
3352         return ret;
3353 }
3354
3355 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3356                                       struct btrfs_root *root,
3357                                       struct btrfs_path *path,
3358                                       struct inode_record *rec)
3359 {
3360         struct orphan_data_extent *orphan;
3361         struct orphan_data_extent *tmp;
3362         int ret = 0;
3363
3364         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3365                 /*
3366                  * Check for conflicting file extents
3367                  *
3368                  * Here we don't know whether the extents is compressed or not,
3369                  * so we can only assume it not compressed nor data offset,
3370                  * and use its disk_len as extent length.
3371                  */
3372                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3373                                        orphan->offset, orphan->disk_len, 0);
3374                 btrfs_release_path(path);
3375                 if (ret < 0)
3376                         goto out;
3377                 if (!ret) {
3378                         fprintf(stderr,
3379                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3380                                 orphan->disk_bytenr, orphan->disk_len);
3381                         ret = btrfs_free_extent(trans,
3382                                         root->fs_info->extent_root,
3383                                         orphan->disk_bytenr, orphan->disk_len,
3384                                         0, root->objectid, orphan->objectid,
3385                                         orphan->offset);
3386                         if (ret < 0)
3387                                 goto out;
3388                 }
3389                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3390                                 orphan->offset, orphan->disk_bytenr,
3391                                 orphan->disk_len, orphan->disk_len);
3392                 if (ret < 0)
3393                         goto out;
3394
3395                 /* Update file size info */
3396                 rec->found_size += orphan->disk_len;
3397                 if (rec->found_size == rec->nbytes)
3398                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3399
3400                 /* Update the file extent hole info too */
3401                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3402                                            orphan->disk_len);
3403                 if (ret < 0)
3404                         goto out;
3405                 if (RB_EMPTY_ROOT(&rec->holes))
3406                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3407
3408                 list_del(&orphan->list);
3409                 free(orphan);
3410         }
3411         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3412 out:
3413         return ret;
3414 }
3415
3416 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3417                                         struct btrfs_root *root,
3418                                         struct btrfs_path *path,
3419                                         struct inode_record *rec)
3420 {
3421         struct rb_node *node;
3422         struct file_extent_hole *hole;
3423         int found = 0;
3424         int ret = 0;
3425
3426         node = rb_first(&rec->holes);
3427
3428         while (node) {
3429                 found = 1;
3430                 hole = rb_entry(node, struct file_extent_hole, node);
3431                 ret = btrfs_punch_hole(trans, root, rec->ino,
3432                                        hole->start, hole->len);
3433                 if (ret < 0)
3434                         goto out;
3435                 ret = del_file_extent_hole(&rec->holes, hole->start,
3436                                            hole->len);
3437                 if (ret < 0)
3438                         goto out;
3439                 if (RB_EMPTY_ROOT(&rec->holes))
3440                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3441                 node = rb_first(&rec->holes);
3442         }
3443         /* special case for a file losing all its file extent */
3444         if (!found) {
3445                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3446                                        round_up(rec->isize,
3447                                                 root->fs_info->sectorsize));
3448                 if (ret < 0)
3449                         goto out;
3450         }
3451         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3452                rec->ino, root->objectid);
3453 out:
3454         return ret;
3455 }
3456
3457 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3458 {
3459         struct btrfs_trans_handle *trans;
3460         struct btrfs_path path;
3461         int ret = 0;
3462
3463         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3464                              I_ERR_NO_ORPHAN_ITEM |
3465                              I_ERR_LINK_COUNT_WRONG |
3466                              I_ERR_NO_INODE_ITEM |
3467                              I_ERR_FILE_EXTENT_ORPHAN |
3468                              I_ERR_FILE_EXTENT_DISCOUNT|
3469                              I_ERR_FILE_NBYTES_WRONG)))
3470                 return rec->errors;
3471
3472         /*
3473          * For nlink repair, it may create a dir and add link, so
3474          * 2 for parent(256)'s dir_index and dir_item
3475          * 2 for lost+found dir's inode_item and inode_ref
3476          * 1 for the new inode_ref of the file
3477          * 2 for lost+found dir's dir_index and dir_item for the file
3478          */
3479         trans = btrfs_start_transaction(root, 7);
3480         if (IS_ERR(trans))
3481                 return PTR_ERR(trans);
3482
3483         btrfs_init_path(&path);
3484         if (rec->errors & I_ERR_NO_INODE_ITEM)
3485                 ret = repair_inode_no_item(trans, root, &path, rec);
3486         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3487                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3488         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3489                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3490         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3491                 ret = repair_inode_isize(trans, root, &path, rec);
3492         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3493                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3494         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3495                 ret = repair_inode_nlinks(trans, root, &path, rec);
3496         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3497                 ret = repair_inode_nbytes(trans, root, &path, rec);
3498         btrfs_commit_transaction(trans, root);
3499         btrfs_release_path(&path);
3500         return ret;
3501 }
3502
3503 static int check_inode_recs(struct btrfs_root *root,
3504                             struct cache_tree *inode_cache)
3505 {
3506         struct cache_extent *cache;
3507         struct ptr_node *node;
3508         struct inode_record *rec;
3509         struct inode_backref *backref;
3510         int stage = 0;
3511         int ret = 0;
3512         int err = 0;
3513         u64 error = 0;
3514         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3515
3516         if (btrfs_root_refs(&root->root_item) == 0) {
3517                 if (!cache_tree_empty(inode_cache))
3518                         fprintf(stderr, "warning line %d\n", __LINE__);
3519                 return 0;
3520         }
3521
3522         /*
3523          * We need to repair backrefs first because we could change some of the
3524          * errors in the inode recs.
3525          *
3526          * We also need to go through and delete invalid backrefs first and then
3527          * add the correct ones second.  We do this because we may get EEXIST
3528          * when adding back the correct index because we hadn't yet deleted the
3529          * invalid index.
3530          *
3531          * For example, if we were missing a dir index then the directories
3532          * isize would be wrong, so if we fixed the isize to what we thought it
3533          * would be and then fixed the backref we'd still have a invalid fs, so
3534          * we need to add back the dir index and then check to see if the isize
3535          * is still wrong.
3536          */
3537         while (stage < 3) {
3538                 stage++;
3539                 if (stage == 3 && !err)
3540                         break;
3541
3542                 cache = search_cache_extent(inode_cache, 0);
3543                 while (repair && cache) {
3544                         node = container_of(cache, struct ptr_node, cache);
3545                         rec = node->data;
3546                         cache = next_cache_extent(cache);
3547
3548                         /* Need to free everything up and rescan */
3549                         if (stage == 3) {
3550                                 remove_cache_extent(inode_cache, &node->cache);
3551                                 free(node);
3552                                 free_inode_rec(rec);
3553                                 continue;
3554                         }
3555
3556                         if (list_empty(&rec->backrefs))
3557                                 continue;
3558
3559                         ret = repair_inode_backrefs(root, rec, inode_cache,
3560                                                     stage == 1);
3561                         if (ret < 0) {
3562                                 err = ret;
3563                                 stage = 2;
3564                                 break;
3565                         } if (ret > 0) {
3566                                 err = -EAGAIN;
3567                         }
3568                 }
3569         }
3570         if (err)
3571                 return err;
3572
3573         rec = get_inode_rec(inode_cache, root_dirid, 0);
3574         BUG_ON(IS_ERR(rec));
3575         if (rec) {
3576                 ret = check_root_dir(rec);
3577                 if (ret) {
3578                         fprintf(stderr, "root %llu root dir %llu error\n",
3579                                 (unsigned long long)root->root_key.objectid,
3580                                 (unsigned long long)root_dirid);
3581                         print_inode_error(root, rec);
3582                         error++;
3583                 }
3584         } else {
3585                 if (repair) {
3586                         struct btrfs_trans_handle *trans;
3587
3588                         trans = btrfs_start_transaction(root, 1);
3589                         if (IS_ERR(trans)) {
3590                                 err = PTR_ERR(trans);
3591                                 return err;
3592                         }
3593
3594                         fprintf(stderr,
3595                                 "root %llu missing its root dir, recreating\n",
3596                                 (unsigned long long)root->objectid);
3597
3598                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3599                         BUG_ON(ret);
3600
3601                         btrfs_commit_transaction(trans, root);
3602                         return -EAGAIN;
3603                 }
3604
3605                 fprintf(stderr, "root %llu root dir %llu not found\n",
3606                         (unsigned long long)root->root_key.objectid,
3607                         (unsigned long long)root_dirid);
3608         }
3609
3610         while (1) {
3611                 cache = search_cache_extent(inode_cache, 0);
3612                 if (!cache)
3613                         break;
3614                 node = container_of(cache, struct ptr_node, cache);
3615                 rec = node->data;
3616                 remove_cache_extent(inode_cache, &node->cache);
3617                 free(node);
3618                 if (rec->ino == root_dirid ||
3619                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3620                         free_inode_rec(rec);
3621                         continue;
3622                 }
3623
3624                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3625                         ret = check_orphan_item(root, rec->ino);
3626                         if (ret == 0)
3627                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3628                         if (can_free_inode_rec(rec)) {
3629                                 free_inode_rec(rec);
3630                                 continue;
3631                         }
3632                 }
3633
3634                 if (!rec->found_inode_item)
3635                         rec->errors |= I_ERR_NO_INODE_ITEM;
3636                 if (rec->found_link != rec->nlink)
3637                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3638                 if (repair) {
3639                         ret = try_repair_inode(root, rec);
3640                         if (ret == 0 && can_free_inode_rec(rec)) {
3641                                 free_inode_rec(rec);
3642                                 continue;
3643                         }
3644                         ret = 0;
3645                 }
3646
3647                 if (!(repair && ret == 0))
3648                         error++;
3649                 print_inode_error(root, rec);
3650                 list_for_each_entry(backref, &rec->backrefs, list) {
3651                         if (!backref->found_dir_item)
3652                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3653                         if (!backref->found_dir_index)
3654                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3655                         if (!backref->found_inode_ref)
3656                                 backref->errors |= REF_ERR_NO_INODE_REF;
3657                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3658                                 " namelen %u name %s filetype %d errors %x",
3659                                 (unsigned long long)backref->dir,
3660                                 (unsigned long long)backref->index,
3661                                 backref->namelen, backref->name,
3662                                 backref->filetype, backref->errors);
3663                         print_ref_error(backref->errors);
3664                 }
3665                 free_inode_rec(rec);
3666         }
3667         return (error > 0) ? -1 : 0;
3668 }
3669
3670 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3671                                         u64 objectid)
3672 {
3673         struct cache_extent *cache;
3674         struct root_record *rec = NULL;
3675         int ret;
3676
3677         cache = lookup_cache_extent(root_cache, objectid, 1);
3678         if (cache) {
3679                 rec = container_of(cache, struct root_record, cache);
3680         } else {
3681                 rec = calloc(1, sizeof(*rec));
3682                 if (!rec)
3683                         return ERR_PTR(-ENOMEM);
3684                 rec->objectid = objectid;
3685                 INIT_LIST_HEAD(&rec->backrefs);
3686                 rec->cache.start = objectid;
3687                 rec->cache.size = 1;
3688
3689                 ret = insert_cache_extent(root_cache, &rec->cache);
3690                 if (ret)
3691                         return ERR_PTR(-EEXIST);
3692         }
3693         return rec;
3694 }
3695
3696 static struct root_backref *get_root_backref(struct root_record *rec,
3697                                              u64 ref_root, u64 dir, u64 index,
3698                                              const char *name, int namelen)
3699 {
3700         struct root_backref *backref;
3701
3702         list_for_each_entry(backref, &rec->backrefs, list) {
3703                 if (backref->ref_root != ref_root || backref->dir != dir ||
3704                     backref->namelen != namelen)
3705                         continue;
3706                 if (memcmp(name, backref->name, namelen))
3707                         continue;
3708                 return backref;
3709         }
3710
3711         backref = calloc(1, sizeof(*backref) + namelen + 1);
3712         if (!backref)
3713                 return NULL;
3714         backref->ref_root = ref_root;
3715         backref->dir = dir;
3716         backref->index = index;
3717         backref->namelen = namelen;
3718         memcpy(backref->name, name, namelen);
3719         backref->name[namelen] = '\0';
3720         list_add_tail(&backref->list, &rec->backrefs);
3721         return backref;
3722 }
3723
3724 static void free_root_record(struct cache_extent *cache)
3725 {
3726         struct root_record *rec;
3727         struct root_backref *backref;
3728
3729         rec = container_of(cache, struct root_record, cache);
3730         while (!list_empty(&rec->backrefs)) {
3731                 backref = to_root_backref(rec->backrefs.next);
3732                 list_del(&backref->list);
3733                 free(backref);
3734         }
3735
3736         free(rec);
3737 }
3738
3739 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3740
3741 static int add_root_backref(struct cache_tree *root_cache,
3742                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3743                             const char *name, int namelen,
3744                             int item_type, int errors)
3745 {
3746         struct root_record *rec;
3747         struct root_backref *backref;
3748
3749         rec = get_root_rec(root_cache, root_id);
3750         BUG_ON(IS_ERR(rec));
3751         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3752         BUG_ON(!backref);
3753
3754         backref->errors |= errors;
3755
3756         if (item_type != BTRFS_DIR_ITEM_KEY) {
3757                 if (backref->found_dir_index || backref->found_back_ref ||
3758                     backref->found_forward_ref) {
3759                         if (backref->index != index)
3760                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3761                 } else {
3762                         backref->index = index;
3763                 }
3764         }
3765
3766         if (item_type == BTRFS_DIR_ITEM_KEY) {
3767                 if (backref->found_forward_ref)
3768                         rec->found_ref++;
3769                 backref->found_dir_item = 1;
3770         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3771                 backref->found_dir_index = 1;
3772         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3773                 if (backref->found_forward_ref)
3774                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3775                 else if (backref->found_dir_item)
3776                         rec->found_ref++;
3777                 backref->found_forward_ref = 1;
3778         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3779                 if (backref->found_back_ref)
3780                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3781                 backref->found_back_ref = 1;
3782         } else {
3783                 BUG_ON(1);
3784         }
3785
3786         if (backref->found_forward_ref && backref->found_dir_item)
3787                 backref->reachable = 1;
3788         return 0;
3789 }
3790
3791 static int merge_root_recs(struct btrfs_root *root,
3792                            struct cache_tree *src_cache,
3793                            struct cache_tree *dst_cache)
3794 {
3795         struct cache_extent *cache;
3796         struct ptr_node *node;
3797         struct inode_record *rec;
3798         struct inode_backref *backref;
3799         int ret = 0;
3800
3801         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3802                 free_inode_recs_tree(src_cache);
3803                 return 0;
3804         }
3805
3806         while (1) {
3807                 cache = search_cache_extent(src_cache, 0);
3808                 if (!cache)
3809                         break;
3810                 node = container_of(cache, struct ptr_node, cache);
3811                 rec = node->data;
3812                 remove_cache_extent(src_cache, &node->cache);
3813                 free(node);
3814
3815                 ret = is_child_root(root, root->objectid, rec->ino);
3816                 if (ret < 0)
3817                         break;
3818                 else if (ret == 0)
3819                         goto skip;
3820
3821                 list_for_each_entry(backref, &rec->backrefs, list) {
3822                         BUG_ON(backref->found_inode_ref);
3823                         if (backref->found_dir_item)
3824                                 add_root_backref(dst_cache, rec->ino,
3825                                         root->root_key.objectid, backref->dir,
3826                                         backref->index, backref->name,
3827                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3828                                         backref->errors);
3829                         if (backref->found_dir_index)
3830                                 add_root_backref(dst_cache, rec->ino,
3831                                         root->root_key.objectid, backref->dir,
3832                                         backref->index, backref->name,
3833                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3834                                         backref->errors);
3835                 }
3836 skip:
3837                 free_inode_rec(rec);
3838         }
3839         if (ret < 0)
3840                 return ret;
3841         return 0;
3842 }
3843
3844 static int check_root_refs(struct btrfs_root *root,
3845                            struct cache_tree *root_cache)
3846 {
3847         struct root_record *rec;
3848         struct root_record *ref_root;
3849         struct root_backref *backref;
3850         struct cache_extent *cache;
3851         int loop = 1;
3852         int ret;
3853         int error;
3854         int errors = 0;
3855
3856         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3857         BUG_ON(IS_ERR(rec));
3858         rec->found_ref = 1;
3859
3860         /* fixme: this can not detect circular references */
3861         while (loop) {
3862                 loop = 0;
3863                 cache = search_cache_extent(root_cache, 0);
3864                 while (1) {
3865                         if (!cache)
3866                                 break;
3867                         rec = container_of(cache, struct root_record, cache);
3868                         cache = next_cache_extent(cache);
3869
3870                         if (rec->found_ref == 0)
3871                                 continue;
3872
3873                         list_for_each_entry(backref, &rec->backrefs, list) {
3874                                 if (!backref->reachable)
3875                                         continue;
3876
3877                                 ref_root = get_root_rec(root_cache,
3878                                                         backref->ref_root);
3879                                 BUG_ON(IS_ERR(ref_root));
3880                                 if (ref_root->found_ref > 0)
3881                                         continue;
3882
3883                                 backref->reachable = 0;
3884                                 rec->found_ref--;
3885                                 if (rec->found_ref == 0)
3886                                         loop = 1;
3887                         }
3888                 }
3889         }
3890
3891         cache = search_cache_extent(root_cache, 0);
3892         while (1) {
3893                 if (!cache)
3894                         break;
3895                 rec = container_of(cache, struct root_record, cache);
3896                 cache = next_cache_extent(cache);
3897
3898                 if (rec->found_ref == 0 &&
3899                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3900                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3901                         ret = check_orphan_item(root->fs_info->tree_root,
3902                                                 rec->objectid);
3903                         if (ret == 0)
3904                                 continue;
3905
3906                         /*
3907                          * If we don't have a root item then we likely just have
3908                          * a dir item in a snapshot for this root but no actual
3909                          * ref key or anything so it's meaningless.
3910                          */
3911                         if (!rec->found_root_item)
3912                                 continue;
3913                         errors++;
3914                         fprintf(stderr, "fs tree %llu not referenced\n",
3915                                 (unsigned long long)rec->objectid);
3916                 }
3917
3918                 error = 0;
3919                 if (rec->found_ref > 0 && !rec->found_root_item)
3920                         error = 1;
3921                 list_for_each_entry(backref, &rec->backrefs, list) {
3922                         if (!backref->found_dir_item)
3923                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3924                         if (!backref->found_dir_index)
3925                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3926                         if (!backref->found_back_ref)
3927                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3928                         if (!backref->found_forward_ref)
3929                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3930                         if (backref->reachable && backref->errors)
3931                                 error = 1;
3932                 }
3933                 if (!error)
3934                         continue;
3935
3936                 errors++;
3937                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3938                         (unsigned long long)rec->objectid, rec->found_ref,
3939                          rec->found_root_item ? "" : "not found");
3940
3941                 list_for_each_entry(backref, &rec->backrefs, list) {
3942                         if (!backref->reachable)
3943                                 continue;
3944                         if (!backref->errors && rec->found_root_item)
3945                                 continue;
3946                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3947                                 " index %llu namelen %u name %s errors %x\n",
3948                                 (unsigned long long)backref->ref_root,
3949                                 (unsigned long long)backref->dir,
3950                                 (unsigned long long)backref->index,
3951                                 backref->namelen, backref->name,
3952                                 backref->errors);
3953                         print_ref_error(backref->errors);
3954                 }
3955         }
3956         return errors > 0 ? 1 : 0;
3957 }
3958
3959 static int process_root_ref(struct extent_buffer *eb, int slot,
3960                             struct btrfs_key *key,
3961                             struct cache_tree *root_cache)
3962 {
3963         u64 dirid;
3964         u64 index;
3965         u32 len;
3966         u32 name_len;
3967         struct btrfs_root_ref *ref;
3968         char namebuf[BTRFS_NAME_LEN];
3969         int error;
3970
3971         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3972
3973         dirid = btrfs_root_ref_dirid(eb, ref);
3974         index = btrfs_root_ref_sequence(eb, ref);
3975         name_len = btrfs_root_ref_name_len(eb, ref);
3976
3977         if (name_len <= BTRFS_NAME_LEN) {
3978                 len = name_len;
3979                 error = 0;
3980         } else {
3981                 len = BTRFS_NAME_LEN;
3982                 error = REF_ERR_NAME_TOO_LONG;
3983         }
3984         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3985
3986         if (key->type == BTRFS_ROOT_REF_KEY) {
3987                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3988                                  index, namebuf, len, key->type, error);
3989         } else {
3990                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3991                                  index, namebuf, len, key->type, error);
3992         }
3993         return 0;
3994 }
3995
3996 static void free_corrupt_block(struct cache_extent *cache)
3997 {
3998         struct btrfs_corrupt_block *corrupt;
3999
4000         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4001         free(corrupt);
4002 }
4003
4004 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4005
4006 /*
4007  * Repair the btree of the given root.
4008  *
4009  * The fix is to remove the node key in corrupt_blocks cache_tree.
4010  * and rebalance the tree.
4011  * After the fix, the btree should be writeable.
4012  */
4013 static int repair_btree(struct btrfs_root *root,
4014                         struct cache_tree *corrupt_blocks)
4015 {
4016         struct btrfs_trans_handle *trans;
4017         struct btrfs_path path;
4018         struct btrfs_corrupt_block *corrupt;
4019         struct cache_extent *cache;
4020         struct btrfs_key key;
4021         u64 offset;
4022         int level;
4023         int ret = 0;
4024
4025         if (cache_tree_empty(corrupt_blocks))
4026                 return 0;
4027
4028         trans = btrfs_start_transaction(root, 1);
4029         if (IS_ERR(trans)) {
4030                 ret = PTR_ERR(trans);
4031                 fprintf(stderr, "Error starting transaction: %s\n",
4032                         strerror(-ret));
4033                 return ret;
4034         }
4035         btrfs_init_path(&path);
4036         cache = first_cache_extent(corrupt_blocks);
4037         while (cache) {
4038                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4039                                        cache);
4040                 level = corrupt->level;
4041                 path.lowest_level = level;
4042                 key.objectid = corrupt->key.objectid;
4043                 key.type = corrupt->key.type;
4044                 key.offset = corrupt->key.offset;
4045
4046                 /*
4047                  * Here we don't want to do any tree balance, since it may
4048                  * cause a balance with corrupted brother leaf/node,
4049                  * so ins_len set to 0 here.
4050                  * Balance will be done after all corrupt node/leaf is deleted.
4051                  */
4052                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4053                 if (ret < 0)
4054                         goto out;
4055                 offset = btrfs_node_blockptr(path.nodes[level],
4056                                              path.slots[level]);
4057
4058                 /* Remove the ptr */
4059                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4060                 if (ret < 0)
4061                         goto out;
4062                 /*
4063                  * Remove the corresponding extent
4064                  * return value is not concerned.
4065                  */
4066                 btrfs_release_path(&path);
4067                 ret = btrfs_free_extent(trans, root, offset,
4068                                 root->fs_info->nodesize, 0,
4069                                 root->root_key.objectid, level - 1, 0);
4070                 cache = next_cache_extent(cache);
4071         }
4072
4073         /* Balance the btree using btrfs_search_slot() */
4074         cache = first_cache_extent(corrupt_blocks);
4075         while (cache) {
4076                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4077                                        cache);
4078                 memcpy(&key, &corrupt->key, sizeof(key));
4079                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4080                 if (ret < 0)
4081                         goto out;
4082                 /* return will always >0 since it won't find the item */
4083                 ret = 0;
4084                 btrfs_release_path(&path);
4085                 cache = next_cache_extent(cache);
4086         }
4087 out:
4088         btrfs_commit_transaction(trans, root);
4089         btrfs_release_path(&path);
4090         return ret;
4091 }
4092
4093 static int check_fs_root(struct btrfs_root *root,
4094                          struct cache_tree *root_cache,
4095                          struct walk_control *wc)
4096 {
4097         int ret = 0;
4098         int err = 0;
4099         int wret;
4100         int level;
4101         struct btrfs_path path;
4102         struct shared_node root_node;
4103         struct root_record *rec;
4104         struct btrfs_root_item *root_item = &root->root_item;
4105         struct cache_tree corrupt_blocks;
4106         struct orphan_data_extent *orphan;
4107         struct orphan_data_extent *tmp;
4108         enum btrfs_tree_block_status status;
4109         struct node_refs nrefs;
4110
4111         /*
4112          * Reuse the corrupt_block cache tree to record corrupted tree block
4113          *
4114          * Unlike the usage in extent tree check, here we do it in a per
4115          * fs/subvol tree base.
4116          */
4117         cache_tree_init(&corrupt_blocks);
4118         root->fs_info->corrupt_blocks = &corrupt_blocks;
4119
4120         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4121                 rec = get_root_rec(root_cache, root->root_key.objectid);
4122                 BUG_ON(IS_ERR(rec));
4123                 if (btrfs_root_refs(root_item) > 0)
4124                         rec->found_root_item = 1;
4125         }
4126
4127         btrfs_init_path(&path);
4128         memset(&root_node, 0, sizeof(root_node));
4129         cache_tree_init(&root_node.root_cache);
4130         cache_tree_init(&root_node.inode_cache);
4131         memset(&nrefs, 0, sizeof(nrefs));
4132
4133         /* Move the orphan extent record to corresponding inode_record */
4134         list_for_each_entry_safe(orphan, tmp,
4135                                  &root->orphan_data_extents, list) {
4136                 struct inode_record *inode;
4137
4138                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4139                                       1);
4140                 BUG_ON(IS_ERR(inode));
4141                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4142                 list_move(&orphan->list, &inode->orphan_extents);
4143         }
4144
4145         level = btrfs_header_level(root->node);
4146         memset(wc->nodes, 0, sizeof(wc->nodes));
4147         wc->nodes[level] = &root_node;
4148         wc->active_node = level;
4149         wc->root_level = level;
4150
4151         /* We may not have checked the root block, lets do that now */
4152         if (btrfs_is_leaf(root->node))
4153                 status = btrfs_check_leaf(root, NULL, root->node);
4154         else
4155                 status = btrfs_check_node(root, NULL, root->node);
4156         if (status != BTRFS_TREE_BLOCK_CLEAN)
4157                 return -EIO;
4158
4159         if (btrfs_root_refs(root_item) > 0 ||
4160             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4161                 path.nodes[level] = root->node;
4162                 extent_buffer_get(root->node);
4163                 path.slots[level] = 0;
4164         } else {
4165                 struct btrfs_key key;
4166                 struct btrfs_disk_key found_key;
4167
4168                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4169                 level = root_item->drop_level;
4170                 path.lowest_level = level;
4171                 if (level > btrfs_header_level(root->node) ||
4172                     level >= BTRFS_MAX_LEVEL) {
4173                         error("ignoring invalid drop level: %u", level);
4174                         goto skip_walking;
4175                 }
4176                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4177                 if (wret < 0)
4178                         goto skip_walking;
4179                 btrfs_node_key(path.nodes[level], &found_key,
4180                                 path.slots[level]);
4181                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4182                                         sizeof(found_key)));
4183         }
4184
4185         while (1) {
4186                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4187                 if (wret < 0)
4188                         ret = wret;
4189                 if (wret != 0)
4190                         break;
4191
4192                 wret = walk_up_tree(root, &path, wc, &level);
4193                 if (wret < 0)
4194                         ret = wret;
4195                 if (wret != 0)
4196                         break;
4197         }
4198 skip_walking:
4199         btrfs_release_path(&path);
4200
4201         if (!cache_tree_empty(&corrupt_blocks)) {
4202                 struct cache_extent *cache;
4203                 struct btrfs_corrupt_block *corrupt;
4204
4205                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4206                        root->root_key.objectid);
4207                 cache = first_cache_extent(&corrupt_blocks);
4208                 while (cache) {
4209                         corrupt = container_of(cache,
4210                                                struct btrfs_corrupt_block,
4211                                                cache);
4212                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4213                                cache->start, corrupt->level,
4214                                corrupt->key.objectid, corrupt->key.type,
4215                                corrupt->key.offset);
4216                         cache = next_cache_extent(cache);
4217                 }
4218                 if (repair) {
4219                         printf("Try to repair the btree for root %llu\n",
4220                                root->root_key.objectid);
4221                         ret = repair_btree(root, &corrupt_blocks);
4222                         if (ret < 0)
4223                                 fprintf(stderr, "Failed to repair btree: %s\n",
4224                                         strerror(-ret));
4225                         if (!ret)
4226                                 printf("Btree for root %llu is fixed\n",
4227                                        root->root_key.objectid);
4228                 }
4229         }
4230
4231         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4232         if (err < 0)
4233                 ret = err;
4234
4235         if (root_node.current) {
4236                 root_node.current->checked = 1;
4237                 maybe_free_inode_rec(&root_node.inode_cache,
4238                                 root_node.current);
4239         }
4240
4241         err = check_inode_recs(root, &root_node.inode_cache);
4242         if (!ret)
4243                 ret = err;
4244
4245         free_corrupt_blocks_tree(&corrupt_blocks);
4246         root->fs_info->corrupt_blocks = NULL;
4247         free_orphan_data_extents(&root->orphan_data_extents);
4248         return ret;
4249 }
4250
4251 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4252                           struct cache_tree *root_cache)
4253 {
4254         struct btrfs_path path;
4255         struct btrfs_key key;
4256         struct walk_control wc;
4257         struct extent_buffer *leaf, *tree_node;
4258         struct btrfs_root *tmp_root;
4259         struct btrfs_root *tree_root = fs_info->tree_root;
4260         int ret;
4261         int err = 0;
4262
4263         if (ctx.progress_enabled) {
4264                 ctx.tp = TASK_FS_ROOTS;
4265                 task_start(ctx.info);
4266         }
4267
4268         /*
4269          * Just in case we made any changes to the extent tree that weren't
4270          * reflected into the free space cache yet.
4271          */
4272         if (repair)
4273                 reset_cached_block_groups(fs_info);
4274         memset(&wc, 0, sizeof(wc));
4275         cache_tree_init(&wc.shared);
4276         btrfs_init_path(&path);
4277
4278 again:
4279         key.offset = 0;
4280         key.objectid = 0;
4281         key.type = BTRFS_ROOT_ITEM_KEY;
4282         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4283         if (ret < 0) {
4284                 err = 1;
4285                 goto out;
4286         }
4287         tree_node = tree_root->node;
4288         while (1) {
4289                 if (tree_node != tree_root->node) {
4290                         free_root_recs_tree(root_cache);
4291                         btrfs_release_path(&path);
4292                         goto again;
4293                 }
4294                 leaf = path.nodes[0];
4295                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4296                         ret = btrfs_next_leaf(tree_root, &path);
4297                         if (ret) {
4298                                 if (ret < 0)
4299                                         err = 1;
4300                                 break;
4301                         }
4302                         leaf = path.nodes[0];
4303                 }
4304                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4305                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4306                     fs_root_objectid(key.objectid)) {
4307                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4308                                 tmp_root = btrfs_read_fs_root_no_cache(
4309                                                 fs_info, &key);
4310                         } else {
4311                                 key.offset = (u64)-1;
4312                                 tmp_root = btrfs_read_fs_root(
4313                                                 fs_info, &key);
4314                         }
4315                         if (IS_ERR(tmp_root)) {
4316                                 err = 1;
4317                                 goto next;
4318                         }
4319                         ret = check_fs_root(tmp_root, root_cache, &wc);
4320                         if (ret == -EAGAIN) {
4321                                 free_root_recs_tree(root_cache);
4322                                 btrfs_release_path(&path);
4323                                 goto again;
4324                         }
4325                         if (ret)
4326                                 err = 1;
4327                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4328                                 btrfs_free_fs_root(tmp_root);
4329                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4330                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4331                         process_root_ref(leaf, path.slots[0], &key,
4332                                          root_cache);
4333                 }
4334 next:
4335                 path.slots[0]++;
4336         }
4337 out:
4338         btrfs_release_path(&path);
4339         if (err)
4340                 free_extent_cache_tree(&wc.shared);
4341         if (!cache_tree_empty(&wc.shared))
4342                 fprintf(stderr, "warning line %d\n", __LINE__);
4343
4344         task_stop(ctx.info);
4345
4346         return err;
4347 }
4348
4349 /*
4350  * Find the @index according by @ino and name.
4351  * Notice:time efficiency is O(N)
4352  *
4353  * @root:       the root of the fs/file tree
4354  * @index_ret:  the index as return value
4355  * @namebuf:    the name to match
4356  * @name_len:   the length of name to match
4357  * @file_type:  the file_type of INODE_ITEM to match
4358  *
4359  * Returns 0 if found and *@index_ret will be modified with right value
4360  * Returns< 0 not found and *@index_ret will be (u64)-1
4361  */
4362 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4363                           u64 *index_ret, char *namebuf, u32 name_len,
4364                           u8 file_type)
4365 {
4366         struct btrfs_path path;
4367         struct extent_buffer *node;
4368         struct btrfs_dir_item *di;
4369         struct btrfs_key key;
4370         struct btrfs_key location;
4371         char name[BTRFS_NAME_LEN] = {0};
4372
4373         u32 total;
4374         u32 cur = 0;
4375         u32 len;
4376         u32 data_len;
4377         u8 filetype;
4378         int slot;
4379         int ret;
4380
4381         ASSERT(index_ret);
4382
4383         /* search from the last index */
4384         key.objectid = dirid;
4385         key.offset = (u64)-1;
4386         key.type = BTRFS_DIR_INDEX_KEY;
4387
4388         btrfs_init_path(&path);
4389         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4390         if (ret < 0)
4391                 return ret;
4392
4393 loop:
4394         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4395         if (ret) {
4396                 ret = -ENOENT;
4397                 *index_ret = (64)-1;
4398                 goto out;
4399         }
4400         /* Check whether inode_id/filetype/name match */
4401         node = path.nodes[0];
4402         slot = path.slots[0];
4403         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4404         total = btrfs_item_size_nr(node, slot);
4405         while (cur < total) {
4406                 ret = -ENOENT;
4407                 len = btrfs_dir_name_len(node, di);
4408                 data_len = btrfs_dir_data_len(node, di);
4409
4410                 btrfs_dir_item_key_to_cpu(node, di, &location);
4411                 if (location.objectid != location_id ||
4412                     location.type != BTRFS_INODE_ITEM_KEY ||
4413                     location.offset != 0)
4414                         goto next;
4415
4416                 filetype = btrfs_dir_type(node, di);
4417                 if (file_type != filetype)
4418                         goto next;
4419
4420                 if (len > BTRFS_NAME_LEN)
4421                         len = BTRFS_NAME_LEN;
4422
4423                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4424                 if (len != name_len || strncmp(namebuf, name, len))
4425                         goto next;
4426
4427                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4428                 *index_ret = key.offset;
4429                 ret = 0;
4430                 goto out;
4431 next:
4432                 len += sizeof(*di) + data_len;
4433                 di = (struct btrfs_dir_item *)((char *)di + len);
4434                 cur += len;
4435         }
4436         goto loop;
4437
4438 out:
4439         btrfs_release_path(&path);
4440         return ret;
4441 }
4442
4443 /*
4444  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4445  * INODE_REF/INODE_EXTREF match.
4446  *
4447  * @root:       the root of the fs/file tree
4448  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4449  *              value while find index
4450  * @location_key: location key of the struct btrfs_dir_item to match
4451  * @name:       the name to match
4452  * @namelen:    the length of name
4453  * @file_type:  the type of file to math
4454  *
4455  * Return 0 if no error occurred.
4456  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4457  * DIR_ITEM/DIR_INDEX
4458  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4459  * and DIR_ITEM/DIR_INDEX mismatch
4460  */
4461 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4462                          struct btrfs_key *location_key, char *name,
4463                          u32 namelen, u8 file_type)
4464 {
4465         struct btrfs_path path;
4466         struct extent_buffer *node;
4467         struct btrfs_dir_item *di;
4468         struct btrfs_key location;
4469         char namebuf[BTRFS_NAME_LEN] = {0};
4470         u32 total;
4471         u32 cur = 0;
4472         u32 len;
4473         u32 data_len;
4474         u8 filetype;
4475         int slot;
4476         int ret;
4477
4478         /* get the index by traversing all index */
4479         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4480                 ret = find_dir_index(root, key->objectid,
4481                                      location_key->objectid, &key->offset,
4482                                      name, namelen, file_type);
4483                 if (ret)
4484                         ret = DIR_INDEX_MISSING;
4485                 return ret;
4486         }
4487
4488         btrfs_init_path(&path);
4489         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4490         if (ret) {
4491                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4492                         DIR_INDEX_MISSING;
4493                 goto out;
4494         }
4495
4496         /* Check whether inode_id/filetype/name match */
4497         node = path.nodes[0];
4498         slot = path.slots[0];
4499         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4500         total = btrfs_item_size_nr(node, slot);
4501         while (cur < total) {
4502                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4503                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4504
4505                 len = btrfs_dir_name_len(node, di);
4506                 data_len = btrfs_dir_data_len(node, di);
4507
4508                 btrfs_dir_item_key_to_cpu(node, di, &location);
4509                 if (location.objectid != location_key->objectid ||
4510                     location.type != location_key->type ||
4511                     location.offset != location_key->offset)
4512                         goto next;
4513
4514                 filetype = btrfs_dir_type(node, di);
4515                 if (file_type != filetype)
4516                         goto next;
4517
4518                 if (len > BTRFS_NAME_LEN) {
4519                         len = BTRFS_NAME_LEN;
4520                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4521                         root->objectid,
4522                         key->type == BTRFS_DIR_ITEM_KEY ?
4523                         "DIR_ITEM" : "DIR_INDEX",
4524                         key->objectid, key->offset, len);
4525                 }
4526                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4527                                    len);
4528                 if (len != namelen || strncmp(namebuf, name, len))
4529                         goto next;
4530
4531                 ret = 0;
4532                 goto out;
4533 next:
4534                 len += sizeof(*di) + data_len;
4535                 di = (struct btrfs_dir_item *)((char *)di + len);
4536                 cur += len;
4537         }
4538
4539 out:
4540         btrfs_release_path(&path);
4541         return ret;
4542 }
4543
4544 /*
4545  * Prints inode ref error message
4546  */
4547 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4548                                 u64 index, const char *namebuf, int name_len,
4549                                 u8 filetype, int err)
4550 {
4551         if (!err)
4552                 return;
4553
4554         /* root dir error */
4555         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4556                 error(
4557         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4558                       root->objectid, key->objectid, key->offset, namebuf);
4559                 return;
4560         }
4561
4562         /* normal error */
4563         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4564                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4565                       root->objectid, key->offset,
4566                       btrfs_name_hash(namebuf, name_len),
4567                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4568                       namebuf, filetype);
4569         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4570                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4571                       root->objectid, key->offset, index,
4572                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4573                       namebuf, filetype);
4574 }
4575
4576 /*
4577  * Insert the missing inode item.
4578  *
4579  * Returns 0 means success.
4580  * Returns <0 means error.
4581  */
4582 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4583                                      u8 filetype)
4584 {
4585         struct btrfs_key key;
4586         struct btrfs_trans_handle *trans;
4587         struct btrfs_path path;
4588         int ret;
4589
4590         key.objectid = ino;
4591         key.type = BTRFS_INODE_ITEM_KEY;
4592         key.offset = 0;
4593
4594         btrfs_init_path(&path);
4595         trans = btrfs_start_transaction(root, 1);
4596         if (IS_ERR(trans)) {
4597                 ret = -EIO;
4598                 goto out;
4599         }
4600
4601         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4602         if (ret < 0 || !ret)
4603                 goto fail;
4604
4605         /* insert inode item */
4606         create_inode_item_lowmem(trans, root, ino, filetype);
4607         ret = 0;
4608 fail:
4609         btrfs_commit_transaction(trans, root);
4610 out:
4611         if (ret)
4612                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4613                       root->objectid, ino);
4614         btrfs_release_path(&path);
4615         return ret;
4616 }
4617
4618 /*
4619  * The ternary means dir item, dir index and relative inode ref.
4620  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4621  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4622  * strategy:
4623  * If two of three is missing or mismatched, delete the existing one.
4624  * If one of three is missing or mismatched, add the missing one.
4625  *
4626  * returns 0 means success.
4627  * returns not 0 means on error;
4628  */
4629 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4630                           u64 index, char *name, int name_len, u8 filetype,
4631                           int err)
4632 {
4633         struct btrfs_trans_handle *trans;
4634         int stage = 0;
4635         int ret = 0;
4636
4637         /*
4638          * stage shall be one of following valild values:
4639          *      0: Fine, nothing to do.
4640          *      1: One of three is wrong, so add missing one.
4641          *      2: Two of three is wrong, so delete existed one.
4642          */
4643         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4644                 stage++;
4645         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4646                 stage++;
4647         if (err & (INODE_REF_MISSING))
4648                 stage++;
4649
4650         /* stage must be smllarer than 3 */
4651         ASSERT(stage < 3);
4652
4653         trans = btrfs_start_transaction(root, 1);
4654         if (stage == 2) {
4655                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4656                                    name_len, 0);
4657                 goto out;
4658         }
4659         if (stage == 1) {
4660                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4661                                filetype, &index, 1, 1);
4662                 goto out;
4663         }
4664 out:
4665         btrfs_commit_transaction(trans, root);
4666
4667         if (ret)
4668                 error("fail to repair inode %llu name %s filetype %u",
4669                       ino, name, filetype);
4670         else
4671                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4672                        stage == 2 ? "Delete" : "Add",
4673                        ino, name, filetype);
4674
4675         return ret;
4676 }
4677
4678 /*
4679  * Traverse the given INODE_REF and call find_dir_item() to find related
4680  * DIR_ITEM/DIR_INDEX.
4681  *
4682  * @root:       the root of the fs/file tree
4683  * @ref_key:    the key of the INODE_REF
4684  * @path        the path provides node and slot
4685  * @refs:       the count of INODE_REF
4686  * @mode:       the st_mode of INODE_ITEM
4687  * @name_ret:   returns with the first ref's name
4688  * @name_len_ret:    len of the name_ret
4689  *
4690  * Return 0 if no error occurred.
4691  */
4692 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4693                            struct btrfs_path *path, char *name_ret,
4694                            u32 *namelen_ret, u64 *refs_ret, int mode)
4695 {
4696         struct btrfs_key key;
4697         struct btrfs_key location;
4698         struct btrfs_inode_ref *ref;
4699         struct extent_buffer *node;
4700         char namebuf[BTRFS_NAME_LEN] = {0};
4701         u32 total;
4702         u32 cur = 0;
4703         u32 len;
4704         u32 name_len;
4705         u64 index;
4706         int ret;
4707         int err = 0;
4708         int tmp_err;
4709         int slot;
4710         int need_research = 0;
4711         u64 refs;
4712
4713 begin:
4714         err = 0;
4715         cur = 0;
4716         refs = *refs_ret;
4717
4718         /* since after repair, path and the dir item may be changed */
4719         if (need_research) {
4720                 need_research = 0;
4721                 btrfs_release_path(path);
4722                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4723                 /* the item was deleted, let path point to the last checked item */
4724                 if (ret > 0) {
4725                         if (path->slots[0] == 0)
4726                                 btrfs_prev_leaf(root, path);
4727                         else
4728                                 path->slots[0]--;
4729                 }
4730                 if (ret)
4731                         goto out;
4732         }
4733
4734         location.objectid = ref_key->objectid;
4735         location.type = BTRFS_INODE_ITEM_KEY;
4736         location.offset = 0;
4737         node = path->nodes[0];
4738         slot = path->slots[0];
4739
4740         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4741         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4742         total = btrfs_item_size_nr(node, slot);
4743
4744 next:
4745         /* Update inode ref count */
4746         refs++;
4747         tmp_err = 0;
4748         index = btrfs_inode_ref_index(node, ref);
4749         name_len = btrfs_inode_ref_name_len(node, ref);
4750
4751         if (name_len <= BTRFS_NAME_LEN) {
4752                 len = name_len;
4753         } else {
4754                 len = BTRFS_NAME_LEN;
4755                 warning("root %llu INODE_REF[%llu %llu] name too long",
4756                         root->objectid, ref_key->objectid, ref_key->offset);
4757         }
4758
4759         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4760
4761         /* copy the first name found to name_ret */
4762         if (refs == 1 && name_ret) {
4763                 memcpy(name_ret, namebuf, len);
4764                 *namelen_ret = len;
4765         }
4766
4767         /* Check root dir ref */
4768         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4769                 if (index != 0 || len != strlen("..") ||
4770                     strncmp("..", namebuf, len) ||
4771                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4772                         /* set err bits then repair will delete the ref */
4773                         err |= DIR_INDEX_MISSING;
4774                         err |= DIR_ITEM_MISSING;
4775                 }
4776                 goto end;
4777         }
4778
4779         /* Find related DIR_INDEX */
4780         key.objectid = ref_key->offset;
4781         key.type = BTRFS_DIR_INDEX_KEY;
4782         key.offset = index;
4783         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4784                             imode_to_type(mode));
4785
4786         /* Find related dir_item */
4787         key.objectid = ref_key->offset;
4788         key.type = BTRFS_DIR_ITEM_KEY;
4789         key.offset = btrfs_name_hash(namebuf, len);
4790         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4791                             imode_to_type(mode));
4792 end:
4793         if (tmp_err && repair) {
4794                 ret = repair_ternary_lowmem(root, ref_key->offset,
4795                                             ref_key->objectid, index, namebuf,
4796                                             name_len, imode_to_type(mode),
4797                                             tmp_err);
4798                 if (!ret) {
4799                         need_research = 1;
4800                         goto begin;
4801                 }
4802         }
4803         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4804                             imode_to_type(mode), tmp_err);
4805         err |= tmp_err;
4806         len = sizeof(*ref) + name_len;
4807         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4808         cur += len;
4809         if (cur < total)
4810                 goto next;
4811
4812 out:
4813         *refs_ret = refs;
4814         return err;
4815 }
4816
4817 /*
4818  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4819  * DIR_ITEM/DIR_INDEX.
4820  *
4821  * @root:       the root of the fs/file tree
4822  * @ref_key:    the key of the INODE_EXTREF
4823  * @refs:       the count of INODE_EXTREF
4824  * @mode:       the st_mode of INODE_ITEM
4825  *
4826  * Return 0 if no error occurred.
4827  */
4828 static int check_inode_extref(struct btrfs_root *root,
4829                               struct btrfs_key *ref_key,
4830                               struct extent_buffer *node, int slot, u64 *refs,
4831                               int mode)
4832 {
4833         struct btrfs_key key;
4834         struct btrfs_key location;
4835         struct btrfs_inode_extref *extref;
4836         char namebuf[BTRFS_NAME_LEN] = {0};
4837         u32 total;
4838         u32 cur = 0;
4839         u32 len;
4840         u32 name_len;
4841         u64 index;
4842         u64 parent;
4843         int ret;
4844         int err = 0;
4845
4846         location.objectid = ref_key->objectid;
4847         location.type = BTRFS_INODE_ITEM_KEY;
4848         location.offset = 0;
4849
4850         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4851         total = btrfs_item_size_nr(node, slot);
4852
4853 next:
4854         /* update inode ref count */
4855         (*refs)++;
4856         name_len = btrfs_inode_extref_name_len(node, extref);
4857         index = btrfs_inode_extref_index(node, extref);
4858         parent = btrfs_inode_extref_parent(node, extref);
4859         if (name_len <= BTRFS_NAME_LEN) {
4860                 len = name_len;
4861         } else {
4862                 len = BTRFS_NAME_LEN;
4863                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4864                         root->objectid, ref_key->objectid, ref_key->offset);
4865         }
4866         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4867
4868         /* Check root dir ref name */
4869         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4870                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4871                       root->objectid, ref_key->objectid, ref_key->offset,
4872                       namebuf);
4873                 err |= ROOT_DIR_ERROR;
4874         }
4875
4876         /* find related dir_index */
4877         key.objectid = parent;
4878         key.type = BTRFS_DIR_INDEX_KEY;
4879         key.offset = index;
4880         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4881         err |= ret;
4882
4883         /* find related dir_item */
4884         key.objectid = parent;
4885         key.type = BTRFS_DIR_ITEM_KEY;
4886         key.offset = btrfs_name_hash(namebuf, len);
4887         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4888         err |= ret;
4889
4890         len = sizeof(*extref) + name_len;
4891         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4892         cur += len;
4893
4894         if (cur < total)
4895                 goto next;
4896
4897         return err;
4898 }
4899
4900 /*
4901  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4902  * DIR_ITEM/DIR_INDEX match.
4903  * Return with @index_ret.
4904  *
4905  * @root:       the root of the fs/file tree
4906  * @key:        the key of the INODE_REF/INODE_EXTREF
4907  * @name:       the name in the INODE_REF/INODE_EXTREF
4908  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4909  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4910  *              value (64)-1 means do not check index
4911  * @ext_ref:    the EXTENDED_IREF feature
4912  *
4913  * Return 0 if no error occurred.
4914  * Return >0 for error bitmap
4915  */
4916 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4917                           char *name, int namelen, u64 *index_ret,
4918                           unsigned int ext_ref)
4919 {
4920         struct btrfs_path path;
4921         struct btrfs_inode_ref *ref;
4922         struct btrfs_inode_extref *extref;
4923         struct extent_buffer *node;
4924         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4925         u32 total;
4926         u32 cur = 0;
4927         u32 len;
4928         u32 ref_namelen;
4929         u64 ref_index;
4930         u64 parent;
4931         u64 dir_id;
4932         int slot;
4933         int ret;
4934
4935         ASSERT(index_ret);
4936
4937         btrfs_init_path(&path);
4938         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4939         if (ret) {
4940                 ret = INODE_REF_MISSING;
4941                 goto extref;
4942         }
4943
4944         node = path.nodes[0];
4945         slot = path.slots[0];
4946
4947         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4948         total = btrfs_item_size_nr(node, slot);
4949
4950         /* Iterate all entry of INODE_REF */
4951         while (cur < total) {
4952                 ret = INODE_REF_MISSING;
4953
4954                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4955                 ref_index = btrfs_inode_ref_index(node, ref);
4956                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4957                         goto next_ref;
4958
4959                 if (cur + sizeof(*ref) + ref_namelen > total ||
4960                     ref_namelen > BTRFS_NAME_LEN) {
4961                         warning("root %llu INODE %s[%llu %llu] name too long",
4962                                 root->objectid,
4963                                 key->type == BTRFS_INODE_REF_KEY ?
4964                                         "REF" : "EXTREF",
4965                                 key->objectid, key->offset);
4966
4967                         if (cur + sizeof(*ref) > total)
4968                                 break;
4969                         len = min_t(u32, total - cur - sizeof(*ref),
4970                                     BTRFS_NAME_LEN);
4971                 } else {
4972                         len = ref_namelen;
4973                 }
4974
4975                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4976                                    len);
4977
4978                 if (len != namelen || strncmp(ref_namebuf, name, len))
4979                         goto next_ref;
4980
4981                 *index_ret = ref_index;
4982                 ret = 0;
4983                 goto out;
4984 next_ref:
4985                 len = sizeof(*ref) + ref_namelen;
4986                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4987                 cur += len;
4988         }
4989
4990 extref:
4991         /* Skip if not support EXTENDED_IREF feature */
4992         if (!ext_ref)
4993                 goto out;
4994
4995         btrfs_release_path(&path);
4996         btrfs_init_path(&path);
4997
4998         dir_id = key->offset;
4999         key->type = BTRFS_INODE_EXTREF_KEY;
5000         key->offset = btrfs_extref_hash(dir_id, name, namelen);
5001
5002         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5003         if (ret) {
5004                 ret = INODE_REF_MISSING;
5005                 goto out;
5006         }
5007
5008         node = path.nodes[0];
5009         slot = path.slots[0];
5010
5011         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5012         cur = 0;
5013         total = btrfs_item_size_nr(node, slot);
5014
5015         /* Iterate all entry of INODE_EXTREF */
5016         while (cur < total) {
5017                 ret = INODE_REF_MISSING;
5018
5019                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5020                 ref_index = btrfs_inode_extref_index(node, extref);
5021                 parent = btrfs_inode_extref_parent(node, extref);
5022                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5023                         goto next_extref;
5024
5025                 if (parent != dir_id)
5026                         goto next_extref;
5027
5028                 if (ref_namelen <= BTRFS_NAME_LEN) {
5029                         len = ref_namelen;
5030                 } else {
5031                         len = BTRFS_NAME_LEN;
5032                         warning("root %llu INODE %s[%llu %llu] name too long",
5033                                 root->objectid,
5034                                 key->type == BTRFS_INODE_REF_KEY ?
5035                                         "REF" : "EXTREF",
5036                                 key->objectid, key->offset);
5037                 }
5038                 read_extent_buffer(node, ref_namebuf,
5039                                    (unsigned long)(extref + 1), len);
5040
5041                 if (len != namelen || strncmp(ref_namebuf, name, len))
5042                         goto next_extref;
5043
5044                 *index_ret = ref_index;
5045                 ret = 0;
5046                 goto out;
5047
5048 next_extref:
5049                 len = sizeof(*extref) + ref_namelen;
5050                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5051                 cur += len;
5052
5053         }
5054 out:
5055         btrfs_release_path(&path);
5056         return ret;
5057 }
5058
5059 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5060                                u64 ino, u64 index, const char *namebuf,
5061                                int name_len, u8 filetype, int err)
5062 {
5063         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5064                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5065                       root->objectid, key->objectid, key->offset, namebuf,
5066                       filetype,
5067                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5068         }
5069
5070         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5071                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5072                       root->objectid, key->objectid, index, namebuf, filetype,
5073                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5074         }
5075
5076         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5077                 error(
5078                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5079                       root->objectid, ino, index, namebuf, filetype,
5080                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5081         }
5082
5083         if (err & INODE_REF_MISSING)
5084                 error(
5085                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5086                       root->objectid, ino, key->objectid, namebuf, filetype);
5087
5088 }
5089
5090 /*
5091  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5092  *
5093  * Returns error after repair
5094  */
5095 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5096                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5097                            int err)
5098 {
5099         int ret;
5100
5101         if (err & INODE_ITEM_MISSING) {
5102                 ret = repair_inode_item_missing(root, ino, filetype);
5103                 if (!ret)
5104                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5105         }
5106
5107         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5108                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5109                                             name_len, filetype, err);
5110                 if (!ret) {
5111                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5112                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5113                         err &= ~(INODE_REF_MISSING);
5114                 }
5115         }
5116         return err;
5117 }
5118
5119 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5120                 u64 *size_ret)
5121 {
5122         struct btrfs_key key;
5123         struct btrfs_path path;
5124         u32 len;
5125         struct btrfs_dir_item *di;
5126         int ret;
5127         int cur = 0;
5128         int total = 0;
5129
5130         ASSERT(size_ret);
5131         *size_ret = 0;
5132
5133         key.objectid = ino;
5134         key.type = type;
5135         key.offset = (u64)-1;
5136
5137         btrfs_init_path(&path);
5138         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5139         if (ret < 0) {
5140                 ret = -EIO;
5141                 goto out;
5142         }
5143         /* if found, go to spacial case */
5144         if (ret == 0)
5145                 goto special_case;
5146
5147 loop:
5148         ret = btrfs_previous_item(root, &path, ino, type);
5149
5150         if (ret) {
5151                 ret = 0;
5152                 goto out;
5153         }
5154
5155 special_case:
5156         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5157         cur = 0;
5158         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5159
5160         while (cur < total) {
5161                 len = btrfs_dir_name_len(path.nodes[0], di);
5162                 if (len > BTRFS_NAME_LEN)
5163                         len = BTRFS_NAME_LEN;
5164                 *size_ret += len;
5165
5166                 len += btrfs_dir_data_len(path.nodes[0], di);
5167                 len += sizeof(*di);
5168                 di = (struct btrfs_dir_item *)((char *)di + len);
5169                 cur += len;
5170         }
5171         goto loop;
5172
5173 out:
5174         btrfs_release_path(&path);
5175         return ret;
5176 }
5177
5178 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5179 {
5180         u64 item_size;
5181         u64 index_size;
5182         int ret;
5183
5184         ASSERT(size);
5185         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5186         if (ret)
5187                 goto out;
5188
5189         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5190         if (ret)
5191                 goto out;
5192
5193         *size = item_size + index_size;
5194
5195 out:
5196         if (ret)
5197                 error("failed to count root %llu INODE[%llu] root size",
5198                       root->objectid, ino);
5199         return ret;
5200 }
5201
5202 /*
5203  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5204  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5205  *
5206  * @root:       the root of the fs/file tree
5207  * @key:        the key of the INODE_REF/INODE_EXTREF
5208  * @path:       the path
5209  * @size:       the st_size of the INODE_ITEM
5210  * @ext_ref:    the EXTENDED_IREF feature
5211  *
5212  * Return 0 if no error occurred.
5213  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5214  */
5215 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5216                           struct btrfs_path *path, u64 *size,
5217                           unsigned int ext_ref)
5218 {
5219         struct btrfs_dir_item *di;
5220         struct btrfs_inode_item *ii;
5221         struct btrfs_key key;
5222         struct btrfs_key location;
5223         struct extent_buffer *node;
5224         int slot;
5225         char namebuf[BTRFS_NAME_LEN] = {0};
5226         u32 total;
5227         u32 cur = 0;
5228         u32 len;
5229         u32 name_len;
5230         u32 data_len;
5231         u8 filetype;
5232         u32 mode = 0;
5233         u64 index;
5234         int ret;
5235         int err;
5236         int tmp_err;
5237         int need_research = 0;
5238
5239         /*
5240          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5241          * ignore index check.
5242          */
5243         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5244                 index = di_key->offset;
5245         else
5246                 index = (u64)-1;
5247 begin:
5248         err = 0;
5249         cur = 0;
5250
5251         /* since after repair, path and the dir item may be changed */
5252         if (need_research) {
5253                 need_research = 0;
5254                 err |= DIR_COUNT_AGAIN;
5255                 btrfs_release_path(path);
5256                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5257                 /* the item was deleted, let path point the last checked item */
5258                 if (ret > 0) {
5259                         if (path->slots[0] == 0)
5260                                 btrfs_prev_leaf(root, path);
5261                         else
5262                                 path->slots[0]--;
5263                 }
5264                 if (ret)
5265                         goto out;
5266         }
5267
5268         node = path->nodes[0];
5269         slot = path->slots[0];
5270
5271         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5272         total = btrfs_item_size_nr(node, slot);
5273         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5274
5275         while (cur < total) {
5276                 data_len = btrfs_dir_data_len(node, di);
5277                 tmp_err = 0;
5278                 if (data_len)
5279                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5280                               root->objectid,
5281               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5282                               di_key->objectid, di_key->offset, data_len);
5283
5284                 name_len = btrfs_dir_name_len(node, di);
5285                 if (name_len <= BTRFS_NAME_LEN) {
5286                         len = name_len;
5287                 } else {
5288                         len = BTRFS_NAME_LEN;
5289                         warning("root %llu %s[%llu %llu] name too long",
5290                                 root->objectid,
5291                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5292                                 di_key->objectid, di_key->offset);
5293                 }
5294                 (*size) += name_len;
5295                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5296                                    len);
5297                 filetype = btrfs_dir_type(node, di);
5298
5299                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5300                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5301                         err |= -EIO;
5302                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5303                         root->objectid, di_key->objectid, di_key->offset,
5304                         namebuf, len, filetype, di_key->offset,
5305                         btrfs_name_hash(namebuf, len));
5306                 }
5307
5308                 btrfs_dir_item_key_to_cpu(node, di, &location);
5309                 /* Ignore related ROOT_ITEM check */
5310                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5311                         goto next;
5312
5313                 btrfs_release_path(path);
5314                 /* Check relative INODE_ITEM(existence/filetype) */
5315                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5316                 if (ret) {
5317                         tmp_err |= INODE_ITEM_MISSING;
5318                         goto next;
5319                 }
5320
5321                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5322                                     struct btrfs_inode_item);
5323                 mode = btrfs_inode_mode(path->nodes[0], ii);
5324                 if (imode_to_type(mode) != filetype) {
5325                         tmp_err |= INODE_ITEM_MISMATCH;
5326                         goto next;
5327                 }
5328
5329                 /* Check relative INODE_REF/INODE_EXTREF */
5330                 key.objectid = location.objectid;
5331                 key.type = BTRFS_INODE_REF_KEY;
5332                 key.offset = di_key->objectid;
5333                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5334                                           &index, ext_ref);
5335
5336                 /* check relative INDEX/ITEM */
5337                 key.objectid = di_key->objectid;
5338                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5339                         key.type = BTRFS_DIR_INDEX_KEY;
5340                         key.offset = index;
5341                 } else {
5342                         key.type = BTRFS_DIR_ITEM_KEY;
5343                         key.offset = btrfs_name_hash(namebuf, name_len);
5344                 }
5345
5346                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5347                                          name_len, filetype);
5348                 /* find_dir_item may find index */
5349                 if (key.type == BTRFS_DIR_INDEX_KEY)
5350                         index = key.offset;
5351 next:
5352
5353                 if (tmp_err && repair) {
5354                         ret = repair_dir_item(root, di_key->objectid,
5355                                               location.objectid, index,
5356                                               imode_to_type(mode), namebuf,
5357                                               name_len, tmp_err);
5358                         if (ret != tmp_err) {
5359                                 need_research = 1;
5360                                 goto begin;
5361                         }
5362                 }
5363                 btrfs_release_path(path);
5364                 print_dir_item_err(root, di_key, location.objectid, index,
5365                                    namebuf, name_len, filetype, tmp_err);
5366                 err |= tmp_err;
5367                 len = sizeof(*di) + name_len + data_len;
5368                 di = (struct btrfs_dir_item *)((char *)di + len);
5369                 cur += len;
5370
5371                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5372                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5373                               root->objectid, di_key->objectid,
5374                               di_key->offset);
5375                         break;
5376                 }
5377         }
5378 out:
5379         /* research path */
5380         btrfs_release_path(path);
5381         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5382         if (ret)
5383                 err |= ret > 0 ? -ENOENT : ret;
5384         return err;
5385 }
5386
5387 /*
5388  * Wrapper function of btrfs_punch_hole.
5389  *
5390  * Returns 0 means success.
5391  * Returns not 0 means error.
5392  */
5393 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5394                              u64 len)
5395 {
5396         struct btrfs_trans_handle *trans;
5397         int ret = 0;
5398
5399         trans = btrfs_start_transaction(root, 1);
5400         if (IS_ERR(trans))
5401                 return PTR_ERR(trans);
5402
5403         ret = btrfs_punch_hole(trans, root, ino, start, len);
5404         if (ret)
5405                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5406                       start, len, ino);
5407         else
5408                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5409                        ino);
5410
5411         btrfs_commit_transaction(trans, root);
5412         return ret;
5413 }
5414
5415 /*
5416  * Check file extent datasum/hole, update the size of the file extents,
5417  * check and update the last offset of the file extent.
5418  *
5419  * @root:       the root of fs/file tree.
5420  * @fkey:       the key of the file extent.
5421  * @nodatasum:  INODE_NODATASUM feature.
5422  * @size:       the sum of all EXTENT_DATA items size for this inode.
5423  * @end:        the offset of the last extent.
5424  *
5425  * Return 0 if no error occurred.
5426  */
5427 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5428                              struct extent_buffer *node, int slot,
5429                              unsigned int nodatasum, u64 *size, u64 *end)
5430 {
5431         struct btrfs_file_extent_item *fi;
5432         u64 disk_bytenr;
5433         u64 disk_num_bytes;
5434         u64 extent_num_bytes;
5435         u64 extent_offset;
5436         u64 csum_found;         /* In byte size, sectorsize aligned */
5437         u64 search_start;       /* Logical range start we search for csum */
5438         u64 search_len;         /* Logical range len we search for csum */
5439         unsigned int extent_type;
5440         unsigned int is_hole;
5441         int compressed = 0;
5442         int ret;
5443         int err = 0;
5444
5445         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5446
5447         /* Check inline extent */
5448         extent_type = btrfs_file_extent_type(node, fi);
5449         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5450                 struct btrfs_item *e = btrfs_item_nr(slot);
5451                 u32 item_inline_len;
5452
5453                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5454                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5455                 compressed = btrfs_file_extent_compression(node, fi);
5456                 if (extent_num_bytes == 0) {
5457                         error(
5458                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5459                                 root->objectid, fkey->objectid, fkey->offset);
5460                         err |= FILE_EXTENT_ERROR;
5461                 }
5462                 if (!compressed && extent_num_bytes != item_inline_len) {
5463                         error(
5464                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5465                                 root->objectid, fkey->objectid, fkey->offset,
5466                                 extent_num_bytes, item_inline_len);
5467                         err |= FILE_EXTENT_ERROR;
5468                 }
5469                 *end += extent_num_bytes;
5470                 *size += extent_num_bytes;
5471                 return err;
5472         }
5473
5474         /* Check extent type */
5475         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5476                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5477                 err |= FILE_EXTENT_ERROR;
5478                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5479                       root->objectid, fkey->objectid, fkey->offset);
5480                 return err;
5481         }
5482
5483         /* Check REG_EXTENT/PREALLOC_EXTENT */
5484         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5485         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5486         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5487         extent_offset = btrfs_file_extent_offset(node, fi);
5488         compressed = btrfs_file_extent_compression(node, fi);
5489         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5490
5491         /*
5492          * Check EXTENT_DATA csum
5493          *
5494          * For plain (uncompressed) extent, we should only check the range
5495          * we're referring to, as it's possible that part of prealloc extent
5496          * has been written, and has csum:
5497          *
5498          * |<--- Original large preallocated extent A ---->|
5499          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5500          *      No csum                         Has csum
5501          *
5502          * For compressed extent, we should check the whole range.
5503          */
5504         if (!compressed) {
5505                 search_start = disk_bytenr + extent_offset;
5506                 search_len = extent_num_bytes;
5507         } else {
5508                 search_start = disk_bytenr;
5509                 search_len = disk_num_bytes;
5510         }
5511         ret = count_csum_range(root, search_start, search_len, &csum_found);
5512         if (csum_found > 0 && nodatasum) {
5513                 err |= ODD_CSUM_ITEM;
5514                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5515                       root->objectid, fkey->objectid, fkey->offset);
5516         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5517                    !is_hole && (ret < 0 || csum_found < search_len)) {
5518                 err |= CSUM_ITEM_MISSING;
5519                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5520                       root->objectid, fkey->objectid, fkey->offset,
5521                       csum_found, search_len);
5522         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5523                 err |= ODD_CSUM_ITEM;
5524                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5525                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5526         }
5527
5528         /* Check EXTENT_DATA hole */
5529         if (!no_holes && *end != fkey->offset) {
5530                 if (repair)
5531                         ret = punch_extent_hole(root, fkey->objectid,
5532                                                 *end, fkey->offset - *end);
5533                 if (!repair || ret) {
5534                         err |= FILE_EXTENT_ERROR;
5535                         error(
5536 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5537                                 root->objectid, fkey->objectid, fkey->offset,
5538                                 fkey->objectid, *end);
5539                 }
5540         }
5541
5542         *end += extent_num_bytes;
5543         if (!is_hole)
5544                 *size += extent_num_bytes;
5545
5546         return err;
5547 }
5548
5549 /*
5550  * Set inode item nbytes to @nbytes
5551  *
5552  * Returns  0     on success
5553  * Returns  != 0  on error
5554  */
5555 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5556                                       struct btrfs_path *path,
5557                                       u64 ino, u64 nbytes)
5558 {
5559         struct btrfs_trans_handle *trans;
5560         struct btrfs_inode_item *ii;
5561         struct btrfs_key key;
5562         struct btrfs_key research_key;
5563         int err = 0;
5564         int ret;
5565
5566         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5567
5568         key.objectid = ino;
5569         key.type = BTRFS_INODE_ITEM_KEY;
5570         key.offset = 0;
5571
5572         trans = btrfs_start_transaction(root, 1);
5573         if (IS_ERR(trans)) {
5574                 ret = PTR_ERR(trans);
5575                 err |= ret;
5576                 goto out;
5577         }
5578
5579         btrfs_release_path(path);
5580         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5581         if (ret > 0)
5582                 ret = -ENOENT;
5583         if (ret) {
5584                 err |= ret;
5585                 goto fail;
5586         }
5587
5588         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5589                             struct btrfs_inode_item);
5590         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5591         btrfs_mark_buffer_dirty(path->nodes[0]);
5592 fail:
5593         btrfs_commit_transaction(trans, root);
5594 out:
5595         if (ret)
5596                 error("failed to set nbytes in inode %llu root %llu",
5597                       ino, root->root_key.objectid);
5598         else
5599                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5600                        root->root_key.objectid, nbytes);
5601
5602         /* research path */
5603         btrfs_release_path(path);
5604         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5605         err |= ret;
5606
5607         return err;
5608 }
5609
5610 /*
5611  * Set directory inode isize to @isize.
5612  *
5613  * Returns 0     on success.
5614  * Returns != 0  on error.
5615  */
5616 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5617                                    struct btrfs_path *path,
5618                                    u64 ino, u64 isize)
5619 {
5620         struct btrfs_trans_handle *trans;
5621         struct btrfs_inode_item *ii;
5622         struct btrfs_key key;
5623         struct btrfs_key research_key;
5624         int ret;
5625         int err = 0;
5626
5627         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5628
5629         key.objectid = ino;
5630         key.type = BTRFS_INODE_ITEM_KEY;
5631         key.offset = 0;
5632
5633         trans = btrfs_start_transaction(root, 1);
5634         if (IS_ERR(trans)) {
5635                 ret = PTR_ERR(trans);
5636                 err |= ret;
5637                 goto out;
5638         }
5639
5640         btrfs_release_path(path);
5641         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5642         if (ret > 0)
5643                 ret = -ENOENT;
5644         if (ret) {
5645                 err |= ret;
5646                 goto fail;
5647         }
5648
5649         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5650                             struct btrfs_inode_item);
5651         btrfs_set_inode_size(path->nodes[0], ii, isize);
5652         btrfs_mark_buffer_dirty(path->nodes[0]);
5653 fail:
5654         btrfs_commit_transaction(trans, root);
5655 out:
5656         if (ret)
5657                 error("failed to set isize in inode %llu root %llu",
5658                       ino, root->root_key.objectid);
5659         else
5660                 printf("Set isize in inode %llu root %llu to %llu\n",
5661                        ino, root->root_key.objectid, isize);
5662
5663         btrfs_release_path(path);
5664         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5665         err |= ret;
5666
5667         return err;
5668 }
5669
5670 /*
5671  * Wrapper function for btrfs_add_orphan_item().
5672  *
5673  * Returns 0     on success.
5674  * Returns != 0  on error.
5675  */
5676 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5677                                            struct btrfs_path *path, u64 ino)
5678 {
5679         struct btrfs_trans_handle *trans;
5680         struct btrfs_key research_key;
5681         int ret;
5682         int err = 0;
5683
5684         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5685
5686         trans = btrfs_start_transaction(root, 1);
5687         if (IS_ERR(trans)) {
5688                 ret = PTR_ERR(trans);
5689                 err |= ret;
5690                 goto out;
5691         }
5692
5693         btrfs_release_path(path);
5694         ret = btrfs_add_orphan_item(trans, root, path, ino);
5695         err |= ret;
5696         btrfs_commit_transaction(trans, root);
5697 out:
5698         if (ret)
5699                 error("failed to add inode %llu as orphan item root %llu",
5700                       ino, root->root_key.objectid);
5701         else
5702                 printf("Added inode %llu as orphan item root %llu\n",
5703                        ino, root->root_key.objectid);
5704
5705         btrfs_release_path(path);
5706         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5707         err |= ret;
5708
5709         return err;
5710 }
5711
5712 /* Set inode_item nlink to @ref_count.
5713  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5714  *
5715  * Returns 0 on success
5716  */
5717 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5718                                       struct btrfs_path *path, u64 ino,
5719                                       const char *name, u32 namelen,
5720                                       u64 ref_count, u8 filetype, u64 *nlink)
5721 {
5722         struct btrfs_trans_handle *trans;
5723         struct btrfs_inode_item *ii;
5724         struct btrfs_key key;
5725         struct btrfs_key old_key;
5726         char namebuf[BTRFS_NAME_LEN] = {0};
5727         int name_len;
5728         int ret;
5729         int ret2;
5730
5731         /* save the key */
5732         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5733
5734         if (name && namelen) {
5735                 ASSERT(namelen <= BTRFS_NAME_LEN);
5736                 memcpy(namebuf, name, namelen);
5737                 name_len = namelen;
5738         } else {
5739                 sprintf(namebuf, "%llu", ino);
5740                 name_len = count_digits(ino);
5741                 printf("Can't find file name for inode %llu, use %s instead\n",
5742                        ino, namebuf);
5743         }
5744
5745         trans = btrfs_start_transaction(root, 1);
5746         if (IS_ERR(trans)) {
5747                 ret = PTR_ERR(trans);
5748                 goto out;
5749         }
5750
5751         btrfs_release_path(path);
5752         /* if refs is 0, put it into lostfound */
5753         if (ref_count == 0) {
5754                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5755                                               name_len, filetype, &ref_count);
5756                 if (ret)
5757                         goto fail;
5758         }
5759
5760         /* reset inode_item's nlink to ref_count */
5761         key.objectid = ino;
5762         key.type = BTRFS_INODE_ITEM_KEY;
5763         key.offset = 0;
5764
5765         btrfs_release_path(path);
5766         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5767         if (ret > 0)
5768                 ret = -ENOENT;
5769         if (ret)
5770                 goto fail;
5771
5772         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5773                             struct btrfs_inode_item);
5774         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5775         btrfs_mark_buffer_dirty(path->nodes[0]);
5776
5777         if (nlink)
5778                 *nlink = ref_count;
5779 fail:
5780         btrfs_commit_transaction(trans, root);
5781 out:
5782         if (ret)
5783                 error(
5784         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5785                        root->objectid, ino, namebuf, filetype);
5786         else
5787                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5788                        root->objectid, ino, namebuf, filetype);
5789
5790         /* research */
5791         btrfs_release_path(path);
5792         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5793         if (ret2 < 0)
5794                 return ret |= ret2;
5795         return ret;
5796 }
5797
5798 /*
5799  * Check INODE_ITEM and related ITEMs (the same inode number)
5800  * 1. check link count
5801  * 2. check inode ref/extref
5802  * 3. check dir item/index
5803  *
5804  * @ext_ref:    the EXTENDED_IREF feature
5805  *
5806  * Return 0 if no error occurred.
5807  * Return >0 for error or hit the traversal is done(by error bitmap)
5808  */
5809 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5810                             unsigned int ext_ref)
5811 {
5812         struct extent_buffer *node;
5813         struct btrfs_inode_item *ii;
5814         struct btrfs_key key;
5815         struct btrfs_key last_key;
5816         u64 inode_id;
5817         u32 mode;
5818         u64 nlink;
5819         u64 nbytes;
5820         u64 isize;
5821         u64 size = 0;
5822         u64 refs = 0;
5823         u64 extent_end = 0;
5824         u64 extent_size = 0;
5825         unsigned int dir;
5826         unsigned int nodatasum;
5827         int slot;
5828         int ret;
5829         int err = 0;
5830         char namebuf[BTRFS_NAME_LEN] = {0};
5831         u32 name_len = 0;
5832
5833         node = path->nodes[0];
5834         slot = path->slots[0];
5835
5836         btrfs_item_key_to_cpu(node, &key, slot);
5837         inode_id = key.objectid;
5838
5839         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5840                 ret = btrfs_next_item(root, path);
5841                 if (ret > 0)
5842                         err |= LAST_ITEM;
5843                 return err;
5844         }
5845
5846         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5847         isize = btrfs_inode_size(node, ii);
5848         nbytes = btrfs_inode_nbytes(node, ii);
5849         mode = btrfs_inode_mode(node, ii);
5850         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5851         nlink = btrfs_inode_nlink(node, ii);
5852         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5853
5854         while (1) {
5855                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5856                 ret = btrfs_next_item(root, path);
5857                 if (ret < 0) {
5858                         /* out will fill 'err' rusing current statistics */
5859                         goto out;
5860                 } else if (ret > 0) {
5861                         err |= LAST_ITEM;
5862                         goto out;
5863                 }
5864
5865                 node = path->nodes[0];
5866                 slot = path->slots[0];
5867                 btrfs_item_key_to_cpu(node, &key, slot);
5868                 if (key.objectid != inode_id)
5869                         goto out;
5870
5871                 switch (key.type) {
5872                 case BTRFS_INODE_REF_KEY:
5873                         ret = check_inode_ref(root, &key, path, namebuf,
5874                                               &name_len, &refs, mode);
5875                         err |= ret;
5876                         break;
5877                 case BTRFS_INODE_EXTREF_KEY:
5878                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5879                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5880                                         root->objectid, key.objectid,
5881                                         key.offset);
5882                         ret = check_inode_extref(root, &key, node, slot, &refs,
5883                                                  mode);
5884                         err |= ret;
5885                         break;
5886                 case BTRFS_DIR_ITEM_KEY:
5887                 case BTRFS_DIR_INDEX_KEY:
5888                         if (!dir) {
5889                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5890                                         root->objectid, inode_id,
5891                                         imode_to_type(mode), key.objectid,
5892                                         key.offset);
5893                         }
5894                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5895                         err |= ret;
5896                         break;
5897                 case BTRFS_EXTENT_DATA_KEY:
5898                         if (dir) {
5899                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5900                                         root->objectid, inode_id, key.objectid,
5901                                         key.offset);
5902                         }
5903                         ret = check_file_extent(root, &key, node, slot,
5904                                                 nodatasum, &extent_size,
5905                                                 &extent_end);
5906                         err |= ret;
5907                         break;
5908                 case BTRFS_XATTR_ITEM_KEY:
5909                         break;
5910                 default:
5911                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5912                               key.objectid, key.type, key.offset);
5913                 }
5914         }
5915
5916 out:
5917         if (err & LAST_ITEM) {
5918                 btrfs_release_path(path);
5919                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5920                 if (ret)
5921                         return err;
5922         }
5923
5924         /* verify INODE_ITEM nlink/isize/nbytes */
5925         if (dir) {
5926                 if (repair && (err & DIR_COUNT_AGAIN)) {
5927                         err &= ~DIR_COUNT_AGAIN;
5928                         count_dir_isize(root, inode_id, &size);
5929                 }
5930
5931                 if ((nlink != 1 || refs != 1) && repair) {
5932                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5933                                 namebuf, name_len, refs, imode_to_type(mode),
5934                                 &nlink);
5935                 }
5936
5937                 if (nlink != 1) {
5938                         err |= LINK_COUNT_ERROR;
5939                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5940                               root->objectid, inode_id, nlink);
5941                 }
5942
5943                 /*
5944                  * Just a warning, as dir inode nbytes is just an
5945                  * instructive value.
5946                  */
5947                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5948                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5949                                 root->objectid, inode_id,
5950                                 root->fs_info->nodesize);
5951                 }
5952
5953                 if (isize != size) {
5954                         if (repair)
5955                                 ret = repair_dir_isize_lowmem(root, path,
5956                                                               inode_id, size);
5957                         if (!repair || ret) {
5958                                 err |= ISIZE_ERROR;
5959                                 error(
5960                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5961                                       root->objectid, inode_id, isize, size);
5962                         }
5963                 }
5964         } else {
5965                 if (nlink != refs) {
5966                         if (repair)
5967                                 ret = repair_inode_nlinks_lowmem(root, path,
5968                                          inode_id, namebuf, name_len, refs,
5969                                          imode_to_type(mode), &nlink);
5970                         if (!repair || ret) {
5971                                 err |= LINK_COUNT_ERROR;
5972                                 error(
5973                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5974                                       root->objectid, inode_id, nlink, refs);
5975                         }
5976                 } else if (!nlink) {
5977                         if (repair)
5978                                 ret = repair_inode_orphan_item_lowmem(root,
5979                                                               path, inode_id);
5980                         if (!repair || ret) {
5981                                 err |= ORPHAN_ITEM;
5982                                 error("root %llu INODE[%llu] is orphan item",
5983                                       root->objectid, inode_id);
5984                         }
5985                 }
5986
5987                 if (!nbytes && !no_holes && extent_end < isize) {
5988                         if (repair)
5989                                 ret = punch_extent_hole(root, inode_id,
5990                                                 extent_end, isize - extent_end);
5991                         if (!repair || ret) {
5992                                 err |= NBYTES_ERROR;
5993                                 error(
5994         "root %llu INODE[%llu] size %llu should have a file extent hole",
5995                                       root->objectid, inode_id, isize);
5996                         }
5997                 }
5998
5999                 if (nbytes != extent_size) {
6000                         if (repair)
6001                                 ret = repair_inode_nbytes_lowmem(root, path,
6002                                                          inode_id, extent_size);
6003                         if (!repair || ret) {
6004                                 err |= NBYTES_ERROR;
6005                                 error(
6006         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6007                                       root->objectid, inode_id, nbytes,
6008                                       extent_size);
6009                         }
6010                 }
6011         }
6012
6013         if (err & LAST_ITEM)
6014                 btrfs_next_item(root, path);
6015         return err;
6016 }
6017
6018 /*
6019  * Insert the missing inode item and inode ref.
6020  *
6021  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6022  * Root dir should be handled specially because root dir is the root of fs.
6023  *
6024  * returns err (>0 or 0) after repair
6025  */
6026 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6027 {
6028         struct btrfs_trans_handle *trans;
6029         struct btrfs_key key;
6030         struct btrfs_path path;
6031         int filetype = BTRFS_FT_DIR;
6032         int ret = 0;
6033
6034         btrfs_init_path(&path);
6035
6036         if (err & INODE_REF_MISSING) {
6037                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6038                 key.type = BTRFS_INODE_REF_KEY;
6039                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6040
6041                 trans = btrfs_start_transaction(root, 1);
6042                 if (IS_ERR(trans)) {
6043                         ret = PTR_ERR(trans);
6044                         goto out;
6045                 }
6046
6047                 btrfs_release_path(&path);
6048                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6049                 if (ret)
6050                         goto trans_fail;
6051
6052                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6053                                              BTRFS_FIRST_FREE_OBJECTID,
6054                                              BTRFS_FIRST_FREE_OBJECTID, 0);
6055                 if (ret)
6056                         goto trans_fail;
6057
6058                 printf("Add INODE_REF[%llu %llu] name %s\n",
6059                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6060                        "..");
6061                 err &= ~INODE_REF_MISSING;
6062 trans_fail:
6063                 if (ret)
6064                         error("fail to insert first inode's ref");
6065                 btrfs_commit_transaction(trans, root);
6066         }
6067
6068         if (err & INODE_ITEM_MISSING) {
6069                 ret = repair_inode_item_missing(root,
6070                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
6071                 if (ret)
6072                         goto out;
6073                 err &= ~INODE_ITEM_MISSING;
6074         }
6075 out:
6076         if (ret)
6077                 error("fail to repair first inode");
6078         btrfs_release_path(&path);
6079         return err;
6080 }
6081
6082 /*
6083  * check first root dir's inode_item and inode_ref
6084  *
6085  * returns 0 means no error
6086  * returns >0 means error
6087  * returns <0 means fatal error
6088  */
6089 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6090 {
6091         struct btrfs_path path;
6092         struct btrfs_key key;
6093         struct btrfs_inode_item *ii;
6094         u64 index;
6095         u32 mode;
6096         int err = 0;
6097         int ret;
6098
6099         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6100         key.type = BTRFS_INODE_ITEM_KEY;
6101         key.offset = 0;
6102
6103         /* For root being dropped, we don't need to check first inode */
6104         if (btrfs_root_refs(&root->root_item) == 0 &&
6105             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6106             BTRFS_FIRST_FREE_OBJECTID)
6107                 return 0;
6108
6109         btrfs_init_path(&path);
6110         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6111         if (ret < 0)
6112                 goto out;
6113         if (ret > 0) {
6114                 ret = 0;
6115                 err |= INODE_ITEM_MISSING;
6116         } else {
6117                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6118                                     struct btrfs_inode_item);
6119                 mode = btrfs_inode_mode(path.nodes[0], ii);
6120                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6121                         err |= INODE_ITEM_MISMATCH;
6122         }
6123
6124         /* lookup first inode ref */
6125         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6126         key.type = BTRFS_INODE_REF_KEY;
6127         /* special index value */
6128         index = 0;
6129
6130         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6131         if (ret < 0)
6132                 goto out;
6133         err |= ret;
6134
6135 out:
6136         btrfs_release_path(&path);
6137
6138         if (err && repair)
6139                 err = repair_fs_first_inode(root, err);
6140
6141         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6142                 error("root dir INODE_ITEM is %s",
6143                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6144         if (err & INODE_REF_MISSING)
6145                 error("root dir INODE_REF is missing");
6146
6147         return ret < 0 ? ret : err;
6148 }
6149
6150 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6151                                                 u64 parent, u64 root)
6152 {
6153         struct rb_node *node;
6154         struct tree_backref *back = NULL;
6155         struct tree_backref match = {
6156                 .node = {
6157                         .is_data = 0,
6158                 },
6159         };
6160
6161         if (parent) {
6162                 match.parent = parent;
6163                 match.node.full_backref = 1;
6164         } else {
6165                 match.root = root;
6166         }
6167
6168         node = rb_search(&rec->backref_tree, &match.node.node,
6169                          (rb_compare_keys)compare_extent_backref, NULL);
6170         if (node)
6171                 back = to_tree_backref(rb_node_to_extent_backref(node));
6172
6173         return back;
6174 }
6175
6176 static struct data_backref *find_data_backref(struct extent_record *rec,
6177                                                 u64 parent, u64 root,
6178                                                 u64 owner, u64 offset,
6179                                                 int found_ref,
6180                                                 u64 disk_bytenr, u64 bytes)
6181 {
6182         struct rb_node *node;
6183         struct data_backref *back = NULL;
6184         struct data_backref match = {
6185                 .node = {
6186                         .is_data = 1,
6187                 },
6188                 .owner = owner,
6189                 .offset = offset,
6190                 .bytes = bytes,
6191                 .found_ref = found_ref,
6192                 .disk_bytenr = disk_bytenr,
6193         };
6194
6195         if (parent) {
6196                 match.parent = parent;
6197                 match.node.full_backref = 1;
6198         } else {
6199                 match.root = root;
6200         }
6201
6202         node = rb_search(&rec->backref_tree, &match.node.node,
6203                          (rb_compare_keys)compare_extent_backref, NULL);
6204         if (node)
6205                 back = to_data_backref(rb_node_to_extent_backref(node));
6206
6207         return back;
6208 }
6209 /*
6210  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6211  * blocks and integrity of fs tree items.
6212  *
6213  * @root:         the root of the tree to be checked.
6214  * @ext_ref       feature EXTENDED_IREF is enable or not.
6215  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6216  *                otherwise means check fs tree(s) items relationship and
6217  *                @root MUST be a fs tree root.
6218  * Returns 0      represents OK.
6219  * Returns not 0  represents error.
6220  */
6221 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6222                             struct btrfs_root *root, unsigned int ext_ref,
6223                             int check_all)
6224
6225 {
6226         struct btrfs_path path;
6227         struct node_refs nrefs;
6228         struct btrfs_root_item *root_item = &root->root_item;
6229         int ret;
6230         int level;
6231         int err = 0;
6232
6233         memset(&nrefs, 0, sizeof(nrefs));
6234         if (!check_all) {
6235                 /*
6236                  * We need to manually check the first inode item (256)
6237                  * As the following traversal function will only start from
6238                  * the first inode item in the leaf, if inode item (256) is
6239                  * missing we will skip it forever.
6240                  */
6241                 ret = check_fs_first_inode(root, ext_ref);
6242                 if (ret < 0)
6243                         return ret;
6244         }
6245
6246
6247         level = btrfs_header_level(root->node);
6248         btrfs_init_path(&path);
6249
6250         if (btrfs_root_refs(root_item) > 0 ||
6251             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6252                 path.nodes[level] = root->node;
6253                 path.slots[level] = 0;
6254                 extent_buffer_get(root->node);
6255         } else {
6256                 struct btrfs_key key;
6257
6258                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6259                 level = root_item->drop_level;
6260                 path.lowest_level = level;
6261                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6262                 if (ret < 0)
6263                         goto out;
6264                 ret = 0;
6265         }
6266
6267         while (1) {
6268                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6269                                         ext_ref, check_all);
6270
6271                 err |= !!ret;
6272
6273                 /* if ret is negative, walk shall stop */
6274                 if (ret < 0) {
6275                         ret = err;
6276                         break;
6277                 }
6278
6279                 ret = walk_up_tree_v2(root, &path, &level);
6280                 if (ret != 0) {
6281                         /* Normal exit, reset ret to err */
6282                         ret = err;
6283                         break;
6284                 }
6285         }
6286
6287 out:
6288         btrfs_release_path(&path);
6289         return ret;
6290 }
6291
6292 /*
6293  * Iterate all items in the tree and call check_inode_item() to check.
6294  *
6295  * @root:       the root of the tree to be checked.
6296  * @ext_ref:    the EXTENDED_IREF feature
6297  *
6298  * Return 0 if no error found.
6299  * Return <0 for error.
6300  */
6301 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6302 {
6303         reset_cached_block_groups(root->fs_info);
6304         return check_btrfs_root(NULL, root, ext_ref, 0);
6305 }
6306
6307 /*
6308  * Find the relative ref for root_ref and root_backref.
6309  *
6310  * @root:       the root of the root tree.
6311  * @ref_key:    the key of the root ref.
6312  *
6313  * Return 0 if no error occurred.
6314  */
6315 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6316                           struct extent_buffer *node, int slot)
6317 {
6318         struct btrfs_path path;
6319         struct btrfs_key key;
6320         struct btrfs_root_ref *ref;
6321         struct btrfs_root_ref *backref;
6322         char ref_name[BTRFS_NAME_LEN] = {0};
6323         char backref_name[BTRFS_NAME_LEN] = {0};
6324         u64 ref_dirid;
6325         u64 ref_seq;
6326         u32 ref_namelen;
6327         u64 backref_dirid;
6328         u64 backref_seq;
6329         u32 backref_namelen;
6330         u32 len;
6331         int ret;
6332         int err = 0;
6333
6334         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6335         ref_dirid = btrfs_root_ref_dirid(node, ref);
6336         ref_seq = btrfs_root_ref_sequence(node, ref);
6337         ref_namelen = btrfs_root_ref_name_len(node, ref);
6338
6339         if (ref_namelen <= BTRFS_NAME_LEN) {
6340                 len = ref_namelen;
6341         } else {
6342                 len = BTRFS_NAME_LEN;
6343                 warning("%s[%llu %llu] ref_name too long",
6344                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6345                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6346                         ref_key->offset);
6347         }
6348         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6349
6350         /* Find relative root_ref */
6351         key.objectid = ref_key->offset;
6352         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6353         key.offset = ref_key->objectid;
6354
6355         btrfs_init_path(&path);
6356         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6357         if (ret) {
6358                 err |= ROOT_REF_MISSING;
6359                 error("%s[%llu %llu] couldn't find relative ref",
6360                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6361                       "ROOT_REF" : "ROOT_BACKREF",
6362                       ref_key->objectid, ref_key->offset);
6363                 goto out;
6364         }
6365
6366         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6367                                  struct btrfs_root_ref);
6368         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6369         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6370         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6371
6372         if (backref_namelen <= BTRFS_NAME_LEN) {
6373                 len = backref_namelen;
6374         } else {
6375                 len = BTRFS_NAME_LEN;
6376                 warning("%s[%llu %llu] ref_name too long",
6377                         key.type == BTRFS_ROOT_REF_KEY ?
6378                         "ROOT_REF" : "ROOT_BACKREF",
6379                         key.objectid, key.offset);
6380         }
6381         read_extent_buffer(path.nodes[0], backref_name,
6382                            (unsigned long)(backref + 1), len);
6383
6384         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6385             ref_namelen != backref_namelen ||
6386             strncmp(ref_name, backref_name, len)) {
6387                 err |= ROOT_REF_MISMATCH;
6388                 error("%s[%llu %llu] mismatch relative ref",
6389                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6390                       "ROOT_REF" : "ROOT_BACKREF",
6391                       ref_key->objectid, ref_key->offset);
6392         }
6393 out:
6394         btrfs_release_path(&path);
6395         return err;
6396 }
6397
6398 /*
6399  * Check all fs/file tree in low_memory mode.
6400  *
6401  * 1. for fs tree root item, call check_fs_root_v2()
6402  * 2. for fs tree root ref/backref, call check_root_ref()
6403  *
6404  * Return 0 if no error occurred.
6405  */
6406 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6407 {
6408         struct btrfs_root *tree_root = fs_info->tree_root;
6409         struct btrfs_root *cur_root = NULL;
6410         struct btrfs_path path;
6411         struct btrfs_key key;
6412         struct extent_buffer *node;
6413         unsigned int ext_ref;
6414         int slot;
6415         int ret;
6416         int err = 0;
6417
6418         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6419
6420         btrfs_init_path(&path);
6421         key.objectid = BTRFS_FS_TREE_OBJECTID;
6422         key.offset = 0;
6423         key.type = BTRFS_ROOT_ITEM_KEY;
6424
6425         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6426         if (ret < 0) {
6427                 err = ret;
6428                 goto out;
6429         } else if (ret > 0) {
6430                 err = -ENOENT;
6431                 goto out;
6432         }
6433
6434         while (1) {
6435                 node = path.nodes[0];
6436                 slot = path.slots[0];
6437                 btrfs_item_key_to_cpu(node, &key, slot);
6438                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6439                         goto out;
6440                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6441                     fs_root_objectid(key.objectid)) {
6442                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6443                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6444                                                                        &key);
6445                         } else {
6446                                 key.offset = (u64)-1;
6447                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6448                         }
6449
6450                         if (IS_ERR(cur_root)) {
6451                                 error("Fail to read fs/subvol tree: %lld",
6452                                       key.objectid);
6453                                 err = -EIO;
6454                                 goto next;
6455                         }
6456
6457                         ret = check_fs_root_v2(cur_root, ext_ref);
6458                         err |= ret;
6459
6460                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6461                                 btrfs_free_fs_root(cur_root);
6462                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6463                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6464                         ret = check_root_ref(tree_root, &key, node, slot);
6465                         err |= ret;
6466                 }
6467 next:
6468                 ret = btrfs_next_item(tree_root, &path);
6469                 if (ret > 0)
6470                         goto out;
6471                 if (ret < 0) {
6472                         err = ret;
6473                         goto out;
6474                 }
6475         }
6476
6477 out:
6478         btrfs_release_path(&path);
6479         return err;
6480 }
6481
6482 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6483                           struct cache_tree *root_cache)
6484 {
6485         int ret;
6486
6487         if (!ctx.progress_enabled)
6488                 fprintf(stderr, "checking fs roots\n");
6489         if (check_mode == CHECK_MODE_LOWMEM)
6490                 ret = check_fs_roots_v2(fs_info);
6491         else
6492                 ret = check_fs_roots(fs_info, root_cache);
6493
6494         return ret;
6495 }
6496
6497 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6498 {
6499         struct extent_backref *back, *tmp;
6500         struct tree_backref *tback;
6501         struct data_backref *dback;
6502         u64 found = 0;
6503         int err = 0;
6504
6505         rbtree_postorder_for_each_entry_safe(back, tmp,
6506                                              &rec->backref_tree, node) {
6507                 if (!back->found_extent_tree) {
6508                         err = 1;
6509                         if (!print_errs)
6510                                 goto out;
6511                         if (back->is_data) {
6512                                 dback = to_data_backref(back);
6513                                 fprintf(stderr, "Data backref %llu %s %llu"
6514                                         " owner %llu offset %llu num_refs %lu"
6515                                         " not found in extent tree\n",
6516                                         (unsigned long long)rec->start,
6517                                         back->full_backref ?
6518                                         "parent" : "root",
6519                                         back->full_backref ?
6520                                         (unsigned long long)dback->parent:
6521                                         (unsigned long long)dback->root,
6522                                         (unsigned long long)dback->owner,
6523                                         (unsigned long long)dback->offset,
6524                                         (unsigned long)dback->num_refs);
6525                         } else {
6526                                 tback = to_tree_backref(back);
6527                                 fprintf(stderr, "Tree backref %llu parent %llu"
6528                                         " root %llu not found in extent tree\n",
6529                                         (unsigned long long)rec->start,
6530                                         (unsigned long long)tback->parent,
6531                                         (unsigned long long)tback->root);
6532                         }
6533                 }
6534                 if (!back->is_data && !back->found_ref) {
6535                         err = 1;
6536                         if (!print_errs)
6537                                 goto out;
6538                         tback = to_tree_backref(back);
6539                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6540                                 (unsigned long long)rec->start,
6541                                 back->full_backref ? "parent" : "root",
6542                                 back->full_backref ?
6543                                 (unsigned long long)tback->parent :
6544                                 (unsigned long long)tback->root, back);
6545                 }
6546                 if (back->is_data) {
6547                         dback = to_data_backref(back);
6548                         if (dback->found_ref != dback->num_refs) {
6549                                 err = 1;
6550                                 if (!print_errs)
6551                                         goto out;
6552                                 fprintf(stderr, "Incorrect local backref count"
6553                                         " on %llu %s %llu owner %llu"
6554                                         " offset %llu found %u wanted %u back %p\n",
6555                                         (unsigned long long)rec->start,
6556                                         back->full_backref ?
6557                                         "parent" : "root",
6558                                         back->full_backref ?
6559                                         (unsigned long long)dback->parent:
6560                                         (unsigned long long)dback->root,
6561                                         (unsigned long long)dback->owner,
6562                                         (unsigned long long)dback->offset,
6563                                         dback->found_ref, dback->num_refs, back);
6564                         }
6565                         if (dback->disk_bytenr != rec->start) {
6566                                 err = 1;
6567                                 if (!print_errs)
6568                                         goto out;
6569                                 fprintf(stderr, "Backref disk bytenr does not"
6570                                         " match extent record, bytenr=%llu, "
6571                                         "ref bytenr=%llu\n",
6572                                         (unsigned long long)rec->start,
6573                                         (unsigned long long)dback->disk_bytenr);
6574                         }
6575
6576                         if (dback->bytes != rec->nr) {
6577                                 err = 1;
6578                                 if (!print_errs)
6579                                         goto out;
6580                                 fprintf(stderr, "Backref bytes do not match "
6581                                         "extent backref, bytenr=%llu, ref "
6582                                         "bytes=%llu, backref bytes=%llu\n",
6583                                         (unsigned long long)rec->start,
6584                                         (unsigned long long)rec->nr,
6585                                         (unsigned long long)dback->bytes);
6586                         }
6587                 }
6588                 if (!back->is_data) {
6589                         found += 1;
6590                 } else {
6591                         dback = to_data_backref(back);
6592                         found += dback->found_ref;
6593                 }
6594         }
6595         if (found != rec->refs) {
6596                 err = 1;
6597                 if (!print_errs)
6598                         goto out;
6599                 fprintf(stderr, "Incorrect global backref count "
6600                         "on %llu found %llu wanted %llu\n",
6601                         (unsigned long long)rec->start,
6602                         (unsigned long long)found,
6603                         (unsigned long long)rec->refs);
6604         }
6605 out:
6606         return err;
6607 }
6608
6609 static void __free_one_backref(struct rb_node *node)
6610 {
6611         struct extent_backref *back = rb_node_to_extent_backref(node);
6612
6613         free(back);
6614 }
6615
6616 static void free_all_extent_backrefs(struct extent_record *rec)
6617 {
6618         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6619 }
6620
6621 static void free_extent_record_cache(struct cache_tree *extent_cache)
6622 {
6623         struct cache_extent *cache;
6624         struct extent_record *rec;
6625
6626         while (1) {
6627                 cache = first_cache_extent(extent_cache);
6628                 if (!cache)
6629                         break;
6630                 rec = container_of(cache, struct extent_record, cache);
6631                 remove_cache_extent(extent_cache, cache);
6632                 free_all_extent_backrefs(rec);
6633                 free(rec);
6634         }
6635 }
6636
6637 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6638                                  struct extent_record *rec)
6639 {
6640         if (rec->content_checked && rec->owner_ref_checked &&
6641             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6642             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6643             !rec->bad_full_backref && !rec->crossing_stripes &&
6644             !rec->wrong_chunk_type) {
6645                 remove_cache_extent(extent_cache, &rec->cache);
6646                 free_all_extent_backrefs(rec);
6647                 list_del_init(&rec->list);
6648                 free(rec);
6649         }
6650         return 0;
6651 }
6652
6653 static int check_owner_ref(struct btrfs_root *root,
6654                             struct extent_record *rec,
6655                             struct extent_buffer *buf)
6656 {
6657         struct extent_backref *node, *tmp;
6658         struct tree_backref *back;
6659         struct btrfs_root *ref_root;
6660         struct btrfs_key key;
6661         struct btrfs_path path;
6662         struct extent_buffer *parent;
6663         int level;
6664         int found = 0;
6665         int ret;
6666
6667         rbtree_postorder_for_each_entry_safe(node, tmp,
6668                                              &rec->backref_tree, node) {
6669                 if (node->is_data)
6670                         continue;
6671                 if (!node->found_ref)
6672                         continue;
6673                 if (node->full_backref)
6674                         continue;
6675                 back = to_tree_backref(node);
6676                 if (btrfs_header_owner(buf) == back->root)
6677                         return 0;
6678         }
6679         BUG_ON(rec->is_root);
6680
6681         /* try to find the block by search corresponding fs tree */
6682         key.objectid = btrfs_header_owner(buf);
6683         key.type = BTRFS_ROOT_ITEM_KEY;
6684         key.offset = (u64)-1;
6685
6686         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6687         if (IS_ERR(ref_root))
6688                 return 1;
6689
6690         level = btrfs_header_level(buf);
6691         if (level == 0)
6692                 btrfs_item_key_to_cpu(buf, &key, 0);
6693         else
6694                 btrfs_node_key_to_cpu(buf, &key, 0);
6695
6696         btrfs_init_path(&path);
6697         path.lowest_level = level + 1;
6698         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6699         if (ret < 0)
6700                 return 0;
6701
6702         parent = path.nodes[level + 1];
6703         if (parent && buf->start == btrfs_node_blockptr(parent,
6704                                                         path.slots[level + 1]))
6705                 found = 1;
6706
6707         btrfs_release_path(&path);
6708         return found ? 0 : 1;
6709 }
6710
6711 static int is_extent_tree_record(struct extent_record *rec)
6712 {
6713         struct extent_backref *node, *tmp;
6714         struct tree_backref *back;
6715         int is_extent = 0;
6716
6717         rbtree_postorder_for_each_entry_safe(node, tmp,
6718                                              &rec->backref_tree, node) {
6719                 if (node->is_data)
6720                         return 0;
6721                 back = to_tree_backref(node);
6722                 if (node->full_backref)
6723                         return 0;
6724                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6725                         is_extent = 1;
6726         }
6727         return is_extent;
6728 }
6729
6730
6731 static int record_bad_block_io(struct btrfs_fs_info *info,
6732                                struct cache_tree *extent_cache,
6733                                u64 start, u64 len)
6734 {
6735         struct extent_record *rec;
6736         struct cache_extent *cache;
6737         struct btrfs_key key;
6738
6739         cache = lookup_cache_extent(extent_cache, start, len);
6740         if (!cache)
6741                 return 0;
6742
6743         rec = container_of(cache, struct extent_record, cache);
6744         if (!is_extent_tree_record(rec))
6745                 return 0;
6746
6747         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6748         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6749 }
6750
6751 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6752                        struct extent_buffer *buf, int slot)
6753 {
6754         if (btrfs_header_level(buf)) {
6755                 struct btrfs_key_ptr ptr1, ptr2;
6756
6757                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6758                                    sizeof(struct btrfs_key_ptr));
6759                 read_extent_buffer(buf, &ptr2,
6760                                    btrfs_node_key_ptr_offset(slot + 1),
6761                                    sizeof(struct btrfs_key_ptr));
6762                 write_extent_buffer(buf, &ptr1,
6763                                     btrfs_node_key_ptr_offset(slot + 1),
6764                                     sizeof(struct btrfs_key_ptr));
6765                 write_extent_buffer(buf, &ptr2,
6766                                     btrfs_node_key_ptr_offset(slot),
6767                                     sizeof(struct btrfs_key_ptr));
6768                 if (slot == 0) {
6769                         struct btrfs_disk_key key;
6770                         btrfs_node_key(buf, &key, 0);
6771                         btrfs_fixup_low_keys(root, path, &key,
6772                                              btrfs_header_level(buf) + 1);
6773                 }
6774         } else {
6775                 struct btrfs_item *item1, *item2;
6776                 struct btrfs_key k1, k2;
6777                 char *item1_data, *item2_data;
6778                 u32 item1_offset, item2_offset, item1_size, item2_size;
6779
6780                 item1 = btrfs_item_nr(slot);
6781                 item2 = btrfs_item_nr(slot + 1);
6782                 btrfs_item_key_to_cpu(buf, &k1, slot);
6783                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6784                 item1_offset = btrfs_item_offset(buf, item1);
6785                 item2_offset = btrfs_item_offset(buf, item2);
6786                 item1_size = btrfs_item_size(buf, item1);
6787                 item2_size = btrfs_item_size(buf, item2);
6788
6789                 item1_data = malloc(item1_size);
6790                 if (!item1_data)
6791                         return -ENOMEM;
6792                 item2_data = malloc(item2_size);
6793                 if (!item2_data) {
6794                         free(item1_data);
6795                         return -ENOMEM;
6796                 }
6797
6798                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6799                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6800
6801                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6802                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6803                 free(item1_data);
6804                 free(item2_data);
6805
6806                 btrfs_set_item_offset(buf, item1, item2_offset);
6807                 btrfs_set_item_offset(buf, item2, item1_offset);
6808                 btrfs_set_item_size(buf, item1, item2_size);
6809                 btrfs_set_item_size(buf, item2, item1_size);
6810
6811                 path->slots[0] = slot;
6812                 btrfs_set_item_key_unsafe(root, path, &k2);
6813                 path->slots[0] = slot + 1;
6814                 btrfs_set_item_key_unsafe(root, path, &k1);
6815         }
6816         return 0;
6817 }
6818
6819 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6820 {
6821         struct extent_buffer *buf;
6822         struct btrfs_key k1, k2;
6823         int i;
6824         int level = path->lowest_level;
6825         int ret = -EIO;
6826
6827         buf = path->nodes[level];
6828         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6829                 if (level) {
6830                         btrfs_node_key_to_cpu(buf, &k1, i);
6831                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6832                 } else {
6833                         btrfs_item_key_to_cpu(buf, &k1, i);
6834                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6835                 }
6836                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6837                         continue;
6838                 ret = swap_values(root, path, buf, i);
6839                 if (ret)
6840                         break;
6841                 btrfs_mark_buffer_dirty(buf);
6842                 i = 0;
6843         }
6844         return ret;
6845 }
6846
6847 static int delete_bogus_item(struct btrfs_root *root,
6848                              struct btrfs_path *path,
6849                              struct extent_buffer *buf, int slot)
6850 {
6851         struct btrfs_key key;
6852         int nritems = btrfs_header_nritems(buf);
6853
6854         btrfs_item_key_to_cpu(buf, &key, slot);
6855
6856         /* These are all the keys we can deal with missing. */
6857         if (key.type != BTRFS_DIR_INDEX_KEY &&
6858             key.type != BTRFS_EXTENT_ITEM_KEY &&
6859             key.type != BTRFS_METADATA_ITEM_KEY &&
6860             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6861             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6862                 return -1;
6863
6864         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6865                (unsigned long long)key.objectid, key.type,
6866                (unsigned long long)key.offset, slot, buf->start);
6867         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6868                               btrfs_item_nr_offset(slot + 1),
6869                               sizeof(struct btrfs_item) *
6870                               (nritems - slot - 1));
6871         btrfs_set_header_nritems(buf, nritems - 1);
6872         if (slot == 0) {
6873                 struct btrfs_disk_key disk_key;
6874
6875                 btrfs_item_key(buf, &disk_key, 0);
6876                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6877         }
6878         btrfs_mark_buffer_dirty(buf);
6879         return 0;
6880 }
6881
6882 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6883 {
6884         struct extent_buffer *buf;
6885         int i;
6886         int ret = 0;
6887
6888         /* We should only get this for leaves */
6889         BUG_ON(path->lowest_level);
6890         buf = path->nodes[0];
6891 again:
6892         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6893                 unsigned int shift = 0, offset;
6894
6895                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6896                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6897                         if (btrfs_item_end_nr(buf, i) >
6898                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6899                                 ret = delete_bogus_item(root, path, buf, i);
6900                                 if (!ret)
6901                                         goto again;
6902                                 fprintf(stderr, "item is off the end of the "
6903                                         "leaf, can't fix\n");
6904                                 ret = -EIO;
6905                                 break;
6906                         }
6907                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6908                                 btrfs_item_end_nr(buf, i);
6909                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6910                            btrfs_item_offset_nr(buf, i - 1)) {
6911                         if (btrfs_item_end_nr(buf, i) >
6912                             btrfs_item_offset_nr(buf, i - 1)) {
6913                                 ret = delete_bogus_item(root, path, buf, i);
6914                                 if (!ret)
6915                                         goto again;
6916                                 fprintf(stderr, "items overlap, can't fix\n");
6917                                 ret = -EIO;
6918                                 break;
6919                         }
6920                         shift = btrfs_item_offset_nr(buf, i - 1) -
6921                                 btrfs_item_end_nr(buf, i);
6922                 }
6923                 if (!shift)
6924                         continue;
6925
6926                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6927                        i, shift, (unsigned long long)buf->start);
6928                 offset = btrfs_item_offset_nr(buf, i);
6929                 memmove_extent_buffer(buf,
6930                                       btrfs_leaf_data(buf) + offset + shift,
6931                                       btrfs_leaf_data(buf) + offset,
6932                                       btrfs_item_size_nr(buf, i));
6933                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6934                                       offset + shift);
6935                 btrfs_mark_buffer_dirty(buf);
6936         }
6937
6938         /*
6939          * We may have moved things, in which case we want to exit so we don't
6940          * write those changes out.  Once we have proper abort functionality in
6941          * progs this can be changed to something nicer.
6942          */
6943         BUG_ON(ret);
6944         return ret;
6945 }
6946
6947 /*
6948  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6949  * then just return -EIO.
6950  */
6951 static int try_to_fix_bad_block(struct btrfs_root *root,
6952                                 struct extent_buffer *buf,
6953                                 enum btrfs_tree_block_status status)
6954 {
6955         struct btrfs_trans_handle *trans;
6956         struct ulist *roots;
6957         struct ulist_node *node;
6958         struct btrfs_root *search_root;
6959         struct btrfs_path path;
6960         struct ulist_iterator iter;
6961         struct btrfs_key root_key, key;
6962         int ret;
6963
6964         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6965             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6966                 return -EIO;
6967
6968         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6969         if (ret)
6970                 return -EIO;
6971
6972         btrfs_init_path(&path);
6973         ULIST_ITER_INIT(&iter);
6974         while ((node = ulist_next(roots, &iter))) {
6975                 root_key.objectid = node->val;
6976                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6977                 root_key.offset = (u64)-1;
6978
6979                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6980                 if (IS_ERR(root)) {
6981                         ret = -EIO;
6982                         break;
6983                 }
6984
6985
6986                 trans = btrfs_start_transaction(search_root, 0);
6987                 if (IS_ERR(trans)) {
6988                         ret = PTR_ERR(trans);
6989                         break;
6990                 }
6991
6992                 path.lowest_level = btrfs_header_level(buf);
6993                 path.skip_check_block = 1;
6994                 if (path.lowest_level)
6995                         btrfs_node_key_to_cpu(buf, &key, 0);
6996                 else
6997                         btrfs_item_key_to_cpu(buf, &key, 0);
6998                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6999                 if (ret) {
7000                         ret = -EIO;
7001                         btrfs_commit_transaction(trans, search_root);
7002                         break;
7003                 }
7004                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7005                         ret = fix_key_order(search_root, &path);
7006                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7007                         ret = fix_item_offset(search_root, &path);
7008                 if (ret) {
7009                         btrfs_commit_transaction(trans, search_root);
7010                         break;
7011                 }
7012                 btrfs_release_path(&path);
7013                 btrfs_commit_transaction(trans, search_root);
7014         }
7015         ulist_free(roots);
7016         btrfs_release_path(&path);
7017         return ret;
7018 }
7019
7020 static int check_block(struct btrfs_root *root,
7021                        struct cache_tree *extent_cache,
7022                        struct extent_buffer *buf, u64 flags)
7023 {
7024         struct extent_record *rec;
7025         struct cache_extent *cache;
7026         struct btrfs_key key;
7027         enum btrfs_tree_block_status status;
7028         int ret = 0;
7029         int level;
7030
7031         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7032         if (!cache)
7033                 return 1;
7034         rec = container_of(cache, struct extent_record, cache);
7035         rec->generation = btrfs_header_generation(buf);
7036
7037         level = btrfs_header_level(buf);
7038         if (btrfs_header_nritems(buf) > 0) {
7039
7040                 if (level == 0)
7041                         btrfs_item_key_to_cpu(buf, &key, 0);
7042                 else
7043                         btrfs_node_key_to_cpu(buf, &key, 0);
7044
7045                 rec->info_objectid = key.objectid;
7046         }
7047         rec->info_level = level;
7048
7049         if (btrfs_is_leaf(buf))
7050                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7051         else
7052                 status = btrfs_check_node(root, &rec->parent_key, buf);
7053
7054         if (status != BTRFS_TREE_BLOCK_CLEAN) {
7055                 if (repair)
7056                         status = try_to_fix_bad_block(root, buf, status);
7057                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7058                         ret = -EIO;
7059                         fprintf(stderr, "bad block %llu\n",
7060                                 (unsigned long long)buf->start);
7061                 } else {
7062                         /*
7063                          * Signal to callers we need to start the scan over
7064                          * again since we'll have cowed blocks.
7065                          */
7066                         ret = -EAGAIN;
7067                 }
7068         } else {
7069                 rec->content_checked = 1;
7070                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7071                         rec->owner_ref_checked = 1;
7072                 else {
7073                         ret = check_owner_ref(root, rec, buf);
7074                         if (!ret)
7075                                 rec->owner_ref_checked = 1;
7076                 }
7077         }
7078         if (!ret)
7079                 maybe_free_extent_rec(extent_cache, rec);
7080         return ret;
7081 }
7082
7083 #if 0
7084 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7085                                                 u64 parent, u64 root)
7086 {
7087         struct list_head *cur = rec->backrefs.next;
7088         struct extent_backref *node;
7089         struct tree_backref *back;
7090
7091         while(cur != &rec->backrefs) {
7092                 node = to_extent_backref(cur);
7093                 cur = cur->next;
7094                 if (node->is_data)
7095                         continue;
7096                 back = to_tree_backref(node);
7097                 if (parent > 0) {
7098                         if (!node->full_backref)
7099                                 continue;
7100                         if (parent == back->parent)
7101                                 return back;
7102                 } else {
7103                         if (node->full_backref)
7104                                 continue;
7105                         if (back->root == root)
7106                                 return back;
7107                 }
7108         }
7109         return NULL;
7110 }
7111 #endif
7112
7113 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7114                                                 u64 parent, u64 root)
7115 {
7116         struct tree_backref *ref = malloc(sizeof(*ref));
7117
7118         if (!ref)
7119                 return NULL;
7120         memset(&ref->node, 0, sizeof(ref->node));
7121         if (parent > 0) {
7122                 ref->parent = parent;
7123                 ref->node.full_backref = 1;
7124         } else {
7125                 ref->root = root;
7126                 ref->node.full_backref = 0;
7127         }
7128
7129         return ref;
7130 }
7131
7132 #if 0
7133 static struct data_backref *find_data_backref(struct extent_record *rec,
7134                                                 u64 parent, u64 root,
7135                                                 u64 owner, u64 offset,
7136                                                 int found_ref,
7137                                                 u64 disk_bytenr, u64 bytes)
7138 {
7139         struct list_head *cur = rec->backrefs.next;
7140         struct extent_backref *node;
7141         struct data_backref *back;
7142
7143         while(cur != &rec->backrefs) {
7144                 node = to_extent_backref(cur);
7145                 cur = cur->next;
7146                 if (!node->is_data)
7147                         continue;
7148                 back = to_data_backref(node);
7149                 if (parent > 0) {
7150                         if (!node->full_backref)
7151                                 continue;
7152                         if (parent == back->parent)
7153                                 return back;
7154                 } else {
7155                         if (node->full_backref)
7156                                 continue;
7157                         if (back->root == root && back->owner == owner &&
7158                             back->offset == offset) {
7159                                 if (found_ref && node->found_ref &&
7160                                     (back->bytes != bytes ||
7161                                     back->disk_bytenr != disk_bytenr))
7162                                         continue;
7163                                 return back;
7164                         }
7165                 }
7166         }
7167         return NULL;
7168 }
7169 #endif
7170
7171 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7172                                                 u64 parent, u64 root,
7173                                                 u64 owner, u64 offset,
7174                                                 u64 max_size)
7175 {
7176         struct data_backref *ref = malloc(sizeof(*ref));
7177
7178         if (!ref)
7179                 return NULL;
7180         memset(&ref->node, 0, sizeof(ref->node));
7181         ref->node.is_data = 1;
7182
7183         if (parent > 0) {
7184                 ref->parent = parent;
7185                 ref->owner = 0;
7186                 ref->offset = 0;
7187                 ref->node.full_backref = 1;
7188         } else {
7189                 ref->root = root;
7190                 ref->owner = owner;
7191                 ref->offset = offset;
7192                 ref->node.full_backref = 0;
7193         }
7194         ref->bytes = max_size;
7195         ref->found_ref = 0;
7196         ref->num_refs = 0;
7197         if (max_size > rec->max_size)
7198                 rec->max_size = max_size;
7199         return ref;
7200 }
7201
7202 /* Check if the type of extent matches with its chunk */
7203 static void check_extent_type(struct extent_record *rec)
7204 {
7205         struct btrfs_block_group_cache *bg_cache;
7206
7207         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7208         if (!bg_cache)
7209                 return;
7210
7211         /* data extent, check chunk directly*/
7212         if (!rec->metadata) {
7213                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7214                         rec->wrong_chunk_type = 1;
7215                 return;
7216         }
7217
7218         /* metadata extent, check the obvious case first */
7219         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7220                                  BTRFS_BLOCK_GROUP_METADATA))) {
7221                 rec->wrong_chunk_type = 1;
7222                 return;
7223         }
7224
7225         /*
7226          * Check SYSTEM extent, as it's also marked as metadata, we can only
7227          * make sure it's a SYSTEM extent by its backref
7228          */
7229         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7230                 struct extent_backref *node;
7231                 struct tree_backref *tback;
7232                 u64 bg_type;
7233
7234                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7235                 if (node->is_data) {
7236                         /* tree block shouldn't have data backref */
7237                         rec->wrong_chunk_type = 1;
7238                         return;
7239                 }
7240                 tback = container_of(node, struct tree_backref, node);
7241
7242                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7243                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7244                 else
7245                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7246                 if (!(bg_cache->flags & bg_type))
7247                         rec->wrong_chunk_type = 1;
7248         }
7249 }
7250
7251 /*
7252  * Allocate a new extent record, fill default values from @tmpl and insert int
7253  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7254  * the cache, otherwise it fails.
7255  */
7256 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7257                 struct extent_record *tmpl)
7258 {
7259         struct extent_record *rec;
7260         int ret = 0;
7261
7262         BUG_ON(tmpl->max_size == 0);
7263         rec = malloc(sizeof(*rec));
7264         if (!rec)
7265                 return -ENOMEM;
7266         rec->start = tmpl->start;
7267         rec->max_size = tmpl->max_size;
7268         rec->nr = max(tmpl->nr, tmpl->max_size);
7269         rec->found_rec = tmpl->found_rec;
7270         rec->content_checked = tmpl->content_checked;
7271         rec->owner_ref_checked = tmpl->owner_ref_checked;
7272         rec->num_duplicates = 0;
7273         rec->metadata = tmpl->metadata;
7274         rec->flag_block_full_backref = FLAG_UNSET;
7275         rec->bad_full_backref = 0;
7276         rec->crossing_stripes = 0;
7277         rec->wrong_chunk_type = 0;
7278         rec->is_root = tmpl->is_root;
7279         rec->refs = tmpl->refs;
7280         rec->extent_item_refs = tmpl->extent_item_refs;
7281         rec->parent_generation = tmpl->parent_generation;
7282         INIT_LIST_HEAD(&rec->backrefs);
7283         INIT_LIST_HEAD(&rec->dups);
7284         INIT_LIST_HEAD(&rec->list);
7285         rec->backref_tree = RB_ROOT;
7286         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7287         rec->cache.start = tmpl->start;
7288         rec->cache.size = tmpl->nr;
7289         ret = insert_cache_extent(extent_cache, &rec->cache);
7290         if (ret) {
7291                 free(rec);
7292                 return ret;
7293         }
7294         bytes_used += rec->nr;
7295
7296         if (tmpl->metadata)
7297                 rec->crossing_stripes = check_crossing_stripes(global_info,
7298                                 rec->start, global_info->nodesize);
7299         check_extent_type(rec);
7300         return ret;
7301 }
7302
7303 /*
7304  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7305  * some are hints:
7306  * - refs              - if found, increase refs
7307  * - is_root           - if found, set
7308  * - content_checked   - if found, set
7309  * - owner_ref_checked - if found, set
7310  *
7311  * If not found, create a new one, initialize and insert.
7312  */
7313 static int add_extent_rec(struct cache_tree *extent_cache,
7314                 struct extent_record *tmpl)
7315 {
7316         struct extent_record *rec;
7317         struct cache_extent *cache;
7318         int ret = 0;
7319         int dup = 0;
7320
7321         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7322         if (cache) {
7323                 rec = container_of(cache, struct extent_record, cache);
7324                 if (tmpl->refs)
7325                         rec->refs++;
7326                 if (rec->nr == 1)
7327                         rec->nr = max(tmpl->nr, tmpl->max_size);
7328
7329                 /*
7330                  * We need to make sure to reset nr to whatever the extent
7331                  * record says was the real size, this way we can compare it to
7332                  * the backrefs.
7333                  */
7334                 if (tmpl->found_rec) {
7335                         if (tmpl->start != rec->start || rec->found_rec) {
7336                                 struct extent_record *tmp;
7337
7338                                 dup = 1;
7339                                 if (list_empty(&rec->list))
7340                                         list_add_tail(&rec->list,
7341                                                       &duplicate_extents);
7342
7343                                 /*
7344                                  * We have to do this song and dance in case we
7345                                  * find an extent record that falls inside of
7346                                  * our current extent record but does not have
7347                                  * the same objectid.
7348                                  */
7349                                 tmp = malloc(sizeof(*tmp));
7350                                 if (!tmp)
7351                                         return -ENOMEM;
7352                                 tmp->start = tmpl->start;
7353                                 tmp->max_size = tmpl->max_size;
7354                                 tmp->nr = tmpl->nr;
7355                                 tmp->found_rec = 1;
7356                                 tmp->metadata = tmpl->metadata;
7357                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7358                                 INIT_LIST_HEAD(&tmp->list);
7359                                 list_add_tail(&tmp->list, &rec->dups);
7360                                 rec->num_duplicates++;
7361                         } else {
7362                                 rec->nr = tmpl->nr;
7363                                 rec->found_rec = 1;
7364                         }
7365                 }
7366
7367                 if (tmpl->extent_item_refs && !dup) {
7368                         if (rec->extent_item_refs) {
7369                                 fprintf(stderr, "block %llu rec "
7370                                         "extent_item_refs %llu, passed %llu\n",
7371                                         (unsigned long long)tmpl->start,
7372                                         (unsigned long long)
7373                                                         rec->extent_item_refs,
7374                                         (unsigned long long)tmpl->extent_item_refs);
7375                         }
7376                         rec->extent_item_refs = tmpl->extent_item_refs;
7377                 }
7378                 if (tmpl->is_root)
7379                         rec->is_root = 1;
7380                 if (tmpl->content_checked)
7381                         rec->content_checked = 1;
7382                 if (tmpl->owner_ref_checked)
7383                         rec->owner_ref_checked = 1;
7384                 memcpy(&rec->parent_key, &tmpl->parent_key,
7385                                 sizeof(tmpl->parent_key));
7386                 if (tmpl->parent_generation)
7387                         rec->parent_generation = tmpl->parent_generation;
7388                 if (rec->max_size < tmpl->max_size)
7389                         rec->max_size = tmpl->max_size;
7390
7391                 /*
7392                  * A metadata extent can't cross stripe_len boundary, otherwise
7393                  * kernel scrub won't be able to handle it.
7394                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7395                  * it.
7396                  */
7397                 if (tmpl->metadata)
7398                         rec->crossing_stripes = check_crossing_stripes(
7399                                         global_info, rec->start,
7400                                         global_info->nodesize);
7401                 check_extent_type(rec);
7402                 maybe_free_extent_rec(extent_cache, rec);
7403                 return ret;
7404         }
7405
7406         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7407
7408         return ret;
7409 }
7410
7411 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7412                             u64 parent, u64 root, int found_ref)
7413 {
7414         struct extent_record *rec;
7415         struct tree_backref *back;
7416         struct cache_extent *cache;
7417         int ret;
7418         bool insert = false;
7419
7420         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7421         if (!cache) {
7422                 struct extent_record tmpl;
7423
7424                 memset(&tmpl, 0, sizeof(tmpl));
7425                 tmpl.start = bytenr;
7426                 tmpl.nr = 1;
7427                 tmpl.metadata = 1;
7428                 tmpl.max_size = 1;
7429
7430                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7431                 if (ret)
7432                         return ret;
7433
7434                 /* really a bug in cache_extent implement now */
7435                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7436                 if (!cache)
7437                         return -ENOENT;
7438         }
7439
7440         rec = container_of(cache, struct extent_record, cache);
7441         if (rec->start != bytenr) {
7442                 /*
7443                  * Several cause, from unaligned bytenr to over lapping extents
7444                  */
7445                 return -EEXIST;
7446         }
7447
7448         back = find_tree_backref(rec, parent, root);
7449         if (!back) {
7450                 back = alloc_tree_backref(rec, parent, root);
7451                 if (!back)
7452                         return -ENOMEM;
7453                 insert = true;
7454         }
7455
7456         if (found_ref) {
7457                 if (back->node.found_ref) {
7458                         fprintf(stderr, "Extent back ref already exists "
7459                                 "for %llu parent %llu root %llu \n",
7460                                 (unsigned long long)bytenr,
7461                                 (unsigned long long)parent,
7462                                 (unsigned long long)root);
7463                 }
7464                 back->node.found_ref = 1;
7465         } else {
7466                 if (back->node.found_extent_tree) {
7467                         fprintf(stderr, "Extent back ref already exists "
7468                                 "for %llu parent %llu root %llu \n",
7469                                 (unsigned long long)bytenr,
7470                                 (unsigned long long)parent,
7471                                 (unsigned long long)root);
7472                 }
7473                 back->node.found_extent_tree = 1;
7474         }
7475         if (insert)
7476                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7477                         compare_extent_backref));
7478         check_extent_type(rec);
7479         maybe_free_extent_rec(extent_cache, rec);
7480         return 0;
7481 }
7482
7483 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7484                             u64 parent, u64 root, u64 owner, u64 offset,
7485                             u32 num_refs, int found_ref, u64 max_size)
7486 {
7487         struct extent_record *rec;
7488         struct data_backref *back;
7489         struct cache_extent *cache;
7490         int ret;
7491         bool insert = false;
7492
7493         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7494         if (!cache) {
7495                 struct extent_record tmpl;
7496
7497                 memset(&tmpl, 0, sizeof(tmpl));
7498                 tmpl.start = bytenr;
7499                 tmpl.nr = 1;
7500                 tmpl.max_size = max_size;
7501
7502                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7503                 if (ret)
7504                         return ret;
7505
7506                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7507                 if (!cache)
7508                         abort();
7509         }
7510
7511         rec = container_of(cache, struct extent_record, cache);
7512         if (rec->max_size < max_size)
7513                 rec->max_size = max_size;
7514
7515         /*
7516          * If found_ref is set then max_size is the real size and must match the
7517          * existing refs.  So if we have already found a ref then we need to
7518          * make sure that this ref matches the existing one, otherwise we need
7519          * to add a new backref so we can notice that the backrefs don't match
7520          * and we need to figure out who is telling the truth.  This is to
7521          * account for that awful fsync bug I introduced where we'd end up with
7522          * a btrfs_file_extent_item that would have its length include multiple
7523          * prealloc extents or point inside of a prealloc extent.
7524          */
7525         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7526                                  bytenr, max_size);
7527         if (!back) {
7528                 back = alloc_data_backref(rec, parent, root, owner, offset,
7529                                           max_size);
7530                 BUG_ON(!back);
7531                 insert = true;
7532         }
7533
7534         if (found_ref) {
7535                 BUG_ON(num_refs != 1);
7536                 if (back->node.found_ref)
7537                         BUG_ON(back->bytes != max_size);
7538                 back->node.found_ref = 1;
7539                 back->found_ref += 1;
7540                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7541                         back->bytes = max_size;
7542                         back->disk_bytenr = bytenr;
7543
7544                         /* Need to reinsert if not already in the tree */
7545                         if (!insert) {
7546                                 rb_erase(&back->node.node, &rec->backref_tree);
7547                                 insert = true;
7548                         }
7549                 }
7550                 rec->refs += 1;
7551                 rec->content_checked = 1;
7552                 rec->owner_ref_checked = 1;
7553         } else {
7554                 if (back->node.found_extent_tree) {
7555                         fprintf(stderr, "Extent back ref already exists "
7556                                 "for %llu parent %llu root %llu "
7557                                 "owner %llu offset %llu num_refs %lu\n",
7558                                 (unsigned long long)bytenr,
7559                                 (unsigned long long)parent,
7560                                 (unsigned long long)root,
7561                                 (unsigned long long)owner,
7562                                 (unsigned long long)offset,
7563                                 (unsigned long)num_refs);
7564                 }
7565                 back->num_refs = num_refs;
7566                 back->node.found_extent_tree = 1;
7567         }
7568         if (insert)
7569                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7570                         compare_extent_backref));
7571
7572         maybe_free_extent_rec(extent_cache, rec);
7573         return 0;
7574 }
7575
7576 static int add_pending(struct cache_tree *pending,
7577                        struct cache_tree *seen, u64 bytenr, u32 size)
7578 {
7579         int ret;
7580         ret = add_cache_extent(seen, bytenr, size);
7581         if (ret)
7582                 return ret;
7583         add_cache_extent(pending, bytenr, size);
7584         return 0;
7585 }
7586
7587 static int pick_next_pending(struct cache_tree *pending,
7588                         struct cache_tree *reada,
7589                         struct cache_tree *nodes,
7590                         u64 last, struct block_info *bits, int bits_nr,
7591                         int *reada_bits)
7592 {
7593         unsigned long node_start = last;
7594         struct cache_extent *cache;
7595         int ret;
7596
7597         cache = search_cache_extent(reada, 0);
7598         if (cache) {
7599                 bits[0].start = cache->start;
7600                 bits[0].size = cache->size;
7601                 *reada_bits = 1;
7602                 return 1;
7603         }
7604         *reada_bits = 0;
7605         if (node_start > 32768)
7606                 node_start -= 32768;
7607
7608         cache = search_cache_extent(nodes, node_start);
7609         if (!cache)
7610                 cache = search_cache_extent(nodes, 0);
7611
7612         if (!cache) {
7613                  cache = search_cache_extent(pending, 0);
7614                  if (!cache)
7615                          return 0;
7616                  ret = 0;
7617                  do {
7618                          bits[ret].start = cache->start;
7619                          bits[ret].size = cache->size;
7620                          cache = next_cache_extent(cache);
7621                          ret++;
7622                  } while (cache && ret < bits_nr);
7623                  return ret;
7624         }
7625
7626         ret = 0;
7627         do {
7628                 bits[ret].start = cache->start;
7629                 bits[ret].size = cache->size;
7630                 cache = next_cache_extent(cache);
7631                 ret++;
7632         } while (cache && ret < bits_nr);
7633
7634         if (bits_nr - ret > 8) {
7635                 u64 lookup = bits[0].start + bits[0].size;
7636                 struct cache_extent *next;
7637                 next = search_cache_extent(pending, lookup);
7638                 while(next) {
7639                         if (next->start - lookup > 32768)
7640                                 break;
7641                         bits[ret].start = next->start;
7642                         bits[ret].size = next->size;
7643                         lookup = next->start + next->size;
7644                         ret++;
7645                         if (ret == bits_nr)
7646                                 break;
7647                         next = next_cache_extent(next);
7648                         if (!next)
7649                                 break;
7650                 }
7651         }
7652         return ret;
7653 }
7654
7655 static void free_chunk_record(struct cache_extent *cache)
7656 {
7657         struct chunk_record *rec;
7658
7659         rec = container_of(cache, struct chunk_record, cache);
7660         list_del_init(&rec->list);
7661         list_del_init(&rec->dextents);
7662         free(rec);
7663 }
7664
7665 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7666 {
7667         cache_tree_free_extents(chunk_cache, free_chunk_record);
7668 }
7669
7670 static void free_device_record(struct rb_node *node)
7671 {
7672         struct device_record *rec;
7673
7674         rec = container_of(node, struct device_record, node);
7675         free(rec);
7676 }
7677
7678 FREE_RB_BASED_TREE(device_cache, free_device_record);
7679
7680 int insert_block_group_record(struct block_group_tree *tree,
7681                               struct block_group_record *bg_rec)
7682 {
7683         int ret;
7684
7685         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7686         if (ret)
7687                 return ret;
7688
7689         list_add_tail(&bg_rec->list, &tree->block_groups);
7690         return 0;
7691 }
7692
7693 static void free_block_group_record(struct cache_extent *cache)
7694 {
7695         struct block_group_record *rec;
7696
7697         rec = container_of(cache, struct block_group_record, cache);
7698         list_del_init(&rec->list);
7699         free(rec);
7700 }
7701
7702 void free_block_group_tree(struct block_group_tree *tree)
7703 {
7704         cache_tree_free_extents(&tree->tree, free_block_group_record);
7705 }
7706
7707 int insert_device_extent_record(struct device_extent_tree *tree,
7708                                 struct device_extent_record *de_rec)
7709 {
7710         int ret;
7711
7712         /*
7713          * Device extent is a bit different from the other extents, because
7714          * the extents which belong to the different devices may have the
7715          * same start and size, so we need use the special extent cache
7716          * search/insert functions.
7717          */
7718         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7719         if (ret)
7720                 return ret;
7721
7722         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7723         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7724         return 0;
7725 }
7726
7727 static void free_device_extent_record(struct cache_extent *cache)
7728 {
7729         struct device_extent_record *rec;
7730
7731         rec = container_of(cache, struct device_extent_record, cache);
7732         if (!list_empty(&rec->chunk_list))
7733                 list_del_init(&rec->chunk_list);
7734         if (!list_empty(&rec->device_list))
7735                 list_del_init(&rec->device_list);
7736         free(rec);
7737 }
7738
7739 void free_device_extent_tree(struct device_extent_tree *tree)
7740 {
7741         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7742 }
7743
7744 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7745 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7746                                  struct extent_buffer *leaf, int slot)
7747 {
7748         struct btrfs_extent_ref_v0 *ref0;
7749         struct btrfs_key key;
7750         int ret;
7751
7752         btrfs_item_key_to_cpu(leaf, &key, slot);
7753         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7754         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7755                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7756                                 0, 0);
7757         } else {
7758                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7759                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7760         }
7761         return ret;
7762 }
7763 #endif
7764
7765 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7766                                             struct btrfs_key *key,
7767                                             int slot)
7768 {
7769         struct btrfs_chunk *ptr;
7770         struct chunk_record *rec;
7771         int num_stripes, i;
7772
7773         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7774         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7775
7776         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7777         if (!rec) {
7778                 fprintf(stderr, "memory allocation failed\n");
7779                 exit(-1);
7780         }
7781
7782         INIT_LIST_HEAD(&rec->list);
7783         INIT_LIST_HEAD(&rec->dextents);
7784         rec->bg_rec = NULL;
7785
7786         rec->cache.start = key->offset;
7787         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7788
7789         rec->generation = btrfs_header_generation(leaf);
7790
7791         rec->objectid = key->objectid;
7792         rec->type = key->type;
7793         rec->offset = key->offset;
7794
7795         rec->length = rec->cache.size;
7796         rec->owner = btrfs_chunk_owner(leaf, ptr);
7797         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7798         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7799         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7800         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7801         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7802         rec->num_stripes = num_stripes;
7803         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7804
7805         for (i = 0; i < rec->num_stripes; ++i) {
7806                 rec->stripes[i].devid =
7807                         btrfs_stripe_devid_nr(leaf, ptr, i);
7808                 rec->stripes[i].offset =
7809                         btrfs_stripe_offset_nr(leaf, ptr, i);
7810                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7811                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7812                                 BTRFS_UUID_SIZE);
7813         }
7814
7815         return rec;
7816 }
7817
7818 static int process_chunk_item(struct cache_tree *chunk_cache,
7819                               struct btrfs_key *key, struct extent_buffer *eb,
7820                               int slot)
7821 {
7822         struct chunk_record *rec;
7823         struct btrfs_chunk *chunk;
7824         int ret = 0;
7825
7826         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7827         /*
7828          * Do extra check for this chunk item,
7829          *
7830          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7831          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7832          * and owner<->key_type check.
7833          */
7834         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7835                                       key->offset);
7836         if (ret < 0) {
7837                 error("chunk(%llu, %llu) is not valid, ignore it",
7838                       key->offset, btrfs_chunk_length(eb, chunk));
7839                 return 0;
7840         }
7841         rec = btrfs_new_chunk_record(eb, key, slot);
7842         ret = insert_cache_extent(chunk_cache, &rec->cache);
7843         if (ret) {
7844                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7845                         rec->offset, rec->length);
7846                 free(rec);
7847         }
7848
7849         return ret;
7850 }
7851
7852 static int process_device_item(struct rb_root *dev_cache,
7853                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7854 {
7855         struct btrfs_dev_item *ptr;
7856         struct device_record *rec;
7857         int ret = 0;
7858
7859         ptr = btrfs_item_ptr(eb,
7860                 slot, struct btrfs_dev_item);
7861
7862         rec = malloc(sizeof(*rec));
7863         if (!rec) {
7864                 fprintf(stderr, "memory allocation failed\n");
7865                 return -ENOMEM;
7866         }
7867
7868         rec->devid = key->offset;
7869         rec->generation = btrfs_header_generation(eb);
7870
7871         rec->objectid = key->objectid;
7872         rec->type = key->type;
7873         rec->offset = key->offset;
7874
7875         rec->devid = btrfs_device_id(eb, ptr);
7876         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7877         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7878
7879         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7880         if (ret) {
7881                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7882                 free(rec);
7883         }
7884
7885         return ret;
7886 }
7887
7888 struct block_group_record *
7889 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7890                              int slot)
7891 {
7892         struct btrfs_block_group_item *ptr;
7893         struct block_group_record *rec;
7894
7895         rec = calloc(1, sizeof(*rec));
7896         if (!rec) {
7897                 fprintf(stderr, "memory allocation failed\n");
7898                 exit(-1);
7899         }
7900
7901         rec->cache.start = key->objectid;
7902         rec->cache.size = key->offset;
7903
7904         rec->generation = btrfs_header_generation(leaf);
7905
7906         rec->objectid = key->objectid;
7907         rec->type = key->type;
7908         rec->offset = key->offset;
7909
7910         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7911         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7912
7913         INIT_LIST_HEAD(&rec->list);
7914
7915         return rec;
7916 }
7917
7918 static int process_block_group_item(struct block_group_tree *block_group_cache,
7919                                     struct btrfs_key *key,
7920                                     struct extent_buffer *eb, int slot)
7921 {
7922         struct block_group_record *rec;
7923         int ret = 0;
7924
7925         rec = btrfs_new_block_group_record(eb, key, slot);
7926         ret = insert_block_group_record(block_group_cache, rec);
7927         if (ret) {
7928                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7929                         rec->objectid, rec->offset);
7930                 free(rec);
7931         }
7932
7933         return ret;
7934 }
7935
7936 struct device_extent_record *
7937 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7938                                struct btrfs_key *key, int slot)
7939 {
7940         struct device_extent_record *rec;
7941         struct btrfs_dev_extent *ptr;
7942
7943         rec = calloc(1, sizeof(*rec));
7944         if (!rec) {
7945                 fprintf(stderr, "memory allocation failed\n");
7946                 exit(-1);
7947         }
7948
7949         rec->cache.objectid = key->objectid;
7950         rec->cache.start = key->offset;
7951
7952         rec->generation = btrfs_header_generation(leaf);
7953
7954         rec->objectid = key->objectid;
7955         rec->type = key->type;
7956         rec->offset = key->offset;
7957
7958         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7959         rec->chunk_objecteid =
7960                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7961         rec->chunk_offset =
7962                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7963         rec->length = btrfs_dev_extent_length(leaf, ptr);
7964         rec->cache.size = rec->length;
7965
7966         INIT_LIST_HEAD(&rec->chunk_list);
7967         INIT_LIST_HEAD(&rec->device_list);
7968
7969         return rec;
7970 }
7971
7972 static int
7973 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7974                            struct btrfs_key *key, struct extent_buffer *eb,
7975                            int slot)
7976 {
7977         struct device_extent_record *rec;
7978         int ret;
7979
7980         rec = btrfs_new_device_extent_record(eb, key, slot);
7981         ret = insert_device_extent_record(dev_extent_cache, rec);
7982         if (ret) {
7983                 fprintf(stderr,
7984                         "Device extent[%llu, %llu, %llu] existed.\n",
7985                         rec->objectid, rec->offset, rec->length);
7986                 free(rec);
7987         }
7988
7989         return ret;
7990 }
7991
7992 static int process_extent_item(struct btrfs_root *root,
7993                                struct cache_tree *extent_cache,
7994                                struct extent_buffer *eb, int slot)
7995 {
7996         struct btrfs_extent_item *ei;
7997         struct btrfs_extent_inline_ref *iref;
7998         struct btrfs_extent_data_ref *dref;
7999         struct btrfs_shared_data_ref *sref;
8000         struct btrfs_key key;
8001         struct extent_record tmpl;
8002         unsigned long end;
8003         unsigned long ptr;
8004         int ret;
8005         int type;
8006         u32 item_size = btrfs_item_size_nr(eb, slot);
8007         u64 refs = 0;
8008         u64 offset;
8009         u64 num_bytes;
8010         int metadata = 0;
8011
8012         btrfs_item_key_to_cpu(eb, &key, slot);
8013
8014         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8015                 metadata = 1;
8016                 num_bytes = root->fs_info->nodesize;
8017         } else {
8018                 num_bytes = key.offset;
8019         }
8020
8021         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8022                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8023                       key.objectid, root->fs_info->sectorsize);
8024                 return -EIO;
8025         }
8026         if (item_size < sizeof(*ei)) {
8027 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8028                 struct btrfs_extent_item_v0 *ei0;
8029                 if (item_size != sizeof(*ei0)) {
8030                         error(
8031         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
8032                                 key.objectid, key.type, key.offset,
8033                                 btrfs_header_bytenr(eb), slot);
8034                         BUG();
8035                 }
8036                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8037                 refs = btrfs_extent_refs_v0(eb, ei0);
8038 #else
8039                 BUG();
8040 #endif
8041                 memset(&tmpl, 0, sizeof(tmpl));
8042                 tmpl.start = key.objectid;
8043                 tmpl.nr = num_bytes;
8044                 tmpl.extent_item_refs = refs;
8045                 tmpl.metadata = metadata;
8046                 tmpl.found_rec = 1;
8047                 tmpl.max_size = num_bytes;
8048
8049                 return add_extent_rec(extent_cache, &tmpl);
8050         }
8051
8052         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8053         refs = btrfs_extent_refs(eb, ei);
8054         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8055                 metadata = 1;
8056         else
8057                 metadata = 0;
8058         if (metadata && num_bytes != root->fs_info->nodesize) {
8059                 error("ignore invalid metadata extent, length %llu does not equal to %u",
8060                       num_bytes, root->fs_info->nodesize);
8061                 return -EIO;
8062         }
8063         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8064                 error("ignore invalid data extent, length %llu is not aligned to %u",
8065                       num_bytes, root->fs_info->sectorsize);
8066                 return -EIO;
8067         }
8068
8069         memset(&tmpl, 0, sizeof(tmpl));
8070         tmpl.start = key.objectid;
8071         tmpl.nr = num_bytes;
8072         tmpl.extent_item_refs = refs;
8073         tmpl.metadata = metadata;
8074         tmpl.found_rec = 1;
8075         tmpl.max_size = num_bytes;
8076         add_extent_rec(extent_cache, &tmpl);
8077
8078         ptr = (unsigned long)(ei + 1);
8079         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8080             key.type == BTRFS_EXTENT_ITEM_KEY)
8081                 ptr += sizeof(struct btrfs_tree_block_info);
8082
8083         end = (unsigned long)ei + item_size;
8084         while (ptr < end) {
8085                 iref = (struct btrfs_extent_inline_ref *)ptr;
8086                 type = btrfs_extent_inline_ref_type(eb, iref);
8087                 offset = btrfs_extent_inline_ref_offset(eb, iref);
8088                 switch (type) {
8089                 case BTRFS_TREE_BLOCK_REF_KEY:
8090                         ret = add_tree_backref(extent_cache, key.objectid,
8091                                         0, offset, 0);
8092                         if (ret < 0)
8093                                 error(
8094                         "add_tree_backref failed (extent items tree block): %s",
8095                                       strerror(-ret));
8096                         break;
8097                 case BTRFS_SHARED_BLOCK_REF_KEY:
8098                         ret = add_tree_backref(extent_cache, key.objectid,
8099                                         offset, 0, 0);
8100                         if (ret < 0)
8101                                 error(
8102                         "add_tree_backref failed (extent items shared block): %s",
8103                                       strerror(-ret));
8104                         break;
8105                 case BTRFS_EXTENT_DATA_REF_KEY:
8106                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8107                         add_data_backref(extent_cache, key.objectid, 0,
8108                                         btrfs_extent_data_ref_root(eb, dref),
8109                                         btrfs_extent_data_ref_objectid(eb,
8110                                                                        dref),
8111                                         btrfs_extent_data_ref_offset(eb, dref),
8112                                         btrfs_extent_data_ref_count(eb, dref),
8113                                         0, num_bytes);
8114                         break;
8115                 case BTRFS_SHARED_DATA_REF_KEY:
8116                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8117                         add_data_backref(extent_cache, key.objectid, offset,
8118                                         0, 0, 0,
8119                                         btrfs_shared_data_ref_count(eb, sref),
8120                                         0, num_bytes);
8121                         break;
8122                 default:
8123                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8124                                 key.objectid, key.type, num_bytes);
8125                         goto out;
8126                 }
8127                 ptr += btrfs_extent_inline_ref_size(type);
8128         }
8129         WARN_ON(ptr > end);
8130 out:
8131         return 0;
8132 }
8133
8134 static int check_cache_range(struct btrfs_root *root,
8135                              struct btrfs_block_group_cache *cache,
8136                              u64 offset, u64 bytes)
8137 {
8138         struct btrfs_free_space *entry;
8139         u64 *logical;
8140         u64 bytenr;
8141         int stripe_len;
8142         int i, nr, ret;
8143
8144         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8145                 bytenr = btrfs_sb_offset(i);
8146                 ret = btrfs_rmap_block(root->fs_info,
8147                                        cache->key.objectid, bytenr, 0,
8148                                        &logical, &nr, &stripe_len);
8149                 if (ret)
8150                         return ret;
8151
8152                 while (nr--) {
8153                         if (logical[nr] + stripe_len <= offset)
8154                                 continue;
8155                         if (offset + bytes <= logical[nr])
8156                                 continue;
8157                         if (logical[nr] == offset) {
8158                                 if (stripe_len >= bytes) {
8159                                         free(logical);
8160                                         return 0;
8161                                 }
8162                                 bytes -= stripe_len;
8163                                 offset += stripe_len;
8164                         } else if (logical[nr] < offset) {
8165                                 if (logical[nr] + stripe_len >=
8166                                     offset + bytes) {
8167                                         free(logical);
8168                                         return 0;
8169                                 }
8170                                 bytes = (offset + bytes) -
8171                                         (logical[nr] + stripe_len);
8172                                 offset = logical[nr] + stripe_len;
8173                         } else {
8174                                 /*
8175                                  * Could be tricky, the super may land in the
8176                                  * middle of the area we're checking.  First
8177                                  * check the easiest case, it's at the end.
8178                                  */
8179                                 if (logical[nr] + stripe_len >=
8180                                     bytes + offset) {
8181                                         bytes = logical[nr] - offset;
8182                                         continue;
8183                                 }
8184
8185                                 /* Check the left side */
8186                                 ret = check_cache_range(root, cache,
8187                                                         offset,
8188                                                         logical[nr] - offset);
8189                                 if (ret) {
8190                                         free(logical);
8191                                         return ret;
8192                                 }
8193
8194                                 /* Now we continue with the right side */
8195                                 bytes = (offset + bytes) -
8196                                         (logical[nr] + stripe_len);
8197                                 offset = logical[nr] + stripe_len;
8198                         }
8199                 }
8200
8201                 free(logical);
8202         }
8203
8204         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8205         if (!entry) {
8206                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8207                         offset, offset+bytes);
8208                 return -EINVAL;
8209         }
8210
8211         if (entry->offset != offset) {
8212                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8213                         entry->offset);
8214                 return -EINVAL;
8215         }
8216
8217         if (entry->bytes != bytes) {
8218                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8219                         bytes, entry->bytes, offset);
8220                 return -EINVAL;
8221         }
8222
8223         unlink_free_space(cache->free_space_ctl, entry);
8224         free(entry);
8225         return 0;
8226 }
8227
8228 static int verify_space_cache(struct btrfs_root *root,
8229                               struct btrfs_block_group_cache *cache)
8230 {
8231         struct btrfs_path path;
8232         struct extent_buffer *leaf;
8233         struct btrfs_key key;
8234         u64 last;
8235         int ret = 0;
8236
8237         root = root->fs_info->extent_root;
8238
8239         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8240
8241         btrfs_init_path(&path);
8242         key.objectid = last;
8243         key.offset = 0;
8244         key.type = BTRFS_EXTENT_ITEM_KEY;
8245         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8246         if (ret < 0)
8247                 goto out;
8248         ret = 0;
8249         while (1) {
8250                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8251                         ret = btrfs_next_leaf(root, &path);
8252                         if (ret < 0)
8253                                 goto out;
8254                         if (ret > 0) {
8255                                 ret = 0;
8256                                 break;
8257                         }
8258                 }
8259                 leaf = path.nodes[0];
8260                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8261                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8262                         break;
8263                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8264                     key.type != BTRFS_METADATA_ITEM_KEY) {
8265                         path.slots[0]++;
8266                         continue;
8267                 }
8268
8269                 if (last == key.objectid) {
8270                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8271                                 last = key.objectid + key.offset;
8272                         else
8273                                 last = key.objectid + root->fs_info->nodesize;
8274                         path.slots[0]++;
8275                         continue;
8276                 }
8277
8278                 ret = check_cache_range(root, cache, last,
8279                                         key.objectid - last);
8280                 if (ret)
8281                         break;
8282                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8283                         last = key.objectid + key.offset;
8284                 else
8285                         last = key.objectid + root->fs_info->nodesize;
8286                 path.slots[0]++;
8287         }
8288
8289         if (last < cache->key.objectid + cache->key.offset)
8290                 ret = check_cache_range(root, cache, last,
8291                                         cache->key.objectid +
8292                                         cache->key.offset - last);
8293
8294 out:
8295         btrfs_release_path(&path);
8296
8297         if (!ret &&
8298             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8299                 fprintf(stderr, "There are still entries left in the space "
8300                         "cache\n");
8301                 ret = -EINVAL;
8302         }
8303
8304         return ret;
8305 }
8306
8307 static int check_space_cache(struct btrfs_root *root)
8308 {
8309         struct btrfs_block_group_cache *cache;
8310         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8311         int ret;
8312         int error = 0;
8313
8314         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8315             btrfs_super_generation(root->fs_info->super_copy) !=
8316             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8317                 printf("cache and super generation don't match, space cache "
8318                        "will be invalidated\n");
8319                 return 0;
8320         }
8321
8322         if (ctx.progress_enabled) {
8323                 ctx.tp = TASK_FREE_SPACE;
8324                 task_start(ctx.info);
8325         }
8326
8327         while (1) {
8328                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8329                 if (!cache)
8330                         break;
8331
8332                 start = cache->key.objectid + cache->key.offset;
8333                 if (!cache->free_space_ctl) {
8334                         if (btrfs_init_free_space_ctl(cache,
8335                                                 root->fs_info->sectorsize)) {
8336                                 ret = -ENOMEM;
8337                                 break;
8338                         }
8339                 } else {
8340                         btrfs_remove_free_space_cache(cache);
8341                 }
8342
8343                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8344                         ret = exclude_super_stripes(root, cache);
8345                         if (ret) {
8346                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8347                                         strerror(-ret));
8348                                 error++;
8349                                 continue;
8350                         }
8351                         ret = load_free_space_tree(root->fs_info, cache);
8352                         free_excluded_extents(root, cache);
8353                         if (ret < 0) {
8354                                 fprintf(stderr, "could not load free space tree: %s\n",
8355                                         strerror(-ret));
8356                                 error++;
8357                                 continue;
8358                         }
8359                         error += ret;
8360                 } else {
8361                         ret = load_free_space_cache(root->fs_info, cache);
8362                         if (!ret)
8363                                 continue;
8364                 }
8365
8366                 ret = verify_space_cache(root, cache);
8367                 if (ret) {
8368                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8369                                 cache->key.objectid);
8370                         error++;
8371                 }
8372         }
8373
8374         task_stop(ctx.info);
8375
8376         return error ? -EINVAL : 0;
8377 }
8378
8379 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8380                         u64 num_bytes, unsigned long leaf_offset,
8381                         struct extent_buffer *eb) {
8382
8383         struct btrfs_fs_info *fs_info = root->fs_info;
8384         u64 offset = 0;
8385         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8386         char *data;
8387         unsigned long csum_offset;
8388         u32 csum;
8389         u32 csum_expected;
8390         u64 read_len;
8391         u64 data_checked = 0;
8392         u64 tmp;
8393         int ret = 0;
8394         int mirror;
8395         int num_copies;
8396
8397         if (num_bytes % fs_info->sectorsize)
8398                 return -EINVAL;
8399
8400         data = malloc(num_bytes);
8401         if (!data)
8402                 return -ENOMEM;
8403
8404         while (offset < num_bytes) {
8405                 mirror = 0;
8406 again:
8407                 read_len = num_bytes - offset;
8408                 /* read as much space once a time */
8409                 ret = read_extent_data(fs_info, data + offset,
8410                                 bytenr + offset, &read_len, mirror);
8411                 if (ret)
8412                         goto out;
8413                 data_checked = 0;
8414                 /* verify every 4k data's checksum */
8415                 while (data_checked < read_len) {
8416                         csum = ~(u32)0;
8417                         tmp = offset + data_checked;
8418
8419                         csum = btrfs_csum_data((char *)data + tmp,
8420                                                csum, fs_info->sectorsize);
8421                         btrfs_csum_final(csum, (u8 *)&csum);
8422
8423                         csum_offset = leaf_offset +
8424                                  tmp / fs_info->sectorsize * csum_size;
8425                         read_extent_buffer(eb, (char *)&csum_expected,
8426                                            csum_offset, csum_size);
8427                         /* try another mirror */
8428                         if (csum != csum_expected) {
8429                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8430                                                 mirror, bytenr + tmp,
8431                                                 csum, csum_expected);
8432                                 num_copies = btrfs_num_copies(root->fs_info,
8433                                                 bytenr, num_bytes);
8434                                 if (mirror < num_copies - 1) {
8435                                         mirror += 1;
8436                                         goto again;
8437                                 }
8438                         }
8439                         data_checked += fs_info->sectorsize;
8440                 }
8441                 offset += read_len;
8442         }
8443 out:
8444         free(data);
8445         return ret;
8446 }
8447
8448 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8449                                u64 num_bytes)
8450 {
8451         struct btrfs_path path;
8452         struct extent_buffer *leaf;
8453         struct btrfs_key key;
8454         int ret;
8455
8456         btrfs_init_path(&path);
8457         key.objectid = bytenr;
8458         key.type = BTRFS_EXTENT_ITEM_KEY;
8459         key.offset = (u64)-1;
8460
8461 again:
8462         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8463                                 0, 0);
8464         if (ret < 0) {
8465                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8466                 btrfs_release_path(&path);
8467                 return ret;
8468         } else if (ret) {
8469                 if (path.slots[0] > 0) {
8470                         path.slots[0]--;
8471                 } else {
8472                         ret = btrfs_prev_leaf(root, &path);
8473                         if (ret < 0) {
8474                                 goto out;
8475                         } else if (ret > 0) {
8476                                 ret = 0;
8477                                 goto out;
8478                         }
8479                 }
8480         }
8481
8482         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8483
8484         /*
8485          * Block group items come before extent items if they have the same
8486          * bytenr, so walk back one more just in case.  Dear future traveller,
8487          * first congrats on mastering time travel.  Now if it's not too much
8488          * trouble could you go back to 2006 and tell Chris to make the
8489          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8490          * EXTENT_ITEM_KEY please?
8491          */
8492         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8493                 if (path.slots[0] > 0) {
8494                         path.slots[0]--;
8495                 } else {
8496                         ret = btrfs_prev_leaf(root, &path);
8497                         if (ret < 0) {
8498                                 goto out;
8499                         } else if (ret > 0) {
8500                                 ret = 0;
8501                                 goto out;
8502                         }
8503                 }
8504                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8505         }
8506
8507         while (num_bytes) {
8508                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8509                         ret = btrfs_next_leaf(root, &path);
8510                         if (ret < 0) {
8511                                 fprintf(stderr, "Error going to next leaf "
8512                                         "%d\n", ret);
8513                                 btrfs_release_path(&path);
8514                                 return ret;
8515                         } else if (ret) {
8516                                 break;
8517                         }
8518                 }
8519                 leaf = path.nodes[0];
8520                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8521                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8522                         path.slots[0]++;
8523                         continue;
8524                 }
8525                 if (key.objectid + key.offset < bytenr) {
8526                         path.slots[0]++;
8527                         continue;
8528                 }
8529                 if (key.objectid > bytenr + num_bytes)
8530                         break;
8531
8532                 if (key.objectid == bytenr) {
8533                         if (key.offset >= num_bytes) {
8534                                 num_bytes = 0;
8535                                 break;
8536                         }
8537                         num_bytes -= key.offset;
8538                         bytenr += key.offset;
8539                 } else if (key.objectid < bytenr) {
8540                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8541                                 num_bytes = 0;
8542                                 break;
8543                         }
8544                         num_bytes = (bytenr + num_bytes) -
8545                                 (key.objectid + key.offset);
8546                         bytenr = key.objectid + key.offset;
8547                 } else {
8548                         if (key.objectid + key.offset < bytenr + num_bytes) {
8549                                 u64 new_start = key.objectid + key.offset;
8550                                 u64 new_bytes = bytenr + num_bytes - new_start;
8551
8552                                 /*
8553                                  * Weird case, the extent is in the middle of
8554                                  * our range, we'll have to search one side
8555                                  * and then the other.  Not sure if this happens
8556                                  * in real life, but no harm in coding it up
8557                                  * anyway just in case.
8558                                  */
8559                                 btrfs_release_path(&path);
8560                                 ret = check_extent_exists(root, new_start,
8561                                                           new_bytes);
8562                                 if (ret) {
8563                                         fprintf(stderr, "Right section didn't "
8564                                                 "have a record\n");
8565                                         break;
8566                                 }
8567                                 num_bytes = key.objectid - bytenr;
8568                                 goto again;
8569                         }
8570                         num_bytes = key.objectid - bytenr;
8571                 }
8572                 path.slots[0]++;
8573         }
8574         ret = 0;
8575
8576 out:
8577         if (num_bytes && !ret) {
8578                 fprintf(stderr, "There are no extents for csum range "
8579                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8580                 ret = 1;
8581         }
8582
8583         btrfs_release_path(&path);
8584         return ret;
8585 }
8586
8587 static int check_csums(struct btrfs_root *root)
8588 {
8589         struct btrfs_path path;
8590         struct extent_buffer *leaf;
8591         struct btrfs_key key;
8592         u64 offset = 0, num_bytes = 0;
8593         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8594         int errors = 0;
8595         int ret;
8596         u64 data_len;
8597         unsigned long leaf_offset;
8598
8599         root = root->fs_info->csum_root;
8600         if (!extent_buffer_uptodate(root->node)) {
8601                 fprintf(stderr, "No valid csum tree found\n");
8602                 return -ENOENT;
8603         }
8604
8605         btrfs_init_path(&path);
8606         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8607         key.type = BTRFS_EXTENT_CSUM_KEY;
8608         key.offset = 0;
8609         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8610         if (ret < 0) {
8611                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8612                 btrfs_release_path(&path);
8613                 return ret;
8614         }
8615
8616         if (ret > 0 && path.slots[0])
8617                 path.slots[0]--;
8618         ret = 0;
8619
8620         while (1) {
8621                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8622                         ret = btrfs_next_leaf(root, &path);
8623                         if (ret < 0) {
8624                                 fprintf(stderr, "Error going to next leaf "
8625                                         "%d\n", ret);
8626                                 break;
8627                         }
8628                         if (ret)
8629                                 break;
8630                 }
8631                 leaf = path.nodes[0];
8632
8633                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8634                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8635                         path.slots[0]++;
8636                         continue;
8637                 }
8638
8639                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8640                               csum_size) * root->fs_info->sectorsize;
8641                 if (!check_data_csum)
8642                         goto skip_csum_check;
8643                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8644                 ret = check_extent_csums(root, key.offset, data_len,
8645                                          leaf_offset, leaf);
8646                 if (ret)
8647                         break;
8648 skip_csum_check:
8649                 if (!num_bytes) {
8650                         offset = key.offset;
8651                 } else if (key.offset != offset + num_bytes) {
8652                         ret = check_extent_exists(root, offset, num_bytes);
8653                         if (ret) {
8654                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8655                                         "there is no extent record\n",
8656                                         offset, offset+num_bytes);
8657                                 errors++;
8658                         }
8659                         offset = key.offset;
8660                         num_bytes = 0;
8661                 }
8662                 num_bytes += data_len;
8663                 path.slots[0]++;
8664         }
8665
8666         btrfs_release_path(&path);
8667         return errors;
8668 }
8669
8670 static int is_dropped_key(struct btrfs_key *key,
8671                           struct btrfs_key *drop_key) {
8672         if (key->objectid < drop_key->objectid)
8673                 return 1;
8674         else if (key->objectid == drop_key->objectid) {
8675                 if (key->type < drop_key->type)
8676                         return 1;
8677                 else if (key->type == drop_key->type) {
8678                         if (key->offset < drop_key->offset)
8679                                 return 1;
8680                 }
8681         }
8682         return 0;
8683 }
8684
8685 /*
8686  * Here are the rules for FULL_BACKREF.
8687  *
8688  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8689  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8690  *      FULL_BACKREF set.
8691  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8692  *    if it happened after the relocation occurred since we'll have dropped the
8693  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8694  *    have no real way to know for sure.
8695  *
8696  * We process the blocks one root at a time, and we start from the lowest root
8697  * objectid and go to the highest.  So we can just lookup the owner backref for
8698  * the record and if we don't find it then we know it doesn't exist and we have
8699  * a FULL BACKREF.
8700  *
8701  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8702  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8703  * be set or not and then we can check later once we've gathered all the refs.
8704  */
8705 static int calc_extent_flag(struct cache_tree *extent_cache,
8706                            struct extent_buffer *buf,
8707                            struct root_item_record *ri,
8708                            u64 *flags)
8709 {
8710         struct extent_record *rec;
8711         struct cache_extent *cache;
8712         struct tree_backref *tback;
8713         u64 owner = 0;
8714
8715         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8716         /* we have added this extent before */
8717         if (!cache)
8718                 return -ENOENT;
8719
8720         rec = container_of(cache, struct extent_record, cache);
8721
8722         /*
8723          * Except file/reloc tree, we can not have
8724          * FULL BACKREF MODE
8725          */
8726         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8727                 goto normal;
8728         /*
8729          * root node
8730          */
8731         if (buf->start == ri->bytenr)
8732                 goto normal;
8733
8734         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8735                 goto full_backref;
8736
8737         owner = btrfs_header_owner(buf);
8738         if (owner == ri->objectid)
8739                 goto normal;
8740
8741         tback = find_tree_backref(rec, 0, owner);
8742         if (!tback)
8743                 goto full_backref;
8744 normal:
8745         *flags = 0;
8746         if (rec->flag_block_full_backref != FLAG_UNSET &&
8747             rec->flag_block_full_backref != 0)
8748                 rec->bad_full_backref = 1;
8749         return 0;
8750 full_backref:
8751         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8752         if (rec->flag_block_full_backref != FLAG_UNSET &&
8753             rec->flag_block_full_backref != 1)
8754                 rec->bad_full_backref = 1;
8755         return 0;
8756 }
8757
8758 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8759 {
8760         fprintf(stderr, "Invalid key type(");
8761         print_key_type(stderr, 0, key_type);
8762         fprintf(stderr, ") found in root(");
8763         print_objectid(stderr, rootid, 0);
8764         fprintf(stderr, ")\n");
8765 }
8766
8767 /*
8768  * Check if the key is valid with its extent buffer.
8769  *
8770  * This is a early check in case invalid key exists in a extent buffer
8771  * This is not comprehensive yet, but should prevent wrong key/item passed
8772  * further
8773  */
8774 static int check_type_with_root(u64 rootid, u8 key_type)
8775 {
8776         switch (key_type) {
8777         /* Only valid in chunk tree */
8778         case BTRFS_DEV_ITEM_KEY:
8779         case BTRFS_CHUNK_ITEM_KEY:
8780                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8781                         goto err;
8782                 break;
8783         /* valid in csum and log tree */
8784         case BTRFS_CSUM_TREE_OBJECTID:
8785                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8786                       is_fstree(rootid)))
8787                         goto err;
8788                 break;
8789         case BTRFS_EXTENT_ITEM_KEY:
8790         case BTRFS_METADATA_ITEM_KEY:
8791         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8792                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8793                         goto err;
8794                 break;
8795         case BTRFS_ROOT_ITEM_KEY:
8796                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8797                         goto err;
8798                 break;
8799         case BTRFS_DEV_EXTENT_KEY:
8800                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8801                         goto err;
8802                 break;
8803         }
8804         return 0;
8805 err:
8806         report_mismatch_key_root(key_type, rootid);
8807         return -EINVAL;
8808 }
8809
8810 static int run_next_block(struct btrfs_root *root,
8811                           struct block_info *bits,
8812                           int bits_nr,
8813                           u64 *last,
8814                           struct cache_tree *pending,
8815                           struct cache_tree *seen,
8816                           struct cache_tree *reada,
8817                           struct cache_tree *nodes,
8818                           struct cache_tree *extent_cache,
8819                           struct cache_tree *chunk_cache,
8820                           struct rb_root *dev_cache,
8821                           struct block_group_tree *block_group_cache,
8822                           struct device_extent_tree *dev_extent_cache,
8823                           struct root_item_record *ri)
8824 {
8825         struct btrfs_fs_info *fs_info = root->fs_info;
8826         struct extent_buffer *buf;
8827         struct extent_record *rec = NULL;
8828         u64 bytenr;
8829         u32 size;
8830         u64 parent;
8831         u64 owner;
8832         u64 flags;
8833         u64 ptr;
8834         u64 gen = 0;
8835         int ret = 0;
8836         int i;
8837         int nritems;
8838         struct btrfs_key key;
8839         struct cache_extent *cache;
8840         int reada_bits;
8841
8842         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8843                                     bits_nr, &reada_bits);
8844         if (nritems == 0)
8845                 return 1;
8846
8847         if (!reada_bits) {
8848                 for(i = 0; i < nritems; i++) {
8849                         ret = add_cache_extent(reada, bits[i].start,
8850                                                bits[i].size);
8851                         if (ret == -EEXIST)
8852                                 continue;
8853
8854                         /* fixme, get the parent transid */
8855                         readahead_tree_block(fs_info, bits[i].start, 0);
8856                 }
8857         }
8858         *last = bits[0].start;
8859         bytenr = bits[0].start;
8860         size = bits[0].size;
8861
8862         cache = lookup_cache_extent(pending, bytenr, size);
8863         if (cache) {
8864                 remove_cache_extent(pending, cache);
8865                 free(cache);
8866         }
8867         cache = lookup_cache_extent(reada, bytenr, size);
8868         if (cache) {
8869                 remove_cache_extent(reada, cache);
8870                 free(cache);
8871         }
8872         cache = lookup_cache_extent(nodes, bytenr, size);
8873         if (cache) {
8874                 remove_cache_extent(nodes, cache);
8875                 free(cache);
8876         }
8877         cache = lookup_cache_extent(extent_cache, bytenr, size);
8878         if (cache) {
8879                 rec = container_of(cache, struct extent_record, cache);
8880                 gen = rec->parent_generation;
8881         }
8882
8883         /* fixme, get the real parent transid */
8884         buf = read_tree_block(root->fs_info, bytenr, gen);
8885         if (!extent_buffer_uptodate(buf)) {
8886                 record_bad_block_io(root->fs_info,
8887                                     extent_cache, bytenr, size);
8888                 goto out;
8889         }
8890
8891         nritems = btrfs_header_nritems(buf);
8892
8893         flags = 0;
8894         if (!init_extent_tree) {
8895                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8896                                        btrfs_header_level(buf), 1, NULL,
8897                                        &flags);
8898                 if (ret < 0) {
8899                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8900                         if (ret < 0) {
8901                                 fprintf(stderr, "Couldn't calc extent flags\n");
8902                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8903                         }
8904                 }
8905         } else {
8906                 flags = 0;
8907                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8908                 if (ret < 0) {
8909                         fprintf(stderr, "Couldn't calc extent flags\n");
8910                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8911                 }
8912         }
8913
8914         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8915                 if (ri != NULL &&
8916                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8917                     ri->objectid == btrfs_header_owner(buf)) {
8918                         /*
8919                          * Ok we got to this block from it's original owner and
8920                          * we have FULL_BACKREF set.  Relocation can leave
8921                          * converted blocks over so this is altogether possible,
8922                          * however it's not possible if the generation > the
8923                          * last snapshot, so check for this case.
8924                          */
8925                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8926                             btrfs_header_generation(buf) > ri->last_snapshot) {
8927                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8928                                 rec->bad_full_backref = 1;
8929                         }
8930                 }
8931         } else {
8932                 if (ri != NULL &&
8933                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8934                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8935                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8936                         rec->bad_full_backref = 1;
8937                 }
8938         }
8939
8940         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8941                 rec->flag_block_full_backref = 1;
8942                 parent = bytenr;
8943                 owner = 0;
8944         } else {
8945                 rec->flag_block_full_backref = 0;
8946                 parent = 0;
8947                 owner = btrfs_header_owner(buf);
8948         }
8949
8950         ret = check_block(root, extent_cache, buf, flags);
8951         if (ret)
8952                 goto out;
8953
8954         if (btrfs_is_leaf(buf)) {
8955                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8956                 for (i = 0; i < nritems; i++) {
8957                         struct btrfs_file_extent_item *fi;
8958                         btrfs_item_key_to_cpu(buf, &key, i);
8959                         /*
8960                          * Check key type against the leaf owner.
8961                          * Could filter quite a lot of early error if
8962                          * owner is correct
8963                          */
8964                         if (check_type_with_root(btrfs_header_owner(buf),
8965                                                  key.type)) {
8966                                 fprintf(stderr, "ignoring invalid key\n");
8967                                 continue;
8968                         }
8969                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8970                                 process_extent_item(root, extent_cache, buf,
8971                                                     i);
8972                                 continue;
8973                         }
8974                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8975                                 process_extent_item(root, extent_cache, buf,
8976                                                     i);
8977                                 continue;
8978                         }
8979                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8980                                 total_csum_bytes +=
8981                                         btrfs_item_size_nr(buf, i);
8982                                 continue;
8983                         }
8984                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8985                                 process_chunk_item(chunk_cache, &key, buf, i);
8986                                 continue;
8987                         }
8988                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8989                                 process_device_item(dev_cache, &key, buf, i);
8990                                 continue;
8991                         }
8992                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8993                                 process_block_group_item(block_group_cache,
8994                                         &key, buf, i);
8995                                 continue;
8996                         }
8997                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8998                                 process_device_extent_item(dev_extent_cache,
8999                                         &key, buf, i);
9000                                 continue;
9001
9002                         }
9003                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9004 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9005                                 process_extent_ref_v0(extent_cache, buf, i);
9006 #else
9007                                 BUG();
9008 #endif
9009                                 continue;
9010                         }
9011
9012                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9013                                 ret = add_tree_backref(extent_cache,
9014                                                 key.objectid, 0, key.offset, 0);
9015                                 if (ret < 0)
9016                                         error(
9017                                 "add_tree_backref failed (leaf tree block): %s",
9018                                               strerror(-ret));
9019                                 continue;
9020                         }
9021                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9022                                 ret = add_tree_backref(extent_cache,
9023                                                 key.objectid, key.offset, 0, 0);
9024                                 if (ret < 0)
9025                                         error(
9026                                 "add_tree_backref failed (leaf shared block): %s",
9027                                               strerror(-ret));
9028                                 continue;
9029                         }
9030                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9031                                 struct btrfs_extent_data_ref *ref;
9032                                 ref = btrfs_item_ptr(buf, i,
9033                                                 struct btrfs_extent_data_ref);
9034                                 add_data_backref(extent_cache,
9035                                         key.objectid, 0,
9036                                         btrfs_extent_data_ref_root(buf, ref),
9037                                         btrfs_extent_data_ref_objectid(buf,
9038                                                                        ref),
9039                                         btrfs_extent_data_ref_offset(buf, ref),
9040                                         btrfs_extent_data_ref_count(buf, ref),
9041                                         0, root->fs_info->sectorsize);
9042                                 continue;
9043                         }
9044                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9045                                 struct btrfs_shared_data_ref *ref;
9046                                 ref = btrfs_item_ptr(buf, i,
9047                                                 struct btrfs_shared_data_ref);
9048                                 add_data_backref(extent_cache,
9049                                         key.objectid, key.offset, 0, 0, 0,
9050                                         btrfs_shared_data_ref_count(buf, ref),
9051                                         0, root->fs_info->sectorsize);
9052                                 continue;
9053                         }
9054                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9055                                 struct bad_item *bad;
9056
9057                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9058                                         continue;
9059                                 if (!owner)
9060                                         continue;
9061                                 bad = malloc(sizeof(struct bad_item));
9062                                 if (!bad)
9063                                         continue;
9064                                 INIT_LIST_HEAD(&bad->list);
9065                                 memcpy(&bad->key, &key,
9066                                        sizeof(struct btrfs_key));
9067                                 bad->root_id = owner;
9068                                 list_add_tail(&bad->list, &delete_items);
9069                                 continue;
9070                         }
9071                         if (key.type != BTRFS_EXTENT_DATA_KEY)
9072                                 continue;
9073                         fi = btrfs_item_ptr(buf, i,
9074                                             struct btrfs_file_extent_item);
9075                         if (btrfs_file_extent_type(buf, fi) ==
9076                             BTRFS_FILE_EXTENT_INLINE)
9077                                 continue;
9078                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9079                                 continue;
9080
9081                         data_bytes_allocated +=
9082                                 btrfs_file_extent_disk_num_bytes(buf, fi);
9083                         if (data_bytes_allocated < root->fs_info->sectorsize) {
9084                                 abort();
9085                         }
9086                         data_bytes_referenced +=
9087                                 btrfs_file_extent_num_bytes(buf, fi);
9088                         add_data_backref(extent_cache,
9089                                 btrfs_file_extent_disk_bytenr(buf, fi),
9090                                 parent, owner, key.objectid, key.offset -
9091                                 btrfs_file_extent_offset(buf, fi), 1, 1,
9092                                 btrfs_file_extent_disk_num_bytes(buf, fi));
9093                 }
9094         } else {
9095                 int level;
9096                 struct btrfs_key first_key;
9097
9098                 first_key.objectid = 0;
9099
9100                 if (nritems > 0)
9101                         btrfs_item_key_to_cpu(buf, &first_key, 0);
9102                 level = btrfs_header_level(buf);
9103                 for (i = 0; i < nritems; i++) {
9104                         struct extent_record tmpl;
9105
9106                         ptr = btrfs_node_blockptr(buf, i);
9107                         size = root->fs_info->nodesize;
9108                         btrfs_node_key_to_cpu(buf, &key, i);
9109                         if (ri != NULL) {
9110                                 if ((level == ri->drop_level)
9111                                     && is_dropped_key(&key, &ri->drop_key)) {
9112                                         continue;
9113                                 }
9114                         }
9115
9116                         memset(&tmpl, 0, sizeof(tmpl));
9117                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9118                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9119                         tmpl.start = ptr;
9120                         tmpl.nr = size;
9121                         tmpl.refs = 1;
9122                         tmpl.metadata = 1;
9123                         tmpl.max_size = size;
9124                         ret = add_extent_rec(extent_cache, &tmpl);
9125                         if (ret < 0)
9126                                 goto out;
9127
9128                         ret = add_tree_backref(extent_cache, ptr, parent,
9129                                         owner, 1);
9130                         if (ret < 0) {
9131                                 error(
9132                                 "add_tree_backref failed (non-leaf block): %s",
9133                                       strerror(-ret));
9134                                 continue;
9135                         }
9136
9137                         if (level > 1) {
9138                                 add_pending(nodes, seen, ptr, size);
9139                         } else {
9140                                 add_pending(pending, seen, ptr, size);
9141                         }
9142                 }
9143                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
9144                                       nritems) * sizeof(struct btrfs_key_ptr);
9145         }
9146         total_btree_bytes += buf->len;
9147         if (fs_root_objectid(btrfs_header_owner(buf)))
9148                 total_fs_tree_bytes += buf->len;
9149         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9150                 total_extent_tree_bytes += buf->len;
9151 out:
9152         free_extent_buffer(buf);
9153         return ret;
9154 }
9155
9156 static int add_root_to_pending(struct extent_buffer *buf,
9157                                struct cache_tree *extent_cache,
9158                                struct cache_tree *pending,
9159                                struct cache_tree *seen,
9160                                struct cache_tree *nodes,
9161                                u64 objectid)
9162 {
9163         struct extent_record tmpl;
9164         int ret;
9165
9166         if (btrfs_header_level(buf) > 0)
9167                 add_pending(nodes, seen, buf->start, buf->len);
9168         else
9169                 add_pending(pending, seen, buf->start, buf->len);
9170
9171         memset(&tmpl, 0, sizeof(tmpl));
9172         tmpl.start = buf->start;
9173         tmpl.nr = buf->len;
9174         tmpl.is_root = 1;
9175         tmpl.refs = 1;
9176         tmpl.metadata = 1;
9177         tmpl.max_size = buf->len;
9178         add_extent_rec(extent_cache, &tmpl);
9179
9180         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9181             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9182                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9183                                 0, 1);
9184         else
9185                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9186                                 1);
9187         return ret;
9188 }
9189
9190 /* as we fix the tree, we might be deleting blocks that
9191  * we're tracking for repair.  This hook makes sure we
9192  * remove any backrefs for blocks as we are fixing them.
9193  */
9194 static int free_extent_hook(struct btrfs_trans_handle *trans,
9195                             struct btrfs_root *root,
9196                             u64 bytenr, u64 num_bytes, u64 parent,
9197                             u64 root_objectid, u64 owner, u64 offset,
9198                             int refs_to_drop)
9199 {
9200         struct extent_record *rec;
9201         struct cache_extent *cache;
9202         int is_data;
9203         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9204
9205         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9206         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9207         if (!cache)
9208                 return 0;
9209
9210         rec = container_of(cache, struct extent_record, cache);
9211         if (is_data) {
9212                 struct data_backref *back;
9213                 back = find_data_backref(rec, parent, root_objectid, owner,
9214                                          offset, 1, bytenr, num_bytes);
9215                 if (!back)
9216                         goto out;
9217                 if (back->node.found_ref) {
9218                         back->found_ref -= refs_to_drop;
9219                         if (rec->refs)
9220                                 rec->refs -= refs_to_drop;
9221                 }
9222                 if (back->node.found_extent_tree) {
9223                         back->num_refs -= refs_to_drop;
9224                         if (rec->extent_item_refs)
9225                                 rec->extent_item_refs -= refs_to_drop;
9226                 }
9227                 if (back->found_ref == 0)
9228                         back->node.found_ref = 0;
9229                 if (back->num_refs == 0)
9230                         back->node.found_extent_tree = 0;
9231
9232                 if (!back->node.found_extent_tree && back->node.found_ref) {
9233                         rb_erase(&back->node.node, &rec->backref_tree);
9234                         free(back);
9235                 }
9236         } else {
9237                 struct tree_backref *back;
9238                 back = find_tree_backref(rec, parent, root_objectid);
9239                 if (!back)
9240                         goto out;
9241                 if (back->node.found_ref) {
9242                         if (rec->refs)
9243                                 rec->refs--;
9244                         back->node.found_ref = 0;
9245                 }
9246                 if (back->node.found_extent_tree) {
9247                         if (rec->extent_item_refs)
9248                                 rec->extent_item_refs--;
9249                         back->node.found_extent_tree = 0;
9250                 }
9251                 if (!back->node.found_extent_tree && back->node.found_ref) {
9252                         rb_erase(&back->node.node, &rec->backref_tree);
9253                         free(back);
9254                 }
9255         }
9256         maybe_free_extent_rec(extent_cache, rec);
9257 out:
9258         return 0;
9259 }
9260
9261 static int delete_extent_records(struct btrfs_trans_handle *trans,
9262                                  struct btrfs_root *root,
9263                                  struct btrfs_path *path,
9264                                  u64 bytenr)
9265 {
9266         struct btrfs_key key;
9267         struct btrfs_key found_key;
9268         struct extent_buffer *leaf;
9269         int ret;
9270         int slot;
9271
9272
9273         key.objectid = bytenr;
9274         key.type = (u8)-1;
9275         key.offset = (u64)-1;
9276
9277         while(1) {
9278                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9279                                         &key, path, 0, 1);
9280                 if (ret < 0)
9281                         break;
9282
9283                 if (ret > 0) {
9284                         ret = 0;
9285                         if (path->slots[0] == 0)
9286                                 break;
9287                         path->slots[0]--;
9288                 }
9289                 ret = 0;
9290
9291                 leaf = path->nodes[0];
9292                 slot = path->slots[0];
9293
9294                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9295                 if (found_key.objectid != bytenr)
9296                         break;
9297
9298                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9299                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9300                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9301                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9302                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9303                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9304                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9305                         btrfs_release_path(path);
9306                         if (found_key.type == 0) {
9307                                 if (found_key.offset == 0)
9308                                         break;
9309                                 key.offset = found_key.offset - 1;
9310                                 key.type = found_key.type;
9311                         }
9312                         key.type = found_key.type - 1;
9313                         key.offset = (u64)-1;
9314                         continue;
9315                 }
9316
9317                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9318                         found_key.objectid, found_key.type, found_key.offset);
9319
9320                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9321                 if (ret)
9322                         break;
9323                 btrfs_release_path(path);
9324
9325                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9326                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9327                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9328                                 found_key.offset : root->fs_info->nodesize;
9329
9330                         ret = btrfs_update_block_group(root, bytenr,
9331                                                        bytes, 0, 0);
9332                         if (ret)
9333                                 break;
9334                 }
9335         }
9336
9337         btrfs_release_path(path);
9338         return ret;
9339 }
9340
9341 /*
9342  * for a single backref, this will allocate a new extent
9343  * and add the backref to it.
9344  */
9345 static int record_extent(struct btrfs_trans_handle *trans,
9346                          struct btrfs_fs_info *info,
9347                          struct btrfs_path *path,
9348                          struct extent_record *rec,
9349                          struct extent_backref *back,
9350                          int allocated, u64 flags)
9351 {
9352         int ret = 0;
9353         struct btrfs_root *extent_root = info->extent_root;
9354         struct extent_buffer *leaf;
9355         struct btrfs_key ins_key;
9356         struct btrfs_extent_item *ei;
9357         struct data_backref *dback;
9358         struct btrfs_tree_block_info *bi;
9359
9360         if (!back->is_data)
9361                 rec->max_size = max_t(u64, rec->max_size,
9362                                     info->nodesize);
9363
9364         if (!allocated) {
9365                 u32 item_size = sizeof(*ei);
9366
9367                 if (!back->is_data)
9368                         item_size += sizeof(*bi);
9369
9370                 ins_key.objectid = rec->start;
9371                 ins_key.offset = rec->max_size;
9372                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9373
9374                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9375                                         &ins_key, item_size);
9376                 if (ret)
9377                         goto fail;
9378
9379                 leaf = path->nodes[0];
9380                 ei = btrfs_item_ptr(leaf, path->slots[0],
9381                                     struct btrfs_extent_item);
9382
9383                 btrfs_set_extent_refs(leaf, ei, 0);
9384                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9385
9386                 if (back->is_data) {
9387                         btrfs_set_extent_flags(leaf, ei,
9388                                                BTRFS_EXTENT_FLAG_DATA);
9389                 } else {
9390                         struct btrfs_disk_key copy_key;;
9391
9392                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9393                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9394                                              sizeof(*bi));
9395
9396                         btrfs_set_disk_key_objectid(&copy_key,
9397                                                     rec->info_objectid);
9398                         btrfs_set_disk_key_type(&copy_key, 0);
9399                         btrfs_set_disk_key_offset(&copy_key, 0);
9400
9401                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9402                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9403
9404                         btrfs_set_extent_flags(leaf, ei,
9405                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9406                 }
9407
9408                 btrfs_mark_buffer_dirty(leaf);
9409                 ret = btrfs_update_block_group(extent_root, rec->start,
9410                                                rec->max_size, 1, 0);
9411                 if (ret)
9412                         goto fail;
9413                 btrfs_release_path(path);
9414         }
9415
9416         if (back->is_data) {
9417                 u64 parent;
9418                 int i;
9419
9420                 dback = to_data_backref(back);
9421                 if (back->full_backref)
9422                         parent = dback->parent;
9423                 else
9424                         parent = 0;
9425
9426                 for (i = 0; i < dback->found_ref; i++) {
9427                         /* if parent != 0, we're doing a full backref
9428                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9429                          * just makes the backref allocator create a data
9430                          * backref
9431                          */
9432                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9433                                                    rec->start, rec->max_size,
9434                                                    parent,
9435                                                    dback->root,
9436                                                    parent ?
9437                                                    BTRFS_FIRST_FREE_OBJECTID :
9438                                                    dback->owner,
9439                                                    dback->offset);
9440                         if (ret)
9441                                 break;
9442                 }
9443                 fprintf(stderr, "adding new data backref"
9444                                 " on %llu %s %llu owner %llu"
9445                                 " offset %llu found %d\n",
9446                                 (unsigned long long)rec->start,
9447                                 back->full_backref ?
9448                                 "parent" : "root",
9449                                 back->full_backref ?
9450                                 (unsigned long long)parent :
9451                                 (unsigned long long)dback->root,
9452                                 (unsigned long long)dback->owner,
9453                                 (unsigned long long)dback->offset,
9454                                 dback->found_ref);
9455         } else {
9456                 u64 parent;
9457                 struct tree_backref *tback;
9458
9459                 tback = to_tree_backref(back);
9460                 if (back->full_backref)
9461                         parent = tback->parent;
9462                 else
9463                         parent = 0;
9464
9465                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9466                                            rec->start, rec->max_size,
9467                                            parent, tback->root, 0, 0);
9468                 fprintf(stderr, "adding new tree backref on "
9469                         "start %llu len %llu parent %llu root %llu\n",
9470                         rec->start, rec->max_size, parent, tback->root);
9471         }
9472 fail:
9473         btrfs_release_path(path);
9474         return ret;
9475 }
9476
9477 static struct extent_entry *find_entry(struct list_head *entries,
9478                                        u64 bytenr, u64 bytes)
9479 {
9480         struct extent_entry *entry = NULL;
9481
9482         list_for_each_entry(entry, entries, list) {
9483                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9484                         return entry;
9485         }
9486
9487         return NULL;
9488 }
9489
9490 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9491 {
9492         struct extent_entry *entry, *best = NULL, *prev = NULL;
9493
9494         list_for_each_entry(entry, entries, list) {
9495                 /*
9496                  * If there are as many broken entries as entries then we know
9497                  * not to trust this particular entry.
9498                  */
9499                 if (entry->broken == entry->count)
9500                         continue;
9501
9502                 /*
9503                  * Special case, when there are only two entries and 'best' is
9504                  * the first one
9505                  */
9506                 if (!prev) {
9507                         best = entry;
9508                         prev = entry;
9509                         continue;
9510                 }
9511
9512                 /*
9513                  * If our current entry == best then we can't be sure our best
9514                  * is really the best, so we need to keep searching.
9515                  */
9516                 if (best && best->count == entry->count) {
9517                         prev = entry;
9518                         best = NULL;
9519                         continue;
9520                 }
9521
9522                 /* Prev == entry, not good enough, have to keep searching */
9523                 if (!prev->broken && prev->count == entry->count)
9524                         continue;
9525
9526                 if (!best)
9527                         best = (prev->count > entry->count) ? prev : entry;
9528                 else if (best->count < entry->count)
9529                         best = entry;
9530                 prev = entry;
9531         }
9532
9533         return best;
9534 }
9535
9536 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9537                       struct data_backref *dback, struct extent_entry *entry)
9538 {
9539         struct btrfs_trans_handle *trans;
9540         struct btrfs_root *root;
9541         struct btrfs_file_extent_item *fi;
9542         struct extent_buffer *leaf;
9543         struct btrfs_key key;
9544         u64 bytenr, bytes;
9545         int ret, err;
9546
9547         key.objectid = dback->root;
9548         key.type = BTRFS_ROOT_ITEM_KEY;
9549         key.offset = (u64)-1;
9550         root = btrfs_read_fs_root(info, &key);
9551         if (IS_ERR(root)) {
9552                 fprintf(stderr, "Couldn't find root for our ref\n");
9553                 return -EINVAL;
9554         }
9555
9556         /*
9557          * The backref points to the original offset of the extent if it was
9558          * split, so we need to search down to the offset we have and then walk
9559          * forward until we find the backref we're looking for.
9560          */
9561         key.objectid = dback->owner;
9562         key.type = BTRFS_EXTENT_DATA_KEY;
9563         key.offset = dback->offset;
9564         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9565         if (ret < 0) {
9566                 fprintf(stderr, "Error looking up ref %d\n", ret);
9567                 return ret;
9568         }
9569
9570         while (1) {
9571                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9572                         ret = btrfs_next_leaf(root, path);
9573                         if (ret) {
9574                                 fprintf(stderr, "Couldn't find our ref, next\n");
9575                                 return -EINVAL;
9576                         }
9577                 }
9578                 leaf = path->nodes[0];
9579                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9580                 if (key.objectid != dback->owner ||
9581                     key.type != BTRFS_EXTENT_DATA_KEY) {
9582                         fprintf(stderr, "Couldn't find our ref, search\n");
9583                         return -EINVAL;
9584                 }
9585                 fi = btrfs_item_ptr(leaf, path->slots[0],
9586                                     struct btrfs_file_extent_item);
9587                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9588                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9589
9590                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9591                         break;
9592                 path->slots[0]++;
9593         }
9594
9595         btrfs_release_path(path);
9596
9597         trans = btrfs_start_transaction(root, 1);
9598         if (IS_ERR(trans))
9599                 return PTR_ERR(trans);
9600
9601         /*
9602          * Ok we have the key of the file extent we want to fix, now we can cow
9603          * down to the thing and fix it.
9604          */
9605         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9606         if (ret < 0) {
9607                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9608                         key.objectid, key.type, key.offset, ret);
9609                 goto out;
9610         }
9611         if (ret > 0) {
9612                 fprintf(stderr, "Well that's odd, we just found this key "
9613                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9614                         key.offset);
9615                 ret = -EINVAL;
9616                 goto out;
9617         }
9618         leaf = path->nodes[0];
9619         fi = btrfs_item_ptr(leaf, path->slots[0],
9620                             struct btrfs_file_extent_item);
9621
9622         if (btrfs_file_extent_compression(leaf, fi) &&
9623             dback->disk_bytenr != entry->bytenr) {
9624                 fprintf(stderr, "Ref doesn't match the record start and is "
9625                         "compressed, please take a btrfs-image of this file "
9626                         "system and send it to a btrfs developer so they can "
9627                         "complete this functionality for bytenr %Lu\n",
9628                         dback->disk_bytenr);
9629                 ret = -EINVAL;
9630                 goto out;
9631         }
9632
9633         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9634                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9635         } else if (dback->disk_bytenr > entry->bytenr) {
9636                 u64 off_diff, offset;
9637
9638                 off_diff = dback->disk_bytenr - entry->bytenr;
9639                 offset = btrfs_file_extent_offset(leaf, fi);
9640                 if (dback->disk_bytenr + offset +
9641                     btrfs_file_extent_num_bytes(leaf, fi) >
9642                     entry->bytenr + entry->bytes) {
9643                         fprintf(stderr, "Ref is past the entry end, please "
9644                                 "take a btrfs-image of this file system and "
9645                                 "send it to a btrfs developer, ref %Lu\n",
9646                                 dback->disk_bytenr);
9647                         ret = -EINVAL;
9648                         goto out;
9649                 }
9650                 offset += off_diff;
9651                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9652                 btrfs_set_file_extent_offset(leaf, fi, offset);
9653         } else if (dback->disk_bytenr < entry->bytenr) {
9654                 u64 offset;
9655
9656                 offset = btrfs_file_extent_offset(leaf, fi);
9657                 if (dback->disk_bytenr + offset < entry->bytenr) {
9658                         fprintf(stderr, "Ref is before the entry start, please"
9659                                 " take a btrfs-image of this file system and "
9660                                 "send it to a btrfs developer, ref %Lu\n",
9661                                 dback->disk_bytenr);
9662                         ret = -EINVAL;
9663                         goto out;
9664                 }
9665
9666                 offset += dback->disk_bytenr;
9667                 offset -= entry->bytenr;
9668                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9669                 btrfs_set_file_extent_offset(leaf, fi, offset);
9670         }
9671
9672         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9673
9674         /*
9675          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9676          * only do this if we aren't using compression, otherwise it's a
9677          * trickier case.
9678          */
9679         if (!btrfs_file_extent_compression(leaf, fi))
9680                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9681         else
9682                 printf("ram bytes may be wrong?\n");
9683         btrfs_mark_buffer_dirty(leaf);
9684 out:
9685         err = btrfs_commit_transaction(trans, root);
9686         btrfs_release_path(path);
9687         return ret ? ret : err;
9688 }
9689
9690 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9691                            struct extent_record *rec)
9692 {
9693         struct extent_backref *back, *tmp;
9694         struct data_backref *dback;
9695         struct extent_entry *entry, *best = NULL;
9696         LIST_HEAD(entries);
9697         int nr_entries = 0;
9698         int broken_entries = 0;
9699         int ret = 0;
9700         short mismatch = 0;
9701
9702         /*
9703          * Metadata is easy and the backrefs should always agree on bytenr and
9704          * size, if not we've got bigger issues.
9705          */
9706         if (rec->metadata)
9707                 return 0;
9708
9709         rbtree_postorder_for_each_entry_safe(back, tmp,
9710                                              &rec->backref_tree, node) {
9711                 if (back->full_backref || !back->is_data)
9712                         continue;
9713
9714                 dback = to_data_backref(back);
9715
9716                 /*
9717                  * We only pay attention to backrefs that we found a real
9718                  * backref for.
9719                  */
9720                 if (dback->found_ref == 0)
9721                         continue;
9722
9723                 /*
9724                  * For now we only catch when the bytes don't match, not the
9725                  * bytenr.  We can easily do this at the same time, but I want
9726                  * to have a fs image to test on before we just add repair
9727                  * functionality willy-nilly so we know we won't screw up the
9728                  * repair.
9729                  */
9730
9731                 entry = find_entry(&entries, dback->disk_bytenr,
9732                                    dback->bytes);
9733                 if (!entry) {
9734                         entry = malloc(sizeof(struct extent_entry));
9735                         if (!entry) {
9736                                 ret = -ENOMEM;
9737                                 goto out;
9738                         }
9739                         memset(entry, 0, sizeof(*entry));
9740                         entry->bytenr = dback->disk_bytenr;
9741                         entry->bytes = dback->bytes;
9742                         list_add_tail(&entry->list, &entries);
9743                         nr_entries++;
9744                 }
9745
9746                 /*
9747                  * If we only have on entry we may think the entries agree when
9748                  * in reality they don't so we have to do some extra checking.
9749                  */
9750                 if (dback->disk_bytenr != rec->start ||
9751                     dback->bytes != rec->nr || back->broken)
9752                         mismatch = 1;
9753
9754                 if (back->broken) {
9755                         entry->broken++;
9756                         broken_entries++;
9757                 }
9758
9759                 entry->count++;
9760         }
9761
9762         /* Yay all the backrefs agree, carry on good sir */
9763         if (nr_entries <= 1 && !mismatch)
9764                 goto out;
9765
9766         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9767                 "%Lu\n", rec->start);
9768
9769         /*
9770          * First we want to see if the backrefs can agree amongst themselves who
9771          * is right, so figure out which one of the entries has the highest
9772          * count.
9773          */
9774         best = find_most_right_entry(&entries);
9775
9776         /*
9777          * Ok so we may have an even split between what the backrefs think, so
9778          * this is where we use the extent ref to see what it thinks.
9779          */
9780         if (!best) {
9781                 entry = find_entry(&entries, rec->start, rec->nr);
9782                 if (!entry && (!broken_entries || !rec->found_rec)) {
9783                         fprintf(stderr, "Backrefs don't agree with each other "
9784                                 "and extent record doesn't agree with anybody,"
9785                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9786                                 rec->start, rec->nr);
9787                         ret = -EINVAL;
9788                         goto out;
9789                 } else if (!entry) {
9790                         /*
9791                          * Ok our backrefs were broken, we'll assume this is the
9792                          * correct value and add an entry for this range.
9793                          */
9794                         entry = malloc(sizeof(struct extent_entry));
9795                         if (!entry) {
9796                                 ret = -ENOMEM;
9797                                 goto out;
9798                         }
9799                         memset(entry, 0, sizeof(*entry));
9800                         entry->bytenr = rec->start;
9801                         entry->bytes = rec->nr;
9802                         list_add_tail(&entry->list, &entries);
9803                         nr_entries++;
9804                 }
9805                 entry->count++;
9806                 best = find_most_right_entry(&entries);
9807                 if (!best) {
9808                         fprintf(stderr, "Backrefs and extent record evenly "
9809                                 "split on who is right, this is going to "
9810                                 "require user input to fix bytenr %Lu bytes "
9811                                 "%Lu\n", rec->start, rec->nr);
9812                         ret = -EINVAL;
9813                         goto out;
9814                 }
9815         }
9816
9817         /*
9818          * I don't think this can happen currently as we'll abort() if we catch
9819          * this case higher up, but in case somebody removes that we still can't
9820          * deal with it properly here yet, so just bail out of that's the case.
9821          */
9822         if (best->bytenr != rec->start) {
9823                 fprintf(stderr, "Extent start and backref starts don't match, "
9824                         "please use btrfs-image on this file system and send "
9825                         "it to a btrfs developer so they can make fsck fix "
9826                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9827                         rec->start, rec->nr);
9828                 ret = -EINVAL;
9829                 goto out;
9830         }
9831
9832         /*
9833          * Ok great we all agreed on an extent record, let's go find the real
9834          * references and fix up the ones that don't match.
9835          */
9836         rbtree_postorder_for_each_entry_safe(back, tmp,
9837                                              &rec->backref_tree, node) {
9838                 if (back->full_backref || !back->is_data)
9839                         continue;
9840
9841                 dback = to_data_backref(back);
9842
9843                 /*
9844                  * Still ignoring backrefs that don't have a real ref attached
9845                  * to them.
9846                  */
9847                 if (dback->found_ref == 0)
9848                         continue;
9849
9850                 if (dback->bytes == best->bytes &&
9851                     dback->disk_bytenr == best->bytenr)
9852                         continue;
9853
9854                 ret = repair_ref(info, path, dback, best);
9855                 if (ret)
9856                         goto out;
9857         }
9858
9859         /*
9860          * Ok we messed with the actual refs, which means we need to drop our
9861          * entire cache and go back and rescan.  I know this is a huge pain and
9862          * adds a lot of extra work, but it's the only way to be safe.  Once all
9863          * the backrefs agree we may not need to do anything to the extent
9864          * record itself.
9865          */
9866         ret = -EAGAIN;
9867 out:
9868         while (!list_empty(&entries)) {
9869                 entry = list_entry(entries.next, struct extent_entry, list);
9870                 list_del_init(&entry->list);
9871                 free(entry);
9872         }
9873         return ret;
9874 }
9875
9876 static int process_duplicates(struct cache_tree *extent_cache,
9877                               struct extent_record *rec)
9878 {
9879         struct extent_record *good, *tmp;
9880         struct cache_extent *cache;
9881         int ret;
9882
9883         /*
9884          * If we found a extent record for this extent then return, or if we
9885          * have more than one duplicate we are likely going to need to delete
9886          * something.
9887          */
9888         if (rec->found_rec || rec->num_duplicates > 1)
9889                 return 0;
9890
9891         /* Shouldn't happen but just in case */
9892         BUG_ON(!rec->num_duplicates);
9893
9894         /*
9895          * So this happens if we end up with a backref that doesn't match the
9896          * actual extent entry.  So either the backref is bad or the extent
9897          * entry is bad.  Either way we want to have the extent_record actually
9898          * reflect what we found in the extent_tree, so we need to take the
9899          * duplicate out and use that as the extent_record since the only way we
9900          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9901          */
9902         remove_cache_extent(extent_cache, &rec->cache);
9903
9904         good = to_extent_record(rec->dups.next);
9905         list_del_init(&good->list);
9906         INIT_LIST_HEAD(&good->backrefs);
9907         INIT_LIST_HEAD(&good->dups);
9908         good->cache.start = good->start;
9909         good->cache.size = good->nr;
9910         good->content_checked = 0;
9911         good->owner_ref_checked = 0;
9912         good->num_duplicates = 0;
9913         good->refs = rec->refs;
9914         list_splice_init(&rec->backrefs, &good->backrefs);
9915         while (1) {
9916                 cache = lookup_cache_extent(extent_cache, good->start,
9917                                             good->nr);
9918                 if (!cache)
9919                         break;
9920                 tmp = container_of(cache, struct extent_record, cache);
9921
9922                 /*
9923                  * If we find another overlapping extent and it's found_rec is
9924                  * set then it's a duplicate and we need to try and delete
9925                  * something.
9926                  */
9927                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9928                         if (list_empty(&good->list))
9929                                 list_add_tail(&good->list,
9930                                               &duplicate_extents);
9931                         good->num_duplicates += tmp->num_duplicates + 1;
9932                         list_splice_init(&tmp->dups, &good->dups);
9933                         list_del_init(&tmp->list);
9934                         list_add_tail(&tmp->list, &good->dups);
9935                         remove_cache_extent(extent_cache, &tmp->cache);
9936                         continue;
9937                 }
9938
9939                 /*
9940                  * Ok we have another non extent item backed extent rec, so lets
9941                  * just add it to this extent and carry on like we did above.
9942                  */
9943                 good->refs += tmp->refs;
9944                 list_splice_init(&tmp->backrefs, &good->backrefs);
9945                 remove_cache_extent(extent_cache, &tmp->cache);
9946                 free(tmp);
9947         }
9948         ret = insert_cache_extent(extent_cache, &good->cache);
9949         BUG_ON(ret);
9950         free(rec);
9951         return good->num_duplicates ? 0 : 1;
9952 }
9953
9954 static int delete_duplicate_records(struct btrfs_root *root,
9955                                     struct extent_record *rec)
9956 {
9957         struct btrfs_trans_handle *trans;
9958         LIST_HEAD(delete_list);
9959         struct btrfs_path path;
9960         struct extent_record *tmp, *good, *n;
9961         int nr_del = 0;
9962         int ret = 0, err;
9963         struct btrfs_key key;
9964
9965         btrfs_init_path(&path);
9966
9967         good = rec;
9968         /* Find the record that covers all of the duplicates. */
9969         list_for_each_entry(tmp, &rec->dups, list) {
9970                 if (good->start < tmp->start)
9971                         continue;
9972                 if (good->nr > tmp->nr)
9973                         continue;
9974
9975                 if (tmp->start + tmp->nr < good->start + good->nr) {
9976                         fprintf(stderr, "Ok we have overlapping extents that "
9977                                 "aren't completely covered by each other, this "
9978                                 "is going to require more careful thought.  "
9979                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9980                                 tmp->start, tmp->nr, good->start, good->nr);
9981                         abort();
9982                 }
9983                 good = tmp;
9984         }
9985
9986         if (good != rec)
9987                 list_add_tail(&rec->list, &delete_list);
9988
9989         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9990                 if (tmp == good)
9991                         continue;
9992                 list_move_tail(&tmp->list, &delete_list);
9993         }
9994
9995         root = root->fs_info->extent_root;
9996         trans = btrfs_start_transaction(root, 1);
9997         if (IS_ERR(trans)) {
9998                 ret = PTR_ERR(trans);
9999                 goto out;
10000         }
10001
10002         list_for_each_entry(tmp, &delete_list, list) {
10003                 if (tmp->found_rec == 0)
10004                         continue;
10005                 key.objectid = tmp->start;
10006                 key.type = BTRFS_EXTENT_ITEM_KEY;
10007                 key.offset = tmp->nr;
10008
10009                 /* Shouldn't happen but just in case */
10010                 if (tmp->metadata) {
10011                         fprintf(stderr, "Well this shouldn't happen, extent "
10012                                 "record overlaps but is metadata? "
10013                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10014                         abort();
10015                 }
10016
10017                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10018                 if (ret) {
10019                         if (ret > 0)
10020                                 ret = -EINVAL;
10021                         break;
10022                 }
10023                 ret = btrfs_del_item(trans, root, &path);
10024                 if (ret)
10025                         break;
10026                 btrfs_release_path(&path);
10027                 nr_del++;
10028         }
10029         err = btrfs_commit_transaction(trans, root);
10030         if (err && !ret)
10031                 ret = err;
10032 out:
10033         while (!list_empty(&delete_list)) {
10034                 tmp = to_extent_record(delete_list.next);
10035                 list_del_init(&tmp->list);
10036                 if (tmp == rec)
10037                         continue;
10038                 free(tmp);
10039         }
10040
10041         while (!list_empty(&rec->dups)) {
10042                 tmp = to_extent_record(rec->dups.next);
10043                 list_del_init(&tmp->list);
10044                 free(tmp);
10045         }
10046
10047         btrfs_release_path(&path);
10048
10049         if (!ret && !nr_del)
10050                 rec->num_duplicates = 0;
10051
10052         return ret ? ret : nr_del;
10053 }
10054
10055 static int find_possible_backrefs(struct btrfs_fs_info *info,
10056                                   struct btrfs_path *path,
10057                                   struct cache_tree *extent_cache,
10058                                   struct extent_record *rec)
10059 {
10060         struct btrfs_root *root;
10061         struct extent_backref *back, *tmp;
10062         struct data_backref *dback;
10063         struct cache_extent *cache;
10064         struct btrfs_file_extent_item *fi;
10065         struct btrfs_key key;
10066         u64 bytenr, bytes;
10067         int ret;
10068
10069         rbtree_postorder_for_each_entry_safe(back, tmp,
10070                                              &rec->backref_tree, node) {
10071                 /* Don't care about full backrefs (poor unloved backrefs) */
10072                 if (back->full_backref || !back->is_data)
10073                         continue;
10074
10075                 dback = to_data_backref(back);
10076
10077                 /* We found this one, we don't need to do a lookup */
10078                 if (dback->found_ref)
10079                         continue;
10080
10081                 key.objectid = dback->root;
10082                 key.type = BTRFS_ROOT_ITEM_KEY;
10083                 key.offset = (u64)-1;
10084
10085                 root = btrfs_read_fs_root(info, &key);
10086
10087                 /* No root, definitely a bad ref, skip */
10088                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10089                         continue;
10090                 /* Other err, exit */
10091                 if (IS_ERR(root))
10092                         return PTR_ERR(root);
10093
10094                 key.objectid = dback->owner;
10095                 key.type = BTRFS_EXTENT_DATA_KEY;
10096                 key.offset = dback->offset;
10097                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10098                 if (ret) {
10099                         btrfs_release_path(path);
10100                         if (ret < 0)
10101                                 return ret;
10102                         /* Didn't find it, we can carry on */
10103                         ret = 0;
10104                         continue;
10105                 }
10106
10107                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10108                                     struct btrfs_file_extent_item);
10109                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10110                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10111                 btrfs_release_path(path);
10112                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10113                 if (cache) {
10114                         struct extent_record *tmp;
10115                         tmp = container_of(cache, struct extent_record, cache);
10116
10117                         /*
10118                          * If we found an extent record for the bytenr for this
10119                          * particular backref then we can't add it to our
10120                          * current extent record.  We only want to add backrefs
10121                          * that don't have a corresponding extent item in the
10122                          * extent tree since they likely belong to this record
10123                          * and we need to fix it if it doesn't match bytenrs.
10124                          */
10125                         if  (tmp->found_rec)
10126                                 continue;
10127                 }
10128
10129                 dback->found_ref += 1;
10130                 dback->disk_bytenr = bytenr;
10131                 dback->bytes = bytes;
10132
10133                 /*
10134                  * Set this so the verify backref code knows not to trust the
10135                  * values in this backref.
10136                  */
10137                 back->broken = 1;
10138         }
10139
10140         return 0;
10141 }
10142
10143 /*
10144  * Record orphan data ref into corresponding root.
10145  *
10146  * Return 0 if the extent item contains data ref and recorded.
10147  * Return 1 if the extent item contains no useful data ref
10148  *   On that case, it may contains only shared_dataref or metadata backref
10149  *   or the file extent exists(this should be handled by the extent bytenr
10150  *   recovery routine)
10151  * Return <0 if something goes wrong.
10152  */
10153 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10154                                       struct extent_record *rec)
10155 {
10156         struct btrfs_key key;
10157         struct btrfs_root *dest_root;
10158         struct extent_backref *back, *tmp;
10159         struct data_backref *dback;
10160         struct orphan_data_extent *orphan;
10161         struct btrfs_path path;
10162         int recorded_data_ref = 0;
10163         int ret = 0;
10164
10165         if (rec->metadata)
10166                 return 1;
10167         btrfs_init_path(&path);
10168         rbtree_postorder_for_each_entry_safe(back, tmp,
10169                                              &rec->backref_tree, node) {
10170                 if (back->full_backref || !back->is_data ||
10171                     !back->found_extent_tree)
10172                         continue;
10173                 dback = to_data_backref(back);
10174                 if (dback->found_ref)
10175                         continue;
10176                 key.objectid = dback->root;
10177                 key.type = BTRFS_ROOT_ITEM_KEY;
10178                 key.offset = (u64)-1;
10179
10180                 dest_root = btrfs_read_fs_root(fs_info, &key);
10181
10182                 /* For non-exist root we just skip it */
10183                 if (IS_ERR(dest_root) || !dest_root)
10184                         continue;
10185
10186                 key.objectid = dback->owner;
10187                 key.type = BTRFS_EXTENT_DATA_KEY;
10188                 key.offset = dback->offset;
10189
10190                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10191                 btrfs_release_path(&path);
10192                 /*
10193                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10194                  * we need to record it for inode/file extent rebuild.
10195                  * For ret > 0, we record it only for file extent rebuild.
10196                  * For ret == 0, the file extent exists but only bytenr
10197                  * mismatch, let the original bytenr fix routine to handle,
10198                  * don't record it.
10199                  */
10200                 if (ret == 0)
10201                         continue;
10202                 ret = 0;
10203                 orphan = malloc(sizeof(*orphan));
10204                 if (!orphan) {
10205                         ret = -ENOMEM;
10206                         goto out;
10207                 }
10208                 INIT_LIST_HEAD(&orphan->list);
10209                 orphan->root = dback->root;
10210                 orphan->objectid = dback->owner;
10211                 orphan->offset = dback->offset;
10212                 orphan->disk_bytenr = rec->cache.start;
10213                 orphan->disk_len = rec->cache.size;
10214                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10215                 recorded_data_ref = 1;
10216         }
10217 out:
10218         btrfs_release_path(&path);
10219         if (!ret)
10220                 return !recorded_data_ref;
10221         else
10222                 return ret;
10223 }
10224
10225 /*
10226  * when an incorrect extent item is found, this will delete
10227  * all of the existing entries for it and recreate them
10228  * based on what the tree scan found.
10229  */
10230 static int fixup_extent_refs(struct btrfs_fs_info *info,
10231                              struct cache_tree *extent_cache,
10232                              struct extent_record *rec)
10233 {
10234         struct btrfs_trans_handle *trans = NULL;
10235         int ret;
10236         struct btrfs_path path;
10237         struct cache_extent *cache;
10238         struct extent_backref *back, *tmp;
10239         int allocated = 0;
10240         u64 flags = 0;
10241
10242         if (rec->flag_block_full_backref)
10243                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10244
10245         btrfs_init_path(&path);
10246         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10247                 /*
10248                  * Sometimes the backrefs themselves are so broken they don't
10249                  * get attached to any meaningful rec, so first go back and
10250                  * check any of our backrefs that we couldn't find and throw
10251                  * them into the list if we find the backref so that
10252                  * verify_backrefs can figure out what to do.
10253                  */
10254                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10255                 if (ret < 0)
10256                         goto out;
10257         }
10258
10259         /* step one, make sure all of the backrefs agree */
10260         ret = verify_backrefs(info, &path, rec);
10261         if (ret < 0)
10262                 goto out;
10263
10264         trans = btrfs_start_transaction(info->extent_root, 1);
10265         if (IS_ERR(trans)) {
10266                 ret = PTR_ERR(trans);
10267                 goto out;
10268         }
10269
10270         /* step two, delete all the existing records */
10271         ret = delete_extent_records(trans, info->extent_root, &path,
10272                                     rec->start);
10273
10274         if (ret < 0)
10275                 goto out;
10276
10277         /* was this block corrupt?  If so, don't add references to it */
10278         cache = lookup_cache_extent(info->corrupt_blocks,
10279                                     rec->start, rec->max_size);
10280         if (cache) {
10281                 ret = 0;
10282                 goto out;
10283         }
10284
10285         /* step three, recreate all the refs we did find */
10286         rbtree_postorder_for_each_entry_safe(back, tmp,
10287                                              &rec->backref_tree, node) {
10288                 /*
10289                  * if we didn't find any references, don't create a
10290                  * new extent record
10291                  */
10292                 if (!back->found_ref)
10293                         continue;
10294
10295                 rec->bad_full_backref = 0;
10296                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10297                 allocated = 1;
10298
10299                 if (ret)
10300                         goto out;
10301         }
10302 out:
10303         if (trans) {
10304                 int err = btrfs_commit_transaction(trans, info->extent_root);
10305                 if (!ret)
10306                         ret = err;
10307         }
10308
10309         if (!ret)
10310                 fprintf(stderr, "Repaired extent references for %llu\n",
10311                                 (unsigned long long)rec->start);
10312
10313         btrfs_release_path(&path);
10314         return ret;
10315 }
10316
10317 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10318                               struct extent_record *rec)
10319 {
10320         struct btrfs_trans_handle *trans;
10321         struct btrfs_root *root = fs_info->extent_root;
10322         struct btrfs_path path;
10323         struct btrfs_extent_item *ei;
10324         struct btrfs_key key;
10325         u64 flags;
10326         int ret = 0;
10327
10328         key.objectid = rec->start;
10329         if (rec->metadata) {
10330                 key.type = BTRFS_METADATA_ITEM_KEY;
10331                 key.offset = rec->info_level;
10332         } else {
10333                 key.type = BTRFS_EXTENT_ITEM_KEY;
10334                 key.offset = rec->max_size;
10335         }
10336
10337         trans = btrfs_start_transaction(root, 0);
10338         if (IS_ERR(trans))
10339                 return PTR_ERR(trans);
10340
10341         btrfs_init_path(&path);
10342         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10343         if (ret < 0) {
10344                 btrfs_release_path(&path);
10345                 btrfs_commit_transaction(trans, root);
10346                 return ret;
10347         } else if (ret) {
10348                 fprintf(stderr, "Didn't find extent for %llu\n",
10349                         (unsigned long long)rec->start);
10350                 btrfs_release_path(&path);
10351                 btrfs_commit_transaction(trans, root);
10352                 return -ENOENT;
10353         }
10354
10355         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10356                             struct btrfs_extent_item);
10357         flags = btrfs_extent_flags(path.nodes[0], ei);
10358         if (rec->flag_block_full_backref) {
10359                 fprintf(stderr, "setting full backref on %llu\n",
10360                         (unsigned long long)key.objectid);
10361                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10362         } else {
10363                 fprintf(stderr, "clearing full backref on %llu\n",
10364                         (unsigned long long)key.objectid);
10365                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10366         }
10367         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10368         btrfs_mark_buffer_dirty(path.nodes[0]);
10369         btrfs_release_path(&path);
10370         ret = btrfs_commit_transaction(trans, root);
10371         if (!ret)
10372                 fprintf(stderr, "Repaired extent flags for %llu\n",
10373                                 (unsigned long long)rec->start);
10374
10375         return ret;
10376 }
10377
10378 /* right now we only prune from the extent allocation tree */
10379 static int prune_one_block(struct btrfs_trans_handle *trans,
10380                            struct btrfs_fs_info *info,
10381                            struct btrfs_corrupt_block *corrupt)
10382 {
10383         int ret;
10384         struct btrfs_path path;
10385         struct extent_buffer *eb;
10386         u64 found;
10387         int slot;
10388         int nritems;
10389         int level = corrupt->level + 1;
10390
10391         btrfs_init_path(&path);
10392 again:
10393         /* we want to stop at the parent to our busted block */
10394         path.lowest_level = level;
10395
10396         ret = btrfs_search_slot(trans, info->extent_root,
10397                                 &corrupt->key, &path, -1, 1);
10398
10399         if (ret < 0)
10400                 goto out;
10401
10402         eb = path.nodes[level];
10403         if (!eb) {
10404                 ret = -ENOENT;
10405                 goto out;
10406         }
10407
10408         /*
10409          * hopefully the search gave us the block we want to prune,
10410          * lets try that first
10411          */
10412         slot = path.slots[level];
10413         found =  btrfs_node_blockptr(eb, slot);
10414         if (found == corrupt->cache.start)
10415                 goto del_ptr;
10416
10417         nritems = btrfs_header_nritems(eb);
10418
10419         /* the search failed, lets scan this node and hope we find it */
10420         for (slot = 0; slot < nritems; slot++) {
10421                 found =  btrfs_node_blockptr(eb, slot);
10422                 if (found == corrupt->cache.start)
10423                         goto del_ptr;
10424         }
10425         /*
10426          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10427          * to this block
10428          */
10429         if (eb == info->extent_root->node) {
10430                 ret = -ENOENT;
10431                 goto out;
10432         } else {
10433                 level++;
10434                 btrfs_release_path(&path);
10435                 goto again;
10436         }
10437
10438 del_ptr:
10439         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10440         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10441
10442 out:
10443         btrfs_release_path(&path);
10444         return ret;
10445 }
10446
10447 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10448 {
10449         struct btrfs_trans_handle *trans = NULL;
10450         struct cache_extent *cache;
10451         struct btrfs_corrupt_block *corrupt;
10452
10453         while (1) {
10454                 cache = search_cache_extent(info->corrupt_blocks, 0);
10455                 if (!cache)
10456                         break;
10457                 if (!trans) {
10458                         trans = btrfs_start_transaction(info->extent_root, 1);
10459                         if (IS_ERR(trans))
10460                                 return PTR_ERR(trans);
10461                 }
10462                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10463                 prune_one_block(trans, info, corrupt);
10464                 remove_cache_extent(info->corrupt_blocks, cache);
10465         }
10466         if (trans)
10467                 return btrfs_commit_transaction(trans, info->extent_root);
10468         return 0;
10469 }
10470
10471 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10472 {
10473         struct btrfs_block_group_cache *cache;
10474         u64 start, end;
10475         int ret;
10476
10477         while (1) {
10478                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10479                                             &start, &end, EXTENT_DIRTY);
10480                 if (ret)
10481                         break;
10482                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10483         }
10484
10485         start = 0;
10486         while (1) {
10487                 cache = btrfs_lookup_first_block_group(fs_info, start);
10488                 if (!cache)
10489                         break;
10490                 if (cache->cached)
10491                         cache->cached = 0;
10492                 start = cache->key.objectid + cache->key.offset;
10493         }
10494 }
10495
10496 static int check_extent_refs(struct btrfs_root *root,
10497                              struct cache_tree *extent_cache)
10498 {
10499         struct extent_record *rec;
10500         struct cache_extent *cache;
10501         int ret = 0;
10502         int had_dups = 0;
10503         int err = 0;
10504
10505         if (repair) {
10506                 /*
10507                  * if we're doing a repair, we have to make sure
10508                  * we don't allocate from the problem extents.
10509                  * In the worst case, this will be all the
10510                  * extents in the FS
10511                  */
10512                 cache = search_cache_extent(extent_cache, 0);
10513                 while(cache) {
10514                         rec = container_of(cache, struct extent_record, cache);
10515                         set_extent_dirty(root->fs_info->excluded_extents,
10516                                          rec->start,
10517                                          rec->start + rec->max_size - 1);
10518                         cache = next_cache_extent(cache);
10519                 }
10520
10521                 /* pin down all the corrupted blocks too */
10522                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10523                 while(cache) {
10524                         set_extent_dirty(root->fs_info->excluded_extents,
10525                                          cache->start,
10526                                          cache->start + cache->size - 1);
10527                         cache = next_cache_extent(cache);
10528                 }
10529                 prune_corrupt_blocks(root->fs_info);
10530                 reset_cached_block_groups(root->fs_info);
10531         }
10532
10533         reset_cached_block_groups(root->fs_info);
10534
10535         /*
10536          * We need to delete any duplicate entries we find first otherwise we
10537          * could mess up the extent tree when we have backrefs that actually
10538          * belong to a different extent item and not the weird duplicate one.
10539          */
10540         while (repair && !list_empty(&duplicate_extents)) {
10541                 rec = to_extent_record(duplicate_extents.next);
10542                 list_del_init(&rec->list);
10543
10544                 /* Sometimes we can find a backref before we find an actual
10545                  * extent, so we need to process it a little bit to see if there
10546                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10547                  * if this is a backref screwup.  If we need to delete stuff
10548                  * process_duplicates() will return 0, otherwise it will return
10549                  * 1 and we
10550                  */
10551                 if (process_duplicates(extent_cache, rec))
10552                         continue;
10553                 ret = delete_duplicate_records(root, rec);
10554                 if (ret < 0)
10555                         return ret;
10556                 /*
10557                  * delete_duplicate_records will return the number of entries
10558                  * deleted, so if it's greater than 0 then we know we actually
10559                  * did something and we need to remove.
10560                  */
10561                 if (ret)
10562                         had_dups = 1;
10563         }
10564
10565         if (had_dups)
10566                 return -EAGAIN;
10567
10568         while(1) {
10569                 int cur_err = 0;
10570                 int fix = 0;
10571
10572                 cache = search_cache_extent(extent_cache, 0);
10573                 if (!cache)
10574                         break;
10575                 rec = container_of(cache, struct extent_record, cache);
10576                 if (rec->num_duplicates) {
10577                         fprintf(stderr, "extent item %llu has multiple extent "
10578                                 "items\n", (unsigned long long)rec->start);
10579                         cur_err = 1;
10580                 }
10581
10582                 if (rec->refs != rec->extent_item_refs) {
10583                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10584                                 (unsigned long long)rec->start,
10585                                 (unsigned long long)rec->nr);
10586                         fprintf(stderr, "extent item %llu, found %llu\n",
10587                                 (unsigned long long)rec->extent_item_refs,
10588                                 (unsigned long long)rec->refs);
10589                         ret = record_orphan_data_extents(root->fs_info, rec);
10590                         if (ret < 0)
10591                                 goto repair_abort;
10592                         fix = ret;
10593                         cur_err = 1;
10594                 }
10595                 if (all_backpointers_checked(rec, 1)) {
10596                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10597                                 (unsigned long long)rec->start,
10598                                 (unsigned long long)rec->nr);
10599                         fix = 1;
10600                         cur_err = 1;
10601                 }
10602                 if (!rec->owner_ref_checked) {
10603                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10604                                 (unsigned long long)rec->start,
10605                                 (unsigned long long)rec->nr);
10606                         fix = 1;
10607                         cur_err = 1;
10608                 }
10609
10610                 if (repair && fix) {
10611                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10612                         if (ret)
10613                                 goto repair_abort;
10614                 }
10615
10616
10617                 if (rec->bad_full_backref) {
10618                         fprintf(stderr, "bad full backref, on [%llu]\n",
10619                                 (unsigned long long)rec->start);
10620                         if (repair) {
10621                                 ret = fixup_extent_flags(root->fs_info, rec);
10622                                 if (ret)
10623                                         goto repair_abort;
10624                                 fix = 1;
10625                         }
10626                         cur_err = 1;
10627                 }
10628                 /*
10629                  * Although it's not a extent ref's problem, we reuse this
10630                  * routine for error reporting.
10631                  * No repair function yet.
10632                  */
10633                 if (rec->crossing_stripes) {
10634                         fprintf(stderr,
10635                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10636                                 rec->start, rec->start + rec->max_size);
10637                         cur_err = 1;
10638                 }
10639
10640                 if (rec->wrong_chunk_type) {
10641                         fprintf(stderr,
10642                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10643                                 rec->start, rec->start + rec->max_size);
10644                         cur_err = 1;
10645                 }
10646
10647                 err = cur_err;
10648                 remove_cache_extent(extent_cache, cache);
10649                 free_all_extent_backrefs(rec);
10650                 if (!init_extent_tree && repair && (!cur_err || fix))
10651                         clear_extent_dirty(root->fs_info->excluded_extents,
10652                                            rec->start,
10653                                            rec->start + rec->max_size - 1);
10654                 free(rec);
10655         }
10656 repair_abort:
10657         if (repair) {
10658                 if (ret && ret != -EAGAIN) {
10659                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10660                         exit(1);
10661                 } else if (!ret) {
10662                         struct btrfs_trans_handle *trans;
10663
10664                         root = root->fs_info->extent_root;
10665                         trans = btrfs_start_transaction(root, 1);
10666                         if (IS_ERR(trans)) {
10667                                 ret = PTR_ERR(trans);
10668                                 goto repair_abort;
10669                         }
10670
10671                         ret = btrfs_fix_block_accounting(trans, root);
10672                         if (ret)
10673                                 goto repair_abort;
10674                         ret = btrfs_commit_transaction(trans, root);
10675                         if (ret)
10676                                 goto repair_abort;
10677                 }
10678                 return ret;
10679         }
10680
10681         if (err)
10682                 err = -EIO;
10683         return err;
10684 }
10685
10686 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10687 {
10688         u64 stripe_size;
10689
10690         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10691                 stripe_size = length;
10692                 stripe_size /= num_stripes;
10693         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10694                 stripe_size = length * 2;
10695                 stripe_size /= num_stripes;
10696         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10697                 stripe_size = length;
10698                 stripe_size /= (num_stripes - 1);
10699         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10700                 stripe_size = length;
10701                 stripe_size /= (num_stripes - 2);
10702         } else {
10703                 stripe_size = length;
10704         }
10705         return stripe_size;
10706 }
10707
10708 /*
10709  * Check the chunk with its block group/dev list ref:
10710  * Return 0 if all refs seems valid.
10711  * Return 1 if part of refs seems valid, need later check for rebuild ref
10712  * like missing block group and needs to search extent tree to rebuild them.
10713  * Return -1 if essential refs are missing and unable to rebuild.
10714  */
10715 static int check_chunk_refs(struct chunk_record *chunk_rec,
10716                             struct block_group_tree *block_group_cache,
10717                             struct device_extent_tree *dev_extent_cache,
10718                             int silent)
10719 {
10720         struct cache_extent *block_group_item;
10721         struct block_group_record *block_group_rec;
10722         struct cache_extent *dev_extent_item;
10723         struct device_extent_record *dev_extent_rec;
10724         u64 devid;
10725         u64 offset;
10726         u64 length;
10727         int metadump_v2 = 0;
10728         int i;
10729         int ret = 0;
10730
10731         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10732                                                chunk_rec->offset,
10733                                                chunk_rec->length);
10734         if (block_group_item) {
10735                 block_group_rec = container_of(block_group_item,
10736                                                struct block_group_record,
10737                                                cache);
10738                 if (chunk_rec->length != block_group_rec->offset ||
10739                     chunk_rec->offset != block_group_rec->objectid ||
10740                     (!metadump_v2 &&
10741                      chunk_rec->type_flags != block_group_rec->flags)) {
10742                         if (!silent)
10743                                 fprintf(stderr,
10744                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10745                                         chunk_rec->objectid,
10746                                         chunk_rec->type,
10747                                         chunk_rec->offset,
10748                                         chunk_rec->length,
10749                                         chunk_rec->offset,
10750                                         chunk_rec->type_flags,
10751                                         block_group_rec->objectid,
10752                                         block_group_rec->type,
10753                                         block_group_rec->offset,
10754                                         block_group_rec->offset,
10755                                         block_group_rec->objectid,
10756                                         block_group_rec->flags);
10757                         ret = -1;
10758                 } else {
10759                         list_del_init(&block_group_rec->list);
10760                         chunk_rec->bg_rec = block_group_rec;
10761                 }
10762         } else {
10763                 if (!silent)
10764                         fprintf(stderr,
10765                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10766                                 chunk_rec->objectid,
10767                                 chunk_rec->type,
10768                                 chunk_rec->offset,
10769                                 chunk_rec->length,
10770                                 chunk_rec->offset,
10771                                 chunk_rec->type_flags);
10772                 ret = 1;
10773         }
10774
10775         if (metadump_v2)
10776                 return ret;
10777
10778         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10779                                     chunk_rec->num_stripes);
10780         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10781                 devid = chunk_rec->stripes[i].devid;
10782                 offset = chunk_rec->stripes[i].offset;
10783                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10784                                                        devid, offset, length);
10785                 if (dev_extent_item) {
10786                         dev_extent_rec = container_of(dev_extent_item,
10787                                                 struct device_extent_record,
10788                                                 cache);
10789                         if (dev_extent_rec->objectid != devid ||
10790                             dev_extent_rec->offset != offset ||
10791                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10792                             dev_extent_rec->length != length) {
10793                                 if (!silent)
10794                                         fprintf(stderr,
10795                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10796                                                 chunk_rec->objectid,
10797                                                 chunk_rec->type,
10798                                                 chunk_rec->offset,
10799                                                 chunk_rec->stripes[i].devid,
10800                                                 chunk_rec->stripes[i].offset,
10801                                                 dev_extent_rec->objectid,
10802                                                 dev_extent_rec->offset,
10803                                                 dev_extent_rec->length);
10804                                 ret = -1;
10805                         } else {
10806                                 list_move(&dev_extent_rec->chunk_list,
10807                                           &chunk_rec->dextents);
10808                         }
10809                 } else {
10810                         if (!silent)
10811                                 fprintf(stderr,
10812                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10813                                         chunk_rec->objectid,
10814                                         chunk_rec->type,
10815                                         chunk_rec->offset,
10816                                         chunk_rec->stripes[i].devid,
10817                                         chunk_rec->stripes[i].offset);
10818                         ret = -1;
10819                 }
10820         }
10821         return ret;
10822 }
10823
10824 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10825 int check_chunks(struct cache_tree *chunk_cache,
10826                  struct block_group_tree *block_group_cache,
10827                  struct device_extent_tree *dev_extent_cache,
10828                  struct list_head *good, struct list_head *bad,
10829                  struct list_head *rebuild, int silent)
10830 {
10831         struct cache_extent *chunk_item;
10832         struct chunk_record *chunk_rec;
10833         struct block_group_record *bg_rec;
10834         struct device_extent_record *dext_rec;
10835         int err;
10836         int ret = 0;
10837
10838         chunk_item = first_cache_extent(chunk_cache);
10839         while (chunk_item) {
10840                 chunk_rec = container_of(chunk_item, struct chunk_record,
10841                                          cache);
10842                 err = check_chunk_refs(chunk_rec, block_group_cache,
10843                                        dev_extent_cache, silent);
10844                 if (err < 0)
10845                         ret = err;
10846                 if (err == 0 && good)
10847                         list_add_tail(&chunk_rec->list, good);
10848                 if (err > 0 && rebuild)
10849                         list_add_tail(&chunk_rec->list, rebuild);
10850                 if (err < 0 && bad)
10851                         list_add_tail(&chunk_rec->list, bad);
10852                 chunk_item = next_cache_extent(chunk_item);
10853         }
10854
10855         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10856                 if (!silent)
10857                         fprintf(stderr,
10858                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10859                                 bg_rec->objectid,
10860                                 bg_rec->offset,
10861                                 bg_rec->flags);
10862                 if (!ret)
10863                         ret = 1;
10864         }
10865
10866         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10867                             chunk_list) {
10868                 if (!silent)
10869                         fprintf(stderr,
10870                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10871                                 dext_rec->objectid,
10872                                 dext_rec->offset,
10873                                 dext_rec->length);
10874                 if (!ret)
10875                         ret = 1;
10876         }
10877         return ret;
10878 }
10879
10880
10881 static int check_device_used(struct device_record *dev_rec,
10882                              struct device_extent_tree *dext_cache)
10883 {
10884         struct cache_extent *cache;
10885         struct device_extent_record *dev_extent_rec;
10886         u64 total_byte = 0;
10887
10888         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10889         while (cache) {
10890                 dev_extent_rec = container_of(cache,
10891                                               struct device_extent_record,
10892                                               cache);
10893                 if (dev_extent_rec->objectid != dev_rec->devid)
10894                         break;
10895
10896                 list_del_init(&dev_extent_rec->device_list);
10897                 total_byte += dev_extent_rec->length;
10898                 cache = next_cache_extent(cache);
10899         }
10900
10901         if (total_byte != dev_rec->byte_used) {
10902                 fprintf(stderr,
10903                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10904                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10905                         dev_rec->type, dev_rec->offset);
10906                 return -1;
10907         } else {
10908                 return 0;
10909         }
10910 }
10911
10912 /*
10913  * Extra (optional) check for dev_item size to report possbile problem on a new
10914  * kernel.
10915  */
10916 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
10917 {
10918         if (!IS_ALIGNED(total_bytes, sectorsize)) {
10919                 warning(
10920 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
10921                         devid, total_bytes, sectorsize);
10922                 warning(
10923 "this is OK for older kernel, but may cause kernel warning for newer kernels");
10924                 warning("this can be fixed by 'btrfs rescue fix-device-size'");
10925         }
10926 }
10927
10928 /*
10929  * Unlike device size alignment check above, some super total_bytes check
10930  * failure can lead to mount failure for newer kernel.
10931  *
10932  * So this function will return the error for a fatal super total_bytes problem.
10933  */
10934 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10935 {
10936         struct btrfs_device *dev;
10937         struct list_head *dev_list = &fs_info->fs_devices->devices;
10938         u64 total_bytes = 0;
10939         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10940
10941         list_for_each_entry(dev, dev_list, dev_list)
10942                 total_bytes += dev->total_bytes;
10943
10944         /* Important check, which can cause unmountable fs */
10945         if (super_bytes < total_bytes) {
10946                 error("super total bytes %llu smaller than real device(s) size %llu",
10947                         super_bytes, total_bytes);
10948                 error("mounting this fs may fail for newer kernels");
10949                 error("this can be fixed by 'btrfs rescue fix-device-size'");
10950                 return false;
10951         }
10952
10953         /*
10954          * Optional check, just to make everything aligned and match with each
10955          * other.
10956          *
10957          * For a btrfs-image restored fs, we don't need to check it anyway.
10958          */
10959         if (btrfs_super_flags(fs_info->super_copy) &
10960             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10961                 return true;
10962         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10963             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10964             super_bytes != total_bytes) {
10965                 warning("minor unaligned/mismatch device size detected");
10966                 warning(
10967                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10968         }
10969         return true;
10970 }
10971
10972 /* check btrfs_dev_item -> btrfs_dev_extent */
10973 static int check_devices(struct rb_root *dev_cache,
10974                          struct device_extent_tree *dev_extent_cache)
10975 {
10976         struct rb_node *dev_node;
10977         struct device_record *dev_rec;
10978         struct device_extent_record *dext_rec;
10979         int err;
10980         int ret = 0;
10981
10982         dev_node = rb_first(dev_cache);
10983         while (dev_node) {
10984                 dev_rec = container_of(dev_node, struct device_record, node);
10985                 err = check_device_used(dev_rec, dev_extent_cache);
10986                 if (err)
10987                         ret = err;
10988
10989                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
10990                                          global_info->sectorsize);
10991                 dev_node = rb_next(dev_node);
10992         }
10993         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10994                             device_list) {
10995                 fprintf(stderr,
10996                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10997                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10998                 if (!ret)
10999                         ret = 1;
11000         }
11001         return ret;
11002 }
11003
11004 static int add_root_item_to_list(struct list_head *head,
11005                                   u64 objectid, u64 bytenr, u64 last_snapshot,
11006                                   u8 level, u8 drop_level,
11007                                   struct btrfs_key *drop_key)
11008 {
11009
11010         struct root_item_record *ri_rec;
11011         ri_rec = malloc(sizeof(*ri_rec));
11012         if (!ri_rec)
11013                 return -ENOMEM;
11014         ri_rec->bytenr = bytenr;
11015         ri_rec->objectid = objectid;
11016         ri_rec->level = level;
11017         ri_rec->drop_level = drop_level;
11018         ri_rec->last_snapshot = last_snapshot;
11019         if (drop_key)
11020                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11021         list_add_tail(&ri_rec->list, head);
11022
11023         return 0;
11024 }
11025
11026 static void free_root_item_list(struct list_head *list)
11027 {
11028         struct root_item_record *ri_rec;
11029
11030         while (!list_empty(list)) {
11031                 ri_rec = list_first_entry(list, struct root_item_record,
11032                                           list);
11033                 list_del_init(&ri_rec->list);
11034                 free(ri_rec);
11035         }
11036 }
11037
11038 static int deal_root_from_list(struct list_head *list,
11039                                struct btrfs_root *root,
11040                                struct block_info *bits,
11041                                int bits_nr,
11042                                struct cache_tree *pending,
11043                                struct cache_tree *seen,
11044                                struct cache_tree *reada,
11045                                struct cache_tree *nodes,
11046                                struct cache_tree *extent_cache,
11047                                struct cache_tree *chunk_cache,
11048                                struct rb_root *dev_cache,
11049                                struct block_group_tree *block_group_cache,
11050                                struct device_extent_tree *dev_extent_cache)
11051 {
11052         int ret = 0;
11053         u64 last;
11054
11055         while (!list_empty(list)) {
11056                 struct root_item_record *rec;
11057                 struct extent_buffer *buf;
11058                 rec = list_entry(list->next,
11059                                  struct root_item_record, list);
11060                 last = 0;
11061                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11062                 if (!extent_buffer_uptodate(buf)) {
11063                         free_extent_buffer(buf);
11064                         ret = -EIO;
11065                         break;
11066                 }
11067                 ret = add_root_to_pending(buf, extent_cache, pending,
11068                                     seen, nodes, rec->objectid);
11069                 if (ret < 0)
11070                         break;
11071                 /*
11072                  * To rebuild extent tree, we need deal with snapshot
11073                  * one by one, otherwise we deal with node firstly which
11074                  * can maximize readahead.
11075                  */
11076                 while (1) {
11077                         ret = run_next_block(root, bits, bits_nr, &last,
11078                                              pending, seen, reada, nodes,
11079                                              extent_cache, chunk_cache,
11080                                              dev_cache, block_group_cache,
11081                                              dev_extent_cache, rec);
11082                         if (ret != 0)
11083                                 break;
11084                 }
11085                 free_extent_buffer(buf);
11086                 list_del(&rec->list);
11087                 free(rec);
11088                 if (ret < 0)
11089                         break;
11090         }
11091         while (ret >= 0) {
11092                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11093                                      reada, nodes, extent_cache, chunk_cache,
11094                                      dev_cache, block_group_cache,
11095                                      dev_extent_cache, NULL);
11096                 if (ret != 0) {
11097                         if (ret > 0)
11098                                 ret = 0;
11099                         break;
11100                 }
11101         }
11102         return ret;
11103 }
11104
11105 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11106 {
11107         struct rb_root dev_cache;
11108         struct cache_tree chunk_cache;
11109         struct block_group_tree block_group_cache;
11110         struct device_extent_tree dev_extent_cache;
11111         struct cache_tree extent_cache;
11112         struct cache_tree seen;
11113         struct cache_tree pending;
11114         struct cache_tree reada;
11115         struct cache_tree nodes;
11116         struct extent_io_tree excluded_extents;
11117         struct cache_tree corrupt_blocks;
11118         struct btrfs_path path;
11119         struct btrfs_key key;
11120         struct btrfs_key found_key;
11121         int ret, err = 0;
11122         struct block_info *bits;
11123         int bits_nr;
11124         struct extent_buffer *leaf;
11125         int slot;
11126         struct btrfs_root_item ri;
11127         struct list_head dropping_trees;
11128         struct list_head normal_trees;
11129         struct btrfs_root *root1;
11130         struct btrfs_root *root;
11131         u64 objectid;
11132         u8 level;
11133
11134         root = fs_info->fs_root;
11135         dev_cache = RB_ROOT;
11136         cache_tree_init(&chunk_cache);
11137         block_group_tree_init(&block_group_cache);
11138         device_extent_tree_init(&dev_extent_cache);
11139
11140         cache_tree_init(&extent_cache);
11141         cache_tree_init(&seen);
11142         cache_tree_init(&pending);
11143         cache_tree_init(&nodes);
11144         cache_tree_init(&reada);
11145         cache_tree_init(&corrupt_blocks);
11146         extent_io_tree_init(&excluded_extents);
11147         INIT_LIST_HEAD(&dropping_trees);
11148         INIT_LIST_HEAD(&normal_trees);
11149
11150         if (repair) {
11151                 fs_info->excluded_extents = &excluded_extents;
11152                 fs_info->fsck_extent_cache = &extent_cache;
11153                 fs_info->free_extent_hook = free_extent_hook;
11154                 fs_info->corrupt_blocks = &corrupt_blocks;
11155         }
11156
11157         bits_nr = 1024;
11158         bits = malloc(bits_nr * sizeof(struct block_info));
11159         if (!bits) {
11160                 perror("malloc");
11161                 exit(1);
11162         }
11163
11164         if (ctx.progress_enabled) {
11165                 ctx.tp = TASK_EXTENTS;
11166                 task_start(ctx.info);
11167         }
11168
11169 again:
11170         root1 = fs_info->tree_root;
11171         level = btrfs_header_level(root1->node);
11172         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11173                                     root1->node->start, 0, level, 0, NULL);
11174         if (ret < 0)
11175                 goto out;
11176         root1 = fs_info->chunk_root;
11177         level = btrfs_header_level(root1->node);
11178         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11179                                     root1->node->start, 0, level, 0, NULL);
11180         if (ret < 0)
11181                 goto out;
11182         btrfs_init_path(&path);
11183         key.offset = 0;
11184         key.objectid = 0;
11185         key.type = BTRFS_ROOT_ITEM_KEY;
11186         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11187         if (ret < 0)
11188                 goto out;
11189         while(1) {
11190                 leaf = path.nodes[0];
11191                 slot = path.slots[0];
11192                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11193                         ret = btrfs_next_leaf(root, &path);
11194                         if (ret != 0)
11195                                 break;
11196                         leaf = path.nodes[0];
11197                         slot = path.slots[0];
11198                 }
11199                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11200                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11201                         unsigned long offset;
11202                         u64 last_snapshot;
11203
11204                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11205                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11206                         last_snapshot = btrfs_root_last_snapshot(&ri);
11207                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11208                                 level = btrfs_root_level(&ri);
11209                                 ret = add_root_item_to_list(&normal_trees,
11210                                                 found_key.objectid,
11211                                                 btrfs_root_bytenr(&ri),
11212                                                 last_snapshot, level,
11213                                                 0, NULL);
11214                                 if (ret < 0)
11215                                         goto out;
11216                         } else {
11217                                 level = btrfs_root_level(&ri);
11218                                 objectid = found_key.objectid;
11219                                 btrfs_disk_key_to_cpu(&found_key,
11220                                                       &ri.drop_progress);
11221                                 ret = add_root_item_to_list(&dropping_trees,
11222                                                 objectid,
11223                                                 btrfs_root_bytenr(&ri),
11224                                                 last_snapshot, level,
11225                                                 ri.drop_level, &found_key);
11226                                 if (ret < 0)
11227                                         goto out;
11228                         }
11229                 }
11230                 path.slots[0]++;
11231         }
11232         btrfs_release_path(&path);
11233
11234         /*
11235          * check_block can return -EAGAIN if it fixes something, please keep
11236          * this in mind when dealing with return values from these functions, if
11237          * we get -EAGAIN we want to fall through and restart the loop.
11238          */
11239         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11240                                   &seen, &reada, &nodes, &extent_cache,
11241                                   &chunk_cache, &dev_cache, &block_group_cache,
11242                                   &dev_extent_cache);
11243         if (ret < 0) {
11244                 if (ret == -EAGAIN)
11245                         goto loop;
11246                 goto out;
11247         }
11248         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11249                                   &pending, &seen, &reada, &nodes,
11250                                   &extent_cache, &chunk_cache, &dev_cache,
11251                                   &block_group_cache, &dev_extent_cache);
11252         if (ret < 0) {
11253                 if (ret == -EAGAIN)
11254                         goto loop;
11255                 goto out;
11256         }
11257
11258         ret = check_chunks(&chunk_cache, &block_group_cache,
11259                            &dev_extent_cache, NULL, NULL, NULL, 0);
11260         if (ret) {
11261                 if (ret == -EAGAIN)
11262                         goto loop;
11263                 err = ret;
11264         }
11265
11266         ret = check_extent_refs(root, &extent_cache);
11267         if (ret < 0) {
11268                 if (ret == -EAGAIN)
11269                         goto loop;
11270                 goto out;
11271         }
11272
11273         ret = check_devices(&dev_cache, &dev_extent_cache);
11274         if (ret && err)
11275                 ret = err;
11276
11277 out:
11278         task_stop(ctx.info);
11279         if (repair) {
11280                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11281                 extent_io_tree_cleanup(&excluded_extents);
11282                 fs_info->fsck_extent_cache = NULL;
11283                 fs_info->free_extent_hook = NULL;
11284                 fs_info->corrupt_blocks = NULL;
11285                 fs_info->excluded_extents = NULL;
11286         }
11287         free(bits);
11288         free_chunk_cache_tree(&chunk_cache);
11289         free_device_cache_tree(&dev_cache);
11290         free_block_group_tree(&block_group_cache);
11291         free_device_extent_tree(&dev_extent_cache);
11292         free_extent_cache_tree(&seen);
11293         free_extent_cache_tree(&pending);
11294         free_extent_cache_tree(&reada);
11295         free_extent_cache_tree(&nodes);
11296         free_root_item_list(&normal_trees);
11297         free_root_item_list(&dropping_trees);
11298         return ret;
11299 loop:
11300         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11301         free_extent_cache_tree(&seen);
11302         free_extent_cache_tree(&pending);
11303         free_extent_cache_tree(&reada);
11304         free_extent_cache_tree(&nodes);
11305         free_chunk_cache_tree(&chunk_cache);
11306         free_block_group_tree(&block_group_cache);
11307         free_device_cache_tree(&dev_cache);
11308         free_device_extent_tree(&dev_extent_cache);
11309         free_extent_record_cache(&extent_cache);
11310         free_root_item_list(&normal_trees);
11311         free_root_item_list(&dropping_trees);
11312         extent_io_tree_cleanup(&excluded_extents);
11313         goto again;
11314 }
11315
11316 static int check_extent_inline_ref(struct extent_buffer *eb,
11317                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11318 {
11319         int ret;
11320         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11321
11322         switch (type) {
11323         case BTRFS_TREE_BLOCK_REF_KEY:
11324         case BTRFS_EXTENT_DATA_REF_KEY:
11325         case BTRFS_SHARED_BLOCK_REF_KEY:
11326         case BTRFS_SHARED_DATA_REF_KEY:
11327                 ret = 0;
11328                 break;
11329         default:
11330                 error("extent[%llu %u %llu] has unknown ref type: %d",
11331                       key->objectid, key->type, key->offset, type);
11332                 ret = UNKNOWN_TYPE;
11333                 break;
11334         }
11335
11336         return ret;
11337 }
11338
11339 /*
11340  * Check backrefs of a tree block given by @bytenr or @eb.
11341  *
11342  * @root:       the root containing the @bytenr or @eb
11343  * @eb:         tree block extent buffer, can be NULL
11344  * @bytenr:     bytenr of the tree block to search
11345  * @level:      tree level of the tree block
11346  * @owner:      owner of the tree block
11347  *
11348  * Return >0 for any error found and output error message
11349  * Return 0 for no error found
11350  */
11351 static int check_tree_block_ref(struct btrfs_root *root,
11352                                 struct extent_buffer *eb, u64 bytenr,
11353                                 int level, u64 owner, struct node_refs *nrefs)
11354 {
11355         struct btrfs_key key;
11356         struct btrfs_root *extent_root = root->fs_info->extent_root;
11357         struct btrfs_path path;
11358         struct btrfs_extent_item *ei;
11359         struct btrfs_extent_inline_ref *iref;
11360         struct extent_buffer *leaf;
11361         unsigned long end;
11362         unsigned long ptr;
11363         int slot;
11364         int skinny_level;
11365         int root_level = btrfs_header_level(root->node);
11366         int type;
11367         u32 nodesize = root->fs_info->nodesize;
11368         u32 item_size;
11369         u64 offset;
11370         int found_ref = 0;
11371         int err = 0;
11372         int ret;
11373         int strict = 1;
11374         int parent = 0;
11375
11376         btrfs_init_path(&path);
11377         key.objectid = bytenr;
11378         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11379                 key.type = BTRFS_METADATA_ITEM_KEY;
11380         else
11381                 key.type = BTRFS_EXTENT_ITEM_KEY;
11382         key.offset = (u64)-1;
11383
11384         /* Search for the backref in extent tree */
11385         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11386         if (ret < 0) {
11387                 err |= BACKREF_MISSING;
11388                 goto out;
11389         }
11390         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11391         if (ret) {
11392                 err |= BACKREF_MISSING;
11393                 goto out;
11394         }
11395
11396         leaf = path.nodes[0];
11397         slot = path.slots[0];
11398         btrfs_item_key_to_cpu(leaf, &key, slot);
11399
11400         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11401
11402         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11403                 skinny_level = (int)key.offset;
11404                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11405         } else {
11406                 struct btrfs_tree_block_info *info;
11407
11408                 info = (struct btrfs_tree_block_info *)(ei + 1);
11409                 skinny_level = btrfs_tree_block_level(leaf, info);
11410                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11411         }
11412
11413
11414         if (eb) {
11415                 u64 header_gen;
11416                 u64 extent_gen;
11417
11418                 /*
11419                  * Due to the feature of shared tree blocks, if the upper node
11420                  * is a fs root or shared node, the extent of checked node may
11421                  * not be updated until the next CoW.
11422                  */
11423                 if (nrefs)
11424                         strict = should_check_extent_strictly(root, nrefs,
11425                                         level);
11426                 if (!(btrfs_extent_flags(leaf, ei) &
11427                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11428                         error(
11429                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11430                                 key.objectid, nodesize,
11431                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11432                         err = BACKREF_MISMATCH;
11433                 }
11434                 header_gen = btrfs_header_generation(eb);
11435                 extent_gen = btrfs_extent_generation(leaf, ei);
11436                 if (header_gen != extent_gen) {
11437                         error(
11438         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11439                                 key.objectid, nodesize, header_gen,
11440                                 extent_gen);
11441                         err = BACKREF_MISMATCH;
11442                 }
11443                 if (level != skinny_level) {
11444                         error(
11445                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11446                                 key.objectid, nodesize, level, skinny_level);
11447                         err = BACKREF_MISMATCH;
11448                 }
11449                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11450                         error(
11451                         "extent[%llu %u] is referred by other roots than %llu",
11452                                 key.objectid, nodesize, root->objectid);
11453                         err = BACKREF_MISMATCH;
11454                 }
11455         }
11456
11457         /*
11458          * Iterate the extent/metadata item to find the exact backref
11459          */
11460         item_size = btrfs_item_size_nr(leaf, slot);
11461         ptr = (unsigned long)iref;
11462         end = (unsigned long)ei + item_size;
11463
11464         while (ptr < end) {
11465                 iref = (struct btrfs_extent_inline_ref *)ptr;
11466                 type = btrfs_extent_inline_ref_type(leaf, iref);
11467                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11468
11469                 ret = check_extent_inline_ref(leaf, &key, iref);
11470                 if (ret) {
11471                         err |= ret;
11472                         break;
11473                 }
11474                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11475                         if (offset == root->objectid)
11476                                 found_ref = 1;
11477                         if (!strict && owner == offset)
11478                                 found_ref = 1;
11479                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11480                         /*
11481                          * Backref of tree reloc root points to itself, no need
11482                          * to check backref any more.
11483                          *
11484                          * This may be an error of loop backref, but extent tree
11485                          * checker should have already handled it.
11486                          * Here we only need to avoid infinite iteration.
11487                          */
11488                         if (offset == bytenr) {
11489                                 found_ref = 1;
11490                         } else {
11491                                 /*
11492                                  * Check if the backref points to valid
11493                                  * referencer
11494                                  */
11495                                 found_ref = !check_tree_block_ref( root, NULL,
11496                                                 offset, level + 1, owner,
11497                                                 NULL);
11498                         }
11499                 }
11500
11501                 if (found_ref)
11502                         break;
11503                 ptr += btrfs_extent_inline_ref_size(type);
11504         }
11505
11506         /*
11507          * Inlined extent item doesn't have what we need, check
11508          * TREE_BLOCK_REF_KEY
11509          */
11510         if (!found_ref) {
11511                 btrfs_release_path(&path);
11512                 key.objectid = bytenr;
11513                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11514                 key.offset = root->objectid;
11515
11516                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11517                 if (!ret)
11518                         found_ref = 1;
11519         }
11520         /*
11521          * Finally check SHARED BLOCK REF, any found will be good
11522          * Here we're not doing comprehensive extent backref checking,
11523          * only need to ensure there is some extent referring to this
11524          * tree block.
11525          */
11526         if (!found_ref) {
11527                 btrfs_release_path(&path);
11528                 key.objectid = bytenr;
11529                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11530                 key.offset = (u64)-1;
11531
11532                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11533                 if (ret < 0) {
11534                         err |= BACKREF_MISSING;
11535                         goto out;
11536                 }
11537                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11538                 if (ret) {
11539                         err |= BACKREF_MISSING;
11540                         goto out;
11541                 }
11542                 found_ref = 1;
11543         }
11544         if (!found_ref)
11545                 err |= BACKREF_MISSING;
11546 out:
11547         btrfs_release_path(&path);
11548         if (nrefs && strict &&
11549             level < root_level && nrefs->full_backref[level + 1])
11550                 parent = nrefs->bytenr[level + 1];
11551         if (eb && (err & BACKREF_MISSING))
11552                 error(
11553         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11554                       bytenr, nodesize, owner, level,
11555                       parent ? "parent" : "root",
11556                       parent ? parent : root->objectid);
11557         return err;
11558 }
11559
11560 /*
11561  * If @err contains BACKREF_MISSING then add extent of the
11562  * file_extent_data_item.
11563  *
11564  * Returns error bits after reapir.
11565  */
11566 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11567                                    struct btrfs_root *root,
11568                                    struct btrfs_path *pathp,
11569                                    struct node_refs *nrefs,
11570                                    int err)
11571 {
11572         struct btrfs_file_extent_item *fi;
11573         struct btrfs_key fi_key;
11574         struct btrfs_key key;
11575         struct btrfs_extent_item *ei;
11576         struct btrfs_path path;
11577         struct btrfs_root *extent_root = root->fs_info->extent_root;
11578         struct extent_buffer *eb;
11579         u64 size;
11580         u64 disk_bytenr;
11581         u64 num_bytes;
11582         u64 parent;
11583         u64 offset;
11584         u64 extent_offset;
11585         u64 file_offset;
11586         int generation;
11587         int slot;
11588         int ret = 0;
11589
11590         eb = pathp->nodes[0];
11591         slot = pathp->slots[0];
11592         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11593         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11594
11595         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11596             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11597                 return err;
11598
11599         file_offset = fi_key.offset;
11600         generation = btrfs_file_extent_generation(eb, fi);
11601         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11602         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11603         extent_offset = btrfs_file_extent_offset(eb, fi);
11604         offset = file_offset - extent_offset;
11605
11606         /* now repair only adds backref */
11607         if ((err & BACKREF_MISSING) == 0)
11608                 return err;
11609
11610         /* search extent item */
11611         key.objectid = disk_bytenr;
11612         key.type = BTRFS_EXTENT_ITEM_KEY;
11613         key.offset = num_bytes;
11614
11615         btrfs_init_path(&path);
11616         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11617         if (ret < 0) {
11618                 ret = -EIO;
11619                 goto out;
11620         }
11621
11622         /* insert an extent item */
11623         if (ret > 0) {
11624                 key.objectid = disk_bytenr;
11625                 key.type = BTRFS_EXTENT_ITEM_KEY;
11626                 key.offset = num_bytes;
11627                 size = sizeof(*ei);
11628
11629                 btrfs_release_path(&path);
11630                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11631                                               size);
11632                 if (ret)
11633                         goto out;
11634                 eb = path.nodes[0];
11635                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11636
11637                 btrfs_set_extent_refs(eb, ei, 0);
11638                 btrfs_set_extent_generation(eb, ei, generation);
11639                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11640
11641                 btrfs_mark_buffer_dirty(eb);
11642                 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11643                                                num_bytes, 1, 0);
11644                 btrfs_release_path(&path);
11645         }
11646
11647         if (nrefs->full_backref[0])
11648                 parent = btrfs_header_bytenr(eb);
11649         else
11650                 parent = 0;
11651
11652         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11653                                    root->objectid,
11654                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11655                                    offset);
11656         if (ret) {
11657                 error(
11658                 "failed to increase extent data backref[%llu %llu] root %llu",
11659                       disk_bytenr, num_bytes, root->objectid);
11660                 goto out;
11661         } else {
11662                 printf("Add one extent data backref [%llu %llu]\n",
11663                        disk_bytenr, num_bytes);
11664         }
11665
11666         err &= ~BACKREF_MISSING;
11667 out:
11668         if (ret)
11669                 error("can't repair root %llu extent data item[%llu %llu]",
11670                       root->objectid, disk_bytenr, num_bytes);
11671         return err;
11672 }
11673
11674 /*
11675  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11676  *
11677  * Return >0 any error found and output error message
11678  * Return 0 for no error found
11679  */
11680 static int check_extent_data_item(struct btrfs_root *root,
11681                                   struct btrfs_path *pathp,
11682                                   struct node_refs *nrefs,  int account_bytes)
11683 {
11684         struct btrfs_file_extent_item *fi;
11685         struct extent_buffer *eb = pathp->nodes[0];
11686         struct btrfs_path path;
11687         struct btrfs_root *extent_root = root->fs_info->extent_root;
11688         struct btrfs_key fi_key;
11689         struct btrfs_key dbref_key;
11690         struct extent_buffer *leaf;
11691         struct btrfs_extent_item *ei;
11692         struct btrfs_extent_inline_ref *iref;
11693         struct btrfs_extent_data_ref *dref;
11694         u64 owner;
11695         u64 disk_bytenr;
11696         u64 disk_num_bytes;
11697         u64 extent_num_bytes;
11698         u64 extent_flags;
11699         u64 offset;
11700         u32 item_size;
11701         unsigned long end;
11702         unsigned long ptr;
11703         int type;
11704         int found_dbackref = 0;
11705         int slot = pathp->slots[0];
11706         int err = 0;
11707         int ret;
11708         int strict;
11709
11710         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11711         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11712
11713         /* Nothing to check for hole and inline data extents */
11714         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11715             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11716                 return 0;
11717
11718         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11719         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11720         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11721         offset = btrfs_file_extent_offset(eb, fi);
11722
11723         /* Check unaligned disk_num_bytes and num_bytes */
11724         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11725                 error(
11726 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11727                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11728                         root->fs_info->sectorsize);
11729                 err |= BYTES_UNALIGNED;
11730         } else if (account_bytes) {
11731                 data_bytes_allocated += disk_num_bytes;
11732         }
11733         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11734                 error(
11735 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11736                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11737                         root->fs_info->sectorsize);
11738                 err |= BYTES_UNALIGNED;
11739         } else if (account_bytes) {
11740                 data_bytes_referenced += extent_num_bytes;
11741         }
11742         owner = btrfs_header_owner(eb);
11743
11744         /* Check the extent item of the file extent in extent tree */
11745         btrfs_init_path(&path);
11746         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11747         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11748         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11749
11750         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11751         if (ret)
11752                 goto out;
11753
11754         leaf = path.nodes[0];
11755         slot = path.slots[0];
11756         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11757
11758         extent_flags = btrfs_extent_flags(leaf, ei);
11759
11760         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11761                 error(
11762                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11763                     disk_bytenr, disk_num_bytes,
11764                     BTRFS_EXTENT_FLAG_DATA);
11765                 err |= BACKREF_MISMATCH;
11766         }
11767
11768         /* Check data backref inside that extent item */
11769         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11770         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11771         ptr = (unsigned long)iref;
11772         end = (unsigned long)ei + item_size;
11773         strict = should_check_extent_strictly(root, nrefs, -1);
11774
11775         while (ptr < end) {
11776                 u64 ref_root;
11777                 u64 ref_objectid;
11778                 u64 ref_offset;
11779                 bool match = false;
11780
11781                 iref = (struct btrfs_extent_inline_ref *)ptr;
11782                 type = btrfs_extent_inline_ref_type(leaf, iref);
11783                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11784
11785                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11786                 if (ret) {
11787                         err |= ret;
11788                         break;
11789                 }
11790                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11791                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11792                         ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11793                         ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11794
11795                         if (ref_objectid == fi_key.objectid &&
11796                             ref_offset == fi_key.offset - offset)
11797                                 match = true;
11798                         if (ref_root == root->objectid && match)
11799                                 found_dbackref = 1;
11800                         else if (!strict && owner == ref_root && match)
11801                                 found_dbackref = 1;
11802                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11803                         found_dbackref = !check_tree_block_ref(root, NULL,
11804                                 btrfs_extent_inline_ref_offset(leaf, iref),
11805                                 0, owner, NULL);
11806                 }
11807
11808                 if (found_dbackref)
11809                         break;
11810                 ptr += btrfs_extent_inline_ref_size(type);
11811         }
11812
11813         if (!found_dbackref) {
11814                 btrfs_release_path(&path);
11815
11816                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11817                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11818                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11819                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11820                                 fi_key.objectid, fi_key.offset - offset);
11821
11822                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11823                                         &dbref_key, &path, 0, 0);
11824                 if (!ret) {
11825                         found_dbackref = 1;
11826                         goto out;
11827                 }
11828
11829                 btrfs_release_path(&path);
11830
11831                 /*
11832                  * Neither inlined nor EXTENT_DATA_REF found, try
11833                  * SHARED_DATA_REF as last chance.
11834                  */
11835                 dbref_key.objectid = disk_bytenr;
11836                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11837                 dbref_key.offset = eb->start;
11838
11839                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11840                                         &dbref_key, &path, 0, 0);
11841                 if (!ret) {
11842                         found_dbackref = 1;
11843                         goto out;
11844                 }
11845         }
11846
11847 out:
11848         if (!found_dbackref)
11849                 err |= BACKREF_MISSING;
11850         btrfs_release_path(&path);
11851         if (err & BACKREF_MISSING) {
11852                 error("data extent[%llu %llu] backref lost",
11853                       disk_bytenr, disk_num_bytes);
11854         }
11855         return err;
11856 }
11857
11858 /*
11859  * Get real tree block level for the case like shared block
11860  * Return >= 0 as tree level
11861  * Return <0 for error
11862  */
11863 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11864 {
11865         struct extent_buffer *eb;
11866         struct btrfs_path path;
11867         struct btrfs_key key;
11868         struct btrfs_extent_item *ei;
11869         u64 flags;
11870         u64 transid;
11871         u8 backref_level;
11872         u8 header_level;
11873         int ret;
11874
11875         /* Search extent tree for extent generation and level */
11876         key.objectid = bytenr;
11877         key.type = BTRFS_METADATA_ITEM_KEY;
11878         key.offset = (u64)-1;
11879
11880         btrfs_init_path(&path);
11881         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11882         if (ret < 0)
11883                 goto release_out;
11884         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11885         if (ret < 0)
11886                 goto release_out;
11887         if (ret > 0) {
11888                 ret = -ENOENT;
11889                 goto release_out;
11890         }
11891
11892         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11893         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11894                             struct btrfs_extent_item);
11895         flags = btrfs_extent_flags(path.nodes[0], ei);
11896         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11897                 ret = -ENOENT;
11898                 goto release_out;
11899         }
11900
11901         /* Get transid for later read_tree_block() check */
11902         transid = btrfs_extent_generation(path.nodes[0], ei);
11903
11904         /* Get backref level as one source */
11905         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11906                 backref_level = key.offset;
11907         } else {
11908                 struct btrfs_tree_block_info *info;
11909
11910                 info = (struct btrfs_tree_block_info *)(ei + 1);
11911                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11912         }
11913         btrfs_release_path(&path);
11914
11915         /* Get level from tree block as an alternative source */
11916         eb = read_tree_block(fs_info, bytenr, transid);
11917         if (!extent_buffer_uptodate(eb)) {
11918                 free_extent_buffer(eb);
11919                 return -EIO;
11920         }
11921         header_level = btrfs_header_level(eb);
11922         free_extent_buffer(eb);
11923
11924         if (header_level != backref_level)
11925                 return -EIO;
11926         return header_level;
11927
11928 release_out:
11929         btrfs_release_path(&path);
11930         return ret;
11931 }
11932
11933 /*
11934  * Check if a tree block backref is valid (points to a valid tree block)
11935  * if level == -1, level will be resolved
11936  * Return >0 for any error found and print error message
11937  */
11938 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11939                                     u64 bytenr, int level)
11940 {
11941         struct btrfs_root *root;
11942         struct btrfs_key key;
11943         struct btrfs_path path;
11944         struct extent_buffer *eb;
11945         struct extent_buffer *node;
11946         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11947         int err = 0;
11948         int ret;
11949
11950         /* Query level for level == -1 special case */
11951         if (level == -1)
11952                 level = query_tree_block_level(fs_info, bytenr);
11953         if (level < 0) {
11954                 err |= REFERENCER_MISSING;
11955                 goto out;
11956         }
11957
11958         key.objectid = root_id;
11959         key.type = BTRFS_ROOT_ITEM_KEY;
11960         key.offset = (u64)-1;
11961
11962         root = btrfs_read_fs_root(fs_info, &key);
11963         if (IS_ERR(root)) {
11964                 err |= REFERENCER_MISSING;
11965                 goto out;
11966         }
11967
11968         /* Read out the tree block to get item/node key */
11969         eb = read_tree_block(fs_info, bytenr, 0);
11970         if (!extent_buffer_uptodate(eb)) {
11971                 err |= REFERENCER_MISSING;
11972                 free_extent_buffer(eb);
11973                 goto out;
11974         }
11975
11976         /* Empty tree, no need to check key */
11977         if (!btrfs_header_nritems(eb) && !level) {
11978                 free_extent_buffer(eb);
11979                 goto out;
11980         }
11981
11982         if (level)
11983                 btrfs_node_key_to_cpu(eb, &key, 0);
11984         else
11985                 btrfs_item_key_to_cpu(eb, &key, 0);
11986
11987         free_extent_buffer(eb);
11988
11989         btrfs_init_path(&path);
11990         path.lowest_level = level;
11991         /* Search with the first key, to ensure we can reach it */
11992         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11993         if (ret < 0) {
11994                 err |= REFERENCER_MISSING;
11995                 goto release_out;
11996         }
11997
11998         node = path.nodes[level];
11999         if (btrfs_header_bytenr(node) != bytenr) {
12000                 error(
12001         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12002                         bytenr, nodesize, bytenr,
12003                         btrfs_header_bytenr(node));
12004                 err |= REFERENCER_MISMATCH;
12005         }
12006         if (btrfs_header_level(node) != level) {
12007                 error(
12008         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12009                         bytenr, nodesize, level,
12010                         btrfs_header_level(node));
12011                 err |= REFERENCER_MISMATCH;
12012         }
12013
12014 release_out:
12015         btrfs_release_path(&path);
12016 out:
12017         if (err & REFERENCER_MISSING) {
12018                 if (level < 0)
12019                         error("extent [%llu %d] lost referencer (owner: %llu)",
12020                                 bytenr, nodesize, root_id);
12021                 else
12022                         error(
12023                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12024                                 bytenr, nodesize, root_id, level);
12025         }
12026
12027         return err;
12028 }
12029
12030 /*
12031  * Check if tree block @eb is tree reloc root.
12032  * Return 0 if it's not or any problem happens
12033  * Return 1 if it's a tree reloc root
12034  */
12035 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12036                                  struct extent_buffer *eb)
12037 {
12038         struct btrfs_root *tree_reloc_root;
12039         struct btrfs_key key;
12040         u64 bytenr = btrfs_header_bytenr(eb);
12041         u64 owner = btrfs_header_owner(eb);
12042         int ret = 0;
12043
12044         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12045         key.offset = owner;
12046         key.type = BTRFS_ROOT_ITEM_KEY;
12047
12048         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12049         if (IS_ERR(tree_reloc_root))
12050                 return 0;
12051
12052         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12053                 ret = 1;
12054         btrfs_free_fs_root(tree_reloc_root);
12055         return ret;
12056 }
12057
12058 /*
12059  * Check referencer for shared block backref
12060  * If level == -1, this function will resolve the level.
12061  */
12062 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12063                                      u64 parent, u64 bytenr, int level)
12064 {
12065         struct extent_buffer *eb;
12066         u32 nr;
12067         int found_parent = 0;
12068         int i;
12069
12070         eb = read_tree_block(fs_info, parent, 0);
12071         if (!extent_buffer_uptodate(eb))
12072                 goto out;
12073
12074         if (level == -1)
12075                 level = query_tree_block_level(fs_info, bytenr);
12076         if (level < 0)
12077                 goto out;
12078
12079         /* It's possible it's a tree reloc root */
12080         if (parent == bytenr) {
12081                 if (is_tree_reloc_root(fs_info, eb))
12082                         found_parent = 1;
12083                 goto out;
12084         }
12085
12086         if (level + 1 != btrfs_header_level(eb))
12087                 goto out;
12088
12089         nr = btrfs_header_nritems(eb);
12090         for (i = 0; i < nr; i++) {
12091                 if (bytenr == btrfs_node_blockptr(eb, i)) {
12092                         found_parent = 1;
12093                         break;
12094                 }
12095         }
12096 out:
12097         free_extent_buffer(eb);
12098         if (!found_parent) {
12099                 error(
12100         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12101                         bytenr, fs_info->nodesize, parent, level);
12102                 return REFERENCER_MISSING;
12103         }
12104         return 0;
12105 }
12106
12107 /*
12108  * Check referencer for normal (inlined) data ref
12109  * If len == 0, it will be resolved by searching in extent tree
12110  */
12111 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12112                                      u64 root_id, u64 objectid, u64 offset,
12113                                      u64 bytenr, u64 len, u32 count)
12114 {
12115         struct btrfs_root *root;
12116         struct btrfs_root *extent_root = fs_info->extent_root;
12117         struct btrfs_key key;
12118         struct btrfs_path path;
12119         struct extent_buffer *leaf;
12120         struct btrfs_file_extent_item *fi;
12121         u32 found_count = 0;
12122         int slot;
12123         int ret = 0;
12124
12125         if (!len) {
12126                 key.objectid = bytenr;
12127                 key.type = BTRFS_EXTENT_ITEM_KEY;
12128                 key.offset = (u64)-1;
12129
12130                 btrfs_init_path(&path);
12131                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12132                 if (ret < 0)
12133                         goto out;
12134                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12135                 if (ret)
12136                         goto out;
12137                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12138                 if (key.objectid != bytenr ||
12139                     key.type != BTRFS_EXTENT_ITEM_KEY)
12140                         goto out;
12141                 len = key.offset;
12142                 btrfs_release_path(&path);
12143         }
12144         key.objectid = root_id;
12145         key.type = BTRFS_ROOT_ITEM_KEY;
12146         key.offset = (u64)-1;
12147         btrfs_init_path(&path);
12148
12149         root = btrfs_read_fs_root(fs_info, &key);
12150         if (IS_ERR(root))
12151                 goto out;
12152
12153         key.objectid = objectid;
12154         key.type = BTRFS_EXTENT_DATA_KEY;
12155         /*
12156          * It can be nasty as data backref offset is
12157          * file offset - file extent offset, which is smaller or
12158          * equal to original backref offset.  The only special case is
12159          * overflow.  So we need to special check and do further search.
12160          */
12161         key.offset = offset & (1ULL << 63) ? 0 : offset;
12162
12163         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12164         if (ret < 0)
12165                 goto out;
12166
12167         /*
12168          * Search afterwards to get correct one
12169          * NOTE: As we must do a comprehensive check on the data backref to
12170          * make sure the dref count also matches, we must iterate all file
12171          * extents for that inode.
12172          */
12173         while (1) {
12174                 leaf = path.nodes[0];
12175                 slot = path.slots[0];
12176
12177                 if (slot >= btrfs_header_nritems(leaf) ||
12178                     btrfs_header_owner(leaf) != root_id)
12179                         goto next;
12180                 btrfs_item_key_to_cpu(leaf, &key, slot);
12181                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12182                         break;
12183                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12184                 /*
12185                  * Except normal disk bytenr and disk num bytes, we still
12186                  * need to do extra check on dbackref offset as
12187                  * dbackref offset = file_offset - file_extent_offset
12188                  *
12189                  * Also, we must check the leaf owner.
12190                  * In case of shared tree blocks (snapshots) we can inherit
12191                  * leaves from source snapshot.
12192                  * In that case, reference from source snapshot should not
12193                  * count.
12194                  */
12195                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12196                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12197                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12198                     offset && btrfs_header_owner(leaf) == root_id)
12199                         found_count++;
12200
12201 next:
12202                 ret = btrfs_next_item(root, &path);
12203                 if (ret)
12204                         break;
12205         }
12206 out:
12207         btrfs_release_path(&path);
12208         if (found_count != count) {
12209                 error(
12210 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12211                         bytenr, len, root_id, objectid, offset, count, found_count);
12212                 return REFERENCER_MISSING;
12213         }
12214         return 0;
12215 }
12216
12217 /*
12218  * Check if the referencer of a shared data backref exists
12219  */
12220 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12221                                      u64 parent, u64 bytenr)
12222 {
12223         struct extent_buffer *eb;
12224         struct btrfs_key key;
12225         struct btrfs_file_extent_item *fi;
12226         u32 nr;
12227         int found_parent = 0;
12228         int i;
12229
12230         eb = read_tree_block(fs_info, parent, 0);
12231         if (!extent_buffer_uptodate(eb))
12232                 goto out;
12233
12234         nr = btrfs_header_nritems(eb);
12235         for (i = 0; i < nr; i++) {
12236                 btrfs_item_key_to_cpu(eb, &key, i);
12237                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12238                         continue;
12239
12240                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12241                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12242                         continue;
12243
12244                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12245                         found_parent = 1;
12246                         break;
12247                 }
12248         }
12249
12250 out:
12251         free_extent_buffer(eb);
12252         if (!found_parent) {
12253                 error("shared extent %llu referencer lost (parent: %llu)",
12254                         bytenr, parent);
12255                 return REFERENCER_MISSING;
12256         }
12257         return 0;
12258 }
12259
12260 /*
12261  * Only delete backref if REFERENCER_MISSING now
12262  *
12263  * Returns <0   the extent was deleted
12264  * Returns >0   the backref was deleted but extent still exists, returned value
12265  *               means error after repair
12266  * Returns  0   nothing happened
12267  */
12268 static int repair_extent_item(struct btrfs_trans_handle *trans,
12269                       struct btrfs_root *root, struct btrfs_path *path,
12270                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12271                       u64 owner, u64 offset, int err)
12272 {
12273         struct btrfs_key old_key;
12274         int freed = 0;
12275         int ret;
12276
12277         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12278
12279         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12280                 /* delete the backref */
12281                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12282                           num_bytes, parent, root_objectid, owner, offset);
12283                 if (!ret) {
12284                         freed = 1;
12285                         err &= ~REFERENCER_MISSING;
12286                         printf("Delete backref in extent [%llu %llu]\n",
12287                                bytenr, num_bytes);
12288                 } else {
12289                         error("fail to delete backref in extent [%llu %llu]",
12290                                bytenr, num_bytes);
12291                 }
12292         }
12293
12294         /* btrfs_free_extent may delete the extent */
12295         btrfs_release_path(path);
12296         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12297
12298         if (ret)
12299                 ret = -ENOENT;
12300         else if (freed)
12301                 ret = err;
12302         return ret;
12303 }
12304
12305 /*
12306  * This function will check a given extent item, including its backref and
12307  * itself (like crossing stripe boundary and type)
12308  *
12309  * Since we don't use extent_record anymore, introduce new error bit
12310  */
12311 static int check_extent_item(struct btrfs_trans_handle *trans,
12312                              struct btrfs_fs_info *fs_info,
12313                              struct btrfs_path *path)
12314 {
12315         struct btrfs_extent_item *ei;
12316         struct btrfs_extent_inline_ref *iref;
12317         struct btrfs_extent_data_ref *dref;
12318         struct extent_buffer *eb = path->nodes[0];
12319         unsigned long end;
12320         unsigned long ptr;
12321         int slot = path->slots[0];
12322         int type;
12323         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12324         u32 item_size = btrfs_item_size_nr(eb, slot);
12325         u64 flags;
12326         u64 offset;
12327         u64 parent;
12328         u64 num_bytes;
12329         u64 root_objectid;
12330         u64 owner;
12331         u64 owner_offset;
12332         int metadata = 0;
12333         int level;
12334         struct btrfs_key key;
12335         int ret;
12336         int err = 0;
12337
12338         btrfs_item_key_to_cpu(eb, &key, slot);
12339         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12340                 bytes_used += key.offset;
12341                 num_bytes = key.offset;
12342         } else {
12343                 bytes_used += nodesize;
12344                 num_bytes = nodesize;
12345         }
12346
12347         if (item_size < sizeof(*ei)) {
12348                 /*
12349                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12350                  * old thing when on disk format is still un-determined.
12351                  * No need to care about it anymore
12352                  */
12353                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12354                 return -ENOTTY;
12355         }
12356
12357         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12358         flags = btrfs_extent_flags(eb, ei);
12359
12360         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12361                 metadata = 1;
12362         if (metadata && check_crossing_stripes(global_info, key.objectid,
12363                                                eb->len)) {
12364                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12365                       key.objectid, key.objectid + nodesize);
12366                 err |= CROSSING_STRIPE_BOUNDARY;
12367         }
12368
12369         ptr = (unsigned long)(ei + 1);
12370
12371         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12372                 /* Old EXTENT_ITEM metadata */
12373                 struct btrfs_tree_block_info *info;
12374
12375                 info = (struct btrfs_tree_block_info *)ptr;
12376                 level = btrfs_tree_block_level(eb, info);
12377                 ptr += sizeof(struct btrfs_tree_block_info);
12378         } else {
12379                 /* New METADATA_ITEM */
12380                 level = key.offset;
12381         }
12382         end = (unsigned long)ei + item_size;
12383
12384 next:
12385         /* Reached extent item end normally */
12386         if (ptr == end)
12387                 goto out;
12388
12389         /* Beyond extent item end, wrong item size */
12390         if (ptr > end) {
12391                 err |= ITEM_SIZE_MISMATCH;
12392                 error("extent item at bytenr %llu slot %d has wrong size",
12393                         eb->start, slot);
12394                 goto out;
12395         }
12396
12397         parent = 0;
12398         root_objectid = 0;
12399         owner = 0;
12400         owner_offset = 0;
12401         /* Now check every backref in this extent item */
12402         iref = (struct btrfs_extent_inline_ref *)ptr;
12403         type = btrfs_extent_inline_ref_type(eb, iref);
12404         offset = btrfs_extent_inline_ref_offset(eb, iref);
12405         switch (type) {
12406         case BTRFS_TREE_BLOCK_REF_KEY:
12407                 root_objectid = offset;
12408                 owner = level;
12409                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12410                                                level);
12411                 err |= ret;
12412                 break;
12413         case BTRFS_SHARED_BLOCK_REF_KEY:
12414                 parent = offset;
12415                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12416                                                  level);
12417                 err |= ret;
12418                 break;
12419         case BTRFS_EXTENT_DATA_REF_KEY:
12420                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12421                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12422                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12423                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12424                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12425                                         owner_offset, key.objectid, key.offset,
12426                                         btrfs_extent_data_ref_count(eb, dref));
12427                 err |= ret;
12428                 break;
12429         case BTRFS_SHARED_DATA_REF_KEY:
12430                 parent = offset;
12431                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12432                 err |= ret;
12433                 break;
12434         default:
12435                 error("extent[%llu %d %llu] has unknown ref type: %d",
12436                         key.objectid, key.type, key.offset, type);
12437                 ret = UNKNOWN_TYPE;
12438                 err |= ret;
12439                 goto out;
12440         }
12441
12442         if (err && repair) {
12443                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12444                          key.objectid, num_bytes, parent, root_objectid,
12445                          owner, owner_offset, ret);
12446                 if (ret < 0)
12447                         goto out;
12448                 if (ret) {
12449                         goto next;
12450                         err = ret;
12451                 }
12452         }
12453
12454         ptr += btrfs_extent_inline_ref_size(type);
12455         goto next;
12456
12457 out:
12458         return err;
12459 }
12460
12461 /*
12462  * Check if a dev extent item is referred correctly by its chunk
12463  */
12464 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12465                                  struct extent_buffer *eb, int slot)
12466 {
12467         struct btrfs_root *chunk_root = fs_info->chunk_root;
12468         struct btrfs_dev_extent *ptr;
12469         struct btrfs_path path;
12470         struct btrfs_key chunk_key;
12471         struct btrfs_key devext_key;
12472         struct btrfs_chunk *chunk;
12473         struct extent_buffer *l;
12474         int num_stripes;
12475         u64 length;
12476         int i;
12477         int found_chunk = 0;
12478         int ret;
12479
12480         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12481         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12482         length = btrfs_dev_extent_length(eb, ptr);
12483
12484         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12485         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12486         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12487
12488         btrfs_init_path(&path);
12489         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12490         if (ret)
12491                 goto out;
12492
12493         l = path.nodes[0];
12494         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12495         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12496                                       chunk_key.offset);
12497         if (ret < 0)
12498                 goto out;
12499
12500         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12501                 goto out;
12502
12503         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12504         for (i = 0; i < num_stripes; i++) {
12505                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12506                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12507
12508                 if (devid == devext_key.objectid &&
12509                     offset == devext_key.offset) {
12510                         found_chunk = 1;
12511                         break;
12512                 }
12513         }
12514 out:
12515         btrfs_release_path(&path);
12516         if (!found_chunk) {
12517                 error(
12518                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12519                         devext_key.objectid, devext_key.offset, length);
12520                 return REFERENCER_MISSING;
12521         }
12522         return 0;
12523 }
12524
12525 /*
12526  * Check if the used space is correct with the dev item
12527  */
12528 static int check_dev_item(struct btrfs_fs_info *fs_info,
12529                           struct extent_buffer *eb, int slot)
12530 {
12531         struct btrfs_root *dev_root = fs_info->dev_root;
12532         struct btrfs_dev_item *dev_item;
12533         struct btrfs_path path;
12534         struct btrfs_key key;
12535         struct btrfs_dev_extent *ptr;
12536         u64 total_bytes;
12537         u64 dev_id;
12538         u64 used;
12539         u64 total = 0;
12540         int ret;
12541
12542         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12543         dev_id = btrfs_device_id(eb, dev_item);
12544         used = btrfs_device_bytes_used(eb, dev_item);
12545         total_bytes = btrfs_device_total_bytes(eb, dev_item);
12546
12547         key.objectid = dev_id;
12548         key.type = BTRFS_DEV_EXTENT_KEY;
12549         key.offset = 0;
12550
12551         btrfs_init_path(&path);
12552         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12553         if (ret < 0) {
12554                 btrfs_item_key_to_cpu(eb, &key, slot);
12555                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12556                         key.objectid, key.type, key.offset);
12557                 btrfs_release_path(&path);
12558                 return REFERENCER_MISSING;
12559         }
12560
12561         /* Iterate dev_extents to calculate the used space of a device */
12562         while (1) {
12563                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12564                         goto next;
12565
12566                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12567                 if (key.objectid > dev_id)
12568                         break;
12569                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12570                         goto next;
12571
12572                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12573                                      struct btrfs_dev_extent);
12574                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12575 next:
12576                 ret = btrfs_next_item(dev_root, &path);
12577                 if (ret)
12578                         break;
12579         }
12580         btrfs_release_path(&path);
12581
12582         if (used != total) {
12583                 btrfs_item_key_to_cpu(eb, &key, slot);
12584                 error(
12585 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12586                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12587                         BTRFS_DEV_EXTENT_KEY, dev_id);
12588                 return ACCOUNTING_MISMATCH;
12589         }
12590         check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12591
12592         return 0;
12593 }
12594
12595 /*
12596  * Check a block group item with its referener (chunk) and its used space
12597  * with extent/metadata item
12598  */
12599 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12600                                   struct extent_buffer *eb, int slot)
12601 {
12602         struct btrfs_root *extent_root = fs_info->extent_root;
12603         struct btrfs_root *chunk_root = fs_info->chunk_root;
12604         struct btrfs_block_group_item *bi;
12605         struct btrfs_block_group_item bg_item;
12606         struct btrfs_path path;
12607         struct btrfs_key bg_key;
12608         struct btrfs_key chunk_key;
12609         struct btrfs_key extent_key;
12610         struct btrfs_chunk *chunk;
12611         struct extent_buffer *leaf;
12612         struct btrfs_extent_item *ei;
12613         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12614         u64 flags;
12615         u64 bg_flags;
12616         u64 used;
12617         u64 total = 0;
12618         int ret;
12619         int err = 0;
12620
12621         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12622         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12623         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12624         used = btrfs_block_group_used(&bg_item);
12625         bg_flags = btrfs_block_group_flags(&bg_item);
12626
12627         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12628         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12629         chunk_key.offset = bg_key.objectid;
12630
12631         btrfs_init_path(&path);
12632         /* Search for the referencer chunk */
12633         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12634         if (ret) {
12635                 error(
12636                 "block group[%llu %llu] did not find the related chunk item",
12637                         bg_key.objectid, bg_key.offset);
12638                 err |= REFERENCER_MISSING;
12639         } else {
12640                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12641                                         struct btrfs_chunk);
12642                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12643                                                 bg_key.offset) {
12644                         error(
12645         "block group[%llu %llu] related chunk item length does not match",
12646                                 bg_key.objectid, bg_key.offset);
12647                         err |= REFERENCER_MISMATCH;
12648                 }
12649         }
12650         btrfs_release_path(&path);
12651
12652         /* Search from the block group bytenr */
12653         extent_key.objectid = bg_key.objectid;
12654         extent_key.type = 0;
12655         extent_key.offset = 0;
12656
12657         btrfs_init_path(&path);
12658         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12659         if (ret < 0)
12660                 goto out;
12661
12662         /* Iterate extent tree to account used space */
12663         while (1) {
12664                 leaf = path.nodes[0];
12665
12666                 /* Search slot can point to the last item beyond leaf nritems */
12667                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12668                         goto next;
12669
12670                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12671                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12672                         break;
12673
12674                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12675                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12676                         goto next;
12677                 if (extent_key.objectid < bg_key.objectid)
12678                         goto next;
12679
12680                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12681                         total += nodesize;
12682                 else
12683                         total += extent_key.offset;
12684
12685                 ei = btrfs_item_ptr(leaf, path.slots[0],
12686                                     struct btrfs_extent_item);
12687                 flags = btrfs_extent_flags(leaf, ei);
12688                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12689                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12690                                 error(
12691                         "bad extent[%llu, %llu) type mismatch with chunk",
12692                                         extent_key.objectid,
12693                                         extent_key.objectid + extent_key.offset);
12694                                 err |= CHUNK_TYPE_MISMATCH;
12695                         }
12696                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12697                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12698                                     BTRFS_BLOCK_GROUP_METADATA))) {
12699                                 error(
12700                         "bad extent[%llu, %llu) type mismatch with chunk",
12701                                         extent_key.objectid,
12702                                         extent_key.objectid + nodesize);
12703                                 err |= CHUNK_TYPE_MISMATCH;
12704                         }
12705                 }
12706 next:
12707                 ret = btrfs_next_item(extent_root, &path);
12708                 if (ret)
12709                         break;
12710         }
12711
12712 out:
12713         btrfs_release_path(&path);
12714
12715         if (total != used) {
12716                 error(
12717                 "block group[%llu %llu] used %llu but extent items used %llu",
12718                         bg_key.objectid, bg_key.offset, used, total);
12719                 err |= BG_ACCOUNTING_ERROR;
12720         }
12721         return err;
12722 }
12723
12724 /*
12725  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12726  * FIXME: We still need to repair error of dev_item.
12727  *
12728  * Returns error after repair.
12729  */
12730 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12731                              struct btrfs_root *chunk_root,
12732                              struct btrfs_path *path, int err)
12733 {
12734         struct btrfs_chunk *chunk;
12735         struct btrfs_key chunk_key;
12736         struct extent_buffer *eb = path->nodes[0];
12737         u64 length;
12738         int slot = path->slots[0];
12739         u64 type;
12740         int ret = 0;
12741
12742         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12743         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12744                 return err;
12745         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12746         type = btrfs_chunk_type(path->nodes[0], chunk);
12747         length = btrfs_chunk_length(eb, chunk);
12748
12749         if (err & REFERENCER_MISSING) {
12750                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12751                                              type, chunk_key.offset, length);
12752                 if (ret) {
12753                         error("fail to add block group item[%llu %llu]",
12754                               chunk_key.offset, length);
12755                         goto out;
12756                 } else {
12757                         err &= ~REFERENCER_MISSING;
12758                         printf("Added block group item[%llu %llu]\n",
12759                                chunk_key.offset, length);
12760                 }
12761         }
12762
12763 out:
12764         return err;
12765 }
12766
12767 /*
12768  * Check a chunk item.
12769  * Including checking all referred dev_extents and block group
12770  */
12771 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12772                             struct extent_buffer *eb, int slot)
12773 {
12774         struct btrfs_root *extent_root = fs_info->extent_root;
12775         struct btrfs_root *dev_root = fs_info->dev_root;
12776         struct btrfs_path path;
12777         struct btrfs_key chunk_key;
12778         struct btrfs_key bg_key;
12779         struct btrfs_key devext_key;
12780         struct btrfs_chunk *chunk;
12781         struct extent_buffer *leaf;
12782         struct btrfs_block_group_item *bi;
12783         struct btrfs_block_group_item bg_item;
12784         struct btrfs_dev_extent *ptr;
12785         u64 length;
12786         u64 chunk_end;
12787         u64 stripe_len;
12788         u64 type;
12789         int num_stripes;
12790         u64 offset;
12791         u64 objectid;
12792         int i;
12793         int ret;
12794         int err = 0;
12795
12796         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12797         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12798         length = btrfs_chunk_length(eb, chunk);
12799         chunk_end = chunk_key.offset + length;
12800         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12801                                       chunk_key.offset);
12802         if (ret < 0) {
12803                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12804                         chunk_end);
12805                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12806                 goto out;
12807         }
12808         type = btrfs_chunk_type(eb, chunk);
12809
12810         bg_key.objectid = chunk_key.offset;
12811         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12812         bg_key.offset = length;
12813
12814         btrfs_init_path(&path);
12815         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12816         if (ret) {
12817                 error(
12818                 "chunk[%llu %llu) did not find the related block group item",
12819                         chunk_key.offset, chunk_end);
12820                 err |= REFERENCER_MISSING;
12821         } else{
12822                 leaf = path.nodes[0];
12823                 bi = btrfs_item_ptr(leaf, path.slots[0],
12824                                     struct btrfs_block_group_item);
12825                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12826                                    sizeof(bg_item));
12827                 if (btrfs_block_group_flags(&bg_item) != type) {
12828                         error(
12829 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12830                                 chunk_key.offset, chunk_end, type,
12831                                 btrfs_block_group_flags(&bg_item));
12832                         err |= REFERENCER_MISSING;
12833                 }
12834         }
12835
12836         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12837         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12838         for (i = 0; i < num_stripes; i++) {
12839                 btrfs_release_path(&path);
12840                 btrfs_init_path(&path);
12841                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12842                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12843                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12844
12845                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12846                                         0, 0);
12847                 if (ret)
12848                         goto not_match_dev;
12849
12850                 leaf = path.nodes[0];
12851                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12852                                      struct btrfs_dev_extent);
12853                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12854                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12855                 if (objectid != chunk_key.objectid ||
12856                     offset != chunk_key.offset ||
12857                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12858                         goto not_match_dev;
12859                 continue;
12860 not_match_dev:
12861                 err |= BACKREF_MISSING;
12862                 error(
12863                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12864                         chunk_key.objectid, chunk_end, i);
12865                 continue;
12866         }
12867         btrfs_release_path(&path);
12868 out:
12869         return err;
12870 }
12871
12872 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12873                                    struct btrfs_root *root,
12874                                    struct btrfs_path *path)
12875 {
12876         struct btrfs_key key;
12877         int ret = 0;
12878
12879         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12880         btrfs_release_path(path);
12881         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12882         if (ret) {
12883                 ret = -ENOENT;
12884                 goto out;
12885         }
12886
12887         ret = btrfs_del_item(trans, root, path);
12888         if (ret)
12889                 goto out;
12890
12891         if (path->slots[0] == 0)
12892                 btrfs_prev_leaf(root, path);
12893         else
12894                 path->slots[0]--;
12895 out:
12896         if (ret)
12897                 error("failed to delete root %llu item[%llu, %u, %llu]",
12898                       root->objectid, key.objectid, key.type, key.offset);
12899         else
12900                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12901                        root->objectid, key.objectid, key.type, key.offset);
12902         return ret;
12903 }
12904
12905 /*
12906  * Main entry function to check known items and update related accounting info
12907  */
12908 static int check_leaf_items(struct btrfs_trans_handle *trans,
12909                             struct btrfs_root *root, struct btrfs_path *path,
12910                             struct node_refs *nrefs, int account_bytes)
12911 {
12912         struct btrfs_fs_info *fs_info = root->fs_info;
12913         struct btrfs_key key;
12914         struct extent_buffer *eb;
12915         int slot;
12916         int type;
12917         struct btrfs_extent_data_ref *dref;
12918         int ret = 0;
12919         int err = 0;
12920
12921 again:
12922         eb = path->nodes[0];
12923         slot = path->slots[0];
12924         if (slot >= btrfs_header_nritems(eb)) {
12925                 if (slot == 0) {
12926                         error("empty leaf [%llu %u] root %llu", eb->start,
12927                                 root->fs_info->nodesize, root->objectid);
12928                         err |= EIO;
12929                 }
12930                 goto out;
12931         }
12932
12933         btrfs_item_key_to_cpu(eb, &key, slot);
12934         type = key.type;
12935
12936         switch (type) {
12937         case BTRFS_EXTENT_DATA_KEY:
12938                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12939                 if (repair && ret)
12940                         ret = repair_extent_data_item(trans, root, path, nrefs,
12941                                                       ret);
12942                 err |= ret;
12943                 break;
12944         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12945                 ret = check_block_group_item(fs_info, eb, slot);
12946                 if (repair &&
12947                     ret & REFERENCER_MISSING)
12948                         ret = delete_extent_tree_item(trans, root, path);
12949                 err |= ret;
12950                 break;
12951         case BTRFS_DEV_ITEM_KEY:
12952                 ret = check_dev_item(fs_info, eb, slot);
12953                 err |= ret;
12954                 break;
12955         case BTRFS_CHUNK_ITEM_KEY:
12956                 ret = check_chunk_item(fs_info, eb, slot);
12957                 if (repair && ret)
12958                         ret = repair_chunk_item(trans, root, path, ret);
12959                 err |= ret;
12960                 break;
12961         case BTRFS_DEV_EXTENT_KEY:
12962                 ret = check_dev_extent_item(fs_info, eb, slot);
12963                 err |= ret;
12964                 break;
12965         case BTRFS_EXTENT_ITEM_KEY:
12966         case BTRFS_METADATA_ITEM_KEY:
12967                 ret = check_extent_item(trans, fs_info, path);
12968                 err |= ret;
12969                 break;
12970         case BTRFS_EXTENT_CSUM_KEY:
12971                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12972                 err |= ret;
12973                 break;
12974         case BTRFS_TREE_BLOCK_REF_KEY:
12975                 ret = check_tree_block_backref(fs_info, key.offset,
12976                                                key.objectid, -1);
12977                 if (repair &&
12978                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12979                         ret = delete_extent_tree_item(trans, root, path);
12980                 err |= ret;
12981                 break;
12982         case BTRFS_EXTENT_DATA_REF_KEY:
12983                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12984                 ret = check_extent_data_backref(fs_info,
12985                                 btrfs_extent_data_ref_root(eb, dref),
12986                                 btrfs_extent_data_ref_objectid(eb, dref),
12987                                 btrfs_extent_data_ref_offset(eb, dref),
12988                                 key.objectid, 0,
12989                                 btrfs_extent_data_ref_count(eb, dref));
12990                 if (repair &&
12991                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12992                         ret = delete_extent_tree_item(trans, root, path);
12993                 err |= ret;
12994                 break;
12995         case BTRFS_SHARED_BLOCK_REF_KEY:
12996                 ret = check_shared_block_backref(fs_info, key.offset,
12997                                                  key.objectid, -1);
12998                 if (repair &&
12999                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13000                         ret = delete_extent_tree_item(trans, root, path);
13001                 err |= ret;
13002                 break;
13003         case BTRFS_SHARED_DATA_REF_KEY:
13004                 ret = check_shared_data_backref(fs_info, key.offset,
13005                                                 key.objectid);
13006                 if (repair &&
13007                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13008                         ret = delete_extent_tree_item(trans, root, path);
13009                 err |= ret;
13010                 break;
13011         default:
13012                 break;
13013         }
13014
13015         ++path->slots[0];
13016         goto again;
13017 out:
13018         return err;
13019 }
13020
13021 /*
13022  * Low memory usage version check_chunks_and_extents.
13023  */
13024 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13025 {
13026         struct btrfs_trans_handle *trans = NULL;
13027         struct btrfs_path path;
13028         struct btrfs_key old_key;
13029         struct btrfs_key key;
13030         struct btrfs_root *root1;
13031         struct btrfs_root *root;
13032         struct btrfs_root *cur_root;
13033         int err = 0;
13034         int ret;
13035
13036         root = fs_info->fs_root;
13037
13038         if (repair) {
13039                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13040                 if (IS_ERR(trans)) {
13041                         error("failed to start transaction before check");
13042                         return PTR_ERR(trans);
13043                 }
13044         }
13045
13046         root1 = root->fs_info->chunk_root;
13047         ret = check_btrfs_root(trans, root1, 0, 1);
13048         err |= ret;
13049
13050         root1 = root->fs_info->tree_root;
13051         ret = check_btrfs_root(trans, root1, 0, 1);
13052         err |= ret;
13053
13054         btrfs_init_path(&path);
13055         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13056         key.offset = 0;
13057         key.type = BTRFS_ROOT_ITEM_KEY;
13058
13059         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13060         if (ret) {
13061                 error("cannot find extent tree in tree_root");
13062                 goto out;
13063         }
13064
13065         while (1) {
13066                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13067                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13068                         goto next;
13069                 old_key = key;
13070                 key.offset = (u64)-1;
13071
13072                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13073                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13074                                         &key);
13075                 else
13076                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
13077                 if (IS_ERR(cur_root) || !cur_root) {
13078                         error("failed to read tree: %lld", key.objectid);
13079                         goto next;
13080                 }
13081
13082                 ret = check_btrfs_root(trans, cur_root, 0, 1);
13083                 err |= ret;
13084
13085                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13086                         btrfs_free_fs_root(cur_root);
13087
13088                 btrfs_release_path(&path);
13089                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13090                                         &old_key, &path, 0, 0);
13091                 if (ret)
13092                         goto out;
13093 next:
13094                 ret = btrfs_next_item(root1, &path);
13095                 if (ret)
13096                         goto out;
13097         }
13098 out:
13099
13100         /* if repair, update block accounting */
13101         if (repair) {
13102                 ret = btrfs_fix_block_accounting(trans, root);
13103                 if (ret)
13104                         err |= ret;
13105                 else
13106                         err &= ~BG_ACCOUNTING_ERROR;
13107         }
13108
13109         if (trans)
13110                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13111
13112         btrfs_release_path(&path);
13113
13114         return err;
13115 }
13116
13117 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13118 {
13119         int ret;
13120
13121         if (!ctx.progress_enabled)
13122                 fprintf(stderr, "checking extents\n");
13123         if (check_mode == CHECK_MODE_LOWMEM)
13124                 ret = check_chunks_and_extents_v2(fs_info);
13125         else
13126                 ret = check_chunks_and_extents(fs_info);
13127
13128         /* Also repair device size related problems */
13129         if (repair && !ret) {
13130                 ret = btrfs_fix_device_and_super_size(fs_info);
13131                 if (ret > 0)
13132                         ret = 0;
13133         }
13134         return ret;
13135 }
13136
13137 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13138                            struct btrfs_root *root, int overwrite)
13139 {
13140         struct extent_buffer *c;
13141         struct extent_buffer *old = root->node;
13142         int level;
13143         int ret;
13144         struct btrfs_disk_key disk_key = {0,0,0};
13145
13146         level = 0;
13147
13148         if (overwrite) {
13149                 c = old;
13150                 extent_buffer_get(c);
13151                 goto init;
13152         }
13153         c = btrfs_alloc_free_block(trans, root,
13154                                    root->fs_info->nodesize,
13155                                    root->root_key.objectid,
13156                                    &disk_key, level, 0, 0);
13157         if (IS_ERR(c)) {
13158                 c = old;
13159                 extent_buffer_get(c);
13160                 overwrite = 1;
13161         }
13162 init:
13163         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13164         btrfs_set_header_level(c, level);
13165         btrfs_set_header_bytenr(c, c->start);
13166         btrfs_set_header_generation(c, trans->transid);
13167         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13168         btrfs_set_header_owner(c, root->root_key.objectid);
13169
13170         write_extent_buffer(c, root->fs_info->fsid,
13171                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
13172
13173         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13174                             btrfs_header_chunk_tree_uuid(c),
13175                             BTRFS_UUID_SIZE);
13176
13177         btrfs_mark_buffer_dirty(c);
13178         /*
13179          * this case can happen in the following case:
13180          *
13181          * 1.overwrite previous root.
13182          *
13183          * 2.reinit reloc data root, this is because we skip pin
13184          * down reloc data tree before which means we can allocate
13185          * same block bytenr here.
13186          */
13187         if (old->start == c->start) {
13188                 btrfs_set_root_generation(&root->root_item,
13189                                           trans->transid);
13190                 root->root_item.level = btrfs_header_level(root->node);
13191                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13192                                         &root->root_key, &root->root_item);
13193                 if (ret) {
13194                         free_extent_buffer(c);
13195                         return ret;
13196                 }
13197         }
13198         free_extent_buffer(old);
13199         root->node = c;
13200         add_root_to_dirty_list(root);
13201         return 0;
13202 }
13203
13204 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13205                                 struct extent_buffer *eb, int tree_root)
13206 {
13207         struct extent_buffer *tmp;
13208         struct btrfs_root_item *ri;
13209         struct btrfs_key key;
13210         u64 bytenr;
13211         int level = btrfs_header_level(eb);
13212         int nritems;
13213         int ret;
13214         int i;
13215
13216         /*
13217          * If we have pinned this block before, don't pin it again.
13218          * This can not only avoid forever loop with broken filesystem
13219          * but also give us some speedups.
13220          */
13221         if (test_range_bit(&fs_info->pinned_extents, eb->start,
13222                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13223                 return 0;
13224
13225         btrfs_pin_extent(fs_info, eb->start, eb->len);
13226
13227         nritems = btrfs_header_nritems(eb);
13228         for (i = 0; i < nritems; i++) {
13229                 if (level == 0) {
13230                         btrfs_item_key_to_cpu(eb, &key, i);
13231                         if (key.type != BTRFS_ROOT_ITEM_KEY)
13232                                 continue;
13233                         /* Skip the extent root and reloc roots */
13234                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13235                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13236                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13237                                 continue;
13238                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13239                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13240
13241                         /*
13242                          * If at any point we start needing the real root we
13243                          * will have to build a stump root for the root we are
13244                          * in, but for now this doesn't actually use the root so
13245                          * just pass in extent_root.
13246                          */
13247                         tmp = read_tree_block(fs_info, bytenr, 0);
13248                         if (!extent_buffer_uptodate(tmp)) {
13249                                 fprintf(stderr, "Error reading root block\n");
13250                                 return -EIO;
13251                         }
13252                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13253                         free_extent_buffer(tmp);
13254                         if (ret)
13255                                 return ret;
13256                 } else {
13257                         bytenr = btrfs_node_blockptr(eb, i);
13258
13259                         /* If we aren't the tree root don't read the block */
13260                         if (level == 1 && !tree_root) {
13261                                 btrfs_pin_extent(fs_info, bytenr,
13262                                                 fs_info->nodesize);
13263                                 continue;
13264                         }
13265
13266                         tmp = read_tree_block(fs_info, bytenr, 0);
13267                         if (!extent_buffer_uptodate(tmp)) {
13268                                 fprintf(stderr, "Error reading tree block\n");
13269                                 return -EIO;
13270                         }
13271                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13272                         free_extent_buffer(tmp);
13273                         if (ret)
13274                                 return ret;
13275                 }
13276         }
13277
13278         return 0;
13279 }
13280
13281 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13282 {
13283         int ret;
13284
13285         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13286         if (ret)
13287                 return ret;
13288
13289         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13290 }
13291
13292 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13293 {
13294         struct btrfs_block_group_cache *cache;
13295         struct btrfs_path path;
13296         struct extent_buffer *leaf;
13297         struct btrfs_chunk *chunk;
13298         struct btrfs_key key;
13299         int ret;
13300         u64 start;
13301
13302         btrfs_init_path(&path);
13303         key.objectid = 0;
13304         key.type = BTRFS_CHUNK_ITEM_KEY;
13305         key.offset = 0;
13306         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13307         if (ret < 0) {
13308                 btrfs_release_path(&path);
13309                 return ret;
13310         }
13311
13312         /*
13313          * We do this in case the block groups were screwed up and had alloc
13314          * bits that aren't actually set on the chunks.  This happens with
13315          * restored images every time and could happen in real life I guess.
13316          */
13317         fs_info->avail_data_alloc_bits = 0;
13318         fs_info->avail_metadata_alloc_bits = 0;
13319         fs_info->avail_system_alloc_bits = 0;
13320
13321         /* First we need to create the in-memory block groups */
13322         while (1) {
13323                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13324                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13325                         if (ret < 0) {
13326                                 btrfs_release_path(&path);
13327                                 return ret;
13328                         }
13329                         if (ret) {
13330                                 ret = 0;
13331                                 break;
13332                         }
13333                 }
13334                 leaf = path.nodes[0];
13335                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13336                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13337                         path.slots[0]++;
13338                         continue;
13339                 }
13340
13341                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13342                 btrfs_add_block_group(fs_info, 0,
13343                                       btrfs_chunk_type(leaf, chunk), key.offset,
13344                                       btrfs_chunk_length(leaf, chunk));
13345                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13346                                  key.offset + btrfs_chunk_length(leaf, chunk));
13347                 path.slots[0]++;
13348         }
13349         start = 0;
13350         while (1) {
13351                 cache = btrfs_lookup_first_block_group(fs_info, start);
13352                 if (!cache)
13353                         break;
13354                 cache->cached = 1;
13355                 start = cache->key.objectid + cache->key.offset;
13356         }
13357
13358         btrfs_release_path(&path);
13359         return 0;
13360 }
13361
13362 static int reset_balance(struct btrfs_trans_handle *trans,
13363                          struct btrfs_fs_info *fs_info)
13364 {
13365         struct btrfs_root *root = fs_info->tree_root;
13366         struct btrfs_path path;
13367         struct extent_buffer *leaf;
13368         struct btrfs_key key;
13369         int del_slot, del_nr = 0;
13370         int ret;
13371         int found = 0;
13372
13373         btrfs_init_path(&path);
13374         key.objectid = BTRFS_BALANCE_OBJECTID;
13375         key.type = BTRFS_BALANCE_ITEM_KEY;
13376         key.offset = 0;
13377         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13378         if (ret) {
13379                 if (ret > 0)
13380                         ret = 0;
13381                 if (!ret)
13382                         goto reinit_data_reloc;
13383                 else
13384                         goto out;
13385         }
13386
13387         ret = btrfs_del_item(trans, root, &path);
13388         if (ret)
13389                 goto out;
13390         btrfs_release_path(&path);
13391
13392         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13393         key.type = BTRFS_ROOT_ITEM_KEY;
13394         key.offset = 0;
13395         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13396         if (ret < 0)
13397                 goto out;
13398         while (1) {
13399                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13400                         if (!found)
13401                                 break;
13402
13403                         if (del_nr) {
13404                                 ret = btrfs_del_items(trans, root, &path,
13405                                                       del_slot, del_nr);
13406                                 del_nr = 0;
13407                                 if (ret)
13408                                         goto out;
13409                         }
13410                         key.offset++;
13411                         btrfs_release_path(&path);
13412
13413                         found = 0;
13414                         ret = btrfs_search_slot(trans, root, &key, &path,
13415                                                 -1, 1);
13416                         if (ret < 0)
13417                                 goto out;
13418                         continue;
13419                 }
13420                 found = 1;
13421                 leaf = path.nodes[0];
13422                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13423                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13424                         break;
13425                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13426                         path.slots[0]++;
13427                         continue;
13428                 }
13429                 if (!del_nr) {
13430                         del_slot = path.slots[0];
13431                         del_nr = 1;
13432                 } else {
13433                         del_nr++;
13434                 }
13435                 path.slots[0]++;
13436         }
13437
13438         if (del_nr) {
13439                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13440                 if (ret)
13441                         goto out;
13442         }
13443         btrfs_release_path(&path);
13444
13445 reinit_data_reloc:
13446         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13447         key.type = BTRFS_ROOT_ITEM_KEY;
13448         key.offset = (u64)-1;
13449         root = btrfs_read_fs_root(fs_info, &key);
13450         if (IS_ERR(root)) {
13451                 fprintf(stderr, "Error reading data reloc tree\n");
13452                 ret = PTR_ERR(root);
13453                 goto out;
13454         }
13455         record_root_in_trans(trans, root);
13456         ret = btrfs_fsck_reinit_root(trans, root, 0);
13457         if (ret)
13458                 goto out;
13459         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13460 out:
13461         btrfs_release_path(&path);
13462         return ret;
13463 }
13464
13465 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13466                               struct btrfs_fs_info *fs_info)
13467 {
13468         u64 start = 0;
13469         int ret;
13470
13471         /*
13472          * The only reason we don't do this is because right now we're just
13473          * walking the trees we find and pinning down their bytes, we don't look
13474          * at any of the leaves.  In order to do mixed groups we'd have to check
13475          * the leaves of any fs roots and pin down the bytes for any file
13476          * extents we find.  Not hard but why do it if we don't have to?
13477          */
13478         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13479                 fprintf(stderr, "We don't support re-initing the extent tree "
13480                         "for mixed block groups yet, please notify a btrfs "
13481                         "developer you want to do this so they can add this "
13482                         "functionality.\n");
13483                 return -EINVAL;
13484         }
13485
13486         /*
13487          * first we need to walk all of the trees except the extent tree and pin
13488          * down the bytes that are in use so we don't overwrite any existing
13489          * metadata.
13490          */
13491         ret = pin_metadata_blocks(fs_info);
13492         if (ret) {
13493                 fprintf(stderr, "error pinning down used bytes\n");
13494                 return ret;
13495         }
13496
13497         /*
13498          * Need to drop all the block groups since we're going to recreate all
13499          * of them again.
13500          */
13501         btrfs_free_block_groups(fs_info);
13502         ret = reset_block_groups(fs_info);
13503         if (ret) {
13504                 fprintf(stderr, "error resetting the block groups\n");
13505                 return ret;
13506         }
13507
13508         /* Ok we can allocate now, reinit the extent root */
13509         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13510         if (ret) {
13511                 fprintf(stderr, "extent root initialization failed\n");
13512                 /*
13513                  * When the transaction code is updated we should end the
13514                  * transaction, but for now progs only knows about commit so
13515                  * just return an error.
13516                  */
13517                 return ret;
13518         }
13519
13520         /*
13521          * Now we have all the in-memory block groups setup so we can make
13522          * allocations properly, and the metadata we care about is safe since we
13523          * pinned all of it above.
13524          */
13525         while (1) {
13526                 struct btrfs_block_group_cache *cache;
13527
13528                 cache = btrfs_lookup_first_block_group(fs_info, start);
13529                 if (!cache)
13530                         break;
13531                 start = cache->key.objectid + cache->key.offset;
13532                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13533                                         &cache->key, &cache->item,
13534                                         sizeof(cache->item));
13535                 if (ret) {
13536                         fprintf(stderr, "Error adding block group\n");
13537                         return ret;
13538                 }
13539                 btrfs_extent_post_op(trans, fs_info->extent_root);
13540         }
13541
13542         ret = reset_balance(trans, fs_info);
13543         if (ret)
13544                 fprintf(stderr, "error resetting the pending balance\n");
13545
13546         return ret;
13547 }
13548
13549 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13550 {
13551         struct btrfs_path path;
13552         struct btrfs_trans_handle *trans;
13553         struct btrfs_key key;
13554         int ret;
13555
13556         printf("Recowing metadata block %llu\n", eb->start);
13557         key.objectid = btrfs_header_owner(eb);
13558         key.type = BTRFS_ROOT_ITEM_KEY;
13559         key.offset = (u64)-1;
13560
13561         root = btrfs_read_fs_root(root->fs_info, &key);
13562         if (IS_ERR(root)) {
13563                 fprintf(stderr, "Couldn't find owner root %llu\n",
13564                         key.objectid);
13565                 return PTR_ERR(root);
13566         }
13567
13568         trans = btrfs_start_transaction(root, 1);
13569         if (IS_ERR(trans))
13570                 return PTR_ERR(trans);
13571
13572         btrfs_init_path(&path);
13573         path.lowest_level = btrfs_header_level(eb);
13574         if (path.lowest_level)
13575                 btrfs_node_key_to_cpu(eb, &key, 0);
13576         else
13577                 btrfs_item_key_to_cpu(eb, &key, 0);
13578
13579         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13580         btrfs_commit_transaction(trans, root);
13581         btrfs_release_path(&path);
13582         return ret;
13583 }
13584
13585 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13586 {
13587         struct btrfs_path path;
13588         struct btrfs_trans_handle *trans;
13589         struct btrfs_key key;
13590         int ret;
13591
13592         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13593                bad->key.type, bad->key.offset);
13594         key.objectid = bad->root_id;
13595         key.type = BTRFS_ROOT_ITEM_KEY;
13596         key.offset = (u64)-1;
13597
13598         root = btrfs_read_fs_root(root->fs_info, &key);
13599         if (IS_ERR(root)) {
13600                 fprintf(stderr, "Couldn't find owner root %llu\n",
13601                         key.objectid);
13602                 return PTR_ERR(root);
13603         }
13604
13605         trans = btrfs_start_transaction(root, 1);
13606         if (IS_ERR(trans))
13607                 return PTR_ERR(trans);
13608
13609         btrfs_init_path(&path);
13610         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13611         if (ret) {
13612                 if (ret > 0)
13613                         ret = 0;
13614                 goto out;
13615         }
13616         ret = btrfs_del_item(trans, root, &path);
13617 out:
13618         btrfs_commit_transaction(trans, root);
13619         btrfs_release_path(&path);
13620         return ret;
13621 }
13622
13623 static int zero_log_tree(struct btrfs_root *root)
13624 {
13625         struct btrfs_trans_handle *trans;
13626         int ret;
13627
13628         trans = btrfs_start_transaction(root, 1);
13629         if (IS_ERR(trans)) {
13630                 ret = PTR_ERR(trans);
13631                 return ret;
13632         }
13633         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13634         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13635         ret = btrfs_commit_transaction(trans, root);
13636         return ret;
13637 }
13638
13639 static int populate_csum(struct btrfs_trans_handle *trans,
13640                          struct btrfs_root *csum_root, char *buf, u64 start,
13641                          u64 len)
13642 {
13643         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13644         u64 offset = 0;
13645         u64 sectorsize;
13646         int ret = 0;
13647
13648         while (offset < len) {
13649                 sectorsize = fs_info->sectorsize;
13650                 ret = read_extent_data(fs_info, buf, start + offset,
13651                                        &sectorsize, 0);
13652                 if (ret)
13653                         break;
13654                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13655                                             start + offset, buf, sectorsize);
13656                 if (ret)
13657                         break;
13658                 offset += sectorsize;
13659         }
13660         return ret;
13661 }
13662
13663 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13664                                       struct btrfs_root *csum_root,
13665                                       struct btrfs_root *cur_root)
13666 {
13667         struct btrfs_path path;
13668         struct btrfs_key key;
13669         struct extent_buffer *node;
13670         struct btrfs_file_extent_item *fi;
13671         char *buf = NULL;
13672         u64 start = 0;
13673         u64 len = 0;
13674         int slot = 0;
13675         int ret = 0;
13676
13677         buf = malloc(cur_root->fs_info->sectorsize);
13678         if (!buf)
13679                 return -ENOMEM;
13680
13681         btrfs_init_path(&path);
13682         key.objectid = 0;
13683         key.offset = 0;
13684         key.type = 0;
13685         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13686         if (ret < 0)
13687                 goto out;
13688         /* Iterate all regular file extents and fill its csum */
13689         while (1) {
13690                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13691
13692                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13693                         goto next;
13694                 node = path.nodes[0];
13695                 slot = path.slots[0];
13696                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13697                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13698                         goto next;
13699                 start = btrfs_file_extent_disk_bytenr(node, fi);
13700                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13701
13702                 ret = populate_csum(trans, csum_root, buf, start, len);
13703                 if (ret == -EEXIST)
13704                         ret = 0;
13705                 if (ret < 0)
13706                         goto out;
13707 next:
13708                 /*
13709                  * TODO: if next leaf is corrupted, jump to nearest next valid
13710                  * leaf.
13711                  */
13712                 ret = btrfs_next_item(cur_root, &path);
13713                 if (ret < 0)
13714                         goto out;
13715                 if (ret > 0) {
13716                         ret = 0;
13717                         goto out;
13718                 }
13719         }
13720
13721 out:
13722         btrfs_release_path(&path);
13723         free(buf);
13724         return ret;
13725 }
13726
13727 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13728                                   struct btrfs_root *csum_root)
13729 {
13730         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13731         struct btrfs_path path;
13732         struct btrfs_root *tree_root = fs_info->tree_root;
13733         struct btrfs_root *cur_root;
13734         struct extent_buffer *node;
13735         struct btrfs_key key;
13736         int slot = 0;
13737         int ret = 0;
13738
13739         btrfs_init_path(&path);
13740         key.objectid = BTRFS_FS_TREE_OBJECTID;
13741         key.offset = 0;
13742         key.type = BTRFS_ROOT_ITEM_KEY;
13743         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13744         if (ret < 0)
13745                 goto out;
13746         if (ret > 0) {
13747                 ret = -ENOENT;
13748                 goto out;
13749         }
13750
13751         while (1) {
13752                 node = path.nodes[0];
13753                 slot = path.slots[0];
13754                 btrfs_item_key_to_cpu(node, &key, slot);
13755                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13756                         goto out;
13757                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13758                         goto next;
13759                 if (!is_fstree(key.objectid))
13760                         goto next;
13761                 key.offset = (u64)-1;
13762
13763                 cur_root = btrfs_read_fs_root(fs_info, &key);
13764                 if (IS_ERR(cur_root) || !cur_root) {
13765                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13766                                 key.objectid);
13767                         goto out;
13768                 }
13769                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13770                                 cur_root);
13771                 if (ret < 0)
13772                         goto out;
13773 next:
13774                 ret = btrfs_next_item(tree_root, &path);
13775                 if (ret > 0) {
13776                         ret = 0;
13777                         goto out;
13778                 }
13779                 if (ret < 0)
13780                         goto out;
13781         }
13782
13783 out:
13784         btrfs_release_path(&path);
13785         return ret;
13786 }
13787
13788 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13789                                       struct btrfs_root *csum_root)
13790 {
13791         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13792         struct btrfs_path path;
13793         struct btrfs_extent_item *ei;
13794         struct extent_buffer *leaf;
13795         char *buf;
13796         struct btrfs_key key;
13797         int ret;
13798
13799         btrfs_init_path(&path);
13800         key.objectid = 0;
13801         key.type = BTRFS_EXTENT_ITEM_KEY;
13802         key.offset = 0;
13803         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13804         if (ret < 0) {
13805                 btrfs_release_path(&path);
13806                 return ret;
13807         }
13808
13809         buf = malloc(csum_root->fs_info->sectorsize);
13810         if (!buf) {
13811                 btrfs_release_path(&path);
13812                 return -ENOMEM;
13813         }
13814
13815         while (1) {
13816                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13817                         ret = btrfs_next_leaf(extent_root, &path);
13818                         if (ret < 0)
13819                                 break;
13820                         if (ret) {
13821                                 ret = 0;
13822                                 break;
13823                         }
13824                 }
13825                 leaf = path.nodes[0];
13826
13827                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13828                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13829                         path.slots[0]++;
13830                         continue;
13831                 }
13832
13833                 ei = btrfs_item_ptr(leaf, path.slots[0],
13834                                     struct btrfs_extent_item);
13835                 if (!(btrfs_extent_flags(leaf, ei) &
13836                       BTRFS_EXTENT_FLAG_DATA)) {
13837                         path.slots[0]++;
13838                         continue;
13839                 }
13840
13841                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13842                                     key.offset);
13843                 if (ret)
13844                         break;
13845                 path.slots[0]++;
13846         }
13847
13848         btrfs_release_path(&path);
13849         free(buf);
13850         return ret;
13851 }
13852
13853 /*
13854  * Recalculate the csum and put it into the csum tree.
13855  *
13856  * Extent tree init will wipe out all the extent info, so in that case, we
13857  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13858  * will use fs/subvol trees to init the csum tree.
13859  */
13860 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13861                           struct btrfs_root *csum_root,
13862                           int search_fs_tree)
13863 {
13864         if (search_fs_tree)
13865                 return fill_csum_tree_from_fs(trans, csum_root);
13866         else
13867                 return fill_csum_tree_from_extent(trans, csum_root);
13868 }
13869
13870 static void free_roots_info_cache(void)
13871 {
13872         if (!roots_info_cache)
13873                 return;
13874
13875         while (!cache_tree_empty(roots_info_cache)) {
13876                 struct cache_extent *entry;
13877                 struct root_item_info *rii;
13878
13879                 entry = first_cache_extent(roots_info_cache);
13880                 if (!entry)
13881                         break;
13882                 remove_cache_extent(roots_info_cache, entry);
13883                 rii = container_of(entry, struct root_item_info, cache_extent);
13884                 free(rii);
13885         }
13886
13887         free(roots_info_cache);
13888         roots_info_cache = NULL;
13889 }
13890
13891 static int build_roots_info_cache(struct btrfs_fs_info *info)
13892 {
13893         int ret = 0;
13894         struct btrfs_key key;
13895         struct extent_buffer *leaf;
13896         struct btrfs_path path;
13897
13898         if (!roots_info_cache) {
13899                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13900                 if (!roots_info_cache)
13901                         return -ENOMEM;
13902                 cache_tree_init(roots_info_cache);
13903         }
13904
13905         btrfs_init_path(&path);
13906         key.objectid = 0;
13907         key.type = BTRFS_EXTENT_ITEM_KEY;
13908         key.offset = 0;
13909         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13910         if (ret < 0)
13911                 goto out;
13912         leaf = path.nodes[0];
13913
13914         while (1) {
13915                 struct btrfs_key found_key;
13916                 struct btrfs_extent_item *ei;
13917                 struct btrfs_extent_inline_ref *iref;
13918                 int slot = path.slots[0];
13919                 int type;
13920                 u64 flags;
13921                 u64 root_id;
13922                 u8 level;
13923                 struct cache_extent *entry;
13924                 struct root_item_info *rii;
13925
13926                 if (slot >= btrfs_header_nritems(leaf)) {
13927                         ret = btrfs_next_leaf(info->extent_root, &path);
13928                         if (ret < 0) {
13929                                 break;
13930                         } else if (ret) {
13931                                 ret = 0;
13932                                 break;
13933                         }
13934                         leaf = path.nodes[0];
13935                         slot = path.slots[0];
13936                 }
13937
13938                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13939
13940                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13941                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13942                         goto next;
13943
13944                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13945                 flags = btrfs_extent_flags(leaf, ei);
13946
13947                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13948                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13949                         goto next;
13950
13951                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13952                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13953                         level = found_key.offset;
13954                 } else {
13955                         struct btrfs_tree_block_info *binfo;
13956
13957                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13958                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13959                         level = btrfs_tree_block_level(leaf, binfo);
13960                 }
13961
13962                 /*
13963                  * For a root extent, it must be of the following type and the
13964                  * first (and only one) iref in the item.
13965                  */
13966                 type = btrfs_extent_inline_ref_type(leaf, iref);
13967                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13968                         goto next;
13969
13970                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13971                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13972                 if (!entry) {
13973                         rii = malloc(sizeof(struct root_item_info));
13974                         if (!rii) {
13975                                 ret = -ENOMEM;
13976                                 goto out;
13977                         }
13978                         rii->cache_extent.start = root_id;
13979                         rii->cache_extent.size = 1;
13980                         rii->level = (u8)-1;
13981                         entry = &rii->cache_extent;
13982                         ret = insert_cache_extent(roots_info_cache, entry);
13983                         ASSERT(ret == 0);
13984                 } else {
13985                         rii = container_of(entry, struct root_item_info,
13986                                            cache_extent);
13987                 }
13988
13989                 ASSERT(rii->cache_extent.start == root_id);
13990                 ASSERT(rii->cache_extent.size == 1);
13991
13992                 if (level > rii->level || rii->level == (u8)-1) {
13993                         rii->level = level;
13994                         rii->bytenr = found_key.objectid;
13995                         rii->gen = btrfs_extent_generation(leaf, ei);
13996                         rii->node_count = 1;
13997                 } else if (level == rii->level) {
13998                         rii->node_count++;
13999                 }
14000 next:
14001                 path.slots[0]++;
14002         }
14003
14004 out:
14005         btrfs_release_path(&path);
14006
14007         return ret;
14008 }
14009
14010 static int maybe_repair_root_item(struct btrfs_path *path,
14011                                   const struct btrfs_key *root_key,
14012                                   const int read_only_mode)
14013 {
14014         const u64 root_id = root_key->objectid;
14015         struct cache_extent *entry;
14016         struct root_item_info *rii;
14017         struct btrfs_root_item ri;
14018         unsigned long offset;
14019
14020         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14021         if (!entry) {
14022                 fprintf(stderr,
14023                         "Error: could not find extent items for root %llu\n",
14024                         root_key->objectid);
14025                 return -ENOENT;
14026         }
14027
14028         rii = container_of(entry, struct root_item_info, cache_extent);
14029         ASSERT(rii->cache_extent.start == root_id);
14030         ASSERT(rii->cache_extent.size == 1);
14031
14032         if (rii->node_count != 1) {
14033                 fprintf(stderr,
14034                         "Error: could not find btree root extent for root %llu\n",
14035                         root_id);
14036                 return -ENOENT;
14037         }
14038
14039         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14040         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14041
14042         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14043             btrfs_root_level(&ri) != rii->level ||
14044             btrfs_root_generation(&ri) != rii->gen) {
14045
14046                 /*
14047                  * If we're in repair mode but our caller told us to not update
14048                  * the root item, i.e. just check if it needs to be updated, don't
14049                  * print this message, since the caller will call us again shortly
14050                  * for the same root item without read only mode (the caller will
14051                  * open a transaction first).
14052                  */
14053                 if (!(read_only_mode && repair))
14054                         fprintf(stderr,
14055                                 "%sroot item for root %llu,"
14056                                 " current bytenr %llu, current gen %llu, current level %u,"
14057                                 " new bytenr %llu, new gen %llu, new level %u\n",
14058                                 (read_only_mode ? "" : "fixing "),
14059                                 root_id,
14060                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14061                                 btrfs_root_level(&ri),
14062                                 rii->bytenr, rii->gen, rii->level);
14063
14064                 if (btrfs_root_generation(&ri) > rii->gen) {
14065                         fprintf(stderr,
14066                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14067                                 root_id, btrfs_root_generation(&ri), rii->gen);
14068                         return -EINVAL;
14069                 }
14070
14071                 if (!read_only_mode) {
14072                         btrfs_set_root_bytenr(&ri, rii->bytenr);
14073                         btrfs_set_root_level(&ri, rii->level);
14074                         btrfs_set_root_generation(&ri, rii->gen);
14075                         write_extent_buffer(path->nodes[0], &ri,
14076                                             offset, sizeof(ri));
14077                 }
14078
14079                 return 1;
14080         }
14081
14082         return 0;
14083 }
14084
14085 /*
14086  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14087  * caused read-only snapshots to be corrupted if they were created at a moment
14088  * when the source subvolume/snapshot had orphan items. The issue was that the
14089  * on-disk root items became incorrect, referring to the pre orphan cleanup root
14090  * node instead of the post orphan cleanup root node.
14091  * So this function, and its callees, just detects and fixes those cases. Even
14092  * though the regression was for read-only snapshots, this function applies to
14093  * any snapshot/subvolume root.
14094  * This must be run before any other repair code - not doing it so, makes other
14095  * repair code delete or modify backrefs in the extent tree for example, which
14096  * will result in an inconsistent fs after repairing the root items.
14097  */
14098 static int repair_root_items(struct btrfs_fs_info *info)
14099 {
14100         struct btrfs_path path;
14101         struct btrfs_key key;
14102         struct extent_buffer *leaf;
14103         struct btrfs_trans_handle *trans = NULL;
14104         int ret = 0;
14105         int bad_roots = 0;
14106         int need_trans = 0;
14107
14108         btrfs_init_path(&path);
14109
14110         ret = build_roots_info_cache(info);
14111         if (ret)
14112                 goto out;
14113
14114         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14115         key.type = BTRFS_ROOT_ITEM_KEY;
14116         key.offset = 0;
14117
14118 again:
14119         /*
14120          * Avoid opening and committing transactions if a leaf doesn't have
14121          * any root items that need to be fixed, so that we avoid rotating
14122          * backup roots unnecessarily.
14123          */
14124         if (need_trans) {
14125                 trans = btrfs_start_transaction(info->tree_root, 1);
14126                 if (IS_ERR(trans)) {
14127                         ret = PTR_ERR(trans);
14128                         goto out;
14129                 }
14130         }
14131
14132         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14133                                 0, trans ? 1 : 0);
14134         if (ret < 0)
14135                 goto out;
14136         leaf = path.nodes[0];
14137
14138         while (1) {
14139                 struct btrfs_key found_key;
14140
14141                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14142                         int no_more_keys = find_next_key(&path, &key);
14143
14144                         btrfs_release_path(&path);
14145                         if (trans) {
14146                                 ret = btrfs_commit_transaction(trans,
14147                                                                info->tree_root);
14148                                 trans = NULL;
14149                                 if (ret < 0)
14150                                         goto out;
14151                         }
14152                         need_trans = 0;
14153                         if (no_more_keys)
14154                                 break;
14155                         goto again;
14156                 }
14157
14158                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14159
14160                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14161                         goto next;
14162                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14163                         goto next;
14164
14165                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14166                 if (ret < 0)
14167                         goto out;
14168                 if (ret) {
14169                         if (!trans && repair) {
14170                                 need_trans = 1;
14171                                 key = found_key;
14172                                 btrfs_release_path(&path);
14173                                 goto again;
14174                         }
14175                         bad_roots++;
14176                 }
14177 next:
14178                 path.slots[0]++;
14179         }
14180         ret = 0;
14181 out:
14182         free_roots_info_cache();
14183         btrfs_release_path(&path);
14184         if (trans)
14185                 btrfs_commit_transaction(trans, info->tree_root);
14186         if (ret < 0)
14187                 return ret;
14188
14189         return bad_roots;
14190 }
14191
14192 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14193 {
14194         struct btrfs_trans_handle *trans;
14195         struct btrfs_block_group_cache *bg_cache;
14196         u64 current = 0;
14197         int ret = 0;
14198
14199         /* Clear all free space cache inodes and its extent data */
14200         while (1) {
14201                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14202                 if (!bg_cache)
14203                         break;
14204                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14205                 if (ret < 0)
14206                         return ret;
14207                 current = bg_cache->key.objectid + bg_cache->key.offset;
14208         }
14209
14210         /* Don't forget to set cache_generation to -1 */
14211         trans = btrfs_start_transaction(fs_info->tree_root, 0);
14212         if (IS_ERR(trans)) {
14213                 error("failed to update super block cache generation");
14214                 return PTR_ERR(trans);
14215         }
14216         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14217         btrfs_commit_transaction(trans, fs_info->tree_root);
14218
14219         return ret;
14220 }
14221
14222 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14223                 int clear_version)
14224 {
14225         int ret = 0;
14226
14227         if (clear_version == 1) {
14228                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14229                         error(
14230                 "free space cache v2 detected, use --clear-space-cache v2");
14231                         ret = 1;
14232                         goto close_out;
14233                 }
14234                 printf("Clearing free space cache\n");
14235                 ret = clear_free_space_cache(fs_info);
14236                 if (ret) {
14237                         error("failed to clear free space cache");
14238                         ret = 1;
14239                 } else {
14240                         printf("Free space cache cleared\n");
14241                 }
14242         } else if (clear_version == 2) {
14243                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14244                         printf("no free space cache v2 to clear\n");
14245                         ret = 0;
14246                         goto close_out;
14247                 }
14248                 printf("Clear free space cache v2\n");
14249                 ret = btrfs_clear_free_space_tree(fs_info);
14250                 if (ret) {
14251                         error("failed to clear free space cache v2: %d", ret);
14252                         ret = 1;
14253                 } else {
14254                         printf("free space cache v2 cleared\n");
14255                 }
14256         }
14257 close_out:
14258         return ret;
14259 }
14260
14261 const char * const cmd_check_usage[] = {
14262         "btrfs check [options] <device>",
14263         "Check structural integrity of a filesystem (unmounted).",
14264         "Check structural integrity of an unmounted filesystem. Verify internal",
14265         "trees' consistency and item connectivity. In the repair mode try to",
14266         "fix the problems found. ",
14267         "WARNING: the repair mode is considered dangerous",
14268         "",
14269         "-s|--super <superblock>     use this superblock copy",
14270         "-b|--backup                 use the first valid backup root copy",
14271         "--force                     skip mount checks, repair is not possible",
14272         "--repair                    try to repair the filesystem",
14273         "--readonly                  run in read-only mode (default)",
14274         "--init-csum-tree            create a new CRC tree",
14275         "--init-extent-tree          create a new extent tree",
14276         "--mode <MODE>               allows choice of memory/IO trade-offs",
14277         "                            where MODE is one of:",
14278         "                            original - read inodes and extents to memory (requires",
14279         "                                       more memory, does less IO)",
14280         "                            lowmem   - try to use less memory but read blocks again",
14281         "                                       when needed",
14282         "--check-data-csum           verify checksums of data blocks",
14283         "-Q|--qgroup-report          print a report on qgroup consistency",
14284         "-E|--subvol-extents <subvolid>",
14285         "                            print subvolume extents and sharing state",
14286         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14287         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14288         "-p|--progress               indicate progress",
14289         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14290         NULL
14291 };
14292
14293 int cmd_check(int argc, char **argv)
14294 {
14295         struct cache_tree root_cache;
14296         struct btrfs_root *root;
14297         struct btrfs_fs_info *info;
14298         u64 bytenr = 0;
14299         u64 subvolid = 0;
14300         u64 tree_root_bytenr = 0;
14301         u64 chunk_root_bytenr = 0;
14302         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14303         int ret = 0;
14304         int err = 0;
14305         u64 num;
14306         int init_csum_tree = 0;
14307         int readonly = 0;
14308         int clear_space_cache = 0;
14309         int qgroup_report = 0;
14310         int qgroups_repaired = 0;
14311         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14312         int force = 0;
14313
14314         while(1) {
14315                 int c;
14316                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14317                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14318                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14319                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14320                         GETOPT_VAL_FORCE };
14321                 static const struct option long_options[] = {
14322                         { "super", required_argument, NULL, 's' },
14323                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14324                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14325                         { "init-csum-tree", no_argument, NULL,
14326                                 GETOPT_VAL_INIT_CSUM },
14327                         { "init-extent-tree", no_argument, NULL,
14328                                 GETOPT_VAL_INIT_EXTENT },
14329                         { "check-data-csum", no_argument, NULL,
14330                                 GETOPT_VAL_CHECK_CSUM },
14331                         { "backup", no_argument, NULL, 'b' },
14332                         { "subvol-extents", required_argument, NULL, 'E' },
14333                         { "qgroup-report", no_argument, NULL, 'Q' },
14334                         { "tree-root", required_argument, NULL, 'r' },
14335                         { "chunk-root", required_argument, NULL,
14336                                 GETOPT_VAL_CHUNK_TREE },
14337                         { "progress", no_argument, NULL, 'p' },
14338                         { "mode", required_argument, NULL,
14339                                 GETOPT_VAL_MODE },
14340                         { "clear-space-cache", required_argument, NULL,
14341                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14342                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14343                         { NULL, 0, NULL, 0}
14344                 };
14345
14346                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14347                 if (c < 0)
14348                         break;
14349                 switch(c) {
14350                         case 'a': /* ignored */ break;
14351                         case 'b':
14352                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14353                                 break;
14354                         case 's':
14355                                 num = arg_strtou64(optarg);
14356                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14357                                         error(
14358                                         "super mirror should be less than %d",
14359                                                 BTRFS_SUPER_MIRROR_MAX);
14360                                         exit(1);
14361                                 }
14362                                 bytenr = btrfs_sb_offset(((int)num));
14363                                 printf("using SB copy %llu, bytenr %llu\n", num,
14364                                        (unsigned long long)bytenr);
14365                                 break;
14366                         case 'Q':
14367                                 qgroup_report = 1;
14368                                 break;
14369                         case 'E':
14370                                 subvolid = arg_strtou64(optarg);
14371                                 break;
14372                         case 'r':
14373                                 tree_root_bytenr = arg_strtou64(optarg);
14374                                 break;
14375                         case GETOPT_VAL_CHUNK_TREE:
14376                                 chunk_root_bytenr = arg_strtou64(optarg);
14377                                 break;
14378                         case 'p':
14379                                 ctx.progress_enabled = true;
14380                                 break;
14381                         case '?':
14382                         case 'h':
14383                                 usage(cmd_check_usage);
14384                         case GETOPT_VAL_REPAIR:
14385                                 printf("enabling repair mode\n");
14386                                 repair = 1;
14387                                 ctree_flags |= OPEN_CTREE_WRITES;
14388                                 break;
14389                         case GETOPT_VAL_READONLY:
14390                                 readonly = 1;
14391                                 break;
14392                         case GETOPT_VAL_INIT_CSUM:
14393                                 printf("Creating a new CRC tree\n");
14394                                 init_csum_tree = 1;
14395                                 repair = 1;
14396                                 ctree_flags |= OPEN_CTREE_WRITES;
14397                                 break;
14398                         case GETOPT_VAL_INIT_EXTENT:
14399                                 init_extent_tree = 1;
14400                                 ctree_flags |= (OPEN_CTREE_WRITES |
14401                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14402                                 repair = 1;
14403                                 break;
14404                         case GETOPT_VAL_CHECK_CSUM:
14405                                 check_data_csum = 1;
14406                                 break;
14407                         case GETOPT_VAL_MODE:
14408                                 check_mode = parse_check_mode(optarg);
14409                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14410                                         error("unknown mode: %s", optarg);
14411                                         exit(1);
14412                                 }
14413                                 break;
14414                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14415                                 if (strcmp(optarg, "v1") == 0) {
14416                                         clear_space_cache = 1;
14417                                 } else if (strcmp(optarg, "v2") == 0) {
14418                                         clear_space_cache = 2;
14419                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14420                                 } else {
14421                                         error(
14422                 "invalid argument to --clear-space-cache, must be v1 or v2");
14423                                         exit(1);
14424                                 }
14425                                 ctree_flags |= OPEN_CTREE_WRITES;
14426                                 break;
14427                         case GETOPT_VAL_FORCE:
14428                                 force = 1;
14429                                 break;
14430                 }
14431         }
14432
14433         if (check_argc_exact(argc - optind, 1))
14434                 usage(cmd_check_usage);
14435
14436         if (ctx.progress_enabled) {
14437                 ctx.tp = TASK_NOTHING;
14438                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14439         }
14440
14441         /* This check is the only reason for --readonly to exist */
14442         if (readonly && repair) {
14443                 error("repair options are not compatible with --readonly");
14444                 exit(1);
14445         }
14446
14447         /*
14448          * experimental and dangerous
14449          */
14450         if (repair && check_mode == CHECK_MODE_LOWMEM)
14451                 warning("low-memory mode repair support is only partial");
14452
14453         radix_tree_init();
14454         cache_tree_init(&root_cache);
14455
14456         ret = check_mounted(argv[optind]);
14457         if (!force) {
14458                 if (ret < 0) {
14459                         error("could not check mount status: %s",
14460                                         strerror(-ret));
14461                         err |= !!ret;
14462                         goto err_out;
14463                 } else if (ret) {
14464                         error(
14465 "%s is currently mounted, use --force if you really intend to check the filesystem",
14466                                 argv[optind]);
14467                         ret = -EBUSY;
14468                         err |= !!ret;
14469                         goto err_out;
14470                 }
14471         } else {
14472                 if (repair) {
14473                         error("repair and --force is not yet supported");
14474                         ret = 1;
14475                         err |= !!ret;
14476                         goto err_out;
14477                 }
14478                 if (ret < 0) {
14479                         warning(
14480 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14481                                 argv[optind]);
14482                 } else if (ret) {
14483                         warning(
14484                         "filesystem mounted, continuing because of --force");
14485                 }
14486                 /* A block device is mounted in exclusive mode by kernel */
14487                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14488         }
14489
14490         /* only allow partial opening under repair mode */
14491         if (repair)
14492                 ctree_flags |= OPEN_CTREE_PARTIAL;
14493
14494         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14495                                   chunk_root_bytenr, ctree_flags);
14496         if (!info) {
14497                 error("cannot open file system");
14498                 ret = -EIO;
14499                 err |= !!ret;
14500                 goto err_out;
14501         }
14502
14503         global_info = info;
14504         root = info->fs_root;
14505         uuid_unparse(info->super_copy->fsid, uuidbuf);
14506
14507         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14508
14509         /*
14510          * Check the bare minimum before starting anything else that could rely
14511          * on it, namely the tree roots, any local consistency checks
14512          */
14513         if (!extent_buffer_uptodate(info->tree_root->node) ||
14514             !extent_buffer_uptodate(info->dev_root->node) ||
14515             !extent_buffer_uptodate(info->chunk_root->node)) {
14516                 error("critical roots corrupted, unable to check the filesystem");
14517                 err |= !!ret;
14518                 ret = -EIO;
14519                 goto close_out;
14520         }
14521
14522         if (clear_space_cache) {
14523                 ret = do_clear_free_space_cache(info, clear_space_cache);
14524                 err |= !!ret;
14525                 goto close_out;
14526         }
14527
14528         /*
14529          * repair mode will force us to commit transaction which
14530          * will make us fail to load log tree when mounting.
14531          */
14532         if (repair && btrfs_super_log_root(info->super_copy)) {
14533                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14534                 if (!ret) {
14535                         ret = 1;
14536                         err |= !!ret;
14537                         goto close_out;
14538                 }
14539                 ret = zero_log_tree(root);
14540                 err |= !!ret;
14541                 if (ret) {
14542                         error("failed to zero log tree: %d", ret);
14543                         goto close_out;
14544                 }
14545         }
14546
14547         if (qgroup_report) {
14548                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14549                        uuidbuf);
14550                 ret = qgroup_verify_all(info);
14551                 err |= !!ret;
14552                 if (ret == 0)
14553                         report_qgroups(1);
14554                 goto close_out;
14555         }
14556         if (subvolid) {
14557                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14558                        subvolid, argv[optind], uuidbuf);
14559                 ret = print_extent_state(info, subvolid);
14560                 err |= !!ret;
14561                 goto close_out;
14562         }
14563
14564         if (init_extent_tree || init_csum_tree) {
14565                 struct btrfs_trans_handle *trans;
14566
14567                 trans = btrfs_start_transaction(info->extent_root, 0);
14568                 if (IS_ERR(trans)) {
14569                         error("error starting transaction");
14570                         ret = PTR_ERR(trans);
14571                         err |= !!ret;
14572                         goto close_out;
14573                 }
14574
14575                 if (init_extent_tree) {
14576                         printf("Creating a new extent tree\n");
14577                         ret = reinit_extent_tree(trans, info);
14578                         err |= !!ret;
14579                         if (ret)
14580                                 goto close_out;
14581                 }
14582
14583                 if (init_csum_tree) {
14584                         printf("Reinitialize checksum tree\n");
14585                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14586                         if (ret) {
14587                                 error("checksum tree initialization failed: %d",
14588                                                 ret);
14589                                 ret = -EIO;
14590                                 err |= !!ret;
14591                                 goto close_out;
14592                         }
14593
14594                         ret = fill_csum_tree(trans, info->csum_root,
14595                                              init_extent_tree);
14596                         err |= !!ret;
14597                         if (ret) {
14598                                 error("checksum tree refilling failed: %d", ret);
14599                                 return -EIO;
14600                         }
14601                 }
14602                 /*
14603                  * Ok now we commit and run the normal fsck, which will add
14604                  * extent entries for all of the items it finds.
14605                  */
14606                 ret = btrfs_commit_transaction(trans, info->extent_root);
14607                 err |= !!ret;
14608                 if (ret)
14609                         goto close_out;
14610         }
14611         if (!extent_buffer_uptodate(info->extent_root->node)) {
14612                 error("critical: extent_root, unable to check the filesystem");
14613                 ret = -EIO;
14614                 err |= !!ret;
14615                 goto close_out;
14616         }
14617         if (!extent_buffer_uptodate(info->csum_root->node)) {
14618                 error("critical: csum_root, unable to check the filesystem");
14619                 ret = -EIO;
14620                 err |= !!ret;
14621                 goto close_out;
14622         }
14623
14624         if (!init_extent_tree) {
14625                 ret = repair_root_items(info);
14626                 if (ret < 0) {
14627                         err = !!ret;
14628                         error("failed to repair root items: %s", strerror(-ret));
14629                         goto close_out;
14630                 }
14631                 if (repair) {
14632                         fprintf(stderr, "Fixed %d roots.\n", ret);
14633                         ret = 0;
14634                 } else if (ret > 0) {
14635                         fprintf(stderr,
14636                                 "Found %d roots with an outdated root item.\n",
14637                                 ret);
14638                         fprintf(stderr,
14639         "Please run a filesystem check with the option --repair to fix them.\n");
14640                         ret = 1;
14641                         err |= ret;
14642                         goto close_out;
14643                 }
14644         }
14645
14646         ret = do_check_chunks_and_extents(info);
14647         err |= !!ret;
14648         if (ret)
14649                 error(
14650                 "errors found in extent allocation tree or chunk allocation");
14651
14652         /* Only re-check super size after we checked and repaired the fs */
14653         err |= !is_super_size_valid(info);
14654
14655         if (!ctx.progress_enabled) {
14656                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14657                         fprintf(stderr, "checking free space tree\n");
14658                 else
14659                         fprintf(stderr, "checking free space cache\n");
14660         }
14661         ret = check_space_cache(root);
14662         err |= !!ret;
14663         if (ret) {
14664                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14665                         error("errors found in free space tree");
14666                 else
14667                         error("errors found in free space cache");
14668                 goto out;
14669         }
14670
14671         /*
14672          * We used to have to have these hole extents in between our real
14673          * extents so if we don't have this flag set we need to make sure there
14674          * are no gaps in the file extents for inodes, otherwise we can just
14675          * ignore it when this happens.
14676          */
14677         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14678         ret = do_check_fs_roots(info, &root_cache);
14679         err |= !!ret;
14680         if (ret) {
14681                 error("errors found in fs roots");
14682                 goto out;
14683         }
14684
14685         fprintf(stderr, "checking csums\n");
14686         ret = check_csums(root);
14687         err |= !!ret;
14688         if (ret) {
14689                 error("errors found in csum tree");
14690                 goto out;
14691         }
14692
14693         fprintf(stderr, "checking root refs\n");
14694         /* For low memory mode, check_fs_roots_v2 handles root refs */
14695         if (check_mode != CHECK_MODE_LOWMEM) {
14696                 ret = check_root_refs(root, &root_cache);
14697                 err |= !!ret;
14698                 if (ret) {
14699                         error("errors found in root refs");
14700                         goto out;
14701                 }
14702         }
14703
14704         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14705                 struct extent_buffer *eb;
14706
14707                 eb = list_first_entry(&root->fs_info->recow_ebs,
14708                                       struct extent_buffer, recow);
14709                 list_del_init(&eb->recow);
14710                 ret = recow_extent_buffer(root, eb);
14711                 err |= !!ret;
14712                 if (ret) {
14713                         error("fails to fix transid errors");
14714                         break;
14715                 }
14716         }
14717
14718         while (!list_empty(&delete_items)) {
14719                 struct bad_item *bad;
14720
14721                 bad = list_first_entry(&delete_items, struct bad_item, list);
14722                 list_del_init(&bad->list);
14723                 if (repair) {
14724                         ret = delete_bad_item(root, bad);
14725                         err |= !!ret;
14726                 }
14727                 free(bad);
14728         }
14729
14730         if (info->quota_enabled) {
14731                 fprintf(stderr, "checking quota groups\n");
14732                 ret = qgroup_verify_all(info);
14733                 err |= !!ret;
14734                 if (ret) {
14735                         error("failed to check quota groups");
14736                         goto out;
14737                 }
14738                 report_qgroups(0);
14739                 ret = repair_qgroups(info, &qgroups_repaired);
14740                 err |= !!ret;
14741                 if (err) {
14742                         error("failed to repair quota groups");
14743                         goto out;
14744                 }
14745                 ret = 0;
14746         }
14747
14748         if (!list_empty(&root->fs_info->recow_ebs)) {
14749                 error("transid errors in file system");
14750                 ret = 1;
14751                 err |= !!ret;
14752         }
14753 out:
14754         printf("found %llu bytes used, ",
14755                (unsigned long long)bytes_used);
14756         if (err)
14757                 printf("error(s) found\n");
14758         else
14759                 printf("no error found\n");
14760         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14761         printf("total tree bytes: %llu\n",
14762                (unsigned long long)total_btree_bytes);
14763         printf("total fs tree bytes: %llu\n",
14764                (unsigned long long)total_fs_tree_bytes);
14765         printf("total extent tree bytes: %llu\n",
14766                (unsigned long long)total_extent_tree_bytes);
14767         printf("btree space waste bytes: %llu\n",
14768                (unsigned long long)btree_space_waste);
14769         printf("file data blocks allocated: %llu\n referenced %llu\n",
14770                 (unsigned long long)data_bytes_allocated,
14771                 (unsigned long long)data_bytes_referenced);
14772
14773         free_qgroup_counts();
14774         free_root_recs_tree(&root_cache);
14775 close_out:
14776         close_ctree(root);
14777 err_out:
14778         if (ctx.progress_enabled)
14779                 task_deinit(ctx.info);
14780
14781         return err;
14782 }