btrfs-progs: check: Move reset_cached_block_groups to check/common.c
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void record_root_in_trans(struct btrfs_trans_handle *trans,
416                                  struct btrfs_root *root)
417 {
418         if (root->last_trans != trans->transid) {
419                 root->track_dirty = 1;
420                 root->last_trans = trans->transid;
421                 root->commit_root = root->node;
422                 extent_buffer_get(root->node);
423         }
424 }
425
426 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
427 {
428         struct device_record *rec1;
429         struct device_record *rec2;
430
431         rec1 = rb_entry(node1, struct device_record, node);
432         rec2 = rb_entry(node2, struct device_record, node);
433         if (rec1->devid > rec2->devid)
434                 return -1;
435         else if (rec1->devid < rec2->devid)
436                 return 1;
437         else
438                 return 0;
439 }
440
441 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
442 {
443         struct inode_record *rec;
444         struct inode_backref *backref;
445         struct inode_backref *orig;
446         struct inode_backref *tmp;
447         struct orphan_data_extent *src_orphan;
448         struct orphan_data_extent *dst_orphan;
449         struct rb_node *rb;
450         size_t size;
451         int ret;
452
453         rec = malloc(sizeof(*rec));
454         if (!rec)
455                 return ERR_PTR(-ENOMEM);
456         memcpy(rec, orig_rec, sizeof(*rec));
457         rec->refs = 1;
458         INIT_LIST_HEAD(&rec->backrefs);
459         INIT_LIST_HEAD(&rec->orphan_extents);
460         rec->holes = RB_ROOT;
461
462         list_for_each_entry(orig, &orig_rec->backrefs, list) {
463                 size = sizeof(*orig) + orig->namelen + 1;
464                 backref = malloc(size);
465                 if (!backref) {
466                         ret = -ENOMEM;
467                         goto cleanup;
468                 }
469                 memcpy(backref, orig, size);
470                 list_add_tail(&backref->list, &rec->backrefs);
471         }
472         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
473                 dst_orphan = malloc(sizeof(*dst_orphan));
474                 if (!dst_orphan) {
475                         ret = -ENOMEM;
476                         goto cleanup;
477                 }
478                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
479                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
480         }
481         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
482         if (ret < 0)
483                 goto cleanup_rb;
484
485         return rec;
486
487 cleanup_rb:
488         rb = rb_first(&rec->holes);
489         while (rb) {
490                 struct file_extent_hole *hole;
491
492                 hole = rb_entry(rb, struct file_extent_hole, node);
493                 rb = rb_next(rb);
494                 free(hole);
495         }
496
497 cleanup:
498         if (!list_empty(&rec->backrefs))
499                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
500                         list_del(&orig->list);
501                         free(orig);
502                 }
503
504         if (!list_empty(&rec->orphan_extents))
505                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
506                         list_del(&orig->list);
507                         free(orig);
508                 }
509
510         free(rec);
511
512         return ERR_PTR(ret);
513 }
514
515 static void print_orphan_data_extents(struct list_head *orphan_extents,
516                                       u64 objectid)
517 {
518         struct orphan_data_extent *orphan;
519
520         if (list_empty(orphan_extents))
521                 return;
522         printf("The following data extent is lost in tree %llu:\n",
523                objectid);
524         list_for_each_entry(orphan, orphan_extents, list) {
525                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
526                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
527                        orphan->disk_len);
528         }
529 }
530
531 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
532 {
533         u64 root_objectid = root->root_key.objectid;
534         int errors = rec->errors;
535
536         if (!errors)
537                 return;
538         /* reloc root errors, we print its corresponding fs root objectid*/
539         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
540                 root_objectid = root->root_key.offset;
541                 fprintf(stderr, "reloc");
542         }
543         fprintf(stderr, "root %llu inode %llu errors %x",
544                 (unsigned long long) root_objectid,
545                 (unsigned long long) rec->ino, rec->errors);
546
547         if (errors & I_ERR_NO_INODE_ITEM)
548                 fprintf(stderr, ", no inode item");
549         if (errors & I_ERR_NO_ORPHAN_ITEM)
550                 fprintf(stderr, ", no orphan item");
551         if (errors & I_ERR_DUP_INODE_ITEM)
552                 fprintf(stderr, ", dup inode item");
553         if (errors & I_ERR_DUP_DIR_INDEX)
554                 fprintf(stderr, ", dup dir index");
555         if (errors & I_ERR_ODD_DIR_ITEM)
556                 fprintf(stderr, ", odd dir item");
557         if (errors & I_ERR_ODD_FILE_EXTENT)
558                 fprintf(stderr, ", odd file extent");
559         if (errors & I_ERR_BAD_FILE_EXTENT)
560                 fprintf(stderr, ", bad file extent");
561         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
562                 fprintf(stderr, ", file extent overlap");
563         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
564                 fprintf(stderr, ", file extent discount");
565         if (errors & I_ERR_DIR_ISIZE_WRONG)
566                 fprintf(stderr, ", dir isize wrong");
567         if (errors & I_ERR_FILE_NBYTES_WRONG)
568                 fprintf(stderr, ", nbytes wrong");
569         if (errors & I_ERR_ODD_CSUM_ITEM)
570                 fprintf(stderr, ", odd csum item");
571         if (errors & I_ERR_SOME_CSUM_MISSING)
572                 fprintf(stderr, ", some csum missing");
573         if (errors & I_ERR_LINK_COUNT_WRONG)
574                 fprintf(stderr, ", link count wrong");
575         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
576                 fprintf(stderr, ", orphan file extent");
577         fprintf(stderr, "\n");
578         /* Print the orphan extents if needed */
579         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
580                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
581
582         /* Print the holes if needed */
583         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
584                 struct file_extent_hole *hole;
585                 struct rb_node *node;
586                 int found = 0;
587
588                 node = rb_first(&rec->holes);
589                 fprintf(stderr, "Found file extent holes:\n");
590                 while (node) {
591                         found = 1;
592                         hole = rb_entry(node, struct file_extent_hole, node);
593                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
594                                 hole->start, hole->len);
595                         node = rb_next(node);
596                 }
597                 if (!found)
598                         fprintf(stderr, "\tstart: 0, len: %llu\n",
599                                 round_up(rec->isize,
600                                          root->fs_info->sectorsize));
601         }
602 }
603
604 static void print_ref_error(int errors)
605 {
606         if (errors & REF_ERR_NO_DIR_ITEM)
607                 fprintf(stderr, ", no dir item");
608         if (errors & REF_ERR_NO_DIR_INDEX)
609                 fprintf(stderr, ", no dir index");
610         if (errors & REF_ERR_NO_INODE_REF)
611                 fprintf(stderr, ", no inode ref");
612         if (errors & REF_ERR_DUP_DIR_ITEM)
613                 fprintf(stderr, ", dup dir item");
614         if (errors & REF_ERR_DUP_DIR_INDEX)
615                 fprintf(stderr, ", dup dir index");
616         if (errors & REF_ERR_DUP_INODE_REF)
617                 fprintf(stderr, ", dup inode ref");
618         if (errors & REF_ERR_INDEX_UNMATCH)
619                 fprintf(stderr, ", index mismatch");
620         if (errors & REF_ERR_FILETYPE_UNMATCH)
621                 fprintf(stderr, ", filetype mismatch");
622         if (errors & REF_ERR_NAME_TOO_LONG)
623                 fprintf(stderr, ", name too long");
624         if (errors & REF_ERR_NO_ROOT_REF)
625                 fprintf(stderr, ", no root ref");
626         if (errors & REF_ERR_NO_ROOT_BACKREF)
627                 fprintf(stderr, ", no root backref");
628         if (errors & REF_ERR_DUP_ROOT_REF)
629                 fprintf(stderr, ", dup root ref");
630         if (errors & REF_ERR_DUP_ROOT_BACKREF)
631                 fprintf(stderr, ", dup root backref");
632         fprintf(stderr, "\n");
633 }
634
635 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
636                                           u64 ino, int mod)
637 {
638         struct ptr_node *node;
639         struct cache_extent *cache;
640         struct inode_record *rec = NULL;
641         int ret;
642
643         cache = lookup_cache_extent(inode_cache, ino, 1);
644         if (cache) {
645                 node = container_of(cache, struct ptr_node, cache);
646                 rec = node->data;
647                 if (mod && rec->refs > 1) {
648                         node->data = clone_inode_rec(rec);
649                         if (IS_ERR(node->data))
650                                 return node->data;
651                         rec->refs--;
652                         rec = node->data;
653                 }
654         } else if (mod) {
655                 rec = calloc(1, sizeof(*rec));
656                 if (!rec)
657                         return ERR_PTR(-ENOMEM);
658                 rec->ino = ino;
659                 rec->extent_start = (u64)-1;
660                 rec->refs = 1;
661                 INIT_LIST_HEAD(&rec->backrefs);
662                 INIT_LIST_HEAD(&rec->orphan_extents);
663                 rec->holes = RB_ROOT;
664
665                 node = malloc(sizeof(*node));
666                 if (!node) {
667                         free(rec);
668                         return ERR_PTR(-ENOMEM);
669                 }
670                 node->cache.start = ino;
671                 node->cache.size = 1;
672                 node->data = rec;
673
674                 if (ino == BTRFS_FREE_INO_OBJECTID)
675                         rec->found_link = 1;
676
677                 ret = insert_cache_extent(inode_cache, &node->cache);
678                 if (ret)
679                         return ERR_PTR(-EEXIST);
680         }
681         return rec;
682 }
683
684 static void free_orphan_data_extents(struct list_head *orphan_extents)
685 {
686         struct orphan_data_extent *orphan;
687
688         while (!list_empty(orphan_extents)) {
689                 orphan = list_entry(orphan_extents->next,
690                                     struct orphan_data_extent, list);
691                 list_del(&orphan->list);
692                 free(orphan);
693         }
694 }
695
696 static void free_inode_rec(struct inode_record *rec)
697 {
698         struct inode_backref *backref;
699
700         if (--rec->refs > 0)
701                 return;
702
703         while (!list_empty(&rec->backrefs)) {
704                 backref = to_inode_backref(rec->backrefs.next);
705                 list_del(&backref->list);
706                 free(backref);
707         }
708         free_orphan_data_extents(&rec->orphan_extents);
709         free_file_extent_holes(&rec->holes);
710         free(rec);
711 }
712
713 static int can_free_inode_rec(struct inode_record *rec)
714 {
715         if (!rec->errors && rec->checked && rec->found_inode_item &&
716             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
717                 return 1;
718         return 0;
719 }
720
721 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
722                                  struct inode_record *rec)
723 {
724         struct cache_extent *cache;
725         struct inode_backref *tmp, *backref;
726         struct ptr_node *node;
727         u8 filetype;
728
729         if (!rec->found_inode_item)
730                 return;
731
732         filetype = imode_to_type(rec->imode);
733         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
734                 if (backref->found_dir_item && backref->found_dir_index) {
735                         if (backref->filetype != filetype)
736                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
737                         if (!backref->errors && backref->found_inode_ref &&
738                             rec->nlink == rec->found_link) {
739                                 list_del(&backref->list);
740                                 free(backref);
741                         }
742                 }
743         }
744
745         if (!rec->checked || rec->merging)
746                 return;
747
748         if (S_ISDIR(rec->imode)) {
749                 if (rec->found_size != rec->isize)
750                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
751                 if (rec->found_file_extent)
752                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
753         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
754                 if (rec->found_dir_item)
755                         rec->errors |= I_ERR_ODD_DIR_ITEM;
756                 if (rec->found_size != rec->nbytes)
757                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
758                 if (rec->nlink > 0 && !no_holes &&
759                     (rec->extent_end < rec->isize ||
760                      first_extent_gap(&rec->holes) < rec->isize))
761                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
762         }
763
764         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
765                 if (rec->found_csum_item && rec->nodatasum)
766                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
767                 if (rec->some_csum_missing && !rec->nodatasum)
768                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
769         }
770
771         BUG_ON(rec->refs != 1);
772         if (can_free_inode_rec(rec)) {
773                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
774                 node = container_of(cache, struct ptr_node, cache);
775                 BUG_ON(node->data != rec);
776                 remove_cache_extent(inode_cache, &node->cache);
777                 free(node);
778                 free_inode_rec(rec);
779         }
780 }
781
782 static int check_orphan_item(struct btrfs_root *root, u64 ino)
783 {
784         struct btrfs_path path;
785         struct btrfs_key key;
786         int ret;
787
788         key.objectid = BTRFS_ORPHAN_OBJECTID;
789         key.type = BTRFS_ORPHAN_ITEM_KEY;
790         key.offset = ino;
791
792         btrfs_init_path(&path);
793         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
794         btrfs_release_path(&path);
795         if (ret > 0)
796                 ret = -ENOENT;
797         return ret;
798 }
799
800 static int process_inode_item(struct extent_buffer *eb,
801                               int slot, struct btrfs_key *key,
802                               struct shared_node *active_node)
803 {
804         struct inode_record *rec;
805         struct btrfs_inode_item *item;
806
807         rec = active_node->current;
808         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
809         if (rec->found_inode_item) {
810                 rec->errors |= I_ERR_DUP_INODE_ITEM;
811                 return 1;
812         }
813         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
814         rec->nlink = btrfs_inode_nlink(eb, item);
815         rec->isize = btrfs_inode_size(eb, item);
816         rec->nbytes = btrfs_inode_nbytes(eb, item);
817         rec->imode = btrfs_inode_mode(eb, item);
818         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
819                 rec->nodatasum = 1;
820         rec->found_inode_item = 1;
821         if (rec->nlink == 0)
822                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
823         maybe_free_inode_rec(&active_node->inode_cache, rec);
824         return 0;
825 }
826
827 static struct inode_backref *get_inode_backref(struct inode_record *rec,
828                                                 const char *name,
829                                                 int namelen, u64 dir)
830 {
831         struct inode_backref *backref;
832
833         list_for_each_entry(backref, &rec->backrefs, list) {
834                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
835                         break;
836                 if (backref->dir != dir || backref->namelen != namelen)
837                         continue;
838                 if (memcmp(name, backref->name, namelen))
839                         continue;
840                 return backref;
841         }
842
843         backref = malloc(sizeof(*backref) + namelen + 1);
844         if (!backref)
845                 return NULL;
846         memset(backref, 0, sizeof(*backref));
847         backref->dir = dir;
848         backref->namelen = namelen;
849         memcpy(backref->name, name, namelen);
850         backref->name[namelen] = '\0';
851         list_add_tail(&backref->list, &rec->backrefs);
852         return backref;
853 }
854
855 static int add_inode_backref(struct cache_tree *inode_cache,
856                              u64 ino, u64 dir, u64 index,
857                              const char *name, int namelen,
858                              u8 filetype, u8 itemtype, int errors)
859 {
860         struct inode_record *rec;
861         struct inode_backref *backref;
862
863         rec = get_inode_rec(inode_cache, ino, 1);
864         BUG_ON(IS_ERR(rec));
865         backref = get_inode_backref(rec, name, namelen, dir);
866         BUG_ON(!backref);
867         if (errors)
868                 backref->errors |= errors;
869         if (itemtype == BTRFS_DIR_INDEX_KEY) {
870                 if (backref->found_dir_index)
871                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
872                 if (backref->found_inode_ref && backref->index != index)
873                         backref->errors |= REF_ERR_INDEX_UNMATCH;
874                 if (backref->found_dir_item && backref->filetype != filetype)
875                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
876
877                 backref->index = index;
878                 backref->filetype = filetype;
879                 backref->found_dir_index = 1;
880         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
881                 rec->found_link++;
882                 if (backref->found_dir_item)
883                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
884                 if (backref->found_dir_index && backref->filetype != filetype)
885                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
886
887                 backref->filetype = filetype;
888                 backref->found_dir_item = 1;
889         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
890                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
891                 if (backref->found_inode_ref)
892                         backref->errors |= REF_ERR_DUP_INODE_REF;
893                 if (backref->found_dir_index && backref->index != index)
894                         backref->errors |= REF_ERR_INDEX_UNMATCH;
895                 else
896                         backref->index = index;
897
898                 backref->ref_type = itemtype;
899                 backref->found_inode_ref = 1;
900         } else {
901                 BUG_ON(1);
902         }
903
904         maybe_free_inode_rec(inode_cache, rec);
905         return 0;
906 }
907
908 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
909                             struct cache_tree *dst_cache)
910 {
911         struct inode_backref *backref;
912         u32 dir_count = 0;
913         int ret = 0;
914
915         dst->merging = 1;
916         list_for_each_entry(backref, &src->backrefs, list) {
917                 if (backref->found_dir_index) {
918                         add_inode_backref(dst_cache, dst->ino, backref->dir,
919                                         backref->index, backref->name,
920                                         backref->namelen, backref->filetype,
921                                         BTRFS_DIR_INDEX_KEY, backref->errors);
922                 }
923                 if (backref->found_dir_item) {
924                         dir_count++;
925                         add_inode_backref(dst_cache, dst->ino,
926                                         backref->dir, 0, backref->name,
927                                         backref->namelen, backref->filetype,
928                                         BTRFS_DIR_ITEM_KEY, backref->errors);
929                 }
930                 if (backref->found_inode_ref) {
931                         add_inode_backref(dst_cache, dst->ino,
932                                         backref->dir, backref->index,
933                                         backref->name, backref->namelen, 0,
934                                         backref->ref_type, backref->errors);
935                 }
936         }
937
938         if (src->found_dir_item)
939                 dst->found_dir_item = 1;
940         if (src->found_file_extent)
941                 dst->found_file_extent = 1;
942         if (src->found_csum_item)
943                 dst->found_csum_item = 1;
944         if (src->some_csum_missing)
945                 dst->some_csum_missing = 1;
946         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
947                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
948                 if (ret < 0)
949                         return ret;
950         }
951
952         BUG_ON(src->found_link < dir_count);
953         dst->found_link += src->found_link - dir_count;
954         dst->found_size += src->found_size;
955         if (src->extent_start != (u64)-1) {
956                 if (dst->extent_start == (u64)-1) {
957                         dst->extent_start = src->extent_start;
958                         dst->extent_end = src->extent_end;
959                 } else {
960                         if (dst->extent_end > src->extent_start)
961                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
962                         else if (dst->extent_end < src->extent_start) {
963                                 ret = add_file_extent_hole(&dst->holes,
964                                         dst->extent_end,
965                                         src->extent_start - dst->extent_end);
966                         }
967                         if (dst->extent_end < src->extent_end)
968                                 dst->extent_end = src->extent_end;
969                 }
970         }
971
972         dst->errors |= src->errors;
973         if (src->found_inode_item) {
974                 if (!dst->found_inode_item) {
975                         dst->nlink = src->nlink;
976                         dst->isize = src->isize;
977                         dst->nbytes = src->nbytes;
978                         dst->imode = src->imode;
979                         dst->nodatasum = src->nodatasum;
980                         dst->found_inode_item = 1;
981                 } else {
982                         dst->errors |= I_ERR_DUP_INODE_ITEM;
983                 }
984         }
985         dst->merging = 0;
986
987         return 0;
988 }
989
990 static int splice_shared_node(struct shared_node *src_node,
991                               struct shared_node *dst_node)
992 {
993         struct cache_extent *cache;
994         struct ptr_node *node, *ins;
995         struct cache_tree *src, *dst;
996         struct inode_record *rec, *conflict;
997         u64 current_ino = 0;
998         int splice = 0;
999         int ret;
1000
1001         if (--src_node->refs == 0)
1002                 splice = 1;
1003         if (src_node->current)
1004                 current_ino = src_node->current->ino;
1005
1006         src = &src_node->root_cache;
1007         dst = &dst_node->root_cache;
1008 again:
1009         cache = search_cache_extent(src, 0);
1010         while (cache) {
1011                 node = container_of(cache, struct ptr_node, cache);
1012                 rec = node->data;
1013                 cache = next_cache_extent(cache);
1014
1015                 if (splice) {
1016                         remove_cache_extent(src, &node->cache);
1017                         ins = node;
1018                 } else {
1019                         ins = malloc(sizeof(*ins));
1020                         BUG_ON(!ins);
1021                         ins->cache.start = node->cache.start;
1022                         ins->cache.size = node->cache.size;
1023                         ins->data = rec;
1024                         rec->refs++;
1025                 }
1026                 ret = insert_cache_extent(dst, &ins->cache);
1027                 if (ret == -EEXIST) {
1028                         conflict = get_inode_rec(dst, rec->ino, 1);
1029                         BUG_ON(IS_ERR(conflict));
1030                         merge_inode_recs(rec, conflict, dst);
1031                         if (rec->checked) {
1032                                 conflict->checked = 1;
1033                                 if (dst_node->current == conflict)
1034                                         dst_node->current = NULL;
1035                         }
1036                         maybe_free_inode_rec(dst, conflict);
1037                         free_inode_rec(rec);
1038                         free(ins);
1039                 } else {
1040                         BUG_ON(ret);
1041                 }
1042         }
1043
1044         if (src == &src_node->root_cache) {
1045                 src = &src_node->inode_cache;
1046                 dst = &dst_node->inode_cache;
1047                 goto again;
1048         }
1049
1050         if (current_ino > 0 && (!dst_node->current ||
1051             current_ino > dst_node->current->ino)) {
1052                 if (dst_node->current) {
1053                         dst_node->current->checked = 1;
1054                         maybe_free_inode_rec(dst, dst_node->current);
1055                 }
1056                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1057                 BUG_ON(IS_ERR(dst_node->current));
1058         }
1059         return 0;
1060 }
1061
1062 static void free_inode_ptr(struct cache_extent *cache)
1063 {
1064         struct ptr_node *node;
1065         struct inode_record *rec;
1066
1067         node = container_of(cache, struct ptr_node, cache);
1068         rec = node->data;
1069         free_inode_rec(rec);
1070         free(node);
1071 }
1072
1073 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1074
1075 static struct shared_node *find_shared_node(struct cache_tree *shared,
1076                                             u64 bytenr)
1077 {
1078         struct cache_extent *cache;
1079         struct shared_node *node;
1080
1081         cache = lookup_cache_extent(shared, bytenr, 1);
1082         if (cache) {
1083                 node = container_of(cache, struct shared_node, cache);
1084                 return node;
1085         }
1086         return NULL;
1087 }
1088
1089 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1090 {
1091         int ret;
1092         struct shared_node *node;
1093
1094         node = calloc(1, sizeof(*node));
1095         if (!node)
1096                 return -ENOMEM;
1097         node->cache.start = bytenr;
1098         node->cache.size = 1;
1099         cache_tree_init(&node->root_cache);
1100         cache_tree_init(&node->inode_cache);
1101         node->refs = refs;
1102
1103         ret = insert_cache_extent(shared, &node->cache);
1104
1105         return ret;
1106 }
1107
1108 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1109                              struct walk_control *wc, int level)
1110 {
1111         struct shared_node *node;
1112         struct shared_node *dest;
1113         int ret;
1114
1115         if (level == wc->active_node)
1116                 return 0;
1117
1118         BUG_ON(wc->active_node <= level);
1119         node = find_shared_node(&wc->shared, bytenr);
1120         if (!node) {
1121                 ret = add_shared_node(&wc->shared, bytenr, refs);
1122                 BUG_ON(ret);
1123                 node = find_shared_node(&wc->shared, bytenr);
1124                 wc->nodes[level] = node;
1125                 wc->active_node = level;
1126                 return 0;
1127         }
1128
1129         if (wc->root_level == wc->active_node &&
1130             btrfs_root_refs(&root->root_item) == 0) {
1131                 if (--node->refs == 0) {
1132                         free_inode_recs_tree(&node->root_cache);
1133                         free_inode_recs_tree(&node->inode_cache);
1134                         remove_cache_extent(&wc->shared, &node->cache);
1135                         free(node);
1136                 }
1137                 return 1;
1138         }
1139
1140         dest = wc->nodes[wc->active_node];
1141         splice_shared_node(node, dest);
1142         if (node->refs == 0) {
1143                 remove_cache_extent(&wc->shared, &node->cache);
1144                 free(node);
1145         }
1146         return 1;
1147 }
1148
1149 static int leave_shared_node(struct btrfs_root *root,
1150                              struct walk_control *wc, int level)
1151 {
1152         struct shared_node *node;
1153         struct shared_node *dest;
1154         int i;
1155
1156         if (level == wc->root_level)
1157                 return 0;
1158
1159         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1160                 if (wc->nodes[i])
1161                         break;
1162         }
1163         BUG_ON(i >= BTRFS_MAX_LEVEL);
1164
1165         node = wc->nodes[wc->active_node];
1166         wc->nodes[wc->active_node] = NULL;
1167         wc->active_node = i;
1168
1169         dest = wc->nodes[wc->active_node];
1170         if (wc->active_node < wc->root_level ||
1171             btrfs_root_refs(&root->root_item) > 0) {
1172                 BUG_ON(node->refs <= 1);
1173                 splice_shared_node(node, dest);
1174         } else {
1175                 BUG_ON(node->refs < 2);
1176                 node->refs--;
1177         }
1178         return 0;
1179 }
1180
1181 /*
1182  * Returns:
1183  * < 0 - on error
1184  * 1   - if the root with id child_root_id is a child of root parent_root_id
1185  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1186  *       has other root(s) as parent(s)
1187  * 2   - if the root child_root_id doesn't have any parent roots
1188  */
1189 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1190                          u64 child_root_id)
1191 {
1192         struct btrfs_path path;
1193         struct btrfs_key key;
1194         struct extent_buffer *leaf;
1195         int has_parent = 0;
1196         int ret;
1197
1198         btrfs_init_path(&path);
1199
1200         key.objectid = parent_root_id;
1201         key.type = BTRFS_ROOT_REF_KEY;
1202         key.offset = child_root_id;
1203         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1204                                 0, 0);
1205         if (ret < 0)
1206                 return ret;
1207         btrfs_release_path(&path);
1208         if (!ret)
1209                 return 1;
1210
1211         key.objectid = child_root_id;
1212         key.type = BTRFS_ROOT_BACKREF_KEY;
1213         key.offset = 0;
1214         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1215                                 0, 0);
1216         if (ret < 0)
1217                 goto out;
1218
1219         while (1) {
1220                 leaf = path.nodes[0];
1221                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1222                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1223                         if (ret)
1224                                 break;
1225                         leaf = path.nodes[0];
1226                 }
1227
1228                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1229                 if (key.objectid != child_root_id ||
1230                     key.type != BTRFS_ROOT_BACKREF_KEY)
1231                         break;
1232
1233                 has_parent = 1;
1234
1235                 if (key.offset == parent_root_id) {
1236                         btrfs_release_path(&path);
1237                         return 1;
1238                 }
1239
1240                 path.slots[0]++;
1241         }
1242 out:
1243         btrfs_release_path(&path);
1244         if (ret < 0)
1245                 return ret;
1246         return has_parent ? 0 : 2;
1247 }
1248
1249 static int process_dir_item(struct extent_buffer *eb,
1250                             int slot, struct btrfs_key *key,
1251                             struct shared_node *active_node)
1252 {
1253         u32 total;
1254         u32 cur = 0;
1255         u32 len;
1256         u32 name_len;
1257         u32 data_len;
1258         int error;
1259         int nritems = 0;
1260         u8 filetype;
1261         struct btrfs_dir_item *di;
1262         struct inode_record *rec;
1263         struct cache_tree *root_cache;
1264         struct cache_tree *inode_cache;
1265         struct btrfs_key location;
1266         char namebuf[BTRFS_NAME_LEN];
1267
1268         root_cache = &active_node->root_cache;
1269         inode_cache = &active_node->inode_cache;
1270         rec = active_node->current;
1271         rec->found_dir_item = 1;
1272
1273         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1274         total = btrfs_item_size_nr(eb, slot);
1275         while (cur < total) {
1276                 nritems++;
1277                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1278                 name_len = btrfs_dir_name_len(eb, di);
1279                 data_len = btrfs_dir_data_len(eb, di);
1280                 filetype = btrfs_dir_type(eb, di);
1281
1282                 rec->found_size += name_len;
1283                 if (cur + sizeof(*di) + name_len > total ||
1284                     name_len > BTRFS_NAME_LEN) {
1285                         error = REF_ERR_NAME_TOO_LONG;
1286
1287                         if (cur + sizeof(*di) > total)
1288                                 break;
1289                         len = min_t(u32, total - cur - sizeof(*di),
1290                                     BTRFS_NAME_LEN);
1291                 } else {
1292                         len = name_len;
1293                         error = 0;
1294                 }
1295
1296                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1297
1298                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1299                     key->offset != btrfs_name_hash(namebuf, len)) {
1300                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1301                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1302                         key->objectid, key->offset, namebuf, len, filetype,
1303                         key->offset, btrfs_name_hash(namebuf, len));
1304                 }
1305
1306                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1307                         add_inode_backref(inode_cache, location.objectid,
1308                                           key->objectid, key->offset, namebuf,
1309                                           len, filetype, key->type, error);
1310                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1311                         add_inode_backref(root_cache, location.objectid,
1312                                           key->objectid, key->offset,
1313                                           namebuf, len, filetype,
1314                                           key->type, error);
1315                 } else {
1316                         fprintf(stderr,
1317                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1318                                 location.type, key->objectid, key->offset);
1319                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1320                                           key->objectid, key->offset, namebuf,
1321                                           len, filetype, key->type, error);
1322                 }
1323
1324                 len = sizeof(*di) + name_len + data_len;
1325                 di = (struct btrfs_dir_item *)((char *)di + len);
1326                 cur += len;
1327         }
1328         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1329                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1330
1331         return 0;
1332 }
1333
1334 static int process_inode_ref(struct extent_buffer *eb,
1335                              int slot, struct btrfs_key *key,
1336                              struct shared_node *active_node)
1337 {
1338         u32 total;
1339         u32 cur = 0;
1340         u32 len;
1341         u32 name_len;
1342         u64 index;
1343         int error;
1344         struct cache_tree *inode_cache;
1345         struct btrfs_inode_ref *ref;
1346         char namebuf[BTRFS_NAME_LEN];
1347
1348         inode_cache = &active_node->inode_cache;
1349
1350         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1351         total = btrfs_item_size_nr(eb, slot);
1352         while (cur < total) {
1353                 name_len = btrfs_inode_ref_name_len(eb, ref);
1354                 index = btrfs_inode_ref_index(eb, ref);
1355
1356                 /* inode_ref + namelen should not cross item boundary */
1357                 if (cur + sizeof(*ref) + name_len > total ||
1358                     name_len > BTRFS_NAME_LEN) {
1359                         if (total < cur + sizeof(*ref))
1360                                 break;
1361
1362                         /* Still try to read out the remaining part */
1363                         len = min_t(u32, total - cur - sizeof(*ref),
1364                                     BTRFS_NAME_LEN);
1365                         error = REF_ERR_NAME_TOO_LONG;
1366                 } else {
1367                         len = name_len;
1368                         error = 0;
1369                 }
1370
1371                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1372                 add_inode_backref(inode_cache, key->objectid, key->offset,
1373                                   index, namebuf, len, 0, key->type, error);
1374
1375                 len = sizeof(*ref) + name_len;
1376                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1377                 cur += len;
1378         }
1379         return 0;
1380 }
1381
1382 static int process_inode_extref(struct extent_buffer *eb,
1383                                 int slot, struct btrfs_key *key,
1384                                 struct shared_node *active_node)
1385 {
1386         u32 total;
1387         u32 cur = 0;
1388         u32 len;
1389         u32 name_len;
1390         u64 index;
1391         u64 parent;
1392         int error;
1393         struct cache_tree *inode_cache;
1394         struct btrfs_inode_extref *extref;
1395         char namebuf[BTRFS_NAME_LEN];
1396
1397         inode_cache = &active_node->inode_cache;
1398
1399         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1400         total = btrfs_item_size_nr(eb, slot);
1401         while (cur < total) {
1402                 name_len = btrfs_inode_extref_name_len(eb, extref);
1403                 index = btrfs_inode_extref_index(eb, extref);
1404                 parent = btrfs_inode_extref_parent(eb, extref);
1405                 if (name_len <= BTRFS_NAME_LEN) {
1406                         len = name_len;
1407                         error = 0;
1408                 } else {
1409                         len = BTRFS_NAME_LEN;
1410                         error = REF_ERR_NAME_TOO_LONG;
1411                 }
1412                 read_extent_buffer(eb, namebuf,
1413                                    (unsigned long)(extref + 1), len);
1414                 add_inode_backref(inode_cache, key->objectid, parent,
1415                                   index, namebuf, len, 0, key->type, error);
1416
1417                 len = sizeof(*extref) + name_len;
1418                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1419                 cur += len;
1420         }
1421         return 0;
1422
1423 }
1424
1425 static int process_file_extent(struct btrfs_root *root,
1426                                 struct extent_buffer *eb,
1427                                 int slot, struct btrfs_key *key,
1428                                 struct shared_node *active_node)
1429 {
1430         struct inode_record *rec;
1431         struct btrfs_file_extent_item *fi;
1432         u64 num_bytes = 0;
1433         u64 disk_bytenr = 0;
1434         u64 extent_offset = 0;
1435         u64 mask = root->fs_info->sectorsize - 1;
1436         int extent_type;
1437         int ret;
1438
1439         rec = active_node->current;
1440         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1441         rec->found_file_extent = 1;
1442
1443         if (rec->extent_start == (u64)-1) {
1444                 rec->extent_start = key->offset;
1445                 rec->extent_end = key->offset;
1446         }
1447
1448         if (rec->extent_end > key->offset)
1449                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1450         else if (rec->extent_end < key->offset) {
1451                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1452                                            key->offset - rec->extent_end);
1453                 if (ret < 0)
1454                         return ret;
1455         }
1456
1457         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1458         extent_type = btrfs_file_extent_type(eb, fi);
1459
1460         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1461                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1462                 if (num_bytes == 0)
1463                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1464                 rec->found_size += num_bytes;
1465                 num_bytes = (num_bytes + mask) & ~mask;
1466         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1467                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1468                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1469                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1470                 extent_offset = btrfs_file_extent_offset(eb, fi);
1471                 if (num_bytes == 0 || (num_bytes & mask))
1472                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1473                 if (num_bytes + extent_offset >
1474                     btrfs_file_extent_ram_bytes(eb, fi))
1475                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1476                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1477                     (btrfs_file_extent_compression(eb, fi) ||
1478                      btrfs_file_extent_encryption(eb, fi) ||
1479                      btrfs_file_extent_other_encoding(eb, fi)))
1480                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1481                 if (disk_bytenr > 0)
1482                         rec->found_size += num_bytes;
1483         } else {
1484                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1485         }
1486         rec->extent_end = key->offset + num_bytes;
1487
1488         /*
1489          * The data reloc tree will copy full extents into its inode and then
1490          * copy the corresponding csums.  Because the extent it copied could be
1491          * a preallocated extent that hasn't been written to yet there may be no
1492          * csums to copy, ergo we won't have csums for our file extent.  This is
1493          * ok so just don't bother checking csums if the inode belongs to the
1494          * data reloc tree.
1495          */
1496         if (disk_bytenr > 0 &&
1497             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1498                 u64 found;
1499                 if (btrfs_file_extent_compression(eb, fi))
1500                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1501                 else
1502                         disk_bytenr += extent_offset;
1503
1504                 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1505                                        &found);
1506                 if (ret < 0)
1507                         return ret;
1508                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1509                         if (found > 0)
1510                                 rec->found_csum_item = 1;
1511                         if (found < num_bytes)
1512                                 rec->some_csum_missing = 1;
1513                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1514                         if (found > 0)
1515                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1516                 }
1517         }
1518         return 0;
1519 }
1520
1521 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1522                             struct walk_control *wc)
1523 {
1524         struct btrfs_key key;
1525         u32 nritems;
1526         int i;
1527         int ret = 0;
1528         struct cache_tree *inode_cache;
1529         struct shared_node *active_node;
1530
1531         if (wc->root_level == wc->active_node &&
1532             btrfs_root_refs(&root->root_item) == 0)
1533                 return 0;
1534
1535         active_node = wc->nodes[wc->active_node];
1536         inode_cache = &active_node->inode_cache;
1537         nritems = btrfs_header_nritems(eb);
1538         for (i = 0; i < nritems; i++) {
1539                 btrfs_item_key_to_cpu(eb, &key, i);
1540
1541                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1542                         continue;
1543                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1544                         continue;
1545
1546                 if (active_node->current == NULL ||
1547                     active_node->current->ino < key.objectid) {
1548                         if (active_node->current) {
1549                                 active_node->current->checked = 1;
1550                                 maybe_free_inode_rec(inode_cache,
1551                                                      active_node->current);
1552                         }
1553                         active_node->current = get_inode_rec(inode_cache,
1554                                                              key.objectid, 1);
1555                         BUG_ON(IS_ERR(active_node->current));
1556                 }
1557                 switch (key.type) {
1558                 case BTRFS_DIR_ITEM_KEY:
1559                 case BTRFS_DIR_INDEX_KEY:
1560                         ret = process_dir_item(eb, i, &key, active_node);
1561                         break;
1562                 case BTRFS_INODE_REF_KEY:
1563                         ret = process_inode_ref(eb, i, &key, active_node);
1564                         break;
1565                 case BTRFS_INODE_EXTREF_KEY:
1566                         ret = process_inode_extref(eb, i, &key, active_node);
1567                         break;
1568                 case BTRFS_INODE_ITEM_KEY:
1569                         ret = process_inode_item(eb, i, &key, active_node);
1570                         break;
1571                 case BTRFS_EXTENT_DATA_KEY:
1572                         ret = process_file_extent(root, eb, i, &key,
1573                                                   active_node);
1574                         break;
1575                 default:
1576                         break;
1577                 };
1578         }
1579         return ret;
1580 }
1581
1582 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1583                              struct extent_buffer *eb, struct node_refs *nrefs,
1584                              u64 level, int check_all);
1585 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1586                             unsigned int ext_ref);
1587
1588 /*
1589  * Returns >0  Found error, not fatal, should continue
1590  * Returns <0  Fatal error, must exit the whole check
1591  * Returns 0   No errors found
1592  */
1593 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1594                                struct node_refs *nrefs, int *level, int ext_ref)
1595 {
1596         struct extent_buffer *cur = path->nodes[0];
1597         struct btrfs_key key;
1598         u64 cur_bytenr;
1599         u32 nritems;
1600         u64 first_ino = 0;
1601         int root_level = btrfs_header_level(root->node);
1602         int i;
1603         int ret = 0; /* Final return value */
1604         int err = 0; /* Positive error bitmap */
1605
1606         cur_bytenr = cur->start;
1607
1608         /* skip to first inode item or the first inode number change */
1609         nritems = btrfs_header_nritems(cur);
1610         for (i = 0; i < nritems; i++) {
1611                 btrfs_item_key_to_cpu(cur, &key, i);
1612                 if (i == 0)
1613                         first_ino = key.objectid;
1614                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1615                     (first_ino && first_ino != key.objectid))
1616                         break;
1617         }
1618         if (i == nritems) {
1619                 path->slots[0] = nritems;
1620                 return 0;
1621         }
1622         path->slots[0] = i;
1623
1624 again:
1625         err |= check_inode_item(root, path, ext_ref);
1626
1627         /* modify cur since check_inode_item may change path */
1628         cur = path->nodes[0];
1629
1630         if (err & LAST_ITEM)
1631                 goto out;
1632
1633         /* still have inode items in thie leaf */
1634         if (cur->start == cur_bytenr)
1635                 goto again;
1636
1637         /*
1638          * we have switched to another leaf, above nodes may
1639          * have changed, here walk down the path, if a node
1640          * or leaf is shared, check whether we can skip this
1641          * node or leaf.
1642          */
1643         for (i = root_level; i >= 0; i--) {
1644                 if (path->nodes[i]->start == nrefs->bytenr[i])
1645                         continue;
1646
1647                 ret = update_nodes_refs(root, path->nodes[i]->start,
1648                                 path->nodes[i], nrefs, i, 0);
1649                 if (ret)
1650                         goto out;
1651
1652                 if (!nrefs->need_check[i]) {
1653                         *level += 1;
1654                         break;
1655                 }
1656         }
1657
1658         for (i = 0; i < *level; i++) {
1659                 free_extent_buffer(path->nodes[i]);
1660                 path->nodes[i] = NULL;
1661         }
1662 out:
1663         err &= ~LAST_ITEM;
1664         if (err && !ret)
1665                 ret = err;
1666         return ret;
1667 }
1668
1669 /*
1670  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1671  * in every fs or file tree check. Here we find its all root ids, and only check
1672  * it in the fs or file tree which has the smallest root id.
1673  */
1674 static int need_check(struct btrfs_root *root, struct ulist *roots)
1675 {
1676         struct rb_node *node;
1677         struct ulist_node *u;
1678
1679         /*
1680          * @roots can be empty if it belongs to tree reloc tree
1681          * In that case, we should always check the leaf, as we can't use
1682          * the tree owner to ensure some other root will check it.
1683          */
1684         if (roots->nnodes == 1 || roots->nnodes == 0)
1685                 return 1;
1686
1687         node = rb_first(&roots->root);
1688         u = rb_entry(node, struct ulist_node, rb_node);
1689         /*
1690          * current root id is not smallest, we skip it and let it be checked
1691          * in the fs or file tree who hash the smallest root id.
1692          */
1693         if (root->objectid != u->val)
1694                 return 0;
1695
1696         return 1;
1697 }
1698
1699 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1700                                u64 *flags_ret)
1701 {
1702         struct btrfs_root *extent_root = root->fs_info->extent_root;
1703         struct btrfs_root_item *ri = &root->root_item;
1704         struct btrfs_extent_inline_ref *iref;
1705         struct btrfs_extent_item *ei;
1706         struct btrfs_key key;
1707         struct btrfs_path *path = NULL;
1708         unsigned long ptr;
1709         unsigned long end;
1710         u64 flags;
1711         u64 owner = 0;
1712         u64 offset;
1713         int slot;
1714         int type;
1715         int ret = 0;
1716
1717         /*
1718          * Except file/reloc tree, we can not have FULL BACKREF MODE
1719          */
1720         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1721                 goto normal;
1722
1723         /* root node */
1724         if (eb->start == btrfs_root_bytenr(ri))
1725                 goto normal;
1726
1727         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1728                 goto full_backref;
1729
1730         owner = btrfs_header_owner(eb);
1731         if (owner == root->objectid)
1732                 goto normal;
1733
1734         path = btrfs_alloc_path();
1735         if (!path)
1736                 return -ENOMEM;
1737
1738         key.objectid = btrfs_header_bytenr(eb);
1739         key.type = (u8)-1;
1740         key.offset = (u64)-1;
1741
1742         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1743         if (ret <= 0) {
1744                 ret = -EIO;
1745                 goto out;
1746         }
1747
1748         if (ret > 0) {
1749                 ret = btrfs_previous_extent_item(extent_root, path,
1750                                                  key.objectid);
1751                 if (ret)
1752                         goto full_backref;
1753
1754         }
1755         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1756
1757         eb = path->nodes[0];
1758         slot = path->slots[0];
1759         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1760
1761         flags = btrfs_extent_flags(eb, ei);
1762         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1763                 goto full_backref;
1764
1765         ptr = (unsigned long)(ei + 1);
1766         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1767
1768         if (key.type == BTRFS_EXTENT_ITEM_KEY)
1769                 ptr += sizeof(struct btrfs_tree_block_info);
1770
1771 next:
1772         /* Reached extent item ends normally */
1773         if (ptr == end)
1774                 goto full_backref;
1775
1776         /* Beyond extent item end, wrong item size */
1777         if (ptr > end) {
1778                 error("extent item at bytenr %llu slot %d has wrong size",
1779                         eb->start, slot);
1780                 goto full_backref;
1781         }
1782
1783         iref = (struct btrfs_extent_inline_ref *)ptr;
1784         offset = btrfs_extent_inline_ref_offset(eb, iref);
1785         type = btrfs_extent_inline_ref_type(eb, iref);
1786
1787         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1788                 goto normal;
1789         ptr += btrfs_extent_inline_ref_size(type);
1790         goto next;
1791
1792 normal:
1793         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1794         goto out;
1795
1796 full_backref:
1797         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1798 out:
1799         btrfs_free_path(path);
1800         return ret;
1801 }
1802
1803 /*
1804  * for a tree node or leaf, we record its reference count, so later if we still
1805  * process this node or leaf, don't need to compute its reference count again.
1806  *
1807  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1808  */
1809 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1810                              struct extent_buffer *eb, struct node_refs *nrefs,
1811                              u64 level, int check_all)
1812 {
1813         struct ulist *roots;
1814         u64 refs = 0;
1815         u64 flags = 0;
1816         int root_level = btrfs_header_level(root->node);
1817         int check;
1818         int ret;
1819
1820         if (nrefs->bytenr[level] == bytenr)
1821                 return 0;
1822
1823         if (bytenr != (u64)-1) {
1824                 /* the return value of this function seems a mistake */
1825                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1826                                        level, 1, &refs, &flags);
1827                 /* temporary fix */
1828                 if (ret < 0 && !check_all)
1829                         return ret;
1830
1831                 nrefs->bytenr[level] = bytenr;
1832                 nrefs->refs[level] = refs;
1833                 nrefs->full_backref[level] = 0;
1834                 nrefs->checked[level] = 0;
1835
1836                 if (refs > 1) {
1837                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1838                                                    0, &roots);
1839                         if (ret)
1840                                 return -EIO;
1841
1842                         check = need_check(root, roots);
1843                         ulist_free(roots);
1844                         nrefs->need_check[level] = check;
1845                 } else {
1846                         if (!check_all) {
1847                                 nrefs->need_check[level] = 1;
1848                         } else {
1849                                 if (level == root_level) {
1850                                         nrefs->need_check[level] = 1;
1851                                 } else {
1852                                         /*
1853                                          * The node refs may have not been
1854                                          * updated if upper needs checking (the
1855                                          * lowest root_objectid) the node can
1856                                          * be checked.
1857                                          */
1858                                         nrefs->need_check[level] =
1859                                                 nrefs->need_check[level + 1];
1860                                 }
1861                         }
1862                 }
1863         }
1864
1865         if (check_all && eb) {
1866                 calc_extent_flag_v2(root, eb, &flags);
1867                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1868                         nrefs->full_backref[level] = 1;
1869         }
1870
1871         return 0;
1872 }
1873
1874 /*
1875  * @level           if @level == -1 means extent data item
1876  *                  else normal treeblocl.
1877  */
1878 static int should_check_extent_strictly(struct btrfs_root *root,
1879                                         struct node_refs *nrefs, int level)
1880 {
1881         int root_level = btrfs_header_level(root->node);
1882
1883         if (level > root_level || level < -1)
1884                 return 1;
1885         if (level == root_level)
1886                 return 1;
1887         /*
1888          * if the upper node is marked full backref, it should contain shared
1889          * backref of the parent (except owner == root->objectid).
1890          */
1891         while (++level <= root_level)
1892                 if (nrefs->refs[level] > 1)
1893                         return 0;
1894
1895         return 1;
1896 }
1897
1898 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1899                           struct walk_control *wc, int *level,
1900                           struct node_refs *nrefs)
1901 {
1902         enum btrfs_tree_block_status status;
1903         u64 bytenr;
1904         u64 ptr_gen;
1905         struct btrfs_fs_info *fs_info = root->fs_info;
1906         struct extent_buffer *next;
1907         struct extent_buffer *cur;
1908         int ret, err = 0;
1909         u64 refs;
1910
1911         WARN_ON(*level < 0);
1912         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1913
1914         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1915                 refs = nrefs->refs[*level];
1916                 ret = 0;
1917         } else {
1918                 ret = btrfs_lookup_extent_info(NULL, root,
1919                                        path->nodes[*level]->start,
1920                                        *level, 1, &refs, NULL);
1921                 if (ret < 0) {
1922                         err = ret;
1923                         goto out;
1924                 }
1925                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1926                 nrefs->refs[*level] = refs;
1927         }
1928
1929         if (refs > 1) {
1930                 ret = enter_shared_node(root, path->nodes[*level]->start,
1931                                         refs, wc, *level);
1932                 if (ret > 0) {
1933                         err = ret;
1934                         goto out;
1935                 }
1936         }
1937
1938         while (*level >= 0) {
1939                 WARN_ON(*level < 0);
1940                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1941                 cur = path->nodes[*level];
1942
1943                 if (btrfs_header_level(cur) != *level)
1944                         WARN_ON(1);
1945
1946                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1947                         break;
1948                 if (*level == 0) {
1949                         ret = process_one_leaf(root, cur, wc);
1950                         if (ret < 0)
1951                                 err = ret;
1952                         break;
1953                 }
1954                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1955                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1956
1957                 if (bytenr == nrefs->bytenr[*level - 1]) {
1958                         refs = nrefs->refs[*level - 1];
1959                 } else {
1960                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1961                                         *level - 1, 1, &refs, NULL);
1962                         if (ret < 0) {
1963                                 refs = 0;
1964                         } else {
1965                                 nrefs->bytenr[*level - 1] = bytenr;
1966                                 nrefs->refs[*level - 1] = refs;
1967                         }
1968                 }
1969
1970                 if (refs > 1) {
1971                         ret = enter_shared_node(root, bytenr, refs,
1972                                                 wc, *level - 1);
1973                         if (ret > 0) {
1974                                 path->slots[*level]++;
1975                                 continue;
1976                         }
1977                 }
1978
1979                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
1980                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1981                         free_extent_buffer(next);
1982                         reada_walk_down(root, cur, path->slots[*level]);
1983                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
1984                         if (!extent_buffer_uptodate(next)) {
1985                                 struct btrfs_key node_key;
1986
1987                                 btrfs_node_key_to_cpu(path->nodes[*level],
1988                                                       &node_key,
1989                                                       path->slots[*level]);
1990                                 btrfs_add_corrupt_extent_record(root->fs_info,
1991                                                 &node_key,
1992                                                 path->nodes[*level]->start,
1993                                                 root->fs_info->nodesize,
1994                                                 *level);
1995                                 err = -EIO;
1996                                 goto out;
1997                         }
1998                 }
1999
2000                 ret = check_child_node(cur, path->slots[*level], next);
2001                 if (ret) {
2002                         free_extent_buffer(next);
2003                         err = ret;
2004                         goto out;
2005                 }
2006
2007                 if (btrfs_is_leaf(next))
2008                         status = btrfs_check_leaf(root, NULL, next);
2009                 else
2010                         status = btrfs_check_node(root, NULL, next);
2011                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2012                         free_extent_buffer(next);
2013                         err = -EIO;
2014                         goto out;
2015                 }
2016
2017                 *level = *level - 1;
2018                 free_extent_buffer(path->nodes[*level]);
2019                 path->nodes[*level] = next;
2020                 path->slots[*level] = 0;
2021         }
2022 out:
2023         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2024         return err;
2025 }
2026
2027 /*
2028  * Update global fs information.
2029  */
2030 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2031                          int level)
2032 {
2033         u32 free_nrs;
2034         struct extent_buffer *eb = path->nodes[level];
2035
2036         total_btree_bytes += eb->len;
2037         if (fs_root_objectid(root->objectid))
2038                 total_fs_tree_bytes += eb->len;
2039         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2040                 total_extent_tree_bytes += eb->len;
2041
2042         if (level == 0) {
2043                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2044         } else {
2045                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2046                             btrfs_header_nritems(eb));
2047                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2048         }
2049 }
2050
2051 /*
2052  * This function only handles BACKREF_MISSING,
2053  * If corresponding extent item exists, increase the ref, else insert an extent
2054  * item and backref.
2055  *
2056  * Returns error bits after repair.
2057  */
2058 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2059                                  struct btrfs_root *root,
2060                                  struct extent_buffer *node,
2061                                  struct node_refs *nrefs, int level, int err)
2062 {
2063         struct btrfs_fs_info *fs_info = root->fs_info;
2064         struct btrfs_root *extent_root = fs_info->extent_root;
2065         struct btrfs_path path;
2066         struct btrfs_extent_item *ei;
2067         struct btrfs_tree_block_info *bi;
2068         struct btrfs_key key;
2069         struct extent_buffer *eb;
2070         u32 size = sizeof(*ei);
2071         u32 node_size = root->fs_info->nodesize;
2072         int insert_extent = 0;
2073         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2074         int root_level = btrfs_header_level(root->node);
2075         int generation;
2076         int ret;
2077         u64 owner;
2078         u64 bytenr;
2079         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2080         u64 parent = 0;
2081
2082         if ((err & BACKREF_MISSING) == 0)
2083                 return err;
2084
2085         WARN_ON(level > BTRFS_MAX_LEVEL);
2086         WARN_ON(level < 0);
2087
2088         btrfs_init_path(&path);
2089         bytenr = btrfs_header_bytenr(node);
2090         owner = btrfs_header_owner(node);
2091         generation = btrfs_header_generation(node);
2092
2093         key.objectid = bytenr;
2094         key.type = (u8)-1;
2095         key.offset = (u64)-1;
2096
2097         /* Search for the extent item */
2098         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2099         if (ret <= 0) {
2100                 ret = -EIO;
2101                 goto out;
2102         }
2103
2104         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2105         if (ret)
2106                 insert_extent = 1;
2107
2108         /* calculate if the extent item flag is full backref or not */
2109         if (nrefs->full_backref[level] != 0)
2110                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2111
2112         /* insert an extent item */
2113         if (insert_extent) {
2114                 struct btrfs_disk_key copy_key;
2115
2116                 generation = btrfs_header_generation(node);
2117
2118                 if (level < root_level && nrefs->full_backref[level + 1] &&
2119                     owner != root->objectid) {
2120                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2121                 }
2122
2123                 key.objectid = bytenr;
2124                 if (!skinny_metadata) {
2125                         key.type = BTRFS_EXTENT_ITEM_KEY;
2126                         key.offset = node_size;
2127                         size += sizeof(*bi);
2128                 } else {
2129                         key.type = BTRFS_METADATA_ITEM_KEY;
2130                         key.offset = level;
2131                 }
2132
2133                 btrfs_release_path(&path);
2134                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2135                                               size);
2136                 if (ret)
2137                         goto out;
2138
2139                 eb = path.nodes[0];
2140                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2141
2142                 btrfs_set_extent_refs(eb, ei, 0);
2143                 btrfs_set_extent_generation(eb, ei, generation);
2144                 btrfs_set_extent_flags(eb, ei, flags);
2145
2146                 if (!skinny_metadata) {
2147                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2148                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2149                                              sizeof(*bi));
2150                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2151                         btrfs_set_disk_key_type(&copy_key, 0);
2152                         btrfs_set_disk_key_offset(&copy_key, 0);
2153
2154                         btrfs_set_tree_block_level(eb, bi, level);
2155                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2156                 }
2157                 btrfs_mark_buffer_dirty(eb);
2158                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2159                 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2160
2161                 nrefs->refs[level] = 0;
2162                 nrefs->full_backref[level] =
2163                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2164                 btrfs_release_path(&path);
2165         }
2166
2167         if (level < root_level && nrefs->full_backref[level + 1] &&
2168             owner != root->objectid)
2169                 parent = nrefs->bytenr[level + 1];
2170
2171         /* increase the ref */
2172         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2173                         parent, root->objectid, level, 0);
2174
2175         nrefs->refs[level]++;
2176 out:
2177         btrfs_release_path(&path);
2178         if (ret) {
2179                 error(
2180         "failed to repair tree block ref start %llu root %llu due to %s",
2181                       bytenr, root->objectid, strerror(-ret));
2182         } else {
2183                 printf("Added one tree block ref start %llu %s %llu\n",
2184                        bytenr, parent ? "parent" : "root",
2185                        parent ? parent : root->objectid);
2186                 err &= ~BACKREF_MISSING;
2187         }
2188
2189         return err;
2190 }
2191
2192 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2193                             unsigned int ext_ref);
2194 static int check_tree_block_ref(struct btrfs_root *root,
2195                                 struct extent_buffer *eb, u64 bytenr,
2196                                 int level, u64 owner, struct node_refs *nrefs);
2197 static int check_leaf_items(struct btrfs_trans_handle *trans,
2198                             struct btrfs_root *root, struct btrfs_path *path,
2199                             struct node_refs *nrefs, int account_bytes);
2200
2201 /*
2202  * @trans      just for lowmem repair mode
2203  * @check all  if not 0 then check all tree block backrefs and items
2204  *             0 then just check relationship of items in fs tree(s)
2205  *
2206  * Returns >0  Found error, should continue
2207  * Returns <0  Fatal error, must exit the whole check
2208  * Returns 0   No errors found
2209  */
2210 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2211                              struct btrfs_root *root, struct btrfs_path *path,
2212                              int *level, struct node_refs *nrefs, int ext_ref,
2213                              int check_all)
2214
2215 {
2216         enum btrfs_tree_block_status status;
2217         u64 bytenr;
2218         u64 ptr_gen;
2219         struct btrfs_fs_info *fs_info = root->fs_info;
2220         struct extent_buffer *next;
2221         struct extent_buffer *cur;
2222         int ret;
2223         int err = 0;
2224         int check;
2225         int account_file_data = 0;
2226
2227         WARN_ON(*level < 0);
2228         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2229
2230         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2231                                 path->nodes[*level], nrefs, *level, check_all);
2232         if (ret < 0)
2233                 return ret;
2234
2235         while (*level >= 0) {
2236                 WARN_ON(*level < 0);
2237                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238                 cur = path->nodes[*level];
2239                 bytenr = btrfs_header_bytenr(cur);
2240                 check = nrefs->need_check[*level];
2241
2242                 if (btrfs_header_level(cur) != *level)
2243                         WARN_ON(1);
2244                /*
2245                 * Update bytes accounting and check tree block ref
2246                 * NOTE: Doing accounting and check before checking nritems
2247                 * is necessary because of empty node/leaf.
2248                 */
2249                 if ((check_all && !nrefs->checked[*level]) ||
2250                     (!check_all && nrefs->need_check[*level])) {
2251                         ret = check_tree_block_ref(root, cur,
2252                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2253                            btrfs_header_owner(cur), nrefs);
2254
2255                         if (repair && ret)
2256                                 ret = repair_tree_block_ref(trans, root,
2257                                     path->nodes[*level], nrefs, *level, ret);
2258                         err |= ret;
2259
2260                         if (check_all && nrefs->need_check[*level] &&
2261                                 nrefs->refs[*level]) {
2262                                 account_bytes(root, path, *level);
2263                                 account_file_data = 1;
2264                         }
2265                         nrefs->checked[*level] = 1;
2266                 }
2267
2268                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2269                         break;
2270
2271                 /* Don't forgot to check leaf/node validation */
2272                 if (*level == 0) {
2273                         /* skip duplicate check */
2274                         if (check || !check_all) {
2275                                 ret = btrfs_check_leaf(root, NULL, cur);
2276                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2277                                         err |= -EIO;
2278                                         break;
2279                                 }
2280                         }
2281
2282                         ret = 0;
2283                         if (!check_all)
2284                                 ret = process_one_leaf_v2(root, path, nrefs,
2285                                                           level, ext_ref);
2286                         else
2287                                 ret = check_leaf_items(trans, root, path,
2288                                                nrefs, account_file_data);
2289                         err |= ret;
2290                         break;
2291                 } else {
2292                         if (check || !check_all) {
2293                                 ret = btrfs_check_node(root, NULL, cur);
2294                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2295                                         err |= -EIO;
2296                                         break;
2297                                 }
2298                         }
2299                 }
2300
2301                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2302                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2303
2304                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2305                                         check_all);
2306                 if (ret < 0)
2307                         break;
2308                 /*
2309                  * check all trees in check_chunks_and_extent_v2
2310                  * check shared node once in check_fs_roots
2311                  */
2312                 if (!check_all && !nrefs->need_check[*level - 1]) {
2313                         path->slots[*level]++;
2314                         continue;
2315                 }
2316
2317                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2318                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2319                         free_extent_buffer(next);
2320                         reada_walk_down(root, cur, path->slots[*level]);
2321                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2322                         if (!extent_buffer_uptodate(next)) {
2323                                 struct btrfs_key node_key;
2324
2325                                 btrfs_node_key_to_cpu(path->nodes[*level],
2326                                                       &node_key,
2327                                                       path->slots[*level]);
2328                                 btrfs_add_corrupt_extent_record(fs_info,
2329                                         &node_key, path->nodes[*level]->start,
2330                                         fs_info->nodesize, *level);
2331                                 err |= -EIO;
2332                                 break;
2333                         }
2334                 }
2335
2336                 ret = check_child_node(cur, path->slots[*level], next);
2337                 err |= ret;
2338                 if (ret < 0) 
2339                         break;
2340
2341                 if (btrfs_is_leaf(next))
2342                         status = btrfs_check_leaf(root, NULL, next);
2343                 else
2344                         status = btrfs_check_node(root, NULL, next);
2345                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2346                         free_extent_buffer(next);
2347                         err |= -EIO;
2348                         break;
2349                 }
2350
2351                 *level = *level - 1;
2352                 free_extent_buffer(path->nodes[*level]);
2353                 path->nodes[*level] = next;
2354                 path->slots[*level] = 0;
2355                 account_file_data = 0;
2356
2357                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2358         }
2359         return err;
2360 }
2361
2362 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2363                         struct walk_control *wc, int *level)
2364 {
2365         int i;
2366         struct extent_buffer *leaf;
2367
2368         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2369                 leaf = path->nodes[i];
2370                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2371                         path->slots[i]++;
2372                         *level = i;
2373                         return 0;
2374                 } else {
2375                         free_extent_buffer(path->nodes[*level]);
2376                         path->nodes[*level] = NULL;
2377                         BUG_ON(*level > wc->active_node);
2378                         if (*level == wc->active_node)
2379                                 leave_shared_node(root, wc, *level);
2380                         *level = i + 1;
2381                 }
2382         }
2383         return 1;
2384 }
2385
2386 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2387                            int *level)
2388 {
2389         int i;
2390         struct extent_buffer *leaf;
2391
2392         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2393                 leaf = path->nodes[i];
2394                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2395                         path->slots[i]++;
2396                         *level = i;
2397                         return 0;
2398                 } else {
2399                         free_extent_buffer(path->nodes[*level]);
2400                         path->nodes[*level] = NULL;
2401                         *level = i + 1;
2402                 }
2403         }
2404         return 1;
2405 }
2406
2407 static int check_root_dir(struct inode_record *rec)
2408 {
2409         struct inode_backref *backref;
2410         int ret = -1;
2411
2412         if (!rec->found_inode_item || rec->errors)
2413                 goto out;
2414         if (rec->nlink != 1 || rec->found_link != 0)
2415                 goto out;
2416         if (list_empty(&rec->backrefs))
2417                 goto out;
2418         backref = to_inode_backref(rec->backrefs.next);
2419         if (!backref->found_inode_ref)
2420                 goto out;
2421         if (backref->index != 0 || backref->namelen != 2 ||
2422             memcmp(backref->name, "..", 2))
2423                 goto out;
2424         if (backref->found_dir_index || backref->found_dir_item)
2425                 goto out;
2426         ret = 0;
2427 out:
2428         return ret;
2429 }
2430
2431 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2432                               struct btrfs_root *root, struct btrfs_path *path,
2433                               struct inode_record *rec)
2434 {
2435         struct btrfs_inode_item *ei;
2436         struct btrfs_key key;
2437         int ret;
2438
2439         key.objectid = rec->ino;
2440         key.type = BTRFS_INODE_ITEM_KEY;
2441         key.offset = (u64)-1;
2442
2443         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2444         if (ret < 0)
2445                 goto out;
2446         if (ret) {
2447                 if (!path->slots[0]) {
2448                         ret = -ENOENT;
2449                         goto out;
2450                 }
2451                 path->slots[0]--;
2452                 ret = 0;
2453         }
2454         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2455         if (key.objectid != rec->ino) {
2456                 ret = -ENOENT;
2457                 goto out;
2458         }
2459
2460         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2461                             struct btrfs_inode_item);
2462         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2463         btrfs_mark_buffer_dirty(path->nodes[0]);
2464         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2465         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2466                root->root_key.objectid);
2467 out:
2468         btrfs_release_path(path);
2469         return ret;
2470 }
2471
2472 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2473                                     struct btrfs_root *root,
2474                                     struct btrfs_path *path,
2475                                     struct inode_record *rec)
2476 {
2477         int ret;
2478
2479         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2480         btrfs_release_path(path);
2481         if (!ret)
2482                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2483         return ret;
2484 }
2485
2486 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2487                                struct btrfs_root *root,
2488                                struct btrfs_path *path,
2489                                struct inode_record *rec)
2490 {
2491         struct btrfs_inode_item *ei;
2492         struct btrfs_key key;
2493         int ret = 0;
2494
2495         key.objectid = rec->ino;
2496         key.type = BTRFS_INODE_ITEM_KEY;
2497         key.offset = 0;
2498
2499         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2500         if (ret) {
2501                 if (ret > 0)
2502                         ret = -ENOENT;
2503                 goto out;
2504         }
2505
2506         /* Since ret == 0, no need to check anything */
2507         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2508                             struct btrfs_inode_item);
2509         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2510         btrfs_mark_buffer_dirty(path->nodes[0]);
2511         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2512         printf("reset nbytes for ino %llu root %llu\n",
2513                rec->ino, root->root_key.objectid);
2514 out:
2515         btrfs_release_path(path);
2516         return ret;
2517 }
2518
2519 static int add_missing_dir_index(struct btrfs_root *root,
2520                                  struct cache_tree *inode_cache,
2521                                  struct inode_record *rec,
2522                                  struct inode_backref *backref)
2523 {
2524         struct btrfs_path path;
2525         struct btrfs_trans_handle *trans;
2526         struct btrfs_dir_item *dir_item;
2527         struct extent_buffer *leaf;
2528         struct btrfs_key key;
2529         struct btrfs_disk_key disk_key;
2530         struct inode_record *dir_rec;
2531         unsigned long name_ptr;
2532         u32 data_size = sizeof(*dir_item) + backref->namelen;
2533         int ret;
2534
2535         trans = btrfs_start_transaction(root, 1);
2536         if (IS_ERR(trans))
2537                 return PTR_ERR(trans);
2538
2539         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2540                 (unsigned long long)rec->ino);
2541
2542         btrfs_init_path(&path);
2543         key.objectid = backref->dir;
2544         key.type = BTRFS_DIR_INDEX_KEY;
2545         key.offset = backref->index;
2546         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2547         BUG_ON(ret);
2548
2549         leaf = path.nodes[0];
2550         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2551
2552         disk_key.objectid = cpu_to_le64(rec->ino);
2553         disk_key.type = BTRFS_INODE_ITEM_KEY;
2554         disk_key.offset = 0;
2555
2556         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2557         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2558         btrfs_set_dir_data_len(leaf, dir_item, 0);
2559         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2560         name_ptr = (unsigned long)(dir_item + 1);
2561         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2562         btrfs_mark_buffer_dirty(leaf);
2563         btrfs_release_path(&path);
2564         btrfs_commit_transaction(trans, root);
2565
2566         backref->found_dir_index = 1;
2567         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2568         BUG_ON(IS_ERR(dir_rec));
2569         if (!dir_rec)
2570                 return 0;
2571         dir_rec->found_size += backref->namelen;
2572         if (dir_rec->found_size == dir_rec->isize &&
2573             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2574                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2575         if (dir_rec->found_size != dir_rec->isize)
2576                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2577
2578         return 0;
2579 }
2580
2581 static int delete_dir_index(struct btrfs_root *root,
2582                             struct inode_backref *backref)
2583 {
2584         struct btrfs_trans_handle *trans;
2585         struct btrfs_dir_item *di;
2586         struct btrfs_path path;
2587         int ret = 0;
2588
2589         trans = btrfs_start_transaction(root, 1);
2590         if (IS_ERR(trans))
2591                 return PTR_ERR(trans);
2592
2593         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2594                 (unsigned long long)backref->dir,
2595                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2596                 (unsigned long long)root->objectid);
2597
2598         btrfs_init_path(&path);
2599         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2600                                     backref->name, backref->namelen,
2601                                     backref->index, -1);
2602         if (IS_ERR(di)) {
2603                 ret = PTR_ERR(di);
2604                 btrfs_release_path(&path);
2605                 btrfs_commit_transaction(trans, root);
2606                 if (ret == -ENOENT)
2607                         return 0;
2608                 return ret;
2609         }
2610
2611         if (!di)
2612                 ret = btrfs_del_item(trans, root, &path);
2613         else
2614                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2615         BUG_ON(ret);
2616         btrfs_release_path(&path);
2617         btrfs_commit_transaction(trans, root);
2618         return ret;
2619 }
2620
2621 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2622                                     struct btrfs_root *root, u64 ino,
2623                                     u8 filetype)
2624 {
2625         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2626
2627         return insert_inode_item(trans, root, ino, 0, 0, 0, mode);
2628 }
2629
2630 static int create_inode_item(struct btrfs_root *root,
2631                              struct inode_record *rec, int root_dir)
2632 {
2633         struct btrfs_trans_handle *trans;
2634         u64 nlink = 0;
2635         u32 mode = 0;
2636         u64 size = 0;
2637         int ret;
2638
2639         trans = btrfs_start_transaction(root, 1);
2640         if (IS_ERR(trans)) {
2641                 ret = PTR_ERR(trans);
2642                 return ret;
2643         }
2644
2645         nlink = root_dir ? 1 : rec->found_link;
2646         if (rec->found_dir_item) {
2647                 if (rec->found_file_extent)
2648                         fprintf(stderr, "root %llu inode %llu has both a dir "
2649                                 "item and extents, unsure if it is a dir or a "
2650                                 "regular file so setting it as a directory\n",
2651                                 (unsigned long long)root->objectid,
2652                                 (unsigned long long)rec->ino);
2653                 mode = S_IFDIR | 0755;
2654                 size = rec->found_size;
2655         } else if (!rec->found_dir_item) {
2656                 size = rec->extent_end;
2657                 mode =  S_IFREG | 0755;
2658         }
2659
2660         ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
2661                                   nlink, mode);
2662         btrfs_commit_transaction(trans, root);
2663         return 0;
2664 }
2665
2666 static int repair_inode_backrefs(struct btrfs_root *root,
2667                                  struct inode_record *rec,
2668                                  struct cache_tree *inode_cache,
2669                                  int delete)
2670 {
2671         struct inode_backref *tmp, *backref;
2672         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2673         int ret = 0;
2674         int repaired = 0;
2675
2676         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2677                 if (!delete && rec->ino == root_dirid) {
2678                         if (!rec->found_inode_item) {
2679                                 ret = create_inode_item(root, rec, 1);
2680                                 if (ret)
2681                                         break;
2682                                 repaired++;
2683                         }
2684                 }
2685
2686                 /* Index 0 for root dir's are special, don't mess with it */
2687                 if (rec->ino == root_dirid && backref->index == 0)
2688                         continue;
2689
2690                 if (delete &&
2691                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2692                      (backref->found_dir_index && backref->found_inode_ref &&
2693                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2694                         ret = delete_dir_index(root, backref);
2695                         if (ret)
2696                                 break;
2697                         repaired++;
2698                         list_del(&backref->list);
2699                         free(backref);
2700                         continue;
2701                 }
2702
2703                 if (!delete && !backref->found_dir_index &&
2704                     backref->found_dir_item && backref->found_inode_ref) {
2705                         ret = add_missing_dir_index(root, inode_cache, rec,
2706                                                     backref);
2707                         if (ret)
2708                                 break;
2709                         repaired++;
2710                         if (backref->found_dir_item &&
2711                             backref->found_dir_index) {
2712                                 if (!backref->errors &&
2713                                     backref->found_inode_ref) {
2714                                         list_del(&backref->list);
2715                                         free(backref);
2716                                         continue;
2717                                 }
2718                         }
2719                 }
2720
2721                 if (!delete && (!backref->found_dir_index &&
2722                                 !backref->found_dir_item &&
2723                                 backref->found_inode_ref)) {
2724                         struct btrfs_trans_handle *trans;
2725                         struct btrfs_key location;
2726
2727                         ret = check_dir_conflict(root, backref->name,
2728                                                  backref->namelen,
2729                                                  backref->dir,
2730                                                  backref->index);
2731                         if (ret) {
2732                                 /*
2733                                  * let nlink fixing routine to handle it,
2734                                  * which can do it better.
2735                                  */
2736                                 ret = 0;
2737                                 break;
2738                         }
2739                         location.objectid = rec->ino;
2740                         location.type = BTRFS_INODE_ITEM_KEY;
2741                         location.offset = 0;
2742
2743                         trans = btrfs_start_transaction(root, 1);
2744                         if (IS_ERR(trans)) {
2745                                 ret = PTR_ERR(trans);
2746                                 break;
2747                         }
2748                         fprintf(stderr, "adding missing dir index/item pair "
2749                                 "for inode %llu\n",
2750                                 (unsigned long long)rec->ino);
2751                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2752                                                     backref->namelen,
2753                                                     backref->dir, &location,
2754                                                     imode_to_type(rec->imode),
2755                                                     backref->index);
2756                         BUG_ON(ret);
2757                         btrfs_commit_transaction(trans, root);
2758                         repaired++;
2759                 }
2760
2761                 if (!delete && (backref->found_inode_ref &&
2762                                 backref->found_dir_index &&
2763                                 backref->found_dir_item &&
2764                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2765                                 !rec->found_inode_item)) {
2766                         ret = create_inode_item(root, rec, 0);
2767                         if (ret)
2768                                 break;
2769                         repaired++;
2770                 }
2771
2772         }
2773         return ret ? ret : repaired;
2774 }
2775
2776 /*
2777  * To determine the file type for nlink/inode_item repair
2778  *
2779  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2780  * Return -ENOENT if file type is not found.
2781  */
2782 static int find_file_type(struct inode_record *rec, u8 *type)
2783 {
2784         struct inode_backref *backref;
2785
2786         /* For inode item recovered case */
2787         if (rec->found_inode_item) {
2788                 *type = imode_to_type(rec->imode);
2789                 return 0;
2790         }
2791
2792         list_for_each_entry(backref, &rec->backrefs, list) {
2793                 if (backref->found_dir_index || backref->found_dir_item) {
2794                         *type = backref->filetype;
2795                         return 0;
2796                 }
2797         }
2798         return -ENOENT;
2799 }
2800
2801 /*
2802  * To determine the file name for nlink repair
2803  *
2804  * Return 0 if file name is found, set name and namelen.
2805  * Return -ENOENT if file name is not found.
2806  */
2807 static int find_file_name(struct inode_record *rec,
2808                           char *name, int *namelen)
2809 {
2810         struct inode_backref *backref;
2811
2812         list_for_each_entry(backref, &rec->backrefs, list) {
2813                 if (backref->found_dir_index || backref->found_dir_item ||
2814                     backref->found_inode_ref) {
2815                         memcpy(name, backref->name, backref->namelen);
2816                         *namelen = backref->namelen;
2817                         return 0;
2818                 }
2819         }
2820         return -ENOENT;
2821 }
2822
2823 /* Reset the nlink of the inode to the correct one */
2824 static int reset_nlink(struct btrfs_trans_handle *trans,
2825                        struct btrfs_root *root,
2826                        struct btrfs_path *path,
2827                        struct inode_record *rec)
2828 {
2829         struct inode_backref *backref;
2830         struct inode_backref *tmp;
2831         struct btrfs_key key;
2832         struct btrfs_inode_item *inode_item;
2833         int ret = 0;
2834
2835         /* We don't believe this either, reset it and iterate backref */
2836         rec->found_link = 0;
2837
2838         /* Remove all backref including the valid ones */
2839         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2840                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2841                                    backref->index, backref->name,
2842                                    backref->namelen, 0);
2843                 if (ret < 0)
2844                         goto out;
2845
2846                 /* remove invalid backref, so it won't be added back */
2847                 if (!(backref->found_dir_index &&
2848                       backref->found_dir_item &&
2849                       backref->found_inode_ref)) {
2850                         list_del(&backref->list);
2851                         free(backref);
2852                 } else {
2853                         rec->found_link++;
2854                 }
2855         }
2856
2857         /* Set nlink to 0 */
2858         key.objectid = rec->ino;
2859         key.type = BTRFS_INODE_ITEM_KEY;
2860         key.offset = 0;
2861         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2862         if (ret < 0)
2863                 goto out;
2864         if (ret > 0) {
2865                 ret = -ENOENT;
2866                 goto out;
2867         }
2868         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2869                                     struct btrfs_inode_item);
2870         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2871         btrfs_mark_buffer_dirty(path->nodes[0]);
2872         btrfs_release_path(path);
2873
2874         /*
2875          * Add back valid inode_ref/dir_item/dir_index,
2876          * add_link() will handle the nlink inc, so new nlink must be correct
2877          */
2878         list_for_each_entry(backref, &rec->backrefs, list) {
2879                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2880                                      backref->name, backref->namelen,
2881                                      backref->filetype, &backref->index, 1, 0);
2882                 if (ret < 0)
2883                         goto out;
2884         }
2885 out:
2886         btrfs_release_path(path);
2887         return ret;
2888 }
2889
2890 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2891                                struct btrfs_root *root,
2892                                struct btrfs_path *path,
2893                                struct inode_record *rec)
2894 {
2895         char namebuf[BTRFS_NAME_LEN] = {0};
2896         u8 type = 0;
2897         int namelen = 0;
2898         int name_recovered = 0;
2899         int type_recovered = 0;
2900         int ret = 0;
2901
2902         /*
2903          * Get file name and type first before these invalid inode ref
2904          * are deleted by remove_all_invalid_backref()
2905          */
2906         name_recovered = !find_file_name(rec, namebuf, &namelen);
2907         type_recovered = !find_file_type(rec, &type);
2908
2909         if (!name_recovered) {
2910                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2911                        rec->ino, rec->ino);
2912                 namelen = count_digits(rec->ino);
2913                 sprintf(namebuf, "%llu", rec->ino);
2914                 name_recovered = 1;
2915         }
2916         if (!type_recovered) {
2917                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2918                        rec->ino);
2919                 type = BTRFS_FT_REG_FILE;
2920                 type_recovered = 1;
2921         }
2922
2923         ret = reset_nlink(trans, root, path, rec);
2924         if (ret < 0) {
2925                 fprintf(stderr,
2926                         "Failed to reset nlink for inode %llu: %s\n",
2927                         rec->ino, strerror(-ret));
2928                 goto out;
2929         }
2930
2931         if (rec->found_link == 0) {
2932                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2933                                               namebuf, namelen, type,
2934                                               (u64 *)&rec->found_link);
2935                 if (ret)
2936                         goto out;
2937         }
2938         printf("Fixed the nlink of inode %llu\n", rec->ino);
2939 out:
2940         /*
2941          * Clear the flag anyway, or we will loop forever for the same inode
2942          * as it will not be removed from the bad inode list and the dead loop
2943          * happens.
2944          */
2945         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2946         btrfs_release_path(path);
2947         return ret;
2948 }
2949
2950 /*
2951  * Check if there is any normal(reg or prealloc) file extent for given
2952  * ino.
2953  * This is used to determine the file type when neither its dir_index/item or
2954  * inode_item exists.
2955  *
2956  * This will *NOT* report error, if any error happens, just consider it does
2957  * not have any normal file extent.
2958  */
2959 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2960 {
2961         struct btrfs_path path;
2962         struct btrfs_key key;
2963         struct btrfs_key found_key;
2964         struct btrfs_file_extent_item *fi;
2965         u8 type;
2966         int ret = 0;
2967
2968         btrfs_init_path(&path);
2969         key.objectid = ino;
2970         key.type = BTRFS_EXTENT_DATA_KEY;
2971         key.offset = 0;
2972
2973         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2974         if (ret < 0) {
2975                 ret = 0;
2976                 goto out;
2977         }
2978         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2979                 ret = btrfs_next_leaf(root, &path);
2980                 if (ret) {
2981                         ret = 0;
2982                         goto out;
2983                 }
2984         }
2985         while (1) {
2986                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2987                                       path.slots[0]);
2988                 if (found_key.objectid != ino ||
2989                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2990                         break;
2991                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2992                                     struct btrfs_file_extent_item);
2993                 type = btrfs_file_extent_type(path.nodes[0], fi);
2994                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2995                         ret = 1;
2996                         goto out;
2997                 }
2998         }
2999 out:
3000         btrfs_release_path(&path);
3001         return ret;
3002 }
3003
3004 static u32 btrfs_type_to_imode(u8 type)
3005 {
3006         static u32 imode_by_btrfs_type[] = {
3007                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3008                 [BTRFS_FT_DIR]          = S_IFDIR,
3009                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3010                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3011                 [BTRFS_FT_FIFO]         = S_IFIFO,
3012                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3013                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3014         };
3015
3016         return imode_by_btrfs_type[(type)];
3017 }
3018
3019 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3020                                 struct btrfs_root *root,
3021                                 struct btrfs_path *path,
3022                                 struct inode_record *rec)
3023 {
3024         u8 filetype;
3025         u32 mode = 0700;
3026         int type_recovered = 0;
3027         int ret = 0;
3028
3029         printf("Trying to rebuild inode:%llu\n", rec->ino);
3030
3031         type_recovered = !find_file_type(rec, &filetype);
3032
3033         /*
3034          * Try to determine inode type if type not found.
3035          *
3036          * For found regular file extent, it must be FILE.
3037          * For found dir_item/index, it must be DIR.
3038          *
3039          * For undetermined one, use FILE as fallback.
3040          *
3041          * TODO:
3042          * 1. If found backref(inode_index/item is already handled) to it,
3043          *    it must be DIR.
3044          *    Need new inode-inode ref structure to allow search for that.
3045          */
3046         if (!type_recovered) {
3047                 if (rec->found_file_extent &&
3048                     find_normal_file_extent(root, rec->ino)) {
3049                         type_recovered = 1;
3050                         filetype = BTRFS_FT_REG_FILE;
3051                 } else if (rec->found_dir_item) {
3052                         type_recovered = 1;
3053                         filetype = BTRFS_FT_DIR;
3054                 } else if (!list_empty(&rec->orphan_extents)) {
3055                         type_recovered = 1;
3056                         filetype = BTRFS_FT_REG_FILE;
3057                 } else{
3058                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3059                                rec->ino);
3060                         type_recovered = 1;
3061                         filetype = BTRFS_FT_REG_FILE;
3062                 }
3063         }
3064
3065         ret = btrfs_new_inode(trans, root, rec->ino,
3066                               mode | btrfs_type_to_imode(filetype));
3067         if (ret < 0)
3068                 goto out;
3069
3070         /*
3071          * Here inode rebuild is done, we only rebuild the inode item,
3072          * don't repair the nlink(like move to lost+found).
3073          * That is the job of nlink repair.
3074          *
3075          * We just fill the record and return
3076          */
3077         rec->found_dir_item = 1;
3078         rec->imode = mode | btrfs_type_to_imode(filetype);
3079         rec->nlink = 0;
3080         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3081         /* Ensure the inode_nlinks repair function will be called */
3082         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3083 out:
3084         return ret;
3085 }
3086
3087 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3088                                       struct btrfs_root *root,
3089                                       struct btrfs_path *path,
3090                                       struct inode_record *rec)
3091 {
3092         struct orphan_data_extent *orphan;
3093         struct orphan_data_extent *tmp;
3094         int ret = 0;
3095
3096         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3097                 /*
3098                  * Check for conflicting file extents
3099                  *
3100                  * Here we don't know whether the extents is compressed or not,
3101                  * so we can only assume it not compressed nor data offset,
3102                  * and use its disk_len as extent length.
3103                  */
3104                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3105                                        orphan->offset, orphan->disk_len, 0);
3106                 btrfs_release_path(path);
3107                 if (ret < 0)
3108                         goto out;
3109                 if (!ret) {
3110                         fprintf(stderr,
3111                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3112                                 orphan->disk_bytenr, orphan->disk_len);
3113                         ret = btrfs_free_extent(trans,
3114                                         root->fs_info->extent_root,
3115                                         orphan->disk_bytenr, orphan->disk_len,
3116                                         0, root->objectid, orphan->objectid,
3117                                         orphan->offset);
3118                         if (ret < 0)
3119                                 goto out;
3120                 }
3121                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3122                                 orphan->offset, orphan->disk_bytenr,
3123                                 orphan->disk_len, orphan->disk_len);
3124                 if (ret < 0)
3125                         goto out;
3126
3127                 /* Update file size info */
3128                 rec->found_size += orphan->disk_len;
3129                 if (rec->found_size == rec->nbytes)
3130                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3131
3132                 /* Update the file extent hole info too */
3133                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3134                                            orphan->disk_len);
3135                 if (ret < 0)
3136                         goto out;
3137                 if (RB_EMPTY_ROOT(&rec->holes))
3138                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3139
3140                 list_del(&orphan->list);
3141                 free(orphan);
3142         }
3143         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3144 out:
3145         return ret;
3146 }
3147
3148 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3149                                         struct btrfs_root *root,
3150                                         struct btrfs_path *path,
3151                                         struct inode_record *rec)
3152 {
3153         struct rb_node *node;
3154         struct file_extent_hole *hole;
3155         int found = 0;
3156         int ret = 0;
3157
3158         node = rb_first(&rec->holes);
3159
3160         while (node) {
3161                 found = 1;
3162                 hole = rb_entry(node, struct file_extent_hole, node);
3163                 ret = btrfs_punch_hole(trans, root, rec->ino,
3164                                        hole->start, hole->len);
3165                 if (ret < 0)
3166                         goto out;
3167                 ret = del_file_extent_hole(&rec->holes, hole->start,
3168                                            hole->len);
3169                 if (ret < 0)
3170                         goto out;
3171                 if (RB_EMPTY_ROOT(&rec->holes))
3172                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3173                 node = rb_first(&rec->holes);
3174         }
3175         /* special case for a file losing all its file extent */
3176         if (!found) {
3177                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3178                                        round_up(rec->isize,
3179                                                 root->fs_info->sectorsize));
3180                 if (ret < 0)
3181                         goto out;
3182         }
3183         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3184                rec->ino, root->objectid);
3185 out:
3186         return ret;
3187 }
3188
3189 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3190 {
3191         struct btrfs_trans_handle *trans;
3192         struct btrfs_path path;
3193         int ret = 0;
3194
3195         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3196                              I_ERR_NO_ORPHAN_ITEM |
3197                              I_ERR_LINK_COUNT_WRONG |
3198                              I_ERR_NO_INODE_ITEM |
3199                              I_ERR_FILE_EXTENT_ORPHAN |
3200                              I_ERR_FILE_EXTENT_DISCOUNT|
3201                              I_ERR_FILE_NBYTES_WRONG)))
3202                 return rec->errors;
3203
3204         /*
3205          * For nlink repair, it may create a dir and add link, so
3206          * 2 for parent(256)'s dir_index and dir_item
3207          * 2 for lost+found dir's inode_item and inode_ref
3208          * 1 for the new inode_ref of the file
3209          * 2 for lost+found dir's dir_index and dir_item for the file
3210          */
3211         trans = btrfs_start_transaction(root, 7);
3212         if (IS_ERR(trans))
3213                 return PTR_ERR(trans);
3214
3215         btrfs_init_path(&path);
3216         if (rec->errors & I_ERR_NO_INODE_ITEM)
3217                 ret = repair_inode_no_item(trans, root, &path, rec);
3218         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3219                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3220         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3221                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3222         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3223                 ret = repair_inode_isize(trans, root, &path, rec);
3224         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3225                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3226         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3227                 ret = repair_inode_nlinks(trans, root, &path, rec);
3228         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3229                 ret = repair_inode_nbytes(trans, root, &path, rec);
3230         btrfs_commit_transaction(trans, root);
3231         btrfs_release_path(&path);
3232         return ret;
3233 }
3234
3235 static int check_inode_recs(struct btrfs_root *root,
3236                             struct cache_tree *inode_cache)
3237 {
3238         struct cache_extent *cache;
3239         struct ptr_node *node;
3240         struct inode_record *rec;
3241         struct inode_backref *backref;
3242         int stage = 0;
3243         int ret = 0;
3244         int err = 0;
3245         u64 error = 0;
3246         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3247
3248         if (btrfs_root_refs(&root->root_item) == 0) {
3249                 if (!cache_tree_empty(inode_cache))
3250                         fprintf(stderr, "warning line %d\n", __LINE__);
3251                 return 0;
3252         }
3253
3254         /*
3255          * We need to repair backrefs first because we could change some of the
3256          * errors in the inode recs.
3257          *
3258          * We also need to go through and delete invalid backrefs first and then
3259          * add the correct ones second.  We do this because we may get EEXIST
3260          * when adding back the correct index because we hadn't yet deleted the
3261          * invalid index.
3262          *
3263          * For example, if we were missing a dir index then the directories
3264          * isize would be wrong, so if we fixed the isize to what we thought it
3265          * would be and then fixed the backref we'd still have a invalid fs, so
3266          * we need to add back the dir index and then check to see if the isize
3267          * is still wrong.
3268          */
3269         while (stage < 3) {
3270                 stage++;
3271                 if (stage == 3 && !err)
3272                         break;
3273
3274                 cache = search_cache_extent(inode_cache, 0);
3275                 while (repair && cache) {
3276                         node = container_of(cache, struct ptr_node, cache);
3277                         rec = node->data;
3278                         cache = next_cache_extent(cache);
3279
3280                         /* Need to free everything up and rescan */
3281                         if (stage == 3) {
3282                                 remove_cache_extent(inode_cache, &node->cache);
3283                                 free(node);
3284                                 free_inode_rec(rec);
3285                                 continue;
3286                         }
3287
3288                         if (list_empty(&rec->backrefs))
3289                                 continue;
3290
3291                         ret = repair_inode_backrefs(root, rec, inode_cache,
3292                                                     stage == 1);
3293                         if (ret < 0) {
3294                                 err = ret;
3295                                 stage = 2;
3296                                 break;
3297                         } if (ret > 0) {
3298                                 err = -EAGAIN;
3299                         }
3300                 }
3301         }
3302         if (err)
3303                 return err;
3304
3305         rec = get_inode_rec(inode_cache, root_dirid, 0);
3306         BUG_ON(IS_ERR(rec));
3307         if (rec) {
3308                 ret = check_root_dir(rec);
3309                 if (ret) {
3310                         fprintf(stderr, "root %llu root dir %llu error\n",
3311                                 (unsigned long long)root->root_key.objectid,
3312                                 (unsigned long long)root_dirid);
3313                         print_inode_error(root, rec);
3314                         error++;
3315                 }
3316         } else {
3317                 if (repair) {
3318                         struct btrfs_trans_handle *trans;
3319
3320                         trans = btrfs_start_transaction(root, 1);
3321                         if (IS_ERR(trans)) {
3322                                 err = PTR_ERR(trans);
3323                                 return err;
3324                         }
3325
3326                         fprintf(stderr,
3327                                 "root %llu missing its root dir, recreating\n",
3328                                 (unsigned long long)root->objectid);
3329
3330                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3331                         BUG_ON(ret);
3332
3333                         btrfs_commit_transaction(trans, root);
3334                         return -EAGAIN;
3335                 }
3336
3337                 fprintf(stderr, "root %llu root dir %llu not found\n",
3338                         (unsigned long long)root->root_key.objectid,
3339                         (unsigned long long)root_dirid);
3340         }
3341
3342         while (1) {
3343                 cache = search_cache_extent(inode_cache, 0);
3344                 if (!cache)
3345                         break;
3346                 node = container_of(cache, struct ptr_node, cache);
3347                 rec = node->data;
3348                 remove_cache_extent(inode_cache, &node->cache);
3349                 free(node);
3350                 if (rec->ino == root_dirid ||
3351                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3352                         free_inode_rec(rec);
3353                         continue;
3354                 }
3355
3356                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3357                         ret = check_orphan_item(root, rec->ino);
3358                         if (ret == 0)
3359                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3360                         if (can_free_inode_rec(rec)) {
3361                                 free_inode_rec(rec);
3362                                 continue;
3363                         }
3364                 }
3365
3366                 if (!rec->found_inode_item)
3367                         rec->errors |= I_ERR_NO_INODE_ITEM;
3368                 if (rec->found_link != rec->nlink)
3369                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3370                 if (repair) {
3371                         ret = try_repair_inode(root, rec);
3372                         if (ret == 0 && can_free_inode_rec(rec)) {
3373                                 free_inode_rec(rec);
3374                                 continue;
3375                         }
3376                         ret = 0;
3377                 }
3378
3379                 if (!(repair && ret == 0))
3380                         error++;
3381                 print_inode_error(root, rec);
3382                 list_for_each_entry(backref, &rec->backrefs, list) {
3383                         if (!backref->found_dir_item)
3384                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3385                         if (!backref->found_dir_index)
3386                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3387                         if (!backref->found_inode_ref)
3388                                 backref->errors |= REF_ERR_NO_INODE_REF;
3389                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3390                                 " namelen %u name %s filetype %d errors %x",
3391                                 (unsigned long long)backref->dir,
3392                                 (unsigned long long)backref->index,
3393                                 backref->namelen, backref->name,
3394                                 backref->filetype, backref->errors);
3395                         print_ref_error(backref->errors);
3396                 }
3397                 free_inode_rec(rec);
3398         }
3399         return (error > 0) ? -1 : 0;
3400 }
3401
3402 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3403                                         u64 objectid)
3404 {
3405         struct cache_extent *cache;
3406         struct root_record *rec = NULL;
3407         int ret;
3408
3409         cache = lookup_cache_extent(root_cache, objectid, 1);
3410         if (cache) {
3411                 rec = container_of(cache, struct root_record, cache);
3412         } else {
3413                 rec = calloc(1, sizeof(*rec));
3414                 if (!rec)
3415                         return ERR_PTR(-ENOMEM);
3416                 rec->objectid = objectid;
3417                 INIT_LIST_HEAD(&rec->backrefs);
3418                 rec->cache.start = objectid;
3419                 rec->cache.size = 1;
3420
3421                 ret = insert_cache_extent(root_cache, &rec->cache);
3422                 if (ret)
3423                         return ERR_PTR(-EEXIST);
3424         }
3425         return rec;
3426 }
3427
3428 static struct root_backref *get_root_backref(struct root_record *rec,
3429                                              u64 ref_root, u64 dir, u64 index,
3430                                              const char *name, int namelen)
3431 {
3432         struct root_backref *backref;
3433
3434         list_for_each_entry(backref, &rec->backrefs, list) {
3435                 if (backref->ref_root != ref_root || backref->dir != dir ||
3436                     backref->namelen != namelen)
3437                         continue;
3438                 if (memcmp(name, backref->name, namelen))
3439                         continue;
3440                 return backref;
3441         }
3442
3443         backref = calloc(1, sizeof(*backref) + namelen + 1);
3444         if (!backref)
3445                 return NULL;
3446         backref->ref_root = ref_root;
3447         backref->dir = dir;
3448         backref->index = index;
3449         backref->namelen = namelen;
3450         memcpy(backref->name, name, namelen);
3451         backref->name[namelen] = '\0';
3452         list_add_tail(&backref->list, &rec->backrefs);
3453         return backref;
3454 }
3455
3456 static void free_root_record(struct cache_extent *cache)
3457 {
3458         struct root_record *rec;
3459         struct root_backref *backref;
3460
3461         rec = container_of(cache, struct root_record, cache);
3462         while (!list_empty(&rec->backrefs)) {
3463                 backref = to_root_backref(rec->backrefs.next);
3464                 list_del(&backref->list);
3465                 free(backref);
3466         }
3467
3468         free(rec);
3469 }
3470
3471 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3472
3473 static int add_root_backref(struct cache_tree *root_cache,
3474                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3475                             const char *name, int namelen,
3476                             int item_type, int errors)
3477 {
3478         struct root_record *rec;
3479         struct root_backref *backref;
3480
3481         rec = get_root_rec(root_cache, root_id);
3482         BUG_ON(IS_ERR(rec));
3483         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3484         BUG_ON(!backref);
3485
3486         backref->errors |= errors;
3487
3488         if (item_type != BTRFS_DIR_ITEM_KEY) {
3489                 if (backref->found_dir_index || backref->found_back_ref ||
3490                     backref->found_forward_ref) {
3491                         if (backref->index != index)
3492                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3493                 } else {
3494                         backref->index = index;
3495                 }
3496         }
3497
3498         if (item_type == BTRFS_DIR_ITEM_KEY) {
3499                 if (backref->found_forward_ref)
3500                         rec->found_ref++;
3501                 backref->found_dir_item = 1;
3502         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3503                 backref->found_dir_index = 1;
3504         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3505                 if (backref->found_forward_ref)
3506                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3507                 else if (backref->found_dir_item)
3508                         rec->found_ref++;
3509                 backref->found_forward_ref = 1;
3510         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3511                 if (backref->found_back_ref)
3512                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3513                 backref->found_back_ref = 1;
3514         } else {
3515                 BUG_ON(1);
3516         }
3517
3518         if (backref->found_forward_ref && backref->found_dir_item)
3519                 backref->reachable = 1;
3520         return 0;
3521 }
3522
3523 static int merge_root_recs(struct btrfs_root *root,
3524                            struct cache_tree *src_cache,
3525                            struct cache_tree *dst_cache)
3526 {
3527         struct cache_extent *cache;
3528         struct ptr_node *node;
3529         struct inode_record *rec;
3530         struct inode_backref *backref;
3531         int ret = 0;
3532
3533         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3534                 free_inode_recs_tree(src_cache);
3535                 return 0;
3536         }
3537
3538         while (1) {
3539                 cache = search_cache_extent(src_cache, 0);
3540                 if (!cache)
3541                         break;
3542                 node = container_of(cache, struct ptr_node, cache);
3543                 rec = node->data;
3544                 remove_cache_extent(src_cache, &node->cache);
3545                 free(node);
3546
3547                 ret = is_child_root(root, root->objectid, rec->ino);
3548                 if (ret < 0)
3549                         break;
3550                 else if (ret == 0)
3551                         goto skip;
3552
3553                 list_for_each_entry(backref, &rec->backrefs, list) {
3554                         BUG_ON(backref->found_inode_ref);
3555                         if (backref->found_dir_item)
3556                                 add_root_backref(dst_cache, rec->ino,
3557                                         root->root_key.objectid, backref->dir,
3558                                         backref->index, backref->name,
3559                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3560                                         backref->errors);
3561                         if (backref->found_dir_index)
3562                                 add_root_backref(dst_cache, rec->ino,
3563                                         root->root_key.objectid, backref->dir,
3564                                         backref->index, backref->name,
3565                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3566                                         backref->errors);
3567                 }
3568 skip:
3569                 free_inode_rec(rec);
3570         }
3571         if (ret < 0)
3572                 return ret;
3573         return 0;
3574 }
3575
3576 static int check_root_refs(struct btrfs_root *root,
3577                            struct cache_tree *root_cache)
3578 {
3579         struct root_record *rec;
3580         struct root_record *ref_root;
3581         struct root_backref *backref;
3582         struct cache_extent *cache;
3583         int loop = 1;
3584         int ret;
3585         int error;
3586         int errors = 0;
3587
3588         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3589         BUG_ON(IS_ERR(rec));
3590         rec->found_ref = 1;
3591
3592         /* fixme: this can not detect circular references */
3593         while (loop) {
3594                 loop = 0;
3595                 cache = search_cache_extent(root_cache, 0);
3596                 while (1) {
3597                         if (!cache)
3598                                 break;
3599                         rec = container_of(cache, struct root_record, cache);
3600                         cache = next_cache_extent(cache);
3601
3602                         if (rec->found_ref == 0)
3603                                 continue;
3604
3605                         list_for_each_entry(backref, &rec->backrefs, list) {
3606                                 if (!backref->reachable)
3607                                         continue;
3608
3609                                 ref_root = get_root_rec(root_cache,
3610                                                         backref->ref_root);
3611                                 BUG_ON(IS_ERR(ref_root));
3612                                 if (ref_root->found_ref > 0)
3613                                         continue;
3614
3615                                 backref->reachable = 0;
3616                                 rec->found_ref--;
3617                                 if (rec->found_ref == 0)
3618                                         loop = 1;
3619                         }
3620                 }
3621         }
3622
3623         cache = search_cache_extent(root_cache, 0);
3624         while (1) {
3625                 if (!cache)
3626                         break;
3627                 rec = container_of(cache, struct root_record, cache);
3628                 cache = next_cache_extent(cache);
3629
3630                 if (rec->found_ref == 0 &&
3631                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3632                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3633                         ret = check_orphan_item(root->fs_info->tree_root,
3634                                                 rec->objectid);
3635                         if (ret == 0)
3636                                 continue;
3637
3638                         /*
3639                          * If we don't have a root item then we likely just have
3640                          * a dir item in a snapshot for this root but no actual
3641                          * ref key or anything so it's meaningless.
3642                          */
3643                         if (!rec->found_root_item)
3644                                 continue;
3645                         errors++;
3646                         fprintf(stderr, "fs tree %llu not referenced\n",
3647                                 (unsigned long long)rec->objectid);
3648                 }
3649
3650                 error = 0;
3651                 if (rec->found_ref > 0 && !rec->found_root_item)
3652                         error = 1;
3653                 list_for_each_entry(backref, &rec->backrefs, list) {
3654                         if (!backref->found_dir_item)
3655                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3656                         if (!backref->found_dir_index)
3657                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3658                         if (!backref->found_back_ref)
3659                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3660                         if (!backref->found_forward_ref)
3661                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3662                         if (backref->reachable && backref->errors)
3663                                 error = 1;
3664                 }
3665                 if (!error)
3666                         continue;
3667
3668                 errors++;
3669                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3670                         (unsigned long long)rec->objectid, rec->found_ref,
3671                          rec->found_root_item ? "" : "not found");
3672
3673                 list_for_each_entry(backref, &rec->backrefs, list) {
3674                         if (!backref->reachable)
3675                                 continue;
3676                         if (!backref->errors && rec->found_root_item)
3677                                 continue;
3678                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3679                                 " index %llu namelen %u name %s errors %x\n",
3680                                 (unsigned long long)backref->ref_root,
3681                                 (unsigned long long)backref->dir,
3682                                 (unsigned long long)backref->index,
3683                                 backref->namelen, backref->name,
3684                                 backref->errors);
3685                         print_ref_error(backref->errors);
3686                 }
3687         }
3688         return errors > 0 ? 1 : 0;
3689 }
3690
3691 static int process_root_ref(struct extent_buffer *eb, int slot,
3692                             struct btrfs_key *key,
3693                             struct cache_tree *root_cache)
3694 {
3695         u64 dirid;
3696         u64 index;
3697         u32 len;
3698         u32 name_len;
3699         struct btrfs_root_ref *ref;
3700         char namebuf[BTRFS_NAME_LEN];
3701         int error;
3702
3703         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3704
3705         dirid = btrfs_root_ref_dirid(eb, ref);
3706         index = btrfs_root_ref_sequence(eb, ref);
3707         name_len = btrfs_root_ref_name_len(eb, ref);
3708
3709         if (name_len <= BTRFS_NAME_LEN) {
3710                 len = name_len;
3711                 error = 0;
3712         } else {
3713                 len = BTRFS_NAME_LEN;
3714                 error = REF_ERR_NAME_TOO_LONG;
3715         }
3716         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3717
3718         if (key->type == BTRFS_ROOT_REF_KEY) {
3719                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3720                                  index, namebuf, len, key->type, error);
3721         } else {
3722                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3723                                  index, namebuf, len, key->type, error);
3724         }
3725         return 0;
3726 }
3727
3728 static void free_corrupt_block(struct cache_extent *cache)
3729 {
3730         struct btrfs_corrupt_block *corrupt;
3731
3732         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3733         free(corrupt);
3734 }
3735
3736 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3737
3738 /*
3739  * Repair the btree of the given root.
3740  *
3741  * The fix is to remove the node key in corrupt_blocks cache_tree.
3742  * and rebalance the tree.
3743  * After the fix, the btree should be writeable.
3744  */
3745 static int repair_btree(struct btrfs_root *root,
3746                         struct cache_tree *corrupt_blocks)
3747 {
3748         struct btrfs_trans_handle *trans;
3749         struct btrfs_path path;
3750         struct btrfs_corrupt_block *corrupt;
3751         struct cache_extent *cache;
3752         struct btrfs_key key;
3753         u64 offset;
3754         int level;
3755         int ret = 0;
3756
3757         if (cache_tree_empty(corrupt_blocks))
3758                 return 0;
3759
3760         trans = btrfs_start_transaction(root, 1);
3761         if (IS_ERR(trans)) {
3762                 ret = PTR_ERR(trans);
3763                 fprintf(stderr, "Error starting transaction: %s\n",
3764                         strerror(-ret));
3765                 return ret;
3766         }
3767         btrfs_init_path(&path);
3768         cache = first_cache_extent(corrupt_blocks);
3769         while (cache) {
3770                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3771                                        cache);
3772                 level = corrupt->level;
3773                 path.lowest_level = level;
3774                 key.objectid = corrupt->key.objectid;
3775                 key.type = corrupt->key.type;
3776                 key.offset = corrupt->key.offset;
3777
3778                 /*
3779                  * Here we don't want to do any tree balance, since it may
3780                  * cause a balance with corrupted brother leaf/node,
3781                  * so ins_len set to 0 here.
3782                  * Balance will be done after all corrupt node/leaf is deleted.
3783                  */
3784                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3785                 if (ret < 0)
3786                         goto out;
3787                 offset = btrfs_node_blockptr(path.nodes[level],
3788                                              path.slots[level]);
3789
3790                 /* Remove the ptr */
3791                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3792                 if (ret < 0)
3793                         goto out;
3794                 /*
3795                  * Remove the corresponding extent
3796                  * return value is not concerned.
3797                  */
3798                 btrfs_release_path(&path);
3799                 ret = btrfs_free_extent(trans, root, offset,
3800                                 root->fs_info->nodesize, 0,
3801                                 root->root_key.objectid, level - 1, 0);
3802                 cache = next_cache_extent(cache);
3803         }
3804
3805         /* Balance the btree using btrfs_search_slot() */
3806         cache = first_cache_extent(corrupt_blocks);
3807         while (cache) {
3808                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3809                                        cache);
3810                 memcpy(&key, &corrupt->key, sizeof(key));
3811                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3812                 if (ret < 0)
3813                         goto out;
3814                 /* return will always >0 since it won't find the item */
3815                 ret = 0;
3816                 btrfs_release_path(&path);
3817                 cache = next_cache_extent(cache);
3818         }
3819 out:
3820         btrfs_commit_transaction(trans, root);
3821         btrfs_release_path(&path);
3822         return ret;
3823 }
3824
3825 static int check_fs_root(struct btrfs_root *root,
3826                          struct cache_tree *root_cache,
3827                          struct walk_control *wc)
3828 {
3829         int ret = 0;
3830         int err = 0;
3831         int wret;
3832         int level;
3833         struct btrfs_path path;
3834         struct shared_node root_node;
3835         struct root_record *rec;
3836         struct btrfs_root_item *root_item = &root->root_item;
3837         struct cache_tree corrupt_blocks;
3838         struct orphan_data_extent *orphan;
3839         struct orphan_data_extent *tmp;
3840         enum btrfs_tree_block_status status;
3841         struct node_refs nrefs;
3842
3843         /*
3844          * Reuse the corrupt_block cache tree to record corrupted tree block
3845          *
3846          * Unlike the usage in extent tree check, here we do it in a per
3847          * fs/subvol tree base.
3848          */
3849         cache_tree_init(&corrupt_blocks);
3850         root->fs_info->corrupt_blocks = &corrupt_blocks;
3851
3852         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3853                 rec = get_root_rec(root_cache, root->root_key.objectid);
3854                 BUG_ON(IS_ERR(rec));
3855                 if (btrfs_root_refs(root_item) > 0)
3856                         rec->found_root_item = 1;
3857         }
3858
3859         btrfs_init_path(&path);
3860         memset(&root_node, 0, sizeof(root_node));
3861         cache_tree_init(&root_node.root_cache);
3862         cache_tree_init(&root_node.inode_cache);
3863         memset(&nrefs, 0, sizeof(nrefs));
3864
3865         /* Move the orphan extent record to corresponding inode_record */
3866         list_for_each_entry_safe(orphan, tmp,
3867                                  &root->orphan_data_extents, list) {
3868                 struct inode_record *inode;
3869
3870                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3871                                       1);
3872                 BUG_ON(IS_ERR(inode));
3873                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3874                 list_move(&orphan->list, &inode->orphan_extents);
3875         }
3876
3877         level = btrfs_header_level(root->node);
3878         memset(wc->nodes, 0, sizeof(wc->nodes));
3879         wc->nodes[level] = &root_node;
3880         wc->active_node = level;
3881         wc->root_level = level;
3882
3883         /* We may not have checked the root block, lets do that now */
3884         if (btrfs_is_leaf(root->node))
3885                 status = btrfs_check_leaf(root, NULL, root->node);
3886         else
3887                 status = btrfs_check_node(root, NULL, root->node);
3888         if (status != BTRFS_TREE_BLOCK_CLEAN)
3889                 return -EIO;
3890
3891         if (btrfs_root_refs(root_item) > 0 ||
3892             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3893                 path.nodes[level] = root->node;
3894                 extent_buffer_get(root->node);
3895                 path.slots[level] = 0;
3896         } else {
3897                 struct btrfs_key key;
3898                 struct btrfs_disk_key found_key;
3899
3900                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3901                 level = root_item->drop_level;
3902                 path.lowest_level = level;
3903                 if (level > btrfs_header_level(root->node) ||
3904                     level >= BTRFS_MAX_LEVEL) {
3905                         error("ignoring invalid drop level: %u", level);
3906                         goto skip_walking;
3907                 }
3908                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3909                 if (wret < 0)
3910                         goto skip_walking;
3911                 btrfs_node_key(path.nodes[level], &found_key,
3912                                 path.slots[level]);
3913                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3914                                         sizeof(found_key)));
3915         }
3916
3917         while (1) {
3918                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3919                 if (wret < 0)
3920                         ret = wret;
3921                 if (wret != 0)
3922                         break;
3923
3924                 wret = walk_up_tree(root, &path, wc, &level);
3925                 if (wret < 0)
3926                         ret = wret;
3927                 if (wret != 0)
3928                         break;
3929         }
3930 skip_walking:
3931         btrfs_release_path(&path);
3932
3933         if (!cache_tree_empty(&corrupt_blocks)) {
3934                 struct cache_extent *cache;
3935                 struct btrfs_corrupt_block *corrupt;
3936
3937                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3938                        root->root_key.objectid);
3939                 cache = first_cache_extent(&corrupt_blocks);
3940                 while (cache) {
3941                         corrupt = container_of(cache,
3942                                                struct btrfs_corrupt_block,
3943                                                cache);
3944                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3945                                cache->start, corrupt->level,
3946                                corrupt->key.objectid, corrupt->key.type,
3947                                corrupt->key.offset);
3948                         cache = next_cache_extent(cache);
3949                 }
3950                 if (repair) {
3951                         printf("Try to repair the btree for root %llu\n",
3952                                root->root_key.objectid);
3953                         ret = repair_btree(root, &corrupt_blocks);
3954                         if (ret < 0)
3955                                 fprintf(stderr, "Failed to repair btree: %s\n",
3956                                         strerror(-ret));
3957                         if (!ret)
3958                                 printf("Btree for root %llu is fixed\n",
3959                                        root->root_key.objectid);
3960                 }
3961         }
3962
3963         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3964         if (err < 0)
3965                 ret = err;
3966
3967         if (root_node.current) {
3968                 root_node.current->checked = 1;
3969                 maybe_free_inode_rec(&root_node.inode_cache,
3970                                 root_node.current);
3971         }
3972
3973         err = check_inode_recs(root, &root_node.inode_cache);
3974         if (!ret)
3975                 ret = err;
3976
3977         free_corrupt_blocks_tree(&corrupt_blocks);
3978         root->fs_info->corrupt_blocks = NULL;
3979         free_orphan_data_extents(&root->orphan_data_extents);
3980         return ret;
3981 }
3982
3983 static int check_fs_roots(struct btrfs_fs_info *fs_info,
3984                           struct cache_tree *root_cache)
3985 {
3986         struct btrfs_path path;
3987         struct btrfs_key key;
3988         struct walk_control wc;
3989         struct extent_buffer *leaf, *tree_node;
3990         struct btrfs_root *tmp_root;
3991         struct btrfs_root *tree_root = fs_info->tree_root;
3992         int ret;
3993         int err = 0;
3994
3995         if (ctx.progress_enabled) {
3996                 ctx.tp = TASK_FS_ROOTS;
3997                 task_start(ctx.info);
3998         }
3999
4000         /*
4001          * Just in case we made any changes to the extent tree that weren't
4002          * reflected into the free space cache yet.
4003          */
4004         if (repair)
4005                 reset_cached_block_groups(fs_info);
4006         memset(&wc, 0, sizeof(wc));
4007         cache_tree_init(&wc.shared);
4008         btrfs_init_path(&path);
4009
4010 again:
4011         key.offset = 0;
4012         key.objectid = 0;
4013         key.type = BTRFS_ROOT_ITEM_KEY;
4014         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4015         if (ret < 0) {
4016                 err = 1;
4017                 goto out;
4018         }
4019         tree_node = tree_root->node;
4020         while (1) {
4021                 if (tree_node != tree_root->node) {
4022                         free_root_recs_tree(root_cache);
4023                         btrfs_release_path(&path);
4024                         goto again;
4025                 }
4026                 leaf = path.nodes[0];
4027                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4028                         ret = btrfs_next_leaf(tree_root, &path);
4029                         if (ret) {
4030                                 if (ret < 0)
4031                                         err = 1;
4032                                 break;
4033                         }
4034                         leaf = path.nodes[0];
4035                 }
4036                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4037                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4038                     fs_root_objectid(key.objectid)) {
4039                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4040                                 tmp_root = btrfs_read_fs_root_no_cache(
4041                                                 fs_info, &key);
4042                         } else {
4043                                 key.offset = (u64)-1;
4044                                 tmp_root = btrfs_read_fs_root(
4045                                                 fs_info, &key);
4046                         }
4047                         if (IS_ERR(tmp_root)) {
4048                                 err = 1;
4049                                 goto next;
4050                         }
4051                         ret = check_fs_root(tmp_root, root_cache, &wc);
4052                         if (ret == -EAGAIN) {
4053                                 free_root_recs_tree(root_cache);
4054                                 btrfs_release_path(&path);
4055                                 goto again;
4056                         }
4057                         if (ret)
4058                                 err = 1;
4059                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4060                                 btrfs_free_fs_root(tmp_root);
4061                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4062                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4063                         process_root_ref(leaf, path.slots[0], &key,
4064                                          root_cache);
4065                 }
4066 next:
4067                 path.slots[0]++;
4068         }
4069 out:
4070         btrfs_release_path(&path);
4071         if (err)
4072                 free_extent_cache_tree(&wc.shared);
4073         if (!cache_tree_empty(&wc.shared))
4074                 fprintf(stderr, "warning line %d\n", __LINE__);
4075
4076         task_stop(ctx.info);
4077
4078         return err;
4079 }
4080
4081 /*
4082  * Find the @index according by @ino and name.
4083  * Notice:time efficiency is O(N)
4084  *
4085  * @root:       the root of the fs/file tree
4086  * @index_ret:  the index as return value
4087  * @namebuf:    the name to match
4088  * @name_len:   the length of name to match
4089  * @file_type:  the file_type of INODE_ITEM to match
4090  *
4091  * Returns 0 if found and *@index_ret will be modified with right value
4092  * Returns< 0 not found and *@index_ret will be (u64)-1
4093  */
4094 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4095                           u64 *index_ret, char *namebuf, u32 name_len,
4096                           u8 file_type)
4097 {
4098         struct btrfs_path path;
4099         struct extent_buffer *node;
4100         struct btrfs_dir_item *di;
4101         struct btrfs_key key;
4102         struct btrfs_key location;
4103         char name[BTRFS_NAME_LEN] = {0};
4104
4105         u32 total;
4106         u32 cur = 0;
4107         u32 len;
4108         u32 data_len;
4109         u8 filetype;
4110         int slot;
4111         int ret;
4112
4113         ASSERT(index_ret);
4114
4115         /* search from the last index */
4116         key.objectid = dirid;
4117         key.offset = (u64)-1;
4118         key.type = BTRFS_DIR_INDEX_KEY;
4119
4120         btrfs_init_path(&path);
4121         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4122         if (ret < 0)
4123                 return ret;
4124
4125 loop:
4126         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4127         if (ret) {
4128                 ret = -ENOENT;
4129                 *index_ret = (64)-1;
4130                 goto out;
4131         }
4132         /* Check whether inode_id/filetype/name match */
4133         node = path.nodes[0];
4134         slot = path.slots[0];
4135         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4136         total = btrfs_item_size_nr(node, slot);
4137         while (cur < total) {
4138                 ret = -ENOENT;
4139                 len = btrfs_dir_name_len(node, di);
4140                 data_len = btrfs_dir_data_len(node, di);
4141
4142                 btrfs_dir_item_key_to_cpu(node, di, &location);
4143                 if (location.objectid != location_id ||
4144                     location.type != BTRFS_INODE_ITEM_KEY ||
4145                     location.offset != 0)
4146                         goto next;
4147
4148                 filetype = btrfs_dir_type(node, di);
4149                 if (file_type != filetype)
4150                         goto next;
4151
4152                 if (len > BTRFS_NAME_LEN)
4153                         len = BTRFS_NAME_LEN;
4154
4155                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4156                 if (len != name_len || strncmp(namebuf, name, len))
4157                         goto next;
4158
4159                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4160                 *index_ret = key.offset;
4161                 ret = 0;
4162                 goto out;
4163 next:
4164                 len += sizeof(*di) + data_len;
4165                 di = (struct btrfs_dir_item *)((char *)di + len);
4166                 cur += len;
4167         }
4168         goto loop;
4169
4170 out:
4171         btrfs_release_path(&path);
4172         return ret;
4173 }
4174
4175 /*
4176  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4177  * INODE_REF/INODE_EXTREF match.
4178  *
4179  * @root:       the root of the fs/file tree
4180  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4181  *              value while find index
4182  * @location_key: location key of the struct btrfs_dir_item to match
4183  * @name:       the name to match
4184  * @namelen:    the length of name
4185  * @file_type:  the type of file to math
4186  *
4187  * Return 0 if no error occurred.
4188  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4189  * DIR_ITEM/DIR_INDEX
4190  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4191  * and DIR_ITEM/DIR_INDEX mismatch
4192  */
4193 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4194                          struct btrfs_key *location_key, char *name,
4195                          u32 namelen, u8 file_type)
4196 {
4197         struct btrfs_path path;
4198         struct extent_buffer *node;
4199         struct btrfs_dir_item *di;
4200         struct btrfs_key location;
4201         char namebuf[BTRFS_NAME_LEN] = {0};
4202         u32 total;
4203         u32 cur = 0;
4204         u32 len;
4205         u32 data_len;
4206         u8 filetype;
4207         int slot;
4208         int ret;
4209
4210         /* get the index by traversing all index */
4211         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4212                 ret = find_dir_index(root, key->objectid,
4213                                      location_key->objectid, &key->offset,
4214                                      name, namelen, file_type);
4215                 if (ret)
4216                         ret = DIR_INDEX_MISSING;
4217                 return ret;
4218         }
4219
4220         btrfs_init_path(&path);
4221         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4222         if (ret) {
4223                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4224                         DIR_INDEX_MISSING;
4225                 goto out;
4226         }
4227
4228         /* Check whether inode_id/filetype/name match */
4229         node = path.nodes[0];
4230         slot = path.slots[0];
4231         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4232         total = btrfs_item_size_nr(node, slot);
4233         while (cur < total) {
4234                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4235                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4236
4237                 len = btrfs_dir_name_len(node, di);
4238                 data_len = btrfs_dir_data_len(node, di);
4239
4240                 btrfs_dir_item_key_to_cpu(node, di, &location);
4241                 if (location.objectid != location_key->objectid ||
4242                     location.type != location_key->type ||
4243                     location.offset != location_key->offset)
4244                         goto next;
4245
4246                 filetype = btrfs_dir_type(node, di);
4247                 if (file_type != filetype)
4248                         goto next;
4249
4250                 if (len > BTRFS_NAME_LEN) {
4251                         len = BTRFS_NAME_LEN;
4252                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4253                         root->objectid,
4254                         key->type == BTRFS_DIR_ITEM_KEY ?
4255                         "DIR_ITEM" : "DIR_INDEX",
4256                         key->objectid, key->offset, len);
4257                 }
4258                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4259                                    len);
4260                 if (len != namelen || strncmp(namebuf, name, len))
4261                         goto next;
4262
4263                 ret = 0;
4264                 goto out;
4265 next:
4266                 len += sizeof(*di) + data_len;
4267                 di = (struct btrfs_dir_item *)((char *)di + len);
4268                 cur += len;
4269         }
4270
4271 out:
4272         btrfs_release_path(&path);
4273         return ret;
4274 }
4275
4276 /*
4277  * Prints inode ref error message
4278  */
4279 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4280                                 u64 index, const char *namebuf, int name_len,
4281                                 u8 filetype, int err)
4282 {
4283         if (!err)
4284                 return;
4285
4286         /* root dir error */
4287         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4288                 error(
4289         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4290                       root->objectid, key->objectid, key->offset, namebuf);
4291                 return;
4292         }
4293
4294         /* normal error */
4295         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4296                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4297                       root->objectid, key->offset,
4298                       btrfs_name_hash(namebuf, name_len),
4299                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4300                       namebuf, filetype);
4301         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4302                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4303                       root->objectid, key->offset, index,
4304                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4305                       namebuf, filetype);
4306 }
4307
4308 /*
4309  * Insert the missing inode item.
4310  *
4311  * Returns 0 means success.
4312  * Returns <0 means error.
4313  */
4314 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4315                                      u8 filetype)
4316 {
4317         struct btrfs_key key;
4318         struct btrfs_trans_handle *trans;
4319         struct btrfs_path path;
4320         int ret;
4321
4322         key.objectid = ino;
4323         key.type = BTRFS_INODE_ITEM_KEY;
4324         key.offset = 0;
4325
4326         btrfs_init_path(&path);
4327         trans = btrfs_start_transaction(root, 1);
4328         if (IS_ERR(trans)) {
4329                 ret = -EIO;
4330                 goto out;
4331         }
4332
4333         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4334         if (ret < 0 || !ret)
4335                 goto fail;
4336
4337         /* insert inode item */
4338         create_inode_item_lowmem(trans, root, ino, filetype);
4339         ret = 0;
4340 fail:
4341         btrfs_commit_transaction(trans, root);
4342 out:
4343         if (ret)
4344                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4345                       root->objectid, ino);
4346         btrfs_release_path(&path);
4347         return ret;
4348 }
4349
4350 /*
4351  * The ternary means dir item, dir index and relative inode ref.
4352  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4353  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4354  * strategy:
4355  * If two of three is missing or mismatched, delete the existing one.
4356  * If one of three is missing or mismatched, add the missing one.
4357  *
4358  * returns 0 means success.
4359  * returns not 0 means on error;
4360  */
4361 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4362                           u64 index, char *name, int name_len, u8 filetype,
4363                           int err)
4364 {
4365         struct btrfs_trans_handle *trans;
4366         int stage = 0;
4367         int ret = 0;
4368
4369         /*
4370          * stage shall be one of following valild values:
4371          *      0: Fine, nothing to do.
4372          *      1: One of three is wrong, so add missing one.
4373          *      2: Two of three is wrong, so delete existed one.
4374          */
4375         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4376                 stage++;
4377         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4378                 stage++;
4379         if (err & (INODE_REF_MISSING))
4380                 stage++;
4381
4382         /* stage must be smllarer than 3 */
4383         ASSERT(stage < 3);
4384
4385         trans = btrfs_start_transaction(root, 1);
4386         if (stage == 2) {
4387                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4388                                    name_len, 0);
4389                 goto out;
4390         }
4391         if (stage == 1) {
4392                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4393                                filetype, &index, 1, 1);
4394                 goto out;
4395         }
4396 out:
4397         btrfs_commit_transaction(trans, root);
4398
4399         if (ret)
4400                 error("fail to repair inode %llu name %s filetype %u",
4401                       ino, name, filetype);
4402         else
4403                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4404                        stage == 2 ? "Delete" : "Add",
4405                        ino, name, filetype);
4406
4407         return ret;
4408 }
4409
4410 /*
4411  * Traverse the given INODE_REF and call find_dir_item() to find related
4412  * DIR_ITEM/DIR_INDEX.
4413  *
4414  * @root:       the root of the fs/file tree
4415  * @ref_key:    the key of the INODE_REF
4416  * @path        the path provides node and slot
4417  * @refs:       the count of INODE_REF
4418  * @mode:       the st_mode of INODE_ITEM
4419  * @name_ret:   returns with the first ref's name
4420  * @name_len_ret:    len of the name_ret
4421  *
4422  * Return 0 if no error occurred.
4423  */
4424 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4425                            struct btrfs_path *path, char *name_ret,
4426                            u32 *namelen_ret, u64 *refs_ret, int mode)
4427 {
4428         struct btrfs_key key;
4429         struct btrfs_key location;
4430         struct btrfs_inode_ref *ref;
4431         struct extent_buffer *node;
4432         char namebuf[BTRFS_NAME_LEN] = {0};
4433         u32 total;
4434         u32 cur = 0;
4435         u32 len;
4436         u32 name_len;
4437         u64 index;
4438         int ret;
4439         int err = 0;
4440         int tmp_err;
4441         int slot;
4442         int need_research = 0;
4443         u64 refs;
4444
4445 begin:
4446         err = 0;
4447         cur = 0;
4448         refs = *refs_ret;
4449
4450         /* since after repair, path and the dir item may be changed */
4451         if (need_research) {
4452                 need_research = 0;
4453                 btrfs_release_path(path);
4454                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4455                 /* the item was deleted, let path point to the last checked item */
4456                 if (ret > 0) {
4457                         if (path->slots[0] == 0)
4458                                 btrfs_prev_leaf(root, path);
4459                         else
4460                                 path->slots[0]--;
4461                 }
4462                 if (ret)
4463                         goto out;
4464         }
4465
4466         location.objectid = ref_key->objectid;
4467         location.type = BTRFS_INODE_ITEM_KEY;
4468         location.offset = 0;
4469         node = path->nodes[0];
4470         slot = path->slots[0];
4471
4472         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4473         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4474         total = btrfs_item_size_nr(node, slot);
4475
4476 next:
4477         /* Update inode ref count */
4478         refs++;
4479         tmp_err = 0;
4480         index = btrfs_inode_ref_index(node, ref);
4481         name_len = btrfs_inode_ref_name_len(node, ref);
4482
4483         if (name_len <= BTRFS_NAME_LEN) {
4484                 len = name_len;
4485         } else {
4486                 len = BTRFS_NAME_LEN;
4487                 warning("root %llu INODE_REF[%llu %llu] name too long",
4488                         root->objectid, ref_key->objectid, ref_key->offset);
4489         }
4490
4491         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4492
4493         /* copy the first name found to name_ret */
4494         if (refs == 1 && name_ret) {
4495                 memcpy(name_ret, namebuf, len);
4496                 *namelen_ret = len;
4497         }
4498
4499         /* Check root dir ref */
4500         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4501                 if (index != 0 || len != strlen("..") ||
4502                     strncmp("..", namebuf, len) ||
4503                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4504                         /* set err bits then repair will delete the ref */
4505                         err |= DIR_INDEX_MISSING;
4506                         err |= DIR_ITEM_MISSING;
4507                 }
4508                 goto end;
4509         }
4510
4511         /* Find related DIR_INDEX */
4512         key.objectid = ref_key->offset;
4513         key.type = BTRFS_DIR_INDEX_KEY;
4514         key.offset = index;
4515         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4516                             imode_to_type(mode));
4517
4518         /* Find related dir_item */
4519         key.objectid = ref_key->offset;
4520         key.type = BTRFS_DIR_ITEM_KEY;
4521         key.offset = btrfs_name_hash(namebuf, len);
4522         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4523                             imode_to_type(mode));
4524 end:
4525         if (tmp_err && repair) {
4526                 ret = repair_ternary_lowmem(root, ref_key->offset,
4527                                             ref_key->objectid, index, namebuf,
4528                                             name_len, imode_to_type(mode),
4529                                             tmp_err);
4530                 if (!ret) {
4531                         need_research = 1;
4532                         goto begin;
4533                 }
4534         }
4535         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4536                             imode_to_type(mode), tmp_err);
4537         err |= tmp_err;
4538         len = sizeof(*ref) + name_len;
4539         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4540         cur += len;
4541         if (cur < total)
4542                 goto next;
4543
4544 out:
4545         *refs_ret = refs;
4546         return err;
4547 }
4548
4549 /*
4550  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4551  * DIR_ITEM/DIR_INDEX.
4552  *
4553  * @root:       the root of the fs/file tree
4554  * @ref_key:    the key of the INODE_EXTREF
4555  * @refs:       the count of INODE_EXTREF
4556  * @mode:       the st_mode of INODE_ITEM
4557  *
4558  * Return 0 if no error occurred.
4559  */
4560 static int check_inode_extref(struct btrfs_root *root,
4561                               struct btrfs_key *ref_key,
4562                               struct extent_buffer *node, int slot, u64 *refs,
4563                               int mode)
4564 {
4565         struct btrfs_key key;
4566         struct btrfs_key location;
4567         struct btrfs_inode_extref *extref;
4568         char namebuf[BTRFS_NAME_LEN] = {0};
4569         u32 total;
4570         u32 cur = 0;
4571         u32 len;
4572         u32 name_len;
4573         u64 index;
4574         u64 parent;
4575         int ret;
4576         int err = 0;
4577
4578         location.objectid = ref_key->objectid;
4579         location.type = BTRFS_INODE_ITEM_KEY;
4580         location.offset = 0;
4581
4582         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4583         total = btrfs_item_size_nr(node, slot);
4584
4585 next:
4586         /* update inode ref count */
4587         (*refs)++;
4588         name_len = btrfs_inode_extref_name_len(node, extref);
4589         index = btrfs_inode_extref_index(node, extref);
4590         parent = btrfs_inode_extref_parent(node, extref);
4591         if (name_len <= BTRFS_NAME_LEN) {
4592                 len = name_len;
4593         } else {
4594                 len = BTRFS_NAME_LEN;
4595                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4596                         root->objectid, ref_key->objectid, ref_key->offset);
4597         }
4598         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4599
4600         /* Check root dir ref name */
4601         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4602                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4603                       root->objectid, ref_key->objectid, ref_key->offset,
4604                       namebuf);
4605                 err |= ROOT_DIR_ERROR;
4606         }
4607
4608         /* find related dir_index */
4609         key.objectid = parent;
4610         key.type = BTRFS_DIR_INDEX_KEY;
4611         key.offset = index;
4612         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4613         err |= ret;
4614
4615         /* find related dir_item */
4616         key.objectid = parent;
4617         key.type = BTRFS_DIR_ITEM_KEY;
4618         key.offset = btrfs_name_hash(namebuf, len);
4619         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4620         err |= ret;
4621
4622         len = sizeof(*extref) + name_len;
4623         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4624         cur += len;
4625
4626         if (cur < total)
4627                 goto next;
4628
4629         return err;
4630 }
4631
4632 /*
4633  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4634  * DIR_ITEM/DIR_INDEX match.
4635  * Return with @index_ret.
4636  *
4637  * @root:       the root of the fs/file tree
4638  * @key:        the key of the INODE_REF/INODE_EXTREF
4639  * @name:       the name in the INODE_REF/INODE_EXTREF
4640  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4641  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4642  *              value (64)-1 means do not check index
4643  * @ext_ref:    the EXTENDED_IREF feature
4644  *
4645  * Return 0 if no error occurred.
4646  * Return >0 for error bitmap
4647  */
4648 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4649                           char *name, int namelen, u64 *index_ret,
4650                           unsigned int ext_ref)
4651 {
4652         struct btrfs_path path;
4653         struct btrfs_inode_ref *ref;
4654         struct btrfs_inode_extref *extref;
4655         struct extent_buffer *node;
4656         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4657         u32 total;
4658         u32 cur = 0;
4659         u32 len;
4660         u32 ref_namelen;
4661         u64 ref_index;
4662         u64 parent;
4663         u64 dir_id;
4664         int slot;
4665         int ret;
4666
4667         ASSERT(index_ret);
4668
4669         btrfs_init_path(&path);
4670         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4671         if (ret) {
4672                 ret = INODE_REF_MISSING;
4673                 goto extref;
4674         }
4675
4676         node = path.nodes[0];
4677         slot = path.slots[0];
4678
4679         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4680         total = btrfs_item_size_nr(node, slot);
4681
4682         /* Iterate all entry of INODE_REF */
4683         while (cur < total) {
4684                 ret = INODE_REF_MISSING;
4685
4686                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4687                 ref_index = btrfs_inode_ref_index(node, ref);
4688                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4689                         goto next_ref;
4690
4691                 if (cur + sizeof(*ref) + ref_namelen > total ||
4692                     ref_namelen > BTRFS_NAME_LEN) {
4693                         warning("root %llu INODE %s[%llu %llu] name too long",
4694                                 root->objectid,
4695                                 key->type == BTRFS_INODE_REF_KEY ?
4696                                         "REF" : "EXTREF",
4697                                 key->objectid, key->offset);
4698
4699                         if (cur + sizeof(*ref) > total)
4700                                 break;
4701                         len = min_t(u32, total - cur - sizeof(*ref),
4702                                     BTRFS_NAME_LEN);
4703                 } else {
4704                         len = ref_namelen;
4705                 }
4706
4707                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4708                                    len);
4709
4710                 if (len != namelen || strncmp(ref_namebuf, name, len))
4711                         goto next_ref;
4712
4713                 *index_ret = ref_index;
4714                 ret = 0;
4715                 goto out;
4716 next_ref:
4717                 len = sizeof(*ref) + ref_namelen;
4718                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4719                 cur += len;
4720         }
4721
4722 extref:
4723         /* Skip if not support EXTENDED_IREF feature */
4724         if (!ext_ref)
4725                 goto out;
4726
4727         btrfs_release_path(&path);
4728         btrfs_init_path(&path);
4729
4730         dir_id = key->offset;
4731         key->type = BTRFS_INODE_EXTREF_KEY;
4732         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4733
4734         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4735         if (ret) {
4736                 ret = INODE_REF_MISSING;
4737                 goto out;
4738         }
4739
4740         node = path.nodes[0];
4741         slot = path.slots[0];
4742
4743         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4744         cur = 0;
4745         total = btrfs_item_size_nr(node, slot);
4746
4747         /* Iterate all entry of INODE_EXTREF */
4748         while (cur < total) {
4749                 ret = INODE_REF_MISSING;
4750
4751                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4752                 ref_index = btrfs_inode_extref_index(node, extref);
4753                 parent = btrfs_inode_extref_parent(node, extref);
4754                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4755                         goto next_extref;
4756
4757                 if (parent != dir_id)
4758                         goto next_extref;
4759
4760                 if (ref_namelen <= BTRFS_NAME_LEN) {
4761                         len = ref_namelen;
4762                 } else {
4763                         len = BTRFS_NAME_LEN;
4764                         warning("root %llu INODE %s[%llu %llu] name too long",
4765                                 root->objectid,
4766                                 key->type == BTRFS_INODE_REF_KEY ?
4767                                         "REF" : "EXTREF",
4768                                 key->objectid, key->offset);
4769                 }
4770                 read_extent_buffer(node, ref_namebuf,
4771                                    (unsigned long)(extref + 1), len);
4772
4773                 if (len != namelen || strncmp(ref_namebuf, name, len))
4774                         goto next_extref;
4775
4776                 *index_ret = ref_index;
4777                 ret = 0;
4778                 goto out;
4779
4780 next_extref:
4781                 len = sizeof(*extref) + ref_namelen;
4782                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4783                 cur += len;
4784
4785         }
4786 out:
4787         btrfs_release_path(&path);
4788         return ret;
4789 }
4790
4791 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4792                                u64 ino, u64 index, const char *namebuf,
4793                                int name_len, u8 filetype, int err)
4794 {
4795         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4796                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4797                       root->objectid, key->objectid, key->offset, namebuf,
4798                       filetype,
4799                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4800         }
4801
4802         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4803                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4804                       root->objectid, key->objectid, index, namebuf, filetype,
4805                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4806         }
4807
4808         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4809                 error(
4810                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4811                       root->objectid, ino, index, namebuf, filetype,
4812                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4813         }
4814
4815         if (err & INODE_REF_MISSING)
4816                 error(
4817                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
4818                       root->objectid, ino, key->objectid, namebuf, filetype);
4819
4820 }
4821
4822 /*
4823  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
4824  *
4825  * Returns error after repair
4826  */
4827 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
4828                            u64 index, u8 filetype, char *namebuf, u32 name_len,
4829                            int err)
4830 {
4831         int ret;
4832
4833         if (err & INODE_ITEM_MISSING) {
4834                 ret = repair_inode_item_missing(root, ino, filetype);
4835                 if (!ret)
4836                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
4837         }
4838
4839         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
4840                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
4841                                             name_len, filetype, err);
4842                 if (!ret) {
4843                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
4844                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
4845                         err &= ~(INODE_REF_MISSING);
4846                 }
4847         }
4848         return err;
4849 }
4850
4851 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
4852                 u64 *size_ret)
4853 {
4854         struct btrfs_key key;
4855         struct btrfs_path path;
4856         u32 len;
4857         struct btrfs_dir_item *di;
4858         int ret;
4859         int cur = 0;
4860         int total = 0;
4861
4862         ASSERT(size_ret);
4863         *size_ret = 0;
4864
4865         key.objectid = ino;
4866         key.type = type;
4867         key.offset = (u64)-1;
4868
4869         btrfs_init_path(&path);
4870         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4871         if (ret < 0) {
4872                 ret = -EIO;
4873                 goto out;
4874         }
4875         /* if found, go to spacial case */
4876         if (ret == 0)
4877                 goto special_case;
4878
4879 loop:
4880         ret = btrfs_previous_item(root, &path, ino, type);
4881
4882         if (ret) {
4883                 ret = 0;
4884                 goto out;
4885         }
4886
4887 special_case:
4888         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
4889         cur = 0;
4890         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
4891
4892         while (cur < total) {
4893                 len = btrfs_dir_name_len(path.nodes[0], di);
4894                 if (len > BTRFS_NAME_LEN)
4895                         len = BTRFS_NAME_LEN;
4896                 *size_ret += len;
4897
4898                 len += btrfs_dir_data_len(path.nodes[0], di);
4899                 len += sizeof(*di);
4900                 di = (struct btrfs_dir_item *)((char *)di + len);
4901                 cur += len;
4902         }
4903         goto loop;
4904
4905 out:
4906         btrfs_release_path(&path);
4907         return ret;
4908 }
4909
4910 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
4911 {
4912         u64 item_size;
4913         u64 index_size;
4914         int ret;
4915
4916         ASSERT(size);
4917         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
4918         if (ret)
4919                 goto out;
4920
4921         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
4922         if (ret)
4923                 goto out;
4924
4925         *size = item_size + index_size;
4926
4927 out:
4928         if (ret)
4929                 error("failed to count root %llu INODE[%llu] root size",
4930                       root->objectid, ino);
4931         return ret;
4932 }
4933
4934 /*
4935  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4936  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4937  *
4938  * @root:       the root of the fs/file tree
4939  * @key:        the key of the INODE_REF/INODE_EXTREF
4940  * @path:       the path
4941  * @size:       the st_size of the INODE_ITEM
4942  * @ext_ref:    the EXTENDED_IREF feature
4943  *
4944  * Return 0 if no error occurred.
4945  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
4946  */
4947 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
4948                           struct btrfs_path *path, u64 *size,
4949                           unsigned int ext_ref)
4950 {
4951         struct btrfs_dir_item *di;
4952         struct btrfs_inode_item *ii;
4953         struct btrfs_key key;
4954         struct btrfs_key location;
4955         struct extent_buffer *node;
4956         int slot;
4957         char namebuf[BTRFS_NAME_LEN] = {0};
4958         u32 total;
4959         u32 cur = 0;
4960         u32 len;
4961         u32 name_len;
4962         u32 data_len;
4963         u8 filetype;
4964         u32 mode = 0;
4965         u64 index;
4966         int ret;
4967         int err;
4968         int tmp_err;
4969         int need_research = 0;
4970
4971         /*
4972          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4973          * ignore index check.
4974          */
4975         if (di_key->type == BTRFS_DIR_INDEX_KEY)
4976                 index = di_key->offset;
4977         else
4978                 index = (u64)-1;
4979 begin:
4980         err = 0;
4981         cur = 0;
4982
4983         /* since after repair, path and the dir item may be changed */
4984         if (need_research) {
4985                 need_research = 0;
4986                 err |= DIR_COUNT_AGAIN;
4987                 btrfs_release_path(path);
4988                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
4989                 /* the item was deleted, let path point the last checked item */
4990                 if (ret > 0) {
4991                         if (path->slots[0] == 0)
4992                                 btrfs_prev_leaf(root, path);
4993                         else
4994                                 path->slots[0]--;
4995                 }
4996                 if (ret)
4997                         goto out;
4998         }
4999
5000         node = path->nodes[0];
5001         slot = path->slots[0];
5002
5003         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5004         total = btrfs_item_size_nr(node, slot);
5005         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5006
5007         while (cur < total) {
5008                 data_len = btrfs_dir_data_len(node, di);
5009                 tmp_err = 0;
5010                 if (data_len)
5011                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5012                               root->objectid,
5013               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5014                               di_key->objectid, di_key->offset, data_len);
5015
5016                 name_len = btrfs_dir_name_len(node, di);
5017                 if (name_len <= BTRFS_NAME_LEN) {
5018                         len = name_len;
5019                 } else {
5020                         len = BTRFS_NAME_LEN;
5021                         warning("root %llu %s[%llu %llu] name too long",
5022                                 root->objectid,
5023                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5024                                 di_key->objectid, di_key->offset);
5025                 }
5026                 (*size) += name_len;
5027                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5028                                    len);
5029                 filetype = btrfs_dir_type(node, di);
5030
5031                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5032                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5033                         err |= -EIO;
5034                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5035                         root->objectid, di_key->objectid, di_key->offset,
5036                         namebuf, len, filetype, di_key->offset,
5037                         btrfs_name_hash(namebuf, len));
5038                 }
5039
5040                 btrfs_dir_item_key_to_cpu(node, di, &location);
5041                 /* Ignore related ROOT_ITEM check */
5042                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5043                         goto next;
5044
5045                 btrfs_release_path(path);
5046                 /* Check relative INODE_ITEM(existence/filetype) */
5047                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5048                 if (ret) {
5049                         tmp_err |= INODE_ITEM_MISSING;
5050                         goto next;
5051                 }
5052
5053                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5054                                     struct btrfs_inode_item);
5055                 mode = btrfs_inode_mode(path->nodes[0], ii);
5056                 if (imode_to_type(mode) != filetype) {
5057                         tmp_err |= INODE_ITEM_MISMATCH;
5058                         goto next;
5059                 }
5060
5061                 /* Check relative INODE_REF/INODE_EXTREF */
5062                 key.objectid = location.objectid;
5063                 key.type = BTRFS_INODE_REF_KEY;
5064                 key.offset = di_key->objectid;
5065                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5066                                           &index, ext_ref);
5067
5068                 /* check relative INDEX/ITEM */
5069                 key.objectid = di_key->objectid;
5070                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5071                         key.type = BTRFS_DIR_INDEX_KEY;
5072                         key.offset = index;
5073                 } else {
5074                         key.type = BTRFS_DIR_ITEM_KEY;
5075                         key.offset = btrfs_name_hash(namebuf, name_len);
5076                 }
5077
5078                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5079                                          name_len, filetype);
5080                 /* find_dir_item may find index */
5081                 if (key.type == BTRFS_DIR_INDEX_KEY)
5082                         index = key.offset;
5083 next:
5084
5085                 if (tmp_err && repair) {
5086                         ret = repair_dir_item(root, di_key->objectid,
5087                                               location.objectid, index,
5088                                               imode_to_type(mode), namebuf,
5089                                               name_len, tmp_err);
5090                         if (ret != tmp_err) {
5091                                 need_research = 1;
5092                                 goto begin;
5093                         }
5094                 }
5095                 btrfs_release_path(path);
5096                 print_dir_item_err(root, di_key, location.objectid, index,
5097                                    namebuf, name_len, filetype, tmp_err);
5098                 err |= tmp_err;
5099                 len = sizeof(*di) + name_len + data_len;
5100                 di = (struct btrfs_dir_item *)((char *)di + len);
5101                 cur += len;
5102
5103                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5104                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5105                               root->objectid, di_key->objectid,
5106                               di_key->offset);
5107                         break;
5108                 }
5109         }
5110 out:
5111         /* research path */
5112         btrfs_release_path(path);
5113         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5114         if (ret)
5115                 err |= ret > 0 ? -ENOENT : ret;
5116         return err;
5117 }
5118
5119 /*
5120  * Wrapper function of btrfs_punch_hole.
5121  *
5122  * Returns 0 means success.
5123  * Returns not 0 means error.
5124  */
5125 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5126                              u64 len)
5127 {
5128         struct btrfs_trans_handle *trans;
5129         int ret = 0;
5130
5131         trans = btrfs_start_transaction(root, 1);
5132         if (IS_ERR(trans))
5133                 return PTR_ERR(trans);
5134
5135         ret = btrfs_punch_hole(trans, root, ino, start, len);
5136         if (ret)
5137                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5138                       start, len, ino);
5139         else
5140                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5141                        ino);
5142
5143         btrfs_commit_transaction(trans, root);
5144         return ret;
5145 }
5146
5147 /*
5148  * Check file extent datasum/hole, update the size of the file extents,
5149  * check and update the last offset of the file extent.
5150  *
5151  * @root:       the root of fs/file tree.
5152  * @fkey:       the key of the file extent.
5153  * @nodatasum:  INODE_NODATASUM feature.
5154  * @size:       the sum of all EXTENT_DATA items size for this inode.
5155  * @end:        the offset of the last extent.
5156  *
5157  * Return 0 if no error occurred.
5158  */
5159 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5160                              struct extent_buffer *node, int slot,
5161                              unsigned int nodatasum, u64 *size, u64 *end)
5162 {
5163         struct btrfs_file_extent_item *fi;
5164         u64 disk_bytenr;
5165         u64 disk_num_bytes;
5166         u64 extent_num_bytes;
5167         u64 extent_offset;
5168         u64 csum_found;         /* In byte size, sectorsize aligned */
5169         u64 search_start;       /* Logical range start we search for csum */
5170         u64 search_len;         /* Logical range len we search for csum */
5171         unsigned int extent_type;
5172         unsigned int is_hole;
5173         int compressed = 0;
5174         int ret;
5175         int err = 0;
5176
5177         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5178
5179         /* Check inline extent */
5180         extent_type = btrfs_file_extent_type(node, fi);
5181         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5182                 struct btrfs_item *e = btrfs_item_nr(slot);
5183                 u32 item_inline_len;
5184
5185                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5186                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5187                 compressed = btrfs_file_extent_compression(node, fi);
5188                 if (extent_num_bytes == 0) {
5189                         error(
5190                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5191                                 root->objectid, fkey->objectid, fkey->offset);
5192                         err |= FILE_EXTENT_ERROR;
5193                 }
5194                 if (!compressed && extent_num_bytes != item_inline_len) {
5195                         error(
5196                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5197                                 root->objectid, fkey->objectid, fkey->offset,
5198                                 extent_num_bytes, item_inline_len);
5199                         err |= FILE_EXTENT_ERROR;
5200                 }
5201                 *end += extent_num_bytes;
5202                 *size += extent_num_bytes;
5203                 return err;
5204         }
5205
5206         /* Check extent type */
5207         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5208                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5209                 err |= FILE_EXTENT_ERROR;
5210                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5211                       root->objectid, fkey->objectid, fkey->offset);
5212                 return err;
5213         }
5214
5215         /* Check REG_EXTENT/PREALLOC_EXTENT */
5216         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5217         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5218         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5219         extent_offset = btrfs_file_extent_offset(node, fi);
5220         compressed = btrfs_file_extent_compression(node, fi);
5221         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5222
5223         /*
5224          * Check EXTENT_DATA csum
5225          *
5226          * For plain (uncompressed) extent, we should only check the range
5227          * we're referring to, as it's possible that part of prealloc extent
5228          * has been written, and has csum:
5229          *
5230          * |<--- Original large preallocated extent A ---->|
5231          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5232          *      No csum                         Has csum
5233          *
5234          * For compressed extent, we should check the whole range.
5235          */
5236         if (!compressed) {
5237                 search_start = disk_bytenr + extent_offset;
5238                 search_len = extent_num_bytes;
5239         } else {
5240                 search_start = disk_bytenr;
5241                 search_len = disk_num_bytes;
5242         }
5243         ret = count_csum_range(root->fs_info, search_start, search_len, &csum_found);
5244         if (csum_found > 0 && nodatasum) {
5245                 err |= ODD_CSUM_ITEM;
5246                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5247                       root->objectid, fkey->objectid, fkey->offset);
5248         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5249                    !is_hole && (ret < 0 || csum_found < search_len)) {
5250                 err |= CSUM_ITEM_MISSING;
5251                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5252                       root->objectid, fkey->objectid, fkey->offset,
5253                       csum_found, search_len);
5254         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5255                 err |= ODD_CSUM_ITEM;
5256                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5257                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5258         }
5259
5260         /* Check EXTENT_DATA hole */
5261         if (!no_holes && *end != fkey->offset) {
5262                 if (repair)
5263                         ret = punch_extent_hole(root, fkey->objectid,
5264                                                 *end, fkey->offset - *end);
5265                 if (!repair || ret) {
5266                         err |= FILE_EXTENT_ERROR;
5267                         error(
5268 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5269                                 root->objectid, fkey->objectid, fkey->offset,
5270                                 fkey->objectid, *end);
5271                 }
5272         }
5273
5274         *end += extent_num_bytes;
5275         if (!is_hole)
5276                 *size += extent_num_bytes;
5277
5278         return err;
5279 }
5280
5281 /*
5282  * Set inode item nbytes to @nbytes
5283  *
5284  * Returns  0     on success
5285  * Returns  != 0  on error
5286  */
5287 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5288                                       struct btrfs_path *path,
5289                                       u64 ino, u64 nbytes)
5290 {
5291         struct btrfs_trans_handle *trans;
5292         struct btrfs_inode_item *ii;
5293         struct btrfs_key key;
5294         struct btrfs_key research_key;
5295         int err = 0;
5296         int ret;
5297
5298         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5299
5300         key.objectid = ino;
5301         key.type = BTRFS_INODE_ITEM_KEY;
5302         key.offset = 0;
5303
5304         trans = btrfs_start_transaction(root, 1);
5305         if (IS_ERR(trans)) {
5306                 ret = PTR_ERR(trans);
5307                 err |= ret;
5308                 goto out;
5309         }
5310
5311         btrfs_release_path(path);
5312         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5313         if (ret > 0)
5314                 ret = -ENOENT;
5315         if (ret) {
5316                 err |= ret;
5317                 goto fail;
5318         }
5319
5320         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5321                             struct btrfs_inode_item);
5322         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5323         btrfs_mark_buffer_dirty(path->nodes[0]);
5324 fail:
5325         btrfs_commit_transaction(trans, root);
5326 out:
5327         if (ret)
5328                 error("failed to set nbytes in inode %llu root %llu",
5329                       ino, root->root_key.objectid);
5330         else
5331                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5332                        root->root_key.objectid, nbytes);
5333
5334         /* research path */
5335         btrfs_release_path(path);
5336         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5337         err |= ret;
5338
5339         return err;
5340 }
5341
5342 /*
5343  * Set directory inode isize to @isize.
5344  *
5345  * Returns 0     on success.
5346  * Returns != 0  on error.
5347  */
5348 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5349                                    struct btrfs_path *path,
5350                                    u64 ino, u64 isize)
5351 {
5352         struct btrfs_trans_handle *trans;
5353         struct btrfs_inode_item *ii;
5354         struct btrfs_key key;
5355         struct btrfs_key research_key;
5356         int ret;
5357         int err = 0;
5358
5359         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5360
5361         key.objectid = ino;
5362         key.type = BTRFS_INODE_ITEM_KEY;
5363         key.offset = 0;
5364
5365         trans = btrfs_start_transaction(root, 1);
5366         if (IS_ERR(trans)) {
5367                 ret = PTR_ERR(trans);
5368                 err |= ret;
5369                 goto out;
5370         }
5371
5372         btrfs_release_path(path);
5373         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5374         if (ret > 0)
5375                 ret = -ENOENT;
5376         if (ret) {
5377                 err |= ret;
5378                 goto fail;
5379         }
5380
5381         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5382                             struct btrfs_inode_item);
5383         btrfs_set_inode_size(path->nodes[0], ii, isize);
5384         btrfs_mark_buffer_dirty(path->nodes[0]);
5385 fail:
5386         btrfs_commit_transaction(trans, root);
5387 out:
5388         if (ret)
5389                 error("failed to set isize in inode %llu root %llu",
5390                       ino, root->root_key.objectid);
5391         else
5392                 printf("Set isize in inode %llu root %llu to %llu\n",
5393                        ino, root->root_key.objectid, isize);
5394
5395         btrfs_release_path(path);
5396         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5397         err |= ret;
5398
5399         return err;
5400 }
5401
5402 /*
5403  * Wrapper function for btrfs_add_orphan_item().
5404  *
5405  * Returns 0     on success.
5406  * Returns != 0  on error.
5407  */
5408 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5409                                            struct btrfs_path *path, u64 ino)
5410 {
5411         struct btrfs_trans_handle *trans;
5412         struct btrfs_key research_key;
5413         int ret;
5414         int err = 0;
5415
5416         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5417
5418         trans = btrfs_start_transaction(root, 1);
5419         if (IS_ERR(trans)) {
5420                 ret = PTR_ERR(trans);
5421                 err |= ret;
5422                 goto out;
5423         }
5424
5425         btrfs_release_path(path);
5426         ret = btrfs_add_orphan_item(trans, root, path, ino);
5427         err |= ret;
5428         btrfs_commit_transaction(trans, root);
5429 out:
5430         if (ret)
5431                 error("failed to add inode %llu as orphan item root %llu",
5432                       ino, root->root_key.objectid);
5433         else
5434                 printf("Added inode %llu as orphan item root %llu\n",
5435                        ino, root->root_key.objectid);
5436
5437         btrfs_release_path(path);
5438         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5439         err |= ret;
5440
5441         return err;
5442 }
5443
5444 /* Set inode_item nlink to @ref_count.
5445  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5446  *
5447  * Returns 0 on success
5448  */
5449 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5450                                       struct btrfs_path *path, u64 ino,
5451                                       const char *name, u32 namelen,
5452                                       u64 ref_count, u8 filetype, u64 *nlink)
5453 {
5454         struct btrfs_trans_handle *trans;
5455         struct btrfs_inode_item *ii;
5456         struct btrfs_key key;
5457         struct btrfs_key old_key;
5458         char namebuf[BTRFS_NAME_LEN] = {0};
5459         int name_len;
5460         int ret;
5461         int ret2;
5462
5463         /* save the key */
5464         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5465
5466         if (name && namelen) {
5467                 ASSERT(namelen <= BTRFS_NAME_LEN);
5468                 memcpy(namebuf, name, namelen);
5469                 name_len = namelen;
5470         } else {
5471                 sprintf(namebuf, "%llu", ino);
5472                 name_len = count_digits(ino);
5473                 printf("Can't find file name for inode %llu, use %s instead\n",
5474                        ino, namebuf);
5475         }
5476
5477         trans = btrfs_start_transaction(root, 1);
5478         if (IS_ERR(trans)) {
5479                 ret = PTR_ERR(trans);
5480                 goto out;
5481         }
5482
5483         btrfs_release_path(path);
5484         /* if refs is 0, put it into lostfound */
5485         if (ref_count == 0) {
5486                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5487                                               name_len, filetype, &ref_count);
5488                 if (ret)
5489                         goto fail;
5490         }
5491
5492         /* reset inode_item's nlink to ref_count */
5493         key.objectid = ino;
5494         key.type = BTRFS_INODE_ITEM_KEY;
5495         key.offset = 0;
5496
5497         btrfs_release_path(path);
5498         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5499         if (ret > 0)
5500                 ret = -ENOENT;
5501         if (ret)
5502                 goto fail;
5503
5504         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5505                             struct btrfs_inode_item);
5506         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5507         btrfs_mark_buffer_dirty(path->nodes[0]);
5508
5509         if (nlink)
5510                 *nlink = ref_count;
5511 fail:
5512         btrfs_commit_transaction(trans, root);
5513 out:
5514         if (ret)
5515                 error(
5516         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5517                        root->objectid, ino, namebuf, filetype);
5518         else
5519                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5520                        root->objectid, ino, namebuf, filetype);
5521
5522         /* research */
5523         btrfs_release_path(path);
5524         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5525         if (ret2 < 0)
5526                 return ret |= ret2;
5527         return ret;
5528 }
5529
5530 /*
5531  * Check INODE_ITEM and related ITEMs (the same inode number)
5532  * 1. check link count
5533  * 2. check inode ref/extref
5534  * 3. check dir item/index
5535  *
5536  * @ext_ref:    the EXTENDED_IREF feature
5537  *
5538  * Return 0 if no error occurred.
5539  * Return >0 for error or hit the traversal is done(by error bitmap)
5540  */
5541 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5542                             unsigned int ext_ref)
5543 {
5544         struct extent_buffer *node;
5545         struct btrfs_inode_item *ii;
5546         struct btrfs_key key;
5547         struct btrfs_key last_key;
5548         u64 inode_id;
5549         u32 mode;
5550         u64 nlink;
5551         u64 nbytes;
5552         u64 isize;
5553         u64 size = 0;
5554         u64 refs = 0;
5555         u64 extent_end = 0;
5556         u64 extent_size = 0;
5557         unsigned int dir;
5558         unsigned int nodatasum;
5559         int slot;
5560         int ret;
5561         int err = 0;
5562         char namebuf[BTRFS_NAME_LEN] = {0};
5563         u32 name_len = 0;
5564
5565         node = path->nodes[0];
5566         slot = path->slots[0];
5567
5568         btrfs_item_key_to_cpu(node, &key, slot);
5569         inode_id = key.objectid;
5570
5571         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5572                 ret = btrfs_next_item(root, path);
5573                 if (ret > 0)
5574                         err |= LAST_ITEM;
5575                 return err;
5576         }
5577
5578         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5579         isize = btrfs_inode_size(node, ii);
5580         nbytes = btrfs_inode_nbytes(node, ii);
5581         mode = btrfs_inode_mode(node, ii);
5582         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5583         nlink = btrfs_inode_nlink(node, ii);
5584         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5585
5586         while (1) {
5587                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5588                 ret = btrfs_next_item(root, path);
5589                 if (ret < 0) {
5590                         /* out will fill 'err' rusing current statistics */
5591                         goto out;
5592                 } else if (ret > 0) {
5593                         err |= LAST_ITEM;
5594                         goto out;
5595                 }
5596
5597                 node = path->nodes[0];
5598                 slot = path->slots[0];
5599                 btrfs_item_key_to_cpu(node, &key, slot);
5600                 if (key.objectid != inode_id)
5601                         goto out;
5602
5603                 switch (key.type) {
5604                 case BTRFS_INODE_REF_KEY:
5605                         ret = check_inode_ref(root, &key, path, namebuf,
5606                                               &name_len, &refs, mode);
5607                         err |= ret;
5608                         break;
5609                 case BTRFS_INODE_EXTREF_KEY:
5610                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5611                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5612                                         root->objectid, key.objectid,
5613                                         key.offset);
5614                         ret = check_inode_extref(root, &key, node, slot, &refs,
5615                                                  mode);
5616                         err |= ret;
5617                         break;
5618                 case BTRFS_DIR_ITEM_KEY:
5619                 case BTRFS_DIR_INDEX_KEY:
5620                         if (!dir) {
5621                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5622                                         root->objectid, inode_id,
5623                                         imode_to_type(mode), key.objectid,
5624                                         key.offset);
5625                         }
5626                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5627                         err |= ret;
5628                         break;
5629                 case BTRFS_EXTENT_DATA_KEY:
5630                         if (dir) {
5631                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5632                                         root->objectid, inode_id, key.objectid,
5633                                         key.offset);
5634                         }
5635                         ret = check_file_extent(root, &key, node, slot,
5636                                                 nodatasum, &extent_size,
5637                                                 &extent_end);
5638                         err |= ret;
5639                         break;
5640                 case BTRFS_XATTR_ITEM_KEY:
5641                         break;
5642                 default:
5643                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5644                               key.objectid, key.type, key.offset);
5645                 }
5646         }
5647
5648 out:
5649         if (err & LAST_ITEM) {
5650                 btrfs_release_path(path);
5651                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5652                 if (ret)
5653                         return err;
5654         }
5655
5656         /* verify INODE_ITEM nlink/isize/nbytes */
5657         if (dir) {
5658                 if (repair && (err & DIR_COUNT_AGAIN)) {
5659                         err &= ~DIR_COUNT_AGAIN;
5660                         count_dir_isize(root, inode_id, &size);
5661                 }
5662
5663                 if ((nlink != 1 || refs != 1) && repair) {
5664                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5665                                 namebuf, name_len, refs, imode_to_type(mode),
5666                                 &nlink);
5667                 }
5668
5669                 if (nlink != 1) {
5670                         err |= LINK_COUNT_ERROR;
5671                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5672                               root->objectid, inode_id, nlink);
5673                 }
5674
5675                 /*
5676                  * Just a warning, as dir inode nbytes is just an
5677                  * instructive value.
5678                  */
5679                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5680                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5681                                 root->objectid, inode_id,
5682                                 root->fs_info->nodesize);
5683                 }
5684
5685                 if (isize != size) {
5686                         if (repair)
5687                                 ret = repair_dir_isize_lowmem(root, path,
5688                                                               inode_id, size);
5689                         if (!repair || ret) {
5690                                 err |= ISIZE_ERROR;
5691                                 error(
5692                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5693                                       root->objectid, inode_id, isize, size);
5694                         }
5695                 }
5696         } else {
5697                 if (nlink != refs) {
5698                         if (repair)
5699                                 ret = repair_inode_nlinks_lowmem(root, path,
5700                                          inode_id, namebuf, name_len, refs,
5701                                          imode_to_type(mode), &nlink);
5702                         if (!repair || ret) {
5703                                 err |= LINK_COUNT_ERROR;
5704                                 error(
5705                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5706                                       root->objectid, inode_id, nlink, refs);
5707                         }
5708                 } else if (!nlink) {
5709                         if (repair)
5710                                 ret = repair_inode_orphan_item_lowmem(root,
5711                                                               path, inode_id);
5712                         if (!repair || ret) {
5713                                 err |= ORPHAN_ITEM;
5714                                 error("root %llu INODE[%llu] is orphan item",
5715                                       root->objectid, inode_id);
5716                         }
5717                 }
5718
5719                 if (!nbytes && !no_holes && extent_end < isize) {
5720                         if (repair)
5721                                 ret = punch_extent_hole(root, inode_id,
5722                                                 extent_end, isize - extent_end);
5723                         if (!repair || ret) {
5724                                 err |= NBYTES_ERROR;
5725                                 error(
5726         "root %llu INODE[%llu] size %llu should have a file extent hole",
5727                                       root->objectid, inode_id, isize);
5728                         }
5729                 }
5730
5731                 if (nbytes != extent_size) {
5732                         if (repair)
5733                                 ret = repair_inode_nbytes_lowmem(root, path,
5734                                                          inode_id, extent_size);
5735                         if (!repair || ret) {
5736                                 err |= NBYTES_ERROR;
5737                                 error(
5738         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5739                                       root->objectid, inode_id, nbytes,
5740                                       extent_size);
5741                         }
5742                 }
5743         }
5744
5745         if (err & LAST_ITEM)
5746                 btrfs_next_item(root, path);
5747         return err;
5748 }
5749
5750 /*
5751  * Insert the missing inode item and inode ref.
5752  *
5753  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5754  * Root dir should be handled specially because root dir is the root of fs.
5755  *
5756  * returns err (>0 or 0) after repair
5757  */
5758 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5759 {
5760         struct btrfs_trans_handle *trans;
5761         struct btrfs_key key;
5762         struct btrfs_path path;
5763         int filetype = BTRFS_FT_DIR;
5764         int ret = 0;
5765
5766         btrfs_init_path(&path);
5767
5768         if (err & INODE_REF_MISSING) {
5769                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5770                 key.type = BTRFS_INODE_REF_KEY;
5771                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5772
5773                 trans = btrfs_start_transaction(root, 1);
5774                 if (IS_ERR(trans)) {
5775                         ret = PTR_ERR(trans);
5776                         goto out;
5777                 }
5778
5779                 btrfs_release_path(&path);
5780                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5781                 if (ret)
5782                         goto trans_fail;
5783
5784                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5785                                              BTRFS_FIRST_FREE_OBJECTID,
5786                                              BTRFS_FIRST_FREE_OBJECTID, 0);
5787                 if (ret)
5788                         goto trans_fail;
5789
5790                 printf("Add INODE_REF[%llu %llu] name %s\n",
5791                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5792                        "..");
5793                 err &= ~INODE_REF_MISSING;
5794 trans_fail:
5795                 if (ret)
5796                         error("fail to insert first inode's ref");
5797                 btrfs_commit_transaction(trans, root);
5798         }
5799
5800         if (err & INODE_ITEM_MISSING) {
5801                 ret = repair_inode_item_missing(root,
5802                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
5803                 if (ret)
5804                         goto out;
5805                 err &= ~INODE_ITEM_MISSING;
5806         }
5807 out:
5808         if (ret)
5809                 error("fail to repair first inode");
5810         btrfs_release_path(&path);
5811         return err;
5812 }
5813
5814 /*
5815  * check first root dir's inode_item and inode_ref
5816  *
5817  * returns 0 means no error
5818  * returns >0 means error
5819  * returns <0 means fatal error
5820  */
5821 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5822 {
5823         struct btrfs_path path;
5824         struct btrfs_key key;
5825         struct btrfs_inode_item *ii;
5826         u64 index;
5827         u32 mode;
5828         int err = 0;
5829         int ret;
5830
5831         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5832         key.type = BTRFS_INODE_ITEM_KEY;
5833         key.offset = 0;
5834
5835         /* For root being dropped, we don't need to check first inode */
5836         if (btrfs_root_refs(&root->root_item) == 0 &&
5837             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5838             BTRFS_FIRST_FREE_OBJECTID)
5839                 return 0;
5840
5841         btrfs_init_path(&path);
5842         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5843         if (ret < 0)
5844                 goto out;
5845         if (ret > 0) {
5846                 ret = 0;
5847                 err |= INODE_ITEM_MISSING;
5848         } else {
5849                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5850                                     struct btrfs_inode_item);
5851                 mode = btrfs_inode_mode(path.nodes[0], ii);
5852                 if (imode_to_type(mode) != BTRFS_FT_DIR)
5853                         err |= INODE_ITEM_MISMATCH;
5854         }
5855
5856         /* lookup first inode ref */
5857         key.offset = BTRFS_FIRST_FREE_OBJECTID;
5858         key.type = BTRFS_INODE_REF_KEY;
5859         /* special index value */
5860         index = 0;
5861
5862         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5863         if (ret < 0)
5864                 goto out;
5865         err |= ret;
5866
5867 out:
5868         btrfs_release_path(&path);
5869
5870         if (err && repair)
5871                 err = repair_fs_first_inode(root, err);
5872
5873         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5874                 error("root dir INODE_ITEM is %s",
5875                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5876         if (err & INODE_REF_MISSING)
5877                 error("root dir INODE_REF is missing");
5878
5879         return ret < 0 ? ret : err;
5880 }
5881
5882 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5883                                                 u64 parent, u64 root)
5884 {
5885         struct rb_node *node;
5886         struct tree_backref *back = NULL;
5887         struct tree_backref match = {
5888                 .node = {
5889                         .is_data = 0,
5890                 },
5891         };
5892
5893         if (parent) {
5894                 match.parent = parent;
5895                 match.node.full_backref = 1;
5896         } else {
5897                 match.root = root;
5898         }
5899
5900         node = rb_search(&rec->backref_tree, &match.node.node,
5901                          (rb_compare_keys)compare_extent_backref, NULL);
5902         if (node)
5903                 back = to_tree_backref(rb_node_to_extent_backref(node));
5904
5905         return back;
5906 }
5907
5908 static struct data_backref *find_data_backref(struct extent_record *rec,
5909                                                 u64 parent, u64 root,
5910                                                 u64 owner, u64 offset,
5911                                                 int found_ref,
5912                                                 u64 disk_bytenr, u64 bytes)
5913 {
5914         struct rb_node *node;
5915         struct data_backref *back = NULL;
5916         struct data_backref match = {
5917                 .node = {
5918                         .is_data = 1,
5919                 },
5920                 .owner = owner,
5921                 .offset = offset,
5922                 .bytes = bytes,
5923                 .found_ref = found_ref,
5924                 .disk_bytenr = disk_bytenr,
5925         };
5926
5927         if (parent) {
5928                 match.parent = parent;
5929                 match.node.full_backref = 1;
5930         } else {
5931                 match.root = root;
5932         }
5933
5934         node = rb_search(&rec->backref_tree, &match.node.node,
5935                          (rb_compare_keys)compare_extent_backref, NULL);
5936         if (node)
5937                 back = to_data_backref(rb_node_to_extent_backref(node));
5938
5939         return back;
5940 }
5941 /*
5942  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
5943  * blocks and integrity of fs tree items.
5944  *
5945  * @root:         the root of the tree to be checked.
5946  * @ext_ref       feature EXTENDED_IREF is enable or not.
5947  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
5948  *                otherwise means check fs tree(s) items relationship and
5949  *                @root MUST be a fs tree root.
5950  * Returns 0      represents OK.
5951  * Returns not 0  represents error.
5952  */
5953 static int check_btrfs_root(struct btrfs_trans_handle *trans,
5954                             struct btrfs_root *root, unsigned int ext_ref,
5955                             int check_all)
5956
5957 {
5958         struct btrfs_path path;
5959         struct node_refs nrefs;
5960         struct btrfs_root_item *root_item = &root->root_item;
5961         int ret;
5962         int level;
5963         int err = 0;
5964
5965         memset(&nrefs, 0, sizeof(nrefs));
5966         if (!check_all) {
5967                 /*
5968                  * We need to manually check the first inode item (256)
5969                  * As the following traversal function will only start from
5970                  * the first inode item in the leaf, if inode item (256) is
5971                  * missing we will skip it forever.
5972                  */
5973                 ret = check_fs_first_inode(root, ext_ref);
5974                 if (ret < 0)
5975                         return ret;
5976         }
5977
5978
5979         level = btrfs_header_level(root->node);
5980         btrfs_init_path(&path);
5981
5982         if (btrfs_root_refs(root_item) > 0 ||
5983             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5984                 path.nodes[level] = root->node;
5985                 path.slots[level] = 0;
5986                 extent_buffer_get(root->node);
5987         } else {
5988                 struct btrfs_key key;
5989
5990                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5991                 level = root_item->drop_level;
5992                 path.lowest_level = level;
5993                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5994                 if (ret < 0)
5995                         goto out;
5996                 ret = 0;
5997         }
5998
5999         while (1) {
6000                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6001                                         ext_ref, check_all);
6002
6003                 err |= !!ret;
6004
6005                 /* if ret is negative, walk shall stop */
6006                 if (ret < 0) {
6007                         ret = err;
6008                         break;
6009                 }
6010
6011                 ret = walk_up_tree_v2(root, &path, &level);
6012                 if (ret != 0) {
6013                         /* Normal exit, reset ret to err */
6014                         ret = err;
6015                         break;
6016                 }
6017         }
6018
6019 out:
6020         btrfs_release_path(&path);
6021         return ret;
6022 }
6023
6024 /*
6025  * Iterate all items in the tree and call check_inode_item() to check.
6026  *
6027  * @root:       the root of the tree to be checked.
6028  * @ext_ref:    the EXTENDED_IREF feature
6029  *
6030  * Return 0 if no error found.
6031  * Return <0 for error.
6032  */
6033 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6034 {
6035         reset_cached_block_groups(root->fs_info);
6036         return check_btrfs_root(NULL, root, ext_ref, 0);
6037 }
6038
6039 /*
6040  * Find the relative ref for root_ref and root_backref.
6041  *
6042  * @root:       the root of the root tree.
6043  * @ref_key:    the key of the root ref.
6044  *
6045  * Return 0 if no error occurred.
6046  */
6047 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6048                           struct extent_buffer *node, int slot)
6049 {
6050         struct btrfs_path path;
6051         struct btrfs_key key;
6052         struct btrfs_root_ref *ref;
6053         struct btrfs_root_ref *backref;
6054         char ref_name[BTRFS_NAME_LEN] = {0};
6055         char backref_name[BTRFS_NAME_LEN] = {0};
6056         u64 ref_dirid;
6057         u64 ref_seq;
6058         u32 ref_namelen;
6059         u64 backref_dirid;
6060         u64 backref_seq;
6061         u32 backref_namelen;
6062         u32 len;
6063         int ret;
6064         int err = 0;
6065
6066         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6067         ref_dirid = btrfs_root_ref_dirid(node, ref);
6068         ref_seq = btrfs_root_ref_sequence(node, ref);
6069         ref_namelen = btrfs_root_ref_name_len(node, ref);
6070
6071         if (ref_namelen <= BTRFS_NAME_LEN) {
6072                 len = ref_namelen;
6073         } else {
6074                 len = BTRFS_NAME_LEN;
6075                 warning("%s[%llu %llu] ref_name too long",
6076                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6077                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6078                         ref_key->offset);
6079         }
6080         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6081
6082         /* Find relative root_ref */
6083         key.objectid = ref_key->offset;
6084         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6085         key.offset = ref_key->objectid;
6086
6087         btrfs_init_path(&path);
6088         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6089         if (ret) {
6090                 err |= ROOT_REF_MISSING;
6091                 error("%s[%llu %llu] couldn't find relative ref",
6092                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6093                       "ROOT_REF" : "ROOT_BACKREF",
6094                       ref_key->objectid, ref_key->offset);
6095                 goto out;
6096         }
6097
6098         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6099                                  struct btrfs_root_ref);
6100         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6101         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6102         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6103
6104         if (backref_namelen <= BTRFS_NAME_LEN) {
6105                 len = backref_namelen;
6106         } else {
6107                 len = BTRFS_NAME_LEN;
6108                 warning("%s[%llu %llu] ref_name too long",
6109                         key.type == BTRFS_ROOT_REF_KEY ?
6110                         "ROOT_REF" : "ROOT_BACKREF",
6111                         key.objectid, key.offset);
6112         }
6113         read_extent_buffer(path.nodes[0], backref_name,
6114                            (unsigned long)(backref + 1), len);
6115
6116         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6117             ref_namelen != backref_namelen ||
6118             strncmp(ref_name, backref_name, len)) {
6119                 err |= ROOT_REF_MISMATCH;
6120                 error("%s[%llu %llu] mismatch relative ref",
6121                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6122                       "ROOT_REF" : "ROOT_BACKREF",
6123                       ref_key->objectid, ref_key->offset);
6124         }
6125 out:
6126         btrfs_release_path(&path);
6127         return err;
6128 }
6129
6130 /*
6131  * Check all fs/file tree in low_memory mode.
6132  *
6133  * 1. for fs tree root item, call check_fs_root_v2()
6134  * 2. for fs tree root ref/backref, call check_root_ref()
6135  *
6136  * Return 0 if no error occurred.
6137  */
6138 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6139 {
6140         struct btrfs_root *tree_root = fs_info->tree_root;
6141         struct btrfs_root *cur_root = NULL;
6142         struct btrfs_path path;
6143         struct btrfs_key key;
6144         struct extent_buffer *node;
6145         unsigned int ext_ref;
6146         int slot;
6147         int ret;
6148         int err = 0;
6149
6150         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6151
6152         btrfs_init_path(&path);
6153         key.objectid = BTRFS_FS_TREE_OBJECTID;
6154         key.offset = 0;
6155         key.type = BTRFS_ROOT_ITEM_KEY;
6156
6157         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6158         if (ret < 0) {
6159                 err = ret;
6160                 goto out;
6161         } else if (ret > 0) {
6162                 err = -ENOENT;
6163                 goto out;
6164         }
6165
6166         while (1) {
6167                 node = path.nodes[0];
6168                 slot = path.slots[0];
6169                 btrfs_item_key_to_cpu(node, &key, slot);
6170                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6171                         goto out;
6172                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6173                     fs_root_objectid(key.objectid)) {
6174                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6175                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6176                                                                        &key);
6177                         } else {
6178                                 key.offset = (u64)-1;
6179                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6180                         }
6181
6182                         if (IS_ERR(cur_root)) {
6183                                 error("Fail to read fs/subvol tree: %lld",
6184                                       key.objectid);
6185                                 err = -EIO;
6186                                 goto next;
6187                         }
6188
6189                         ret = check_fs_root_v2(cur_root, ext_ref);
6190                         err |= ret;
6191
6192                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6193                                 btrfs_free_fs_root(cur_root);
6194                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6195                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6196                         ret = check_root_ref(tree_root, &key, node, slot);
6197                         err |= ret;
6198                 }
6199 next:
6200                 ret = btrfs_next_item(tree_root, &path);
6201                 if (ret > 0)
6202                         goto out;
6203                 if (ret < 0) {
6204                         err = ret;
6205                         goto out;
6206                 }
6207         }
6208
6209 out:
6210         btrfs_release_path(&path);
6211         return err;
6212 }
6213
6214 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6215                           struct cache_tree *root_cache)
6216 {
6217         int ret;
6218
6219         if (!ctx.progress_enabled)
6220                 fprintf(stderr, "checking fs roots\n");
6221         if (check_mode == CHECK_MODE_LOWMEM)
6222                 ret = check_fs_roots_v2(fs_info);
6223         else
6224                 ret = check_fs_roots(fs_info, root_cache);
6225
6226         return ret;
6227 }
6228
6229 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6230 {
6231         struct extent_backref *back, *tmp;
6232         struct tree_backref *tback;
6233         struct data_backref *dback;
6234         u64 found = 0;
6235         int err = 0;
6236
6237         rbtree_postorder_for_each_entry_safe(back, tmp,
6238                                              &rec->backref_tree, node) {
6239                 if (!back->found_extent_tree) {
6240                         err = 1;
6241                         if (!print_errs)
6242                                 goto out;
6243                         if (back->is_data) {
6244                                 dback = to_data_backref(back);
6245                                 fprintf(stderr, "Data backref %llu %s %llu"
6246                                         " owner %llu offset %llu num_refs %lu"
6247                                         " not found in extent tree\n",
6248                                         (unsigned long long)rec->start,
6249                                         back->full_backref ?
6250                                         "parent" : "root",
6251                                         back->full_backref ?
6252                                         (unsigned long long)dback->parent:
6253                                         (unsigned long long)dback->root,
6254                                         (unsigned long long)dback->owner,
6255                                         (unsigned long long)dback->offset,
6256                                         (unsigned long)dback->num_refs);
6257                         } else {
6258                                 tback = to_tree_backref(back);
6259                                 fprintf(stderr, "Tree backref %llu parent %llu"
6260                                         " root %llu not found in extent tree\n",
6261                                         (unsigned long long)rec->start,
6262                                         (unsigned long long)tback->parent,
6263                                         (unsigned long long)tback->root);
6264                         }
6265                 }
6266                 if (!back->is_data && !back->found_ref) {
6267                         err = 1;
6268                         if (!print_errs)
6269                                 goto out;
6270                         tback = to_tree_backref(back);
6271                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6272                                 (unsigned long long)rec->start,
6273                                 back->full_backref ? "parent" : "root",
6274                                 back->full_backref ?
6275                                 (unsigned long long)tback->parent :
6276                                 (unsigned long long)tback->root, back);
6277                 }
6278                 if (back->is_data) {
6279                         dback = to_data_backref(back);
6280                         if (dback->found_ref != dback->num_refs) {
6281                                 err = 1;
6282                                 if (!print_errs)
6283                                         goto out;
6284                                 fprintf(stderr, "Incorrect local backref count"
6285                                         " on %llu %s %llu owner %llu"
6286                                         " offset %llu found %u wanted %u back %p\n",
6287                                         (unsigned long long)rec->start,
6288                                         back->full_backref ?
6289                                         "parent" : "root",
6290                                         back->full_backref ?
6291                                         (unsigned long long)dback->parent:
6292                                         (unsigned long long)dback->root,
6293                                         (unsigned long long)dback->owner,
6294                                         (unsigned long long)dback->offset,
6295                                         dback->found_ref, dback->num_refs, back);
6296                         }
6297                         if (dback->disk_bytenr != rec->start) {
6298                                 err = 1;
6299                                 if (!print_errs)
6300                                         goto out;
6301                                 fprintf(stderr, "Backref disk bytenr does not"
6302                                         " match extent record, bytenr=%llu, "
6303                                         "ref bytenr=%llu\n",
6304                                         (unsigned long long)rec->start,
6305                                         (unsigned long long)dback->disk_bytenr);
6306                         }
6307
6308                         if (dback->bytes != rec->nr) {
6309                                 err = 1;
6310                                 if (!print_errs)
6311                                         goto out;
6312                                 fprintf(stderr, "Backref bytes do not match "
6313                                         "extent backref, bytenr=%llu, ref "
6314                                         "bytes=%llu, backref bytes=%llu\n",
6315                                         (unsigned long long)rec->start,
6316                                         (unsigned long long)rec->nr,
6317                                         (unsigned long long)dback->bytes);
6318                         }
6319                 }
6320                 if (!back->is_data) {
6321                         found += 1;
6322                 } else {
6323                         dback = to_data_backref(back);
6324                         found += dback->found_ref;
6325                 }
6326         }
6327         if (found != rec->refs) {
6328                 err = 1;
6329                 if (!print_errs)
6330                         goto out;
6331                 fprintf(stderr, "Incorrect global backref count "
6332                         "on %llu found %llu wanted %llu\n",
6333                         (unsigned long long)rec->start,
6334                         (unsigned long long)found,
6335                         (unsigned long long)rec->refs);
6336         }
6337 out:
6338         return err;
6339 }
6340
6341 static void __free_one_backref(struct rb_node *node)
6342 {
6343         struct extent_backref *back = rb_node_to_extent_backref(node);
6344
6345         free(back);
6346 }
6347
6348 static void free_all_extent_backrefs(struct extent_record *rec)
6349 {
6350         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6351 }
6352
6353 static void free_extent_record_cache(struct cache_tree *extent_cache)
6354 {
6355         struct cache_extent *cache;
6356         struct extent_record *rec;
6357
6358         while (1) {
6359                 cache = first_cache_extent(extent_cache);
6360                 if (!cache)
6361                         break;
6362                 rec = container_of(cache, struct extent_record, cache);
6363                 remove_cache_extent(extent_cache, cache);
6364                 free_all_extent_backrefs(rec);
6365                 free(rec);
6366         }
6367 }
6368
6369 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6370                                  struct extent_record *rec)
6371 {
6372         if (rec->content_checked && rec->owner_ref_checked &&
6373             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6374             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6375             !rec->bad_full_backref && !rec->crossing_stripes &&
6376             !rec->wrong_chunk_type) {
6377                 remove_cache_extent(extent_cache, &rec->cache);
6378                 free_all_extent_backrefs(rec);
6379                 list_del_init(&rec->list);
6380                 free(rec);
6381         }
6382         return 0;
6383 }
6384
6385 static int check_owner_ref(struct btrfs_root *root,
6386                             struct extent_record *rec,
6387                             struct extent_buffer *buf)
6388 {
6389         struct extent_backref *node, *tmp;
6390         struct tree_backref *back;
6391         struct btrfs_root *ref_root;
6392         struct btrfs_key key;
6393         struct btrfs_path path;
6394         struct extent_buffer *parent;
6395         int level;
6396         int found = 0;
6397         int ret;
6398
6399         rbtree_postorder_for_each_entry_safe(node, tmp,
6400                                              &rec->backref_tree, node) {
6401                 if (node->is_data)
6402                         continue;
6403                 if (!node->found_ref)
6404                         continue;
6405                 if (node->full_backref)
6406                         continue;
6407                 back = to_tree_backref(node);
6408                 if (btrfs_header_owner(buf) == back->root)
6409                         return 0;
6410         }
6411         BUG_ON(rec->is_root);
6412
6413         /* try to find the block by search corresponding fs tree */
6414         key.objectid = btrfs_header_owner(buf);
6415         key.type = BTRFS_ROOT_ITEM_KEY;
6416         key.offset = (u64)-1;
6417
6418         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6419         if (IS_ERR(ref_root))
6420                 return 1;
6421
6422         level = btrfs_header_level(buf);
6423         if (level == 0)
6424                 btrfs_item_key_to_cpu(buf, &key, 0);
6425         else
6426                 btrfs_node_key_to_cpu(buf, &key, 0);
6427
6428         btrfs_init_path(&path);
6429         path.lowest_level = level + 1;
6430         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6431         if (ret < 0)
6432                 return 0;
6433
6434         parent = path.nodes[level + 1];
6435         if (parent && buf->start == btrfs_node_blockptr(parent,
6436                                                         path.slots[level + 1]))
6437                 found = 1;
6438
6439         btrfs_release_path(&path);
6440         return found ? 0 : 1;
6441 }
6442
6443 static int is_extent_tree_record(struct extent_record *rec)
6444 {
6445         struct extent_backref *node, *tmp;
6446         struct tree_backref *back;
6447         int is_extent = 0;
6448
6449         rbtree_postorder_for_each_entry_safe(node, tmp,
6450                                              &rec->backref_tree, node) {
6451                 if (node->is_data)
6452                         return 0;
6453                 back = to_tree_backref(node);
6454                 if (node->full_backref)
6455                         return 0;
6456                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6457                         is_extent = 1;
6458         }
6459         return is_extent;
6460 }
6461
6462
6463 static int record_bad_block_io(struct btrfs_fs_info *info,
6464                                struct cache_tree *extent_cache,
6465                                u64 start, u64 len)
6466 {
6467         struct extent_record *rec;
6468         struct cache_extent *cache;
6469         struct btrfs_key key;
6470
6471         cache = lookup_cache_extent(extent_cache, start, len);
6472         if (!cache)
6473                 return 0;
6474
6475         rec = container_of(cache, struct extent_record, cache);
6476         if (!is_extent_tree_record(rec))
6477                 return 0;
6478
6479         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6480         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6481 }
6482
6483 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6484                        struct extent_buffer *buf, int slot)
6485 {
6486         if (btrfs_header_level(buf)) {
6487                 struct btrfs_key_ptr ptr1, ptr2;
6488
6489                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6490                                    sizeof(struct btrfs_key_ptr));
6491                 read_extent_buffer(buf, &ptr2,
6492                                    btrfs_node_key_ptr_offset(slot + 1),
6493                                    sizeof(struct btrfs_key_ptr));
6494                 write_extent_buffer(buf, &ptr1,
6495                                     btrfs_node_key_ptr_offset(slot + 1),
6496                                     sizeof(struct btrfs_key_ptr));
6497                 write_extent_buffer(buf, &ptr2,
6498                                     btrfs_node_key_ptr_offset(slot),
6499                                     sizeof(struct btrfs_key_ptr));
6500                 if (slot == 0) {
6501                         struct btrfs_disk_key key;
6502                         btrfs_node_key(buf, &key, 0);
6503                         btrfs_fixup_low_keys(root, path, &key,
6504                                              btrfs_header_level(buf) + 1);
6505                 }
6506         } else {
6507                 struct btrfs_item *item1, *item2;
6508                 struct btrfs_key k1, k2;
6509                 char *item1_data, *item2_data;
6510                 u32 item1_offset, item2_offset, item1_size, item2_size;
6511
6512                 item1 = btrfs_item_nr(slot);
6513                 item2 = btrfs_item_nr(slot + 1);
6514                 btrfs_item_key_to_cpu(buf, &k1, slot);
6515                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6516                 item1_offset = btrfs_item_offset(buf, item1);
6517                 item2_offset = btrfs_item_offset(buf, item2);
6518                 item1_size = btrfs_item_size(buf, item1);
6519                 item2_size = btrfs_item_size(buf, item2);
6520
6521                 item1_data = malloc(item1_size);
6522                 if (!item1_data)
6523                         return -ENOMEM;
6524                 item2_data = malloc(item2_size);
6525                 if (!item2_data) {
6526                         free(item1_data);
6527                         return -ENOMEM;
6528                 }
6529
6530                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6531                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6532
6533                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6534                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6535                 free(item1_data);
6536                 free(item2_data);
6537
6538                 btrfs_set_item_offset(buf, item1, item2_offset);
6539                 btrfs_set_item_offset(buf, item2, item1_offset);
6540                 btrfs_set_item_size(buf, item1, item2_size);
6541                 btrfs_set_item_size(buf, item2, item1_size);
6542
6543                 path->slots[0] = slot;
6544                 btrfs_set_item_key_unsafe(root, path, &k2);
6545                 path->slots[0] = slot + 1;
6546                 btrfs_set_item_key_unsafe(root, path, &k1);
6547         }
6548         return 0;
6549 }
6550
6551 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6552 {
6553         struct extent_buffer *buf;
6554         struct btrfs_key k1, k2;
6555         int i;
6556         int level = path->lowest_level;
6557         int ret = -EIO;
6558
6559         buf = path->nodes[level];
6560         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6561                 if (level) {
6562                         btrfs_node_key_to_cpu(buf, &k1, i);
6563                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6564                 } else {
6565                         btrfs_item_key_to_cpu(buf, &k1, i);
6566                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6567                 }
6568                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6569                         continue;
6570                 ret = swap_values(root, path, buf, i);
6571                 if (ret)
6572                         break;
6573                 btrfs_mark_buffer_dirty(buf);
6574                 i = 0;
6575         }
6576         return ret;
6577 }
6578
6579 static int delete_bogus_item(struct btrfs_root *root,
6580                              struct btrfs_path *path,
6581                              struct extent_buffer *buf, int slot)
6582 {
6583         struct btrfs_key key;
6584         int nritems = btrfs_header_nritems(buf);
6585
6586         btrfs_item_key_to_cpu(buf, &key, slot);
6587
6588         /* These are all the keys we can deal with missing. */
6589         if (key.type != BTRFS_DIR_INDEX_KEY &&
6590             key.type != BTRFS_EXTENT_ITEM_KEY &&
6591             key.type != BTRFS_METADATA_ITEM_KEY &&
6592             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6593             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6594                 return -1;
6595
6596         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6597                (unsigned long long)key.objectid, key.type,
6598                (unsigned long long)key.offset, slot, buf->start);
6599         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6600                               btrfs_item_nr_offset(slot + 1),
6601                               sizeof(struct btrfs_item) *
6602                               (nritems - slot - 1));
6603         btrfs_set_header_nritems(buf, nritems - 1);
6604         if (slot == 0) {
6605                 struct btrfs_disk_key disk_key;
6606
6607                 btrfs_item_key(buf, &disk_key, 0);
6608                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6609         }
6610         btrfs_mark_buffer_dirty(buf);
6611         return 0;
6612 }
6613
6614 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6615 {
6616         struct extent_buffer *buf;
6617         int i;
6618         int ret = 0;
6619
6620         /* We should only get this for leaves */
6621         BUG_ON(path->lowest_level);
6622         buf = path->nodes[0];
6623 again:
6624         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6625                 unsigned int shift = 0, offset;
6626
6627                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6628                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6629                         if (btrfs_item_end_nr(buf, i) >
6630                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6631                                 ret = delete_bogus_item(root, path, buf, i);
6632                                 if (!ret)
6633                                         goto again;
6634                                 fprintf(stderr, "item is off the end of the "
6635                                         "leaf, can't fix\n");
6636                                 ret = -EIO;
6637                                 break;
6638                         }
6639                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6640                                 btrfs_item_end_nr(buf, i);
6641                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6642                            btrfs_item_offset_nr(buf, i - 1)) {
6643                         if (btrfs_item_end_nr(buf, i) >
6644                             btrfs_item_offset_nr(buf, i - 1)) {
6645                                 ret = delete_bogus_item(root, path, buf, i);
6646                                 if (!ret)
6647                                         goto again;
6648                                 fprintf(stderr, "items overlap, can't fix\n");
6649                                 ret = -EIO;
6650                                 break;
6651                         }
6652                         shift = btrfs_item_offset_nr(buf, i - 1) -
6653                                 btrfs_item_end_nr(buf, i);
6654                 }
6655                 if (!shift)
6656                         continue;
6657
6658                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6659                        i, shift, (unsigned long long)buf->start);
6660                 offset = btrfs_item_offset_nr(buf, i);
6661                 memmove_extent_buffer(buf,
6662                                       btrfs_leaf_data(buf) + offset + shift,
6663                                       btrfs_leaf_data(buf) + offset,
6664                                       btrfs_item_size_nr(buf, i));
6665                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6666                                       offset + shift);
6667                 btrfs_mark_buffer_dirty(buf);
6668         }
6669
6670         /*
6671          * We may have moved things, in which case we want to exit so we don't
6672          * write those changes out.  Once we have proper abort functionality in
6673          * progs this can be changed to something nicer.
6674          */
6675         BUG_ON(ret);
6676         return ret;
6677 }
6678
6679 /*
6680  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6681  * then just return -EIO.
6682  */
6683 static int try_to_fix_bad_block(struct btrfs_root *root,
6684                                 struct extent_buffer *buf,
6685                                 enum btrfs_tree_block_status status)
6686 {
6687         struct btrfs_trans_handle *trans;
6688         struct ulist *roots;
6689         struct ulist_node *node;
6690         struct btrfs_root *search_root;
6691         struct btrfs_path path;
6692         struct ulist_iterator iter;
6693         struct btrfs_key root_key, key;
6694         int ret;
6695
6696         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6697             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6698                 return -EIO;
6699
6700         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6701         if (ret)
6702                 return -EIO;
6703
6704         btrfs_init_path(&path);
6705         ULIST_ITER_INIT(&iter);
6706         while ((node = ulist_next(roots, &iter))) {
6707                 root_key.objectid = node->val;
6708                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6709                 root_key.offset = (u64)-1;
6710
6711                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6712                 if (IS_ERR(root)) {
6713                         ret = -EIO;
6714                         break;
6715                 }
6716
6717
6718                 trans = btrfs_start_transaction(search_root, 0);
6719                 if (IS_ERR(trans)) {
6720                         ret = PTR_ERR(trans);
6721                         break;
6722                 }
6723
6724                 path.lowest_level = btrfs_header_level(buf);
6725                 path.skip_check_block = 1;
6726                 if (path.lowest_level)
6727                         btrfs_node_key_to_cpu(buf, &key, 0);
6728                 else
6729                         btrfs_item_key_to_cpu(buf, &key, 0);
6730                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6731                 if (ret) {
6732                         ret = -EIO;
6733                         btrfs_commit_transaction(trans, search_root);
6734                         break;
6735                 }
6736                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6737                         ret = fix_key_order(search_root, &path);
6738                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6739                         ret = fix_item_offset(search_root, &path);
6740                 if (ret) {
6741                         btrfs_commit_transaction(trans, search_root);
6742                         break;
6743                 }
6744                 btrfs_release_path(&path);
6745                 btrfs_commit_transaction(trans, search_root);
6746         }
6747         ulist_free(roots);
6748         btrfs_release_path(&path);
6749         return ret;
6750 }
6751
6752 static int check_block(struct btrfs_root *root,
6753                        struct cache_tree *extent_cache,
6754                        struct extent_buffer *buf, u64 flags)
6755 {
6756         struct extent_record *rec;
6757         struct cache_extent *cache;
6758         struct btrfs_key key;
6759         enum btrfs_tree_block_status status;
6760         int ret = 0;
6761         int level;
6762
6763         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6764         if (!cache)
6765                 return 1;
6766         rec = container_of(cache, struct extent_record, cache);
6767         rec->generation = btrfs_header_generation(buf);
6768
6769         level = btrfs_header_level(buf);
6770         if (btrfs_header_nritems(buf) > 0) {
6771
6772                 if (level == 0)
6773                         btrfs_item_key_to_cpu(buf, &key, 0);
6774                 else
6775                         btrfs_node_key_to_cpu(buf, &key, 0);
6776
6777                 rec->info_objectid = key.objectid;
6778         }
6779         rec->info_level = level;
6780
6781         if (btrfs_is_leaf(buf))
6782                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6783         else
6784                 status = btrfs_check_node(root, &rec->parent_key, buf);
6785
6786         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6787                 if (repair)
6788                         status = try_to_fix_bad_block(root, buf, status);
6789                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6790                         ret = -EIO;
6791                         fprintf(stderr, "bad block %llu\n",
6792                                 (unsigned long long)buf->start);
6793                 } else {
6794                         /*
6795                          * Signal to callers we need to start the scan over
6796                          * again since we'll have cowed blocks.
6797                          */
6798                         ret = -EAGAIN;
6799                 }
6800         } else {
6801                 rec->content_checked = 1;
6802                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6803                         rec->owner_ref_checked = 1;
6804                 else {
6805                         ret = check_owner_ref(root, rec, buf);
6806                         if (!ret)
6807                                 rec->owner_ref_checked = 1;
6808                 }
6809         }
6810         if (!ret)
6811                 maybe_free_extent_rec(extent_cache, rec);
6812         return ret;
6813 }
6814
6815 #if 0
6816 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6817                                                 u64 parent, u64 root)
6818 {
6819         struct list_head *cur = rec->backrefs.next;
6820         struct extent_backref *node;
6821         struct tree_backref *back;
6822
6823         while(cur != &rec->backrefs) {
6824                 node = to_extent_backref(cur);
6825                 cur = cur->next;
6826                 if (node->is_data)
6827                         continue;
6828                 back = to_tree_backref(node);
6829                 if (parent > 0) {
6830                         if (!node->full_backref)
6831                                 continue;
6832                         if (parent == back->parent)
6833                                 return back;
6834                 } else {
6835                         if (node->full_backref)
6836                                 continue;
6837                         if (back->root == root)
6838                                 return back;
6839                 }
6840         }
6841         return NULL;
6842 }
6843 #endif
6844
6845 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6846                                                 u64 parent, u64 root)
6847 {
6848         struct tree_backref *ref = malloc(sizeof(*ref));
6849
6850         if (!ref)
6851                 return NULL;
6852         memset(&ref->node, 0, sizeof(ref->node));
6853         if (parent > 0) {
6854                 ref->parent = parent;
6855                 ref->node.full_backref = 1;
6856         } else {
6857                 ref->root = root;
6858                 ref->node.full_backref = 0;
6859         }
6860
6861         return ref;
6862 }
6863
6864 #if 0
6865 static struct data_backref *find_data_backref(struct extent_record *rec,
6866                                                 u64 parent, u64 root,
6867                                                 u64 owner, u64 offset,
6868                                                 int found_ref,
6869                                                 u64 disk_bytenr, u64 bytes)
6870 {
6871         struct list_head *cur = rec->backrefs.next;
6872         struct extent_backref *node;
6873         struct data_backref *back;
6874
6875         while(cur != &rec->backrefs) {
6876                 node = to_extent_backref(cur);
6877                 cur = cur->next;
6878                 if (!node->is_data)
6879                         continue;
6880                 back = to_data_backref(node);
6881                 if (parent > 0) {
6882                         if (!node->full_backref)
6883                                 continue;
6884                         if (parent == back->parent)
6885                                 return back;
6886                 } else {
6887                         if (node->full_backref)
6888                                 continue;
6889                         if (back->root == root && back->owner == owner &&
6890                             back->offset == offset) {
6891                                 if (found_ref && node->found_ref &&
6892                                     (back->bytes != bytes ||
6893                                     back->disk_bytenr != disk_bytenr))
6894                                         continue;
6895                                 return back;
6896                         }
6897                 }
6898         }
6899         return NULL;
6900 }
6901 #endif
6902
6903 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6904                                                 u64 parent, u64 root,
6905                                                 u64 owner, u64 offset,
6906                                                 u64 max_size)
6907 {
6908         struct data_backref *ref = malloc(sizeof(*ref));
6909
6910         if (!ref)
6911                 return NULL;
6912         memset(&ref->node, 0, sizeof(ref->node));
6913         ref->node.is_data = 1;
6914
6915         if (parent > 0) {
6916                 ref->parent = parent;
6917                 ref->owner = 0;
6918                 ref->offset = 0;
6919                 ref->node.full_backref = 1;
6920         } else {
6921                 ref->root = root;
6922                 ref->owner = owner;
6923                 ref->offset = offset;
6924                 ref->node.full_backref = 0;
6925         }
6926         ref->bytes = max_size;
6927         ref->found_ref = 0;
6928         ref->num_refs = 0;
6929         if (max_size > rec->max_size)
6930                 rec->max_size = max_size;
6931         return ref;
6932 }
6933
6934 /* Check if the type of extent matches with its chunk */
6935 static void check_extent_type(struct extent_record *rec)
6936 {
6937         struct btrfs_block_group_cache *bg_cache;
6938
6939         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6940         if (!bg_cache)
6941                 return;
6942
6943         /* data extent, check chunk directly*/
6944         if (!rec->metadata) {
6945                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6946                         rec->wrong_chunk_type = 1;
6947                 return;
6948         }
6949
6950         /* metadata extent, check the obvious case first */
6951         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6952                                  BTRFS_BLOCK_GROUP_METADATA))) {
6953                 rec->wrong_chunk_type = 1;
6954                 return;
6955         }
6956
6957         /*
6958          * Check SYSTEM extent, as it's also marked as metadata, we can only
6959          * make sure it's a SYSTEM extent by its backref
6960          */
6961         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6962                 struct extent_backref *node;
6963                 struct tree_backref *tback;
6964                 u64 bg_type;
6965
6966                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6967                 if (node->is_data) {
6968                         /* tree block shouldn't have data backref */
6969                         rec->wrong_chunk_type = 1;
6970                         return;
6971                 }
6972                 tback = container_of(node, struct tree_backref, node);
6973
6974                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6975                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6976                 else
6977                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6978                 if (!(bg_cache->flags & bg_type))
6979                         rec->wrong_chunk_type = 1;
6980         }
6981 }
6982
6983 /*
6984  * Allocate a new extent record, fill default values from @tmpl and insert int
6985  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6986  * the cache, otherwise it fails.
6987  */
6988 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6989                 struct extent_record *tmpl)
6990 {
6991         struct extent_record *rec;
6992         int ret = 0;
6993
6994         BUG_ON(tmpl->max_size == 0);
6995         rec = malloc(sizeof(*rec));
6996         if (!rec)
6997                 return -ENOMEM;
6998         rec->start = tmpl->start;
6999         rec->max_size = tmpl->max_size;
7000         rec->nr = max(tmpl->nr, tmpl->max_size);
7001         rec->found_rec = tmpl->found_rec;
7002         rec->content_checked = tmpl->content_checked;
7003         rec->owner_ref_checked = tmpl->owner_ref_checked;
7004         rec->num_duplicates = 0;
7005         rec->metadata = tmpl->metadata;
7006         rec->flag_block_full_backref = FLAG_UNSET;
7007         rec->bad_full_backref = 0;
7008         rec->crossing_stripes = 0;
7009         rec->wrong_chunk_type = 0;
7010         rec->is_root = tmpl->is_root;
7011         rec->refs = tmpl->refs;
7012         rec->extent_item_refs = tmpl->extent_item_refs;
7013         rec->parent_generation = tmpl->parent_generation;
7014         INIT_LIST_HEAD(&rec->backrefs);
7015         INIT_LIST_HEAD(&rec->dups);
7016         INIT_LIST_HEAD(&rec->list);
7017         rec->backref_tree = RB_ROOT;
7018         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7019         rec->cache.start = tmpl->start;
7020         rec->cache.size = tmpl->nr;
7021         ret = insert_cache_extent(extent_cache, &rec->cache);
7022         if (ret) {
7023                 free(rec);
7024                 return ret;
7025         }
7026         bytes_used += rec->nr;
7027
7028         if (tmpl->metadata)
7029                 rec->crossing_stripes = check_crossing_stripes(global_info,
7030                                 rec->start, global_info->nodesize);
7031         check_extent_type(rec);
7032         return ret;
7033 }
7034
7035 /*
7036  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7037  * some are hints:
7038  * - refs              - if found, increase refs
7039  * - is_root           - if found, set
7040  * - content_checked   - if found, set
7041  * - owner_ref_checked - if found, set
7042  *
7043  * If not found, create a new one, initialize and insert.
7044  */
7045 static int add_extent_rec(struct cache_tree *extent_cache,
7046                 struct extent_record *tmpl)
7047 {
7048         struct extent_record *rec;
7049         struct cache_extent *cache;
7050         int ret = 0;
7051         int dup = 0;
7052
7053         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7054         if (cache) {
7055                 rec = container_of(cache, struct extent_record, cache);
7056                 if (tmpl->refs)
7057                         rec->refs++;
7058                 if (rec->nr == 1)
7059                         rec->nr = max(tmpl->nr, tmpl->max_size);
7060
7061                 /*
7062                  * We need to make sure to reset nr to whatever the extent
7063                  * record says was the real size, this way we can compare it to
7064                  * the backrefs.
7065                  */
7066                 if (tmpl->found_rec) {
7067                         if (tmpl->start != rec->start || rec->found_rec) {
7068                                 struct extent_record *tmp;
7069
7070                                 dup = 1;
7071                                 if (list_empty(&rec->list))
7072                                         list_add_tail(&rec->list,
7073                                                       &duplicate_extents);
7074
7075                                 /*
7076                                  * We have to do this song and dance in case we
7077                                  * find an extent record that falls inside of
7078                                  * our current extent record but does not have
7079                                  * the same objectid.
7080                                  */
7081                                 tmp = malloc(sizeof(*tmp));
7082                                 if (!tmp)
7083                                         return -ENOMEM;
7084                                 tmp->start = tmpl->start;
7085                                 tmp->max_size = tmpl->max_size;
7086                                 tmp->nr = tmpl->nr;
7087                                 tmp->found_rec = 1;
7088                                 tmp->metadata = tmpl->metadata;
7089                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7090                                 INIT_LIST_HEAD(&tmp->list);
7091                                 list_add_tail(&tmp->list, &rec->dups);
7092                                 rec->num_duplicates++;
7093                         } else {
7094                                 rec->nr = tmpl->nr;
7095                                 rec->found_rec = 1;
7096                         }
7097                 }
7098
7099                 if (tmpl->extent_item_refs && !dup) {
7100                         if (rec->extent_item_refs) {
7101                                 fprintf(stderr, "block %llu rec "
7102                                         "extent_item_refs %llu, passed %llu\n",
7103                                         (unsigned long long)tmpl->start,
7104                                         (unsigned long long)
7105                                                         rec->extent_item_refs,
7106                                         (unsigned long long)tmpl->extent_item_refs);
7107                         }
7108                         rec->extent_item_refs = tmpl->extent_item_refs;
7109                 }
7110                 if (tmpl->is_root)
7111                         rec->is_root = 1;
7112                 if (tmpl->content_checked)
7113                         rec->content_checked = 1;
7114                 if (tmpl->owner_ref_checked)
7115                         rec->owner_ref_checked = 1;
7116                 memcpy(&rec->parent_key, &tmpl->parent_key,
7117                                 sizeof(tmpl->parent_key));
7118                 if (tmpl->parent_generation)
7119                         rec->parent_generation = tmpl->parent_generation;
7120                 if (rec->max_size < tmpl->max_size)
7121                         rec->max_size = tmpl->max_size;
7122
7123                 /*
7124                  * A metadata extent can't cross stripe_len boundary, otherwise
7125                  * kernel scrub won't be able to handle it.
7126                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7127                  * it.
7128                  */
7129                 if (tmpl->metadata)
7130                         rec->crossing_stripes = check_crossing_stripes(
7131                                         global_info, rec->start,
7132                                         global_info->nodesize);
7133                 check_extent_type(rec);
7134                 maybe_free_extent_rec(extent_cache, rec);
7135                 return ret;
7136         }
7137
7138         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7139
7140         return ret;
7141 }
7142
7143 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7144                             u64 parent, u64 root, int found_ref)
7145 {
7146         struct extent_record *rec;
7147         struct tree_backref *back;
7148         struct cache_extent *cache;
7149         int ret;
7150         bool insert = false;
7151
7152         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7153         if (!cache) {
7154                 struct extent_record tmpl;
7155
7156                 memset(&tmpl, 0, sizeof(tmpl));
7157                 tmpl.start = bytenr;
7158                 tmpl.nr = 1;
7159                 tmpl.metadata = 1;
7160                 tmpl.max_size = 1;
7161
7162                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7163                 if (ret)
7164                         return ret;
7165
7166                 /* really a bug in cache_extent implement now */
7167                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7168                 if (!cache)
7169                         return -ENOENT;
7170         }
7171
7172         rec = container_of(cache, struct extent_record, cache);
7173         if (rec->start != bytenr) {
7174                 /*
7175                  * Several cause, from unaligned bytenr to over lapping extents
7176                  */
7177                 return -EEXIST;
7178         }
7179
7180         back = find_tree_backref(rec, parent, root);
7181         if (!back) {
7182                 back = alloc_tree_backref(rec, parent, root);
7183                 if (!back)
7184                         return -ENOMEM;
7185                 insert = true;
7186         }
7187
7188         if (found_ref) {
7189                 if (back->node.found_ref) {
7190                         fprintf(stderr, "Extent back ref already exists "
7191                                 "for %llu parent %llu root %llu \n",
7192                                 (unsigned long long)bytenr,
7193                                 (unsigned long long)parent,
7194                                 (unsigned long long)root);
7195                 }
7196                 back->node.found_ref = 1;
7197         } else {
7198                 if (back->node.found_extent_tree) {
7199                         fprintf(stderr, "Extent back ref already exists "
7200                                 "for %llu parent %llu root %llu \n",
7201                                 (unsigned long long)bytenr,
7202                                 (unsigned long long)parent,
7203                                 (unsigned long long)root);
7204                 }
7205                 back->node.found_extent_tree = 1;
7206         }
7207         if (insert)
7208                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7209                         compare_extent_backref));
7210         check_extent_type(rec);
7211         maybe_free_extent_rec(extent_cache, rec);
7212         return 0;
7213 }
7214
7215 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7216                             u64 parent, u64 root, u64 owner, u64 offset,
7217                             u32 num_refs, int found_ref, u64 max_size)
7218 {
7219         struct extent_record *rec;
7220         struct data_backref *back;
7221         struct cache_extent *cache;
7222         int ret;
7223         bool insert = false;
7224
7225         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7226         if (!cache) {
7227                 struct extent_record tmpl;
7228
7229                 memset(&tmpl, 0, sizeof(tmpl));
7230                 tmpl.start = bytenr;
7231                 tmpl.nr = 1;
7232                 tmpl.max_size = max_size;
7233
7234                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7235                 if (ret)
7236                         return ret;
7237
7238                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7239                 if (!cache)
7240                         abort();
7241         }
7242
7243         rec = container_of(cache, struct extent_record, cache);
7244         if (rec->max_size < max_size)
7245                 rec->max_size = max_size;
7246
7247         /*
7248          * If found_ref is set then max_size is the real size and must match the
7249          * existing refs.  So if we have already found a ref then we need to
7250          * make sure that this ref matches the existing one, otherwise we need
7251          * to add a new backref so we can notice that the backrefs don't match
7252          * and we need to figure out who is telling the truth.  This is to
7253          * account for that awful fsync bug I introduced where we'd end up with
7254          * a btrfs_file_extent_item that would have its length include multiple
7255          * prealloc extents or point inside of a prealloc extent.
7256          */
7257         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7258                                  bytenr, max_size);
7259         if (!back) {
7260                 back = alloc_data_backref(rec, parent, root, owner, offset,
7261                                           max_size);
7262                 BUG_ON(!back);
7263                 insert = true;
7264         }
7265
7266         if (found_ref) {
7267                 BUG_ON(num_refs != 1);
7268                 if (back->node.found_ref)
7269                         BUG_ON(back->bytes != max_size);
7270                 back->node.found_ref = 1;
7271                 back->found_ref += 1;
7272                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7273                         back->bytes = max_size;
7274                         back->disk_bytenr = bytenr;
7275
7276                         /* Need to reinsert if not already in the tree */
7277                         if (!insert) {
7278                                 rb_erase(&back->node.node, &rec->backref_tree);
7279                                 insert = true;
7280                         }
7281                 }
7282                 rec->refs += 1;
7283                 rec->content_checked = 1;
7284                 rec->owner_ref_checked = 1;
7285         } else {
7286                 if (back->node.found_extent_tree) {
7287                         fprintf(stderr, "Extent back ref already exists "
7288                                 "for %llu parent %llu root %llu "
7289                                 "owner %llu offset %llu num_refs %lu\n",
7290                                 (unsigned long long)bytenr,
7291                                 (unsigned long long)parent,
7292                                 (unsigned long long)root,
7293                                 (unsigned long long)owner,
7294                                 (unsigned long long)offset,
7295                                 (unsigned long)num_refs);
7296                 }
7297                 back->num_refs = num_refs;
7298                 back->node.found_extent_tree = 1;
7299         }
7300         if (insert)
7301                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7302                         compare_extent_backref));
7303
7304         maybe_free_extent_rec(extent_cache, rec);
7305         return 0;
7306 }
7307
7308 static int add_pending(struct cache_tree *pending,
7309                        struct cache_tree *seen, u64 bytenr, u32 size)
7310 {
7311         int ret;
7312         ret = add_cache_extent(seen, bytenr, size);
7313         if (ret)
7314                 return ret;
7315         add_cache_extent(pending, bytenr, size);
7316         return 0;
7317 }
7318
7319 static int pick_next_pending(struct cache_tree *pending,
7320                         struct cache_tree *reada,
7321                         struct cache_tree *nodes,
7322                         u64 last, struct block_info *bits, int bits_nr,
7323                         int *reada_bits)
7324 {
7325         unsigned long node_start = last;
7326         struct cache_extent *cache;
7327         int ret;
7328
7329         cache = search_cache_extent(reada, 0);
7330         if (cache) {
7331                 bits[0].start = cache->start;
7332                 bits[0].size = cache->size;
7333                 *reada_bits = 1;
7334                 return 1;
7335         }
7336         *reada_bits = 0;
7337         if (node_start > 32768)
7338                 node_start -= 32768;
7339
7340         cache = search_cache_extent(nodes, node_start);
7341         if (!cache)
7342                 cache = search_cache_extent(nodes, 0);
7343
7344         if (!cache) {
7345                  cache = search_cache_extent(pending, 0);
7346                  if (!cache)
7347                          return 0;
7348                  ret = 0;
7349                  do {
7350                          bits[ret].start = cache->start;
7351                          bits[ret].size = cache->size;
7352                          cache = next_cache_extent(cache);
7353                          ret++;
7354                  } while (cache && ret < bits_nr);
7355                  return ret;
7356         }
7357
7358         ret = 0;
7359         do {
7360                 bits[ret].start = cache->start;
7361                 bits[ret].size = cache->size;
7362                 cache = next_cache_extent(cache);
7363                 ret++;
7364         } while (cache && ret < bits_nr);
7365
7366         if (bits_nr - ret > 8) {
7367                 u64 lookup = bits[0].start + bits[0].size;
7368                 struct cache_extent *next;
7369                 next = search_cache_extent(pending, lookup);
7370                 while(next) {
7371                         if (next->start - lookup > 32768)
7372                                 break;
7373                         bits[ret].start = next->start;
7374                         bits[ret].size = next->size;
7375                         lookup = next->start + next->size;
7376                         ret++;
7377                         if (ret == bits_nr)
7378                                 break;
7379                         next = next_cache_extent(next);
7380                         if (!next)
7381                                 break;
7382                 }
7383         }
7384         return ret;
7385 }
7386
7387 static void free_chunk_record(struct cache_extent *cache)
7388 {
7389         struct chunk_record *rec;
7390
7391         rec = container_of(cache, struct chunk_record, cache);
7392         list_del_init(&rec->list);
7393         list_del_init(&rec->dextents);
7394         free(rec);
7395 }
7396
7397 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7398 {
7399         cache_tree_free_extents(chunk_cache, free_chunk_record);
7400 }
7401
7402 static void free_device_record(struct rb_node *node)
7403 {
7404         struct device_record *rec;
7405
7406         rec = container_of(node, struct device_record, node);
7407         free(rec);
7408 }
7409
7410 FREE_RB_BASED_TREE(device_cache, free_device_record);
7411
7412 int insert_block_group_record(struct block_group_tree *tree,
7413                               struct block_group_record *bg_rec)
7414 {
7415         int ret;
7416
7417         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7418         if (ret)
7419                 return ret;
7420
7421         list_add_tail(&bg_rec->list, &tree->block_groups);
7422         return 0;
7423 }
7424
7425 static void free_block_group_record(struct cache_extent *cache)
7426 {
7427         struct block_group_record *rec;
7428
7429         rec = container_of(cache, struct block_group_record, cache);
7430         list_del_init(&rec->list);
7431         free(rec);
7432 }
7433
7434 void free_block_group_tree(struct block_group_tree *tree)
7435 {
7436         cache_tree_free_extents(&tree->tree, free_block_group_record);
7437 }
7438
7439 int insert_device_extent_record(struct device_extent_tree *tree,
7440                                 struct device_extent_record *de_rec)
7441 {
7442         int ret;
7443
7444         /*
7445          * Device extent is a bit different from the other extents, because
7446          * the extents which belong to the different devices may have the
7447          * same start and size, so we need use the special extent cache
7448          * search/insert functions.
7449          */
7450         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7451         if (ret)
7452                 return ret;
7453
7454         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7455         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7456         return 0;
7457 }
7458
7459 static void free_device_extent_record(struct cache_extent *cache)
7460 {
7461         struct device_extent_record *rec;
7462
7463         rec = container_of(cache, struct device_extent_record, cache);
7464         if (!list_empty(&rec->chunk_list))
7465                 list_del_init(&rec->chunk_list);
7466         if (!list_empty(&rec->device_list))
7467                 list_del_init(&rec->device_list);
7468         free(rec);
7469 }
7470
7471 void free_device_extent_tree(struct device_extent_tree *tree)
7472 {
7473         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7474 }
7475
7476 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7477 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7478                                  struct extent_buffer *leaf, int slot)
7479 {
7480         struct btrfs_extent_ref_v0 *ref0;
7481         struct btrfs_key key;
7482         int ret;
7483
7484         btrfs_item_key_to_cpu(leaf, &key, slot);
7485         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7486         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7487                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7488                                 0, 0);
7489         } else {
7490                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7491                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7492         }
7493         return ret;
7494 }
7495 #endif
7496
7497 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7498                                             struct btrfs_key *key,
7499                                             int slot)
7500 {
7501         struct btrfs_chunk *ptr;
7502         struct chunk_record *rec;
7503         int num_stripes, i;
7504
7505         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7506         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7507
7508         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7509         if (!rec) {
7510                 fprintf(stderr, "memory allocation failed\n");
7511                 exit(-1);
7512         }
7513
7514         INIT_LIST_HEAD(&rec->list);
7515         INIT_LIST_HEAD(&rec->dextents);
7516         rec->bg_rec = NULL;
7517
7518         rec->cache.start = key->offset;
7519         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7520
7521         rec->generation = btrfs_header_generation(leaf);
7522
7523         rec->objectid = key->objectid;
7524         rec->type = key->type;
7525         rec->offset = key->offset;
7526
7527         rec->length = rec->cache.size;
7528         rec->owner = btrfs_chunk_owner(leaf, ptr);
7529         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7530         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7531         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7532         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7533         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7534         rec->num_stripes = num_stripes;
7535         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7536
7537         for (i = 0; i < rec->num_stripes; ++i) {
7538                 rec->stripes[i].devid =
7539                         btrfs_stripe_devid_nr(leaf, ptr, i);
7540                 rec->stripes[i].offset =
7541                         btrfs_stripe_offset_nr(leaf, ptr, i);
7542                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7543                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7544                                 BTRFS_UUID_SIZE);
7545         }
7546
7547         return rec;
7548 }
7549
7550 static int process_chunk_item(struct cache_tree *chunk_cache,
7551                               struct btrfs_key *key, struct extent_buffer *eb,
7552                               int slot)
7553 {
7554         struct chunk_record *rec;
7555         struct btrfs_chunk *chunk;
7556         int ret = 0;
7557
7558         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7559         /*
7560          * Do extra check for this chunk item,
7561          *
7562          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7563          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7564          * and owner<->key_type check.
7565          */
7566         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7567                                       key->offset);
7568         if (ret < 0) {
7569                 error("chunk(%llu, %llu) is not valid, ignore it",
7570                       key->offset, btrfs_chunk_length(eb, chunk));
7571                 return 0;
7572         }
7573         rec = btrfs_new_chunk_record(eb, key, slot);
7574         ret = insert_cache_extent(chunk_cache, &rec->cache);
7575         if (ret) {
7576                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7577                         rec->offset, rec->length);
7578                 free(rec);
7579         }
7580
7581         return ret;
7582 }
7583
7584 static int process_device_item(struct rb_root *dev_cache,
7585                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7586 {
7587         struct btrfs_dev_item *ptr;
7588         struct device_record *rec;
7589         int ret = 0;
7590
7591         ptr = btrfs_item_ptr(eb,
7592                 slot, struct btrfs_dev_item);
7593
7594         rec = malloc(sizeof(*rec));
7595         if (!rec) {
7596                 fprintf(stderr, "memory allocation failed\n");
7597                 return -ENOMEM;
7598         }
7599
7600         rec->devid = key->offset;
7601         rec->generation = btrfs_header_generation(eb);
7602
7603         rec->objectid = key->objectid;
7604         rec->type = key->type;
7605         rec->offset = key->offset;
7606
7607         rec->devid = btrfs_device_id(eb, ptr);
7608         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7609         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7610
7611         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7612         if (ret) {
7613                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7614                 free(rec);
7615         }
7616
7617         return ret;
7618 }
7619
7620 struct block_group_record *
7621 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7622                              int slot)
7623 {
7624         struct btrfs_block_group_item *ptr;
7625         struct block_group_record *rec;
7626
7627         rec = calloc(1, sizeof(*rec));
7628         if (!rec) {
7629                 fprintf(stderr, "memory allocation failed\n");
7630                 exit(-1);
7631         }
7632
7633         rec->cache.start = key->objectid;
7634         rec->cache.size = key->offset;
7635
7636         rec->generation = btrfs_header_generation(leaf);
7637
7638         rec->objectid = key->objectid;
7639         rec->type = key->type;
7640         rec->offset = key->offset;
7641
7642         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7643         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7644
7645         INIT_LIST_HEAD(&rec->list);
7646
7647         return rec;
7648 }
7649
7650 static int process_block_group_item(struct block_group_tree *block_group_cache,
7651                                     struct btrfs_key *key,
7652                                     struct extent_buffer *eb, int slot)
7653 {
7654         struct block_group_record *rec;
7655         int ret = 0;
7656
7657         rec = btrfs_new_block_group_record(eb, key, slot);
7658         ret = insert_block_group_record(block_group_cache, rec);
7659         if (ret) {
7660                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7661                         rec->objectid, rec->offset);
7662                 free(rec);
7663         }
7664
7665         return ret;
7666 }
7667
7668 struct device_extent_record *
7669 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7670                                struct btrfs_key *key, int slot)
7671 {
7672         struct device_extent_record *rec;
7673         struct btrfs_dev_extent *ptr;
7674
7675         rec = calloc(1, sizeof(*rec));
7676         if (!rec) {
7677                 fprintf(stderr, "memory allocation failed\n");
7678                 exit(-1);
7679         }
7680
7681         rec->cache.objectid = key->objectid;
7682         rec->cache.start = key->offset;
7683
7684         rec->generation = btrfs_header_generation(leaf);
7685
7686         rec->objectid = key->objectid;
7687         rec->type = key->type;
7688         rec->offset = key->offset;
7689
7690         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7691         rec->chunk_objecteid =
7692                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7693         rec->chunk_offset =
7694                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7695         rec->length = btrfs_dev_extent_length(leaf, ptr);
7696         rec->cache.size = rec->length;
7697
7698         INIT_LIST_HEAD(&rec->chunk_list);
7699         INIT_LIST_HEAD(&rec->device_list);
7700
7701         return rec;
7702 }
7703
7704 static int
7705 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7706                            struct btrfs_key *key, struct extent_buffer *eb,
7707                            int slot)
7708 {
7709         struct device_extent_record *rec;
7710         int ret;
7711
7712         rec = btrfs_new_device_extent_record(eb, key, slot);
7713         ret = insert_device_extent_record(dev_extent_cache, rec);
7714         if (ret) {
7715                 fprintf(stderr,
7716                         "Device extent[%llu, %llu, %llu] existed.\n",
7717                         rec->objectid, rec->offset, rec->length);
7718                 free(rec);
7719         }
7720
7721         return ret;
7722 }
7723
7724 static int process_extent_item(struct btrfs_root *root,
7725                                struct cache_tree *extent_cache,
7726                                struct extent_buffer *eb, int slot)
7727 {
7728         struct btrfs_extent_item *ei;
7729         struct btrfs_extent_inline_ref *iref;
7730         struct btrfs_extent_data_ref *dref;
7731         struct btrfs_shared_data_ref *sref;
7732         struct btrfs_key key;
7733         struct extent_record tmpl;
7734         unsigned long end;
7735         unsigned long ptr;
7736         int ret;
7737         int type;
7738         u32 item_size = btrfs_item_size_nr(eb, slot);
7739         u64 refs = 0;
7740         u64 offset;
7741         u64 num_bytes;
7742         int metadata = 0;
7743
7744         btrfs_item_key_to_cpu(eb, &key, slot);
7745
7746         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7747                 metadata = 1;
7748                 num_bytes = root->fs_info->nodesize;
7749         } else {
7750                 num_bytes = key.offset;
7751         }
7752
7753         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7754                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7755                       key.objectid, root->fs_info->sectorsize);
7756                 return -EIO;
7757         }
7758         if (item_size < sizeof(*ei)) {
7759 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7760                 struct btrfs_extent_item_v0 *ei0;
7761                 if (item_size != sizeof(*ei0)) {
7762                         error(
7763         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
7764                                 key.objectid, key.type, key.offset,
7765                                 btrfs_header_bytenr(eb), slot);
7766                         BUG();
7767                 }
7768                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7769                 refs = btrfs_extent_refs_v0(eb, ei0);
7770 #else
7771                 BUG();
7772 #endif
7773                 memset(&tmpl, 0, sizeof(tmpl));
7774                 tmpl.start = key.objectid;
7775                 tmpl.nr = num_bytes;
7776                 tmpl.extent_item_refs = refs;
7777                 tmpl.metadata = metadata;
7778                 tmpl.found_rec = 1;
7779                 tmpl.max_size = num_bytes;
7780
7781                 return add_extent_rec(extent_cache, &tmpl);
7782         }
7783
7784         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7785         refs = btrfs_extent_refs(eb, ei);
7786         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7787                 metadata = 1;
7788         else
7789                 metadata = 0;
7790         if (metadata && num_bytes != root->fs_info->nodesize) {
7791                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7792                       num_bytes, root->fs_info->nodesize);
7793                 return -EIO;
7794         }
7795         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7796                 error("ignore invalid data extent, length %llu is not aligned to %u",
7797                       num_bytes, root->fs_info->sectorsize);
7798                 return -EIO;
7799         }
7800
7801         memset(&tmpl, 0, sizeof(tmpl));
7802         tmpl.start = key.objectid;
7803         tmpl.nr = num_bytes;
7804         tmpl.extent_item_refs = refs;
7805         tmpl.metadata = metadata;
7806         tmpl.found_rec = 1;
7807         tmpl.max_size = num_bytes;
7808         add_extent_rec(extent_cache, &tmpl);
7809
7810         ptr = (unsigned long)(ei + 1);
7811         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7812             key.type == BTRFS_EXTENT_ITEM_KEY)
7813                 ptr += sizeof(struct btrfs_tree_block_info);
7814
7815         end = (unsigned long)ei + item_size;
7816         while (ptr < end) {
7817                 iref = (struct btrfs_extent_inline_ref *)ptr;
7818                 type = btrfs_extent_inline_ref_type(eb, iref);
7819                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7820                 switch (type) {
7821                 case BTRFS_TREE_BLOCK_REF_KEY:
7822                         ret = add_tree_backref(extent_cache, key.objectid,
7823                                         0, offset, 0);
7824                         if (ret < 0)
7825                                 error(
7826                         "add_tree_backref failed (extent items tree block): %s",
7827                                       strerror(-ret));
7828                         break;
7829                 case BTRFS_SHARED_BLOCK_REF_KEY:
7830                         ret = add_tree_backref(extent_cache, key.objectid,
7831                                         offset, 0, 0);
7832                         if (ret < 0)
7833                                 error(
7834                         "add_tree_backref failed (extent items shared block): %s",
7835                                       strerror(-ret));
7836                         break;
7837                 case BTRFS_EXTENT_DATA_REF_KEY:
7838                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7839                         add_data_backref(extent_cache, key.objectid, 0,
7840                                         btrfs_extent_data_ref_root(eb, dref),
7841                                         btrfs_extent_data_ref_objectid(eb,
7842                                                                        dref),
7843                                         btrfs_extent_data_ref_offset(eb, dref),
7844                                         btrfs_extent_data_ref_count(eb, dref),
7845                                         0, num_bytes);
7846                         break;
7847                 case BTRFS_SHARED_DATA_REF_KEY:
7848                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7849                         add_data_backref(extent_cache, key.objectid, offset,
7850                                         0, 0, 0,
7851                                         btrfs_shared_data_ref_count(eb, sref),
7852                                         0, num_bytes);
7853                         break;
7854                 default:
7855                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7856                                 key.objectid, key.type, num_bytes);
7857                         goto out;
7858                 }
7859                 ptr += btrfs_extent_inline_ref_size(type);
7860         }
7861         WARN_ON(ptr > end);
7862 out:
7863         return 0;
7864 }
7865
7866 static int check_cache_range(struct btrfs_root *root,
7867                              struct btrfs_block_group_cache *cache,
7868                              u64 offset, u64 bytes)
7869 {
7870         struct btrfs_free_space *entry;
7871         u64 *logical;
7872         u64 bytenr;
7873         int stripe_len;
7874         int i, nr, ret;
7875
7876         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7877                 bytenr = btrfs_sb_offset(i);
7878                 ret = btrfs_rmap_block(root->fs_info,
7879                                        cache->key.objectid, bytenr, 0,
7880                                        &logical, &nr, &stripe_len);
7881                 if (ret)
7882                         return ret;
7883
7884                 while (nr--) {
7885                         if (logical[nr] + stripe_len <= offset)
7886                                 continue;
7887                         if (offset + bytes <= logical[nr])
7888                                 continue;
7889                         if (logical[nr] == offset) {
7890                                 if (stripe_len >= bytes) {
7891                                         free(logical);
7892                                         return 0;
7893                                 }
7894                                 bytes -= stripe_len;
7895                                 offset += stripe_len;
7896                         } else if (logical[nr] < offset) {
7897                                 if (logical[nr] + stripe_len >=
7898                                     offset + bytes) {
7899                                         free(logical);
7900                                         return 0;
7901                                 }
7902                                 bytes = (offset + bytes) -
7903                                         (logical[nr] + stripe_len);
7904                                 offset = logical[nr] + stripe_len;
7905                         } else {
7906                                 /*
7907                                  * Could be tricky, the super may land in the
7908                                  * middle of the area we're checking.  First
7909                                  * check the easiest case, it's at the end.
7910                                  */
7911                                 if (logical[nr] + stripe_len >=
7912                                     bytes + offset) {
7913                                         bytes = logical[nr] - offset;
7914                                         continue;
7915                                 }
7916
7917                                 /* Check the left side */
7918                                 ret = check_cache_range(root, cache,
7919                                                         offset,
7920                                                         logical[nr] - offset);
7921                                 if (ret) {
7922                                         free(logical);
7923                                         return ret;
7924                                 }
7925
7926                                 /* Now we continue with the right side */
7927                                 bytes = (offset + bytes) -
7928                                         (logical[nr] + stripe_len);
7929                                 offset = logical[nr] + stripe_len;
7930                         }
7931                 }
7932
7933                 free(logical);
7934         }
7935
7936         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7937         if (!entry) {
7938                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7939                         offset, offset+bytes);
7940                 return -EINVAL;
7941         }
7942
7943         if (entry->offset != offset) {
7944                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7945                         entry->offset);
7946                 return -EINVAL;
7947         }
7948
7949         if (entry->bytes != bytes) {
7950                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7951                         bytes, entry->bytes, offset);
7952                 return -EINVAL;
7953         }
7954
7955         unlink_free_space(cache->free_space_ctl, entry);
7956         free(entry);
7957         return 0;
7958 }
7959
7960 static int verify_space_cache(struct btrfs_root *root,
7961                               struct btrfs_block_group_cache *cache)
7962 {
7963         struct btrfs_path path;
7964         struct extent_buffer *leaf;
7965         struct btrfs_key key;
7966         u64 last;
7967         int ret = 0;
7968
7969         root = root->fs_info->extent_root;
7970
7971         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7972
7973         btrfs_init_path(&path);
7974         key.objectid = last;
7975         key.offset = 0;
7976         key.type = BTRFS_EXTENT_ITEM_KEY;
7977         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7978         if (ret < 0)
7979                 goto out;
7980         ret = 0;
7981         while (1) {
7982                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7983                         ret = btrfs_next_leaf(root, &path);
7984                         if (ret < 0)
7985                                 goto out;
7986                         if (ret > 0) {
7987                                 ret = 0;
7988                                 break;
7989                         }
7990                 }
7991                 leaf = path.nodes[0];
7992                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7993                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7994                         break;
7995                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7996                     key.type != BTRFS_METADATA_ITEM_KEY) {
7997                         path.slots[0]++;
7998                         continue;
7999                 }
8000
8001                 if (last == key.objectid) {
8002                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8003                                 last = key.objectid + key.offset;
8004                         else
8005                                 last = key.objectid + root->fs_info->nodesize;
8006                         path.slots[0]++;
8007                         continue;
8008                 }
8009
8010                 ret = check_cache_range(root, cache, last,
8011                                         key.objectid - last);
8012                 if (ret)
8013                         break;
8014                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8015                         last = key.objectid + key.offset;
8016                 else
8017                         last = key.objectid + root->fs_info->nodesize;
8018                 path.slots[0]++;
8019         }
8020
8021         if (last < cache->key.objectid + cache->key.offset)
8022                 ret = check_cache_range(root, cache, last,
8023                                         cache->key.objectid +
8024                                         cache->key.offset - last);
8025
8026 out:
8027         btrfs_release_path(&path);
8028
8029         if (!ret &&
8030             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8031                 fprintf(stderr, "There are still entries left in the space "
8032                         "cache\n");
8033                 ret = -EINVAL;
8034         }
8035
8036         return ret;
8037 }
8038
8039 static int check_space_cache(struct btrfs_root *root)
8040 {
8041         struct btrfs_block_group_cache *cache;
8042         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8043         int ret;
8044         int error = 0;
8045
8046         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8047             btrfs_super_generation(root->fs_info->super_copy) !=
8048             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8049                 printf("cache and super generation don't match, space cache "
8050                        "will be invalidated\n");
8051                 return 0;
8052         }
8053
8054         if (ctx.progress_enabled) {
8055                 ctx.tp = TASK_FREE_SPACE;
8056                 task_start(ctx.info);
8057         }
8058
8059         while (1) {
8060                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8061                 if (!cache)
8062                         break;
8063
8064                 start = cache->key.objectid + cache->key.offset;
8065                 if (!cache->free_space_ctl) {
8066                         if (btrfs_init_free_space_ctl(cache,
8067                                                 root->fs_info->sectorsize)) {
8068                                 ret = -ENOMEM;
8069                                 break;
8070                         }
8071                 } else {
8072                         btrfs_remove_free_space_cache(cache);
8073                 }
8074
8075                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8076                         ret = exclude_super_stripes(root, cache);
8077                         if (ret) {
8078                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8079                                         strerror(-ret));
8080                                 error++;
8081                                 continue;
8082                         }
8083                         ret = load_free_space_tree(root->fs_info, cache);
8084                         free_excluded_extents(root, cache);
8085                         if (ret < 0) {
8086                                 fprintf(stderr, "could not load free space tree: %s\n",
8087                                         strerror(-ret));
8088                                 error++;
8089                                 continue;
8090                         }
8091                         error += ret;
8092                 } else {
8093                         ret = load_free_space_cache(root->fs_info, cache);
8094                         if (!ret)
8095                                 continue;
8096                 }
8097
8098                 ret = verify_space_cache(root, cache);
8099                 if (ret) {
8100                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8101                                 cache->key.objectid);
8102                         error++;
8103                 }
8104         }
8105
8106         task_stop(ctx.info);
8107
8108         return error ? -EINVAL : 0;
8109 }
8110
8111 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8112                         u64 num_bytes, unsigned long leaf_offset,
8113                         struct extent_buffer *eb) {
8114
8115         struct btrfs_fs_info *fs_info = root->fs_info;
8116         u64 offset = 0;
8117         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8118         char *data;
8119         unsigned long csum_offset;
8120         u32 csum;
8121         u32 csum_expected;
8122         u64 read_len;
8123         u64 data_checked = 0;
8124         u64 tmp;
8125         int ret = 0;
8126         int mirror;
8127         int num_copies;
8128
8129         if (num_bytes % fs_info->sectorsize)
8130                 return -EINVAL;
8131
8132         data = malloc(num_bytes);
8133         if (!data)
8134                 return -ENOMEM;
8135
8136         while (offset < num_bytes) {
8137                 mirror = 0;
8138 again:
8139                 read_len = num_bytes - offset;
8140                 /* read as much space once a time */
8141                 ret = read_extent_data(fs_info, data + offset,
8142                                 bytenr + offset, &read_len, mirror);
8143                 if (ret)
8144                         goto out;
8145                 data_checked = 0;
8146                 /* verify every 4k data's checksum */
8147                 while (data_checked < read_len) {
8148                         csum = ~(u32)0;
8149                         tmp = offset + data_checked;
8150
8151                         csum = btrfs_csum_data((char *)data + tmp,
8152                                                csum, fs_info->sectorsize);
8153                         btrfs_csum_final(csum, (u8 *)&csum);
8154
8155                         csum_offset = leaf_offset +
8156                                  tmp / fs_info->sectorsize * csum_size;
8157                         read_extent_buffer(eb, (char *)&csum_expected,
8158                                            csum_offset, csum_size);
8159                         /* try another mirror */
8160                         if (csum != csum_expected) {
8161                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8162                                                 mirror, bytenr + tmp,
8163                                                 csum, csum_expected);
8164                                 num_copies = btrfs_num_copies(root->fs_info,
8165                                                 bytenr, num_bytes);
8166                                 if (mirror < num_copies - 1) {
8167                                         mirror += 1;
8168                                         goto again;
8169                                 }
8170                         }
8171                         data_checked += fs_info->sectorsize;
8172                 }
8173                 offset += read_len;
8174         }
8175 out:
8176         free(data);
8177         return ret;
8178 }
8179
8180 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8181                                u64 num_bytes)
8182 {
8183         struct btrfs_path path;
8184         struct extent_buffer *leaf;
8185         struct btrfs_key key;
8186         int ret;
8187
8188         btrfs_init_path(&path);
8189         key.objectid = bytenr;
8190         key.type = BTRFS_EXTENT_ITEM_KEY;
8191         key.offset = (u64)-1;
8192
8193 again:
8194         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8195                                 0, 0);
8196         if (ret < 0) {
8197                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8198                 btrfs_release_path(&path);
8199                 return ret;
8200         } else if (ret) {
8201                 if (path.slots[0] > 0) {
8202                         path.slots[0]--;
8203                 } else {
8204                         ret = btrfs_prev_leaf(root, &path);
8205                         if (ret < 0) {
8206                                 goto out;
8207                         } else if (ret > 0) {
8208                                 ret = 0;
8209                                 goto out;
8210                         }
8211                 }
8212         }
8213
8214         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8215
8216         /*
8217          * Block group items come before extent items if they have the same
8218          * bytenr, so walk back one more just in case.  Dear future traveller,
8219          * first congrats on mastering time travel.  Now if it's not too much
8220          * trouble could you go back to 2006 and tell Chris to make the
8221          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8222          * EXTENT_ITEM_KEY please?
8223          */
8224         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8225                 if (path.slots[0] > 0) {
8226                         path.slots[0]--;
8227                 } else {
8228                         ret = btrfs_prev_leaf(root, &path);
8229                         if (ret < 0) {
8230                                 goto out;
8231                         } else if (ret > 0) {
8232                                 ret = 0;
8233                                 goto out;
8234                         }
8235                 }
8236                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8237         }
8238
8239         while (num_bytes) {
8240                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8241                         ret = btrfs_next_leaf(root, &path);
8242                         if (ret < 0) {
8243                                 fprintf(stderr, "Error going to next leaf "
8244                                         "%d\n", ret);
8245                                 btrfs_release_path(&path);
8246                                 return ret;
8247                         } else if (ret) {
8248                                 break;
8249                         }
8250                 }
8251                 leaf = path.nodes[0];
8252                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8253                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8254                         path.slots[0]++;
8255                         continue;
8256                 }
8257                 if (key.objectid + key.offset < bytenr) {
8258                         path.slots[0]++;
8259                         continue;
8260                 }
8261                 if (key.objectid > bytenr + num_bytes)
8262                         break;
8263
8264                 if (key.objectid == bytenr) {
8265                         if (key.offset >= num_bytes) {
8266                                 num_bytes = 0;
8267                                 break;
8268                         }
8269                         num_bytes -= key.offset;
8270                         bytenr += key.offset;
8271                 } else if (key.objectid < bytenr) {
8272                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8273                                 num_bytes = 0;
8274                                 break;
8275                         }
8276                         num_bytes = (bytenr + num_bytes) -
8277                                 (key.objectid + key.offset);
8278                         bytenr = key.objectid + key.offset;
8279                 } else {
8280                         if (key.objectid + key.offset < bytenr + num_bytes) {
8281                                 u64 new_start = key.objectid + key.offset;
8282                                 u64 new_bytes = bytenr + num_bytes - new_start;
8283
8284                                 /*
8285                                  * Weird case, the extent is in the middle of
8286                                  * our range, we'll have to search one side
8287                                  * and then the other.  Not sure if this happens
8288                                  * in real life, but no harm in coding it up
8289                                  * anyway just in case.
8290                                  */
8291                                 btrfs_release_path(&path);
8292                                 ret = check_extent_exists(root, new_start,
8293                                                           new_bytes);
8294                                 if (ret) {
8295                                         fprintf(stderr, "Right section didn't "
8296                                                 "have a record\n");
8297                                         break;
8298                                 }
8299                                 num_bytes = key.objectid - bytenr;
8300                                 goto again;
8301                         }
8302                         num_bytes = key.objectid - bytenr;
8303                 }
8304                 path.slots[0]++;
8305         }
8306         ret = 0;
8307
8308 out:
8309         if (num_bytes && !ret) {
8310                 fprintf(stderr, "There are no extents for csum range "
8311                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8312                 ret = 1;
8313         }
8314
8315         btrfs_release_path(&path);
8316         return ret;
8317 }
8318
8319 static int check_csums(struct btrfs_root *root)
8320 {
8321         struct btrfs_path path;
8322         struct extent_buffer *leaf;
8323         struct btrfs_key key;
8324         u64 offset = 0, num_bytes = 0;
8325         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8326         int errors = 0;
8327         int ret;
8328         u64 data_len;
8329         unsigned long leaf_offset;
8330
8331         root = root->fs_info->csum_root;
8332         if (!extent_buffer_uptodate(root->node)) {
8333                 fprintf(stderr, "No valid csum tree found\n");
8334                 return -ENOENT;
8335         }
8336
8337         btrfs_init_path(&path);
8338         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8339         key.type = BTRFS_EXTENT_CSUM_KEY;
8340         key.offset = 0;
8341         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8342         if (ret < 0) {
8343                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8344                 btrfs_release_path(&path);
8345                 return ret;
8346         }
8347
8348         if (ret > 0 && path.slots[0])
8349                 path.slots[0]--;
8350         ret = 0;
8351
8352         while (1) {
8353                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8354                         ret = btrfs_next_leaf(root, &path);
8355                         if (ret < 0) {
8356                                 fprintf(stderr, "Error going to next leaf "
8357                                         "%d\n", ret);
8358                                 break;
8359                         }
8360                         if (ret)
8361                                 break;
8362                 }
8363                 leaf = path.nodes[0];
8364
8365                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8366                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8367                         path.slots[0]++;
8368                         continue;
8369                 }
8370
8371                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8372                               csum_size) * root->fs_info->sectorsize;
8373                 if (!check_data_csum)
8374                         goto skip_csum_check;
8375                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8376                 ret = check_extent_csums(root, key.offset, data_len,
8377                                          leaf_offset, leaf);
8378                 if (ret)
8379                         break;
8380 skip_csum_check:
8381                 if (!num_bytes) {
8382                         offset = key.offset;
8383                 } else if (key.offset != offset + num_bytes) {
8384                         ret = check_extent_exists(root, offset, num_bytes);
8385                         if (ret) {
8386                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8387                                         "there is no extent record\n",
8388                                         offset, offset+num_bytes);
8389                                 errors++;
8390                         }
8391                         offset = key.offset;
8392                         num_bytes = 0;
8393                 }
8394                 num_bytes += data_len;
8395                 path.slots[0]++;
8396         }
8397
8398         btrfs_release_path(&path);
8399         return errors;
8400 }
8401
8402 static int is_dropped_key(struct btrfs_key *key,
8403                           struct btrfs_key *drop_key) {
8404         if (key->objectid < drop_key->objectid)
8405                 return 1;
8406         else if (key->objectid == drop_key->objectid) {
8407                 if (key->type < drop_key->type)
8408                         return 1;
8409                 else if (key->type == drop_key->type) {
8410                         if (key->offset < drop_key->offset)
8411                                 return 1;
8412                 }
8413         }
8414         return 0;
8415 }
8416
8417 /*
8418  * Here are the rules for FULL_BACKREF.
8419  *
8420  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8421  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8422  *      FULL_BACKREF set.
8423  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8424  *    if it happened after the relocation occurred since we'll have dropped the
8425  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8426  *    have no real way to know for sure.
8427  *
8428  * We process the blocks one root at a time, and we start from the lowest root
8429  * objectid and go to the highest.  So we can just lookup the owner backref for
8430  * the record and if we don't find it then we know it doesn't exist and we have
8431  * a FULL BACKREF.
8432  *
8433  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8434  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8435  * be set or not and then we can check later once we've gathered all the refs.
8436  */
8437 static int calc_extent_flag(struct cache_tree *extent_cache,
8438                            struct extent_buffer *buf,
8439                            struct root_item_record *ri,
8440                            u64 *flags)
8441 {
8442         struct extent_record *rec;
8443         struct cache_extent *cache;
8444         struct tree_backref *tback;
8445         u64 owner = 0;
8446
8447         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8448         /* we have added this extent before */
8449         if (!cache)
8450                 return -ENOENT;
8451
8452         rec = container_of(cache, struct extent_record, cache);
8453
8454         /*
8455          * Except file/reloc tree, we can not have
8456          * FULL BACKREF MODE
8457          */
8458         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8459                 goto normal;
8460         /*
8461          * root node
8462          */
8463         if (buf->start == ri->bytenr)
8464                 goto normal;
8465
8466         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8467                 goto full_backref;
8468
8469         owner = btrfs_header_owner(buf);
8470         if (owner == ri->objectid)
8471                 goto normal;
8472
8473         tback = find_tree_backref(rec, 0, owner);
8474         if (!tback)
8475                 goto full_backref;
8476 normal:
8477         *flags = 0;
8478         if (rec->flag_block_full_backref != FLAG_UNSET &&
8479             rec->flag_block_full_backref != 0)
8480                 rec->bad_full_backref = 1;
8481         return 0;
8482 full_backref:
8483         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8484         if (rec->flag_block_full_backref != FLAG_UNSET &&
8485             rec->flag_block_full_backref != 1)
8486                 rec->bad_full_backref = 1;
8487         return 0;
8488 }
8489
8490 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8491 {
8492         fprintf(stderr, "Invalid key type(");
8493         print_key_type(stderr, 0, key_type);
8494         fprintf(stderr, ") found in root(");
8495         print_objectid(stderr, rootid, 0);
8496         fprintf(stderr, ")\n");
8497 }
8498
8499 /*
8500  * Check if the key is valid with its extent buffer.
8501  *
8502  * This is a early check in case invalid key exists in a extent buffer
8503  * This is not comprehensive yet, but should prevent wrong key/item passed
8504  * further
8505  */
8506 static int check_type_with_root(u64 rootid, u8 key_type)
8507 {
8508         switch (key_type) {
8509         /* Only valid in chunk tree */
8510         case BTRFS_DEV_ITEM_KEY:
8511         case BTRFS_CHUNK_ITEM_KEY:
8512                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8513                         goto err;
8514                 break;
8515         /* valid in csum and log tree */
8516         case BTRFS_CSUM_TREE_OBJECTID:
8517                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8518                       is_fstree(rootid)))
8519                         goto err;
8520                 break;
8521         case BTRFS_EXTENT_ITEM_KEY:
8522         case BTRFS_METADATA_ITEM_KEY:
8523         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8524                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8525                         goto err;
8526                 break;
8527         case BTRFS_ROOT_ITEM_KEY:
8528                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8529                         goto err;
8530                 break;
8531         case BTRFS_DEV_EXTENT_KEY:
8532                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8533                         goto err;
8534                 break;
8535         }
8536         return 0;
8537 err:
8538         report_mismatch_key_root(key_type, rootid);
8539         return -EINVAL;
8540 }
8541
8542 static int run_next_block(struct btrfs_root *root,
8543                           struct block_info *bits,
8544                           int bits_nr,
8545                           u64 *last,
8546                           struct cache_tree *pending,
8547                           struct cache_tree *seen,
8548                           struct cache_tree *reada,
8549                           struct cache_tree *nodes,
8550                           struct cache_tree *extent_cache,
8551                           struct cache_tree *chunk_cache,
8552                           struct rb_root *dev_cache,
8553                           struct block_group_tree *block_group_cache,
8554                           struct device_extent_tree *dev_extent_cache,
8555                           struct root_item_record *ri)
8556 {
8557         struct btrfs_fs_info *fs_info = root->fs_info;
8558         struct extent_buffer *buf;
8559         struct extent_record *rec = NULL;
8560         u64 bytenr;
8561         u32 size;
8562         u64 parent;
8563         u64 owner;
8564         u64 flags;
8565         u64 ptr;
8566         u64 gen = 0;
8567         int ret = 0;
8568         int i;
8569         int nritems;
8570         struct btrfs_key key;
8571         struct cache_extent *cache;
8572         int reada_bits;
8573
8574         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8575                                     bits_nr, &reada_bits);
8576         if (nritems == 0)
8577                 return 1;
8578
8579         if (!reada_bits) {
8580                 for(i = 0; i < nritems; i++) {
8581                         ret = add_cache_extent(reada, bits[i].start,
8582                                                bits[i].size);
8583                         if (ret == -EEXIST)
8584                                 continue;
8585
8586                         /* fixme, get the parent transid */
8587                         readahead_tree_block(fs_info, bits[i].start, 0);
8588                 }
8589         }
8590         *last = bits[0].start;
8591         bytenr = bits[0].start;
8592         size = bits[0].size;
8593
8594         cache = lookup_cache_extent(pending, bytenr, size);
8595         if (cache) {
8596                 remove_cache_extent(pending, cache);
8597                 free(cache);
8598         }
8599         cache = lookup_cache_extent(reada, bytenr, size);
8600         if (cache) {
8601                 remove_cache_extent(reada, cache);
8602                 free(cache);
8603         }
8604         cache = lookup_cache_extent(nodes, bytenr, size);
8605         if (cache) {
8606                 remove_cache_extent(nodes, cache);
8607                 free(cache);
8608         }
8609         cache = lookup_cache_extent(extent_cache, bytenr, size);
8610         if (cache) {
8611                 rec = container_of(cache, struct extent_record, cache);
8612                 gen = rec->parent_generation;
8613         }
8614
8615         /* fixme, get the real parent transid */
8616         buf = read_tree_block(root->fs_info, bytenr, gen);
8617         if (!extent_buffer_uptodate(buf)) {
8618                 record_bad_block_io(root->fs_info,
8619                                     extent_cache, bytenr, size);
8620                 goto out;
8621         }
8622
8623         nritems = btrfs_header_nritems(buf);
8624
8625         flags = 0;
8626         if (!init_extent_tree) {
8627                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8628                                        btrfs_header_level(buf), 1, NULL,
8629                                        &flags);
8630                 if (ret < 0) {
8631                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8632                         if (ret < 0) {
8633                                 fprintf(stderr, "Couldn't calc extent flags\n");
8634                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8635                         }
8636                 }
8637         } else {
8638                 flags = 0;
8639                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8640                 if (ret < 0) {
8641                         fprintf(stderr, "Couldn't calc extent flags\n");
8642                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8643                 }
8644         }
8645
8646         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8647                 if (ri != NULL &&
8648                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8649                     ri->objectid == btrfs_header_owner(buf)) {
8650                         /*
8651                          * Ok we got to this block from it's original owner and
8652                          * we have FULL_BACKREF set.  Relocation can leave
8653                          * converted blocks over so this is altogether possible,
8654                          * however it's not possible if the generation > the
8655                          * last snapshot, so check for this case.
8656                          */
8657                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8658                             btrfs_header_generation(buf) > ri->last_snapshot) {
8659                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8660                                 rec->bad_full_backref = 1;
8661                         }
8662                 }
8663         } else {
8664                 if (ri != NULL &&
8665                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8666                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8667                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8668                         rec->bad_full_backref = 1;
8669                 }
8670         }
8671
8672         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8673                 rec->flag_block_full_backref = 1;
8674                 parent = bytenr;
8675                 owner = 0;
8676         } else {
8677                 rec->flag_block_full_backref = 0;
8678                 parent = 0;
8679                 owner = btrfs_header_owner(buf);
8680         }
8681
8682         ret = check_block(root, extent_cache, buf, flags);
8683         if (ret)
8684                 goto out;
8685
8686         if (btrfs_is_leaf(buf)) {
8687                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8688                 for (i = 0; i < nritems; i++) {
8689                         struct btrfs_file_extent_item *fi;
8690                         btrfs_item_key_to_cpu(buf, &key, i);
8691                         /*
8692                          * Check key type against the leaf owner.
8693                          * Could filter quite a lot of early error if
8694                          * owner is correct
8695                          */
8696                         if (check_type_with_root(btrfs_header_owner(buf),
8697                                                  key.type)) {
8698                                 fprintf(stderr, "ignoring invalid key\n");
8699                                 continue;
8700                         }
8701                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8702                                 process_extent_item(root, extent_cache, buf,
8703                                                     i);
8704                                 continue;
8705                         }
8706                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8707                                 process_extent_item(root, extent_cache, buf,
8708                                                     i);
8709                                 continue;
8710                         }
8711                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8712                                 total_csum_bytes +=
8713                                         btrfs_item_size_nr(buf, i);
8714                                 continue;
8715                         }
8716                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8717                                 process_chunk_item(chunk_cache, &key, buf, i);
8718                                 continue;
8719                         }
8720                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8721                                 process_device_item(dev_cache, &key, buf, i);
8722                                 continue;
8723                         }
8724                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8725                                 process_block_group_item(block_group_cache,
8726                                         &key, buf, i);
8727                                 continue;
8728                         }
8729                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8730                                 process_device_extent_item(dev_extent_cache,
8731                                         &key, buf, i);
8732                                 continue;
8733
8734                         }
8735                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8736 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8737                                 process_extent_ref_v0(extent_cache, buf, i);
8738 #else
8739                                 BUG();
8740 #endif
8741                                 continue;
8742                         }
8743
8744                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8745                                 ret = add_tree_backref(extent_cache,
8746                                                 key.objectid, 0, key.offset, 0);
8747                                 if (ret < 0)
8748                                         error(
8749                                 "add_tree_backref failed (leaf tree block): %s",
8750                                               strerror(-ret));
8751                                 continue;
8752                         }
8753                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8754                                 ret = add_tree_backref(extent_cache,
8755                                                 key.objectid, key.offset, 0, 0);
8756                                 if (ret < 0)
8757                                         error(
8758                                 "add_tree_backref failed (leaf shared block): %s",
8759                                               strerror(-ret));
8760                                 continue;
8761                         }
8762                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8763                                 struct btrfs_extent_data_ref *ref;
8764                                 ref = btrfs_item_ptr(buf, i,
8765                                                 struct btrfs_extent_data_ref);
8766                                 add_data_backref(extent_cache,
8767                                         key.objectid, 0,
8768                                         btrfs_extent_data_ref_root(buf, ref),
8769                                         btrfs_extent_data_ref_objectid(buf,
8770                                                                        ref),
8771                                         btrfs_extent_data_ref_offset(buf, ref),
8772                                         btrfs_extent_data_ref_count(buf, ref),
8773                                         0, root->fs_info->sectorsize);
8774                                 continue;
8775                         }
8776                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8777                                 struct btrfs_shared_data_ref *ref;
8778                                 ref = btrfs_item_ptr(buf, i,
8779                                                 struct btrfs_shared_data_ref);
8780                                 add_data_backref(extent_cache,
8781                                         key.objectid, key.offset, 0, 0, 0,
8782                                         btrfs_shared_data_ref_count(buf, ref),
8783                                         0, root->fs_info->sectorsize);
8784                                 continue;
8785                         }
8786                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8787                                 struct bad_item *bad;
8788
8789                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8790                                         continue;
8791                                 if (!owner)
8792                                         continue;
8793                                 bad = malloc(sizeof(struct bad_item));
8794                                 if (!bad)
8795                                         continue;
8796                                 INIT_LIST_HEAD(&bad->list);
8797                                 memcpy(&bad->key, &key,
8798                                        sizeof(struct btrfs_key));
8799                                 bad->root_id = owner;
8800                                 list_add_tail(&bad->list, &delete_items);
8801                                 continue;
8802                         }
8803                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8804                                 continue;
8805                         fi = btrfs_item_ptr(buf, i,
8806                                             struct btrfs_file_extent_item);
8807                         if (btrfs_file_extent_type(buf, fi) ==
8808                             BTRFS_FILE_EXTENT_INLINE)
8809                                 continue;
8810                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8811                                 continue;
8812
8813                         data_bytes_allocated +=
8814                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8815                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8816                                 abort();
8817                         }
8818                         data_bytes_referenced +=
8819                                 btrfs_file_extent_num_bytes(buf, fi);
8820                         add_data_backref(extent_cache,
8821                                 btrfs_file_extent_disk_bytenr(buf, fi),
8822                                 parent, owner, key.objectid, key.offset -
8823                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8824                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8825                 }
8826         } else {
8827                 int level;
8828                 struct btrfs_key first_key;
8829
8830                 first_key.objectid = 0;
8831
8832                 if (nritems > 0)
8833                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8834                 level = btrfs_header_level(buf);
8835                 for (i = 0; i < nritems; i++) {
8836                         struct extent_record tmpl;
8837
8838                         ptr = btrfs_node_blockptr(buf, i);
8839                         size = root->fs_info->nodesize;
8840                         btrfs_node_key_to_cpu(buf, &key, i);
8841                         if (ri != NULL) {
8842                                 if ((level == ri->drop_level)
8843                                     && is_dropped_key(&key, &ri->drop_key)) {
8844                                         continue;
8845                                 }
8846                         }
8847
8848                         memset(&tmpl, 0, sizeof(tmpl));
8849                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8850                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8851                         tmpl.start = ptr;
8852                         tmpl.nr = size;
8853                         tmpl.refs = 1;
8854                         tmpl.metadata = 1;
8855                         tmpl.max_size = size;
8856                         ret = add_extent_rec(extent_cache, &tmpl);
8857                         if (ret < 0)
8858                                 goto out;
8859
8860                         ret = add_tree_backref(extent_cache, ptr, parent,
8861                                         owner, 1);
8862                         if (ret < 0) {
8863                                 error(
8864                                 "add_tree_backref failed (non-leaf block): %s",
8865                                       strerror(-ret));
8866                                 continue;
8867                         }
8868
8869                         if (level > 1) {
8870                                 add_pending(nodes, seen, ptr, size);
8871                         } else {
8872                                 add_pending(pending, seen, ptr, size);
8873                         }
8874                 }
8875                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
8876                                       nritems) * sizeof(struct btrfs_key_ptr);
8877         }
8878         total_btree_bytes += buf->len;
8879         if (fs_root_objectid(btrfs_header_owner(buf)))
8880                 total_fs_tree_bytes += buf->len;
8881         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8882                 total_extent_tree_bytes += buf->len;
8883 out:
8884         free_extent_buffer(buf);
8885         return ret;
8886 }
8887
8888 static int add_root_to_pending(struct extent_buffer *buf,
8889                                struct cache_tree *extent_cache,
8890                                struct cache_tree *pending,
8891                                struct cache_tree *seen,
8892                                struct cache_tree *nodes,
8893                                u64 objectid)
8894 {
8895         struct extent_record tmpl;
8896         int ret;
8897
8898         if (btrfs_header_level(buf) > 0)
8899                 add_pending(nodes, seen, buf->start, buf->len);
8900         else
8901                 add_pending(pending, seen, buf->start, buf->len);
8902
8903         memset(&tmpl, 0, sizeof(tmpl));
8904         tmpl.start = buf->start;
8905         tmpl.nr = buf->len;
8906         tmpl.is_root = 1;
8907         tmpl.refs = 1;
8908         tmpl.metadata = 1;
8909         tmpl.max_size = buf->len;
8910         add_extent_rec(extent_cache, &tmpl);
8911
8912         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8913             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8914                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8915                                 0, 1);
8916         else
8917                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8918                                 1);
8919         return ret;
8920 }
8921
8922 /* as we fix the tree, we might be deleting blocks that
8923  * we're tracking for repair.  This hook makes sure we
8924  * remove any backrefs for blocks as we are fixing them.
8925  */
8926 static int free_extent_hook(struct btrfs_trans_handle *trans,
8927                             struct btrfs_root *root,
8928                             u64 bytenr, u64 num_bytes, u64 parent,
8929                             u64 root_objectid, u64 owner, u64 offset,
8930                             int refs_to_drop)
8931 {
8932         struct extent_record *rec;
8933         struct cache_extent *cache;
8934         int is_data;
8935         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8936
8937         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8938         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8939         if (!cache)
8940                 return 0;
8941
8942         rec = container_of(cache, struct extent_record, cache);
8943         if (is_data) {
8944                 struct data_backref *back;
8945                 back = find_data_backref(rec, parent, root_objectid, owner,
8946                                          offset, 1, bytenr, num_bytes);
8947                 if (!back)
8948                         goto out;
8949                 if (back->node.found_ref) {
8950                         back->found_ref -= refs_to_drop;
8951                         if (rec->refs)
8952                                 rec->refs -= refs_to_drop;
8953                 }
8954                 if (back->node.found_extent_tree) {
8955                         back->num_refs -= refs_to_drop;
8956                         if (rec->extent_item_refs)
8957                                 rec->extent_item_refs -= refs_to_drop;
8958                 }
8959                 if (back->found_ref == 0)
8960                         back->node.found_ref = 0;
8961                 if (back->num_refs == 0)
8962                         back->node.found_extent_tree = 0;
8963
8964                 if (!back->node.found_extent_tree && back->node.found_ref) {
8965                         rb_erase(&back->node.node, &rec->backref_tree);
8966                         free(back);
8967                 }
8968         } else {
8969                 struct tree_backref *back;
8970                 back = find_tree_backref(rec, parent, root_objectid);
8971                 if (!back)
8972                         goto out;
8973                 if (back->node.found_ref) {
8974                         if (rec->refs)
8975                                 rec->refs--;
8976                         back->node.found_ref = 0;
8977                 }
8978                 if (back->node.found_extent_tree) {
8979                         if (rec->extent_item_refs)
8980                                 rec->extent_item_refs--;
8981                         back->node.found_extent_tree = 0;
8982                 }
8983                 if (!back->node.found_extent_tree && back->node.found_ref) {
8984                         rb_erase(&back->node.node, &rec->backref_tree);
8985                         free(back);
8986                 }
8987         }
8988         maybe_free_extent_rec(extent_cache, rec);
8989 out:
8990         return 0;
8991 }
8992
8993 static int delete_extent_records(struct btrfs_trans_handle *trans,
8994                                  struct btrfs_root *root,
8995                                  struct btrfs_path *path,
8996                                  u64 bytenr)
8997 {
8998         struct btrfs_key key;
8999         struct btrfs_key found_key;
9000         struct extent_buffer *leaf;
9001         int ret;
9002         int slot;
9003
9004
9005         key.objectid = bytenr;
9006         key.type = (u8)-1;
9007         key.offset = (u64)-1;
9008
9009         while(1) {
9010                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9011                                         &key, path, 0, 1);
9012                 if (ret < 0)
9013                         break;
9014
9015                 if (ret > 0) {
9016                         ret = 0;
9017                         if (path->slots[0] == 0)
9018                                 break;
9019                         path->slots[0]--;
9020                 }
9021                 ret = 0;
9022
9023                 leaf = path->nodes[0];
9024                 slot = path->slots[0];
9025
9026                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9027                 if (found_key.objectid != bytenr)
9028                         break;
9029
9030                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9031                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9032                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9033                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9034                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9035                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9036                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9037                         btrfs_release_path(path);
9038                         if (found_key.type == 0) {
9039                                 if (found_key.offset == 0)
9040                                         break;
9041                                 key.offset = found_key.offset - 1;
9042                                 key.type = found_key.type;
9043                         }
9044                         key.type = found_key.type - 1;
9045                         key.offset = (u64)-1;
9046                         continue;
9047                 }
9048
9049                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9050                         found_key.objectid, found_key.type, found_key.offset);
9051
9052                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9053                 if (ret)
9054                         break;
9055                 btrfs_release_path(path);
9056
9057                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9058                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9059                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9060                                 found_key.offset : root->fs_info->nodesize;
9061
9062                         ret = btrfs_update_block_group(root, bytenr,
9063                                                        bytes, 0, 0);
9064                         if (ret)
9065                                 break;
9066                 }
9067         }
9068
9069         btrfs_release_path(path);
9070         return ret;
9071 }
9072
9073 /*
9074  * for a single backref, this will allocate a new extent
9075  * and add the backref to it.
9076  */
9077 static int record_extent(struct btrfs_trans_handle *trans,
9078                          struct btrfs_fs_info *info,
9079                          struct btrfs_path *path,
9080                          struct extent_record *rec,
9081                          struct extent_backref *back,
9082                          int allocated, u64 flags)
9083 {
9084         int ret = 0;
9085         struct btrfs_root *extent_root = info->extent_root;
9086         struct extent_buffer *leaf;
9087         struct btrfs_key ins_key;
9088         struct btrfs_extent_item *ei;
9089         struct data_backref *dback;
9090         struct btrfs_tree_block_info *bi;
9091
9092         if (!back->is_data)
9093                 rec->max_size = max_t(u64, rec->max_size,
9094                                     info->nodesize);
9095
9096         if (!allocated) {
9097                 u32 item_size = sizeof(*ei);
9098
9099                 if (!back->is_data)
9100                         item_size += sizeof(*bi);
9101
9102                 ins_key.objectid = rec->start;
9103                 ins_key.offset = rec->max_size;
9104                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9105
9106                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9107                                         &ins_key, item_size);
9108                 if (ret)
9109                         goto fail;
9110
9111                 leaf = path->nodes[0];
9112                 ei = btrfs_item_ptr(leaf, path->slots[0],
9113                                     struct btrfs_extent_item);
9114
9115                 btrfs_set_extent_refs(leaf, ei, 0);
9116                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9117
9118                 if (back->is_data) {
9119                         btrfs_set_extent_flags(leaf, ei,
9120                                                BTRFS_EXTENT_FLAG_DATA);
9121                 } else {
9122                         struct btrfs_disk_key copy_key;;
9123
9124                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9125                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9126                                              sizeof(*bi));
9127
9128                         btrfs_set_disk_key_objectid(&copy_key,
9129                                                     rec->info_objectid);
9130                         btrfs_set_disk_key_type(&copy_key, 0);
9131                         btrfs_set_disk_key_offset(&copy_key, 0);
9132
9133                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9134                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9135
9136                         btrfs_set_extent_flags(leaf, ei,
9137                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9138                 }
9139
9140                 btrfs_mark_buffer_dirty(leaf);
9141                 ret = btrfs_update_block_group(extent_root, rec->start,
9142                                                rec->max_size, 1, 0);
9143                 if (ret)
9144                         goto fail;
9145                 btrfs_release_path(path);
9146         }
9147
9148         if (back->is_data) {
9149                 u64 parent;
9150                 int i;
9151
9152                 dback = to_data_backref(back);
9153                 if (back->full_backref)
9154                         parent = dback->parent;
9155                 else
9156                         parent = 0;
9157
9158                 for (i = 0; i < dback->found_ref; i++) {
9159                         /* if parent != 0, we're doing a full backref
9160                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9161                          * just makes the backref allocator create a data
9162                          * backref
9163                          */
9164                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9165                                                    rec->start, rec->max_size,
9166                                                    parent,
9167                                                    dback->root,
9168                                                    parent ?
9169                                                    BTRFS_FIRST_FREE_OBJECTID :
9170                                                    dback->owner,
9171                                                    dback->offset);
9172                         if (ret)
9173                                 break;
9174                 }
9175                 fprintf(stderr, "adding new data backref"
9176                                 " on %llu %s %llu owner %llu"
9177                                 " offset %llu found %d\n",
9178                                 (unsigned long long)rec->start,
9179                                 back->full_backref ?
9180                                 "parent" : "root",
9181                                 back->full_backref ?
9182                                 (unsigned long long)parent :
9183                                 (unsigned long long)dback->root,
9184                                 (unsigned long long)dback->owner,
9185                                 (unsigned long long)dback->offset,
9186                                 dback->found_ref);
9187         } else {
9188                 u64 parent;
9189                 struct tree_backref *tback;
9190
9191                 tback = to_tree_backref(back);
9192                 if (back->full_backref)
9193                         parent = tback->parent;
9194                 else
9195                         parent = 0;
9196
9197                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9198                                            rec->start, rec->max_size,
9199                                            parent, tback->root, 0, 0);
9200                 fprintf(stderr, "adding new tree backref on "
9201                         "start %llu len %llu parent %llu root %llu\n",
9202                         rec->start, rec->max_size, parent, tback->root);
9203         }
9204 fail:
9205         btrfs_release_path(path);
9206         return ret;
9207 }
9208
9209 static struct extent_entry *find_entry(struct list_head *entries,
9210                                        u64 bytenr, u64 bytes)
9211 {
9212         struct extent_entry *entry = NULL;
9213
9214         list_for_each_entry(entry, entries, list) {
9215                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9216                         return entry;
9217         }
9218
9219         return NULL;
9220 }
9221
9222 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9223 {
9224         struct extent_entry *entry, *best = NULL, *prev = NULL;
9225
9226         list_for_each_entry(entry, entries, list) {
9227                 /*
9228                  * If there are as many broken entries as entries then we know
9229                  * not to trust this particular entry.
9230                  */
9231                 if (entry->broken == entry->count)
9232                         continue;
9233
9234                 /*
9235                  * Special case, when there are only two entries and 'best' is
9236                  * the first one
9237                  */
9238                 if (!prev) {
9239                         best = entry;
9240                         prev = entry;
9241                         continue;
9242                 }
9243
9244                 /*
9245                  * If our current entry == best then we can't be sure our best
9246                  * is really the best, so we need to keep searching.
9247                  */
9248                 if (best && best->count == entry->count) {
9249                         prev = entry;
9250                         best = NULL;
9251                         continue;
9252                 }
9253
9254                 /* Prev == entry, not good enough, have to keep searching */
9255                 if (!prev->broken && prev->count == entry->count)
9256                         continue;
9257
9258                 if (!best)
9259                         best = (prev->count > entry->count) ? prev : entry;
9260                 else if (best->count < entry->count)
9261                         best = entry;
9262                 prev = entry;
9263         }
9264
9265         return best;
9266 }
9267
9268 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9269                       struct data_backref *dback, struct extent_entry *entry)
9270 {
9271         struct btrfs_trans_handle *trans;
9272         struct btrfs_root *root;
9273         struct btrfs_file_extent_item *fi;
9274         struct extent_buffer *leaf;
9275         struct btrfs_key key;
9276         u64 bytenr, bytes;
9277         int ret, err;
9278
9279         key.objectid = dback->root;
9280         key.type = BTRFS_ROOT_ITEM_KEY;
9281         key.offset = (u64)-1;
9282         root = btrfs_read_fs_root(info, &key);
9283         if (IS_ERR(root)) {
9284                 fprintf(stderr, "Couldn't find root for our ref\n");
9285                 return -EINVAL;
9286         }
9287
9288         /*
9289          * The backref points to the original offset of the extent if it was
9290          * split, so we need to search down to the offset we have and then walk
9291          * forward until we find the backref we're looking for.
9292          */
9293         key.objectid = dback->owner;
9294         key.type = BTRFS_EXTENT_DATA_KEY;
9295         key.offset = dback->offset;
9296         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9297         if (ret < 0) {
9298                 fprintf(stderr, "Error looking up ref %d\n", ret);
9299                 return ret;
9300         }
9301
9302         while (1) {
9303                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9304                         ret = btrfs_next_leaf(root, path);
9305                         if (ret) {
9306                                 fprintf(stderr, "Couldn't find our ref, next\n");
9307                                 return -EINVAL;
9308                         }
9309                 }
9310                 leaf = path->nodes[0];
9311                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9312                 if (key.objectid != dback->owner ||
9313                     key.type != BTRFS_EXTENT_DATA_KEY) {
9314                         fprintf(stderr, "Couldn't find our ref, search\n");
9315                         return -EINVAL;
9316                 }
9317                 fi = btrfs_item_ptr(leaf, path->slots[0],
9318                                     struct btrfs_file_extent_item);
9319                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9320                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9321
9322                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9323                         break;
9324                 path->slots[0]++;
9325         }
9326
9327         btrfs_release_path(path);
9328
9329         trans = btrfs_start_transaction(root, 1);
9330         if (IS_ERR(trans))
9331                 return PTR_ERR(trans);
9332
9333         /*
9334          * Ok we have the key of the file extent we want to fix, now we can cow
9335          * down to the thing and fix it.
9336          */
9337         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9338         if (ret < 0) {
9339                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9340                         key.objectid, key.type, key.offset, ret);
9341                 goto out;
9342         }
9343         if (ret > 0) {
9344                 fprintf(stderr, "Well that's odd, we just found this key "
9345                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9346                         key.offset);
9347                 ret = -EINVAL;
9348                 goto out;
9349         }
9350         leaf = path->nodes[0];
9351         fi = btrfs_item_ptr(leaf, path->slots[0],
9352                             struct btrfs_file_extent_item);
9353
9354         if (btrfs_file_extent_compression(leaf, fi) &&
9355             dback->disk_bytenr != entry->bytenr) {
9356                 fprintf(stderr, "Ref doesn't match the record start and is "
9357                         "compressed, please take a btrfs-image of this file "
9358                         "system and send it to a btrfs developer so they can "
9359                         "complete this functionality for bytenr %Lu\n",
9360                         dback->disk_bytenr);
9361                 ret = -EINVAL;
9362                 goto out;
9363         }
9364
9365         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9366                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9367         } else if (dback->disk_bytenr > entry->bytenr) {
9368                 u64 off_diff, offset;
9369
9370                 off_diff = dback->disk_bytenr - entry->bytenr;
9371                 offset = btrfs_file_extent_offset(leaf, fi);
9372                 if (dback->disk_bytenr + offset +
9373                     btrfs_file_extent_num_bytes(leaf, fi) >
9374                     entry->bytenr + entry->bytes) {
9375                         fprintf(stderr, "Ref is past the entry end, please "
9376                                 "take a btrfs-image of this file system and "
9377                                 "send it to a btrfs developer, ref %Lu\n",
9378                                 dback->disk_bytenr);
9379                         ret = -EINVAL;
9380                         goto out;
9381                 }
9382                 offset += off_diff;
9383                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9384                 btrfs_set_file_extent_offset(leaf, fi, offset);
9385         } else if (dback->disk_bytenr < entry->bytenr) {
9386                 u64 offset;
9387
9388                 offset = btrfs_file_extent_offset(leaf, fi);
9389                 if (dback->disk_bytenr + offset < entry->bytenr) {
9390                         fprintf(stderr, "Ref is before the entry start, please"
9391                                 " take a btrfs-image of this file system and "
9392                                 "send it to a btrfs developer, ref %Lu\n",
9393                                 dback->disk_bytenr);
9394                         ret = -EINVAL;
9395                         goto out;
9396                 }
9397
9398                 offset += dback->disk_bytenr;
9399                 offset -= entry->bytenr;
9400                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9401                 btrfs_set_file_extent_offset(leaf, fi, offset);
9402         }
9403
9404         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9405
9406         /*
9407          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9408          * only do this if we aren't using compression, otherwise it's a
9409          * trickier case.
9410          */
9411         if (!btrfs_file_extent_compression(leaf, fi))
9412                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9413         else
9414                 printf("ram bytes may be wrong?\n");
9415         btrfs_mark_buffer_dirty(leaf);
9416 out:
9417         err = btrfs_commit_transaction(trans, root);
9418         btrfs_release_path(path);
9419         return ret ? ret : err;
9420 }
9421
9422 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9423                            struct extent_record *rec)
9424 {
9425         struct extent_backref *back, *tmp;
9426         struct data_backref *dback;
9427         struct extent_entry *entry, *best = NULL;
9428         LIST_HEAD(entries);
9429         int nr_entries = 0;
9430         int broken_entries = 0;
9431         int ret = 0;
9432         short mismatch = 0;
9433
9434         /*
9435          * Metadata is easy and the backrefs should always agree on bytenr and
9436          * size, if not we've got bigger issues.
9437          */
9438         if (rec->metadata)
9439                 return 0;
9440
9441         rbtree_postorder_for_each_entry_safe(back, tmp,
9442                                              &rec->backref_tree, node) {
9443                 if (back->full_backref || !back->is_data)
9444                         continue;
9445
9446                 dback = to_data_backref(back);
9447
9448                 /*
9449                  * We only pay attention to backrefs that we found a real
9450                  * backref for.
9451                  */
9452                 if (dback->found_ref == 0)
9453                         continue;
9454
9455                 /*
9456                  * For now we only catch when the bytes don't match, not the
9457                  * bytenr.  We can easily do this at the same time, but I want
9458                  * to have a fs image to test on before we just add repair
9459                  * functionality willy-nilly so we know we won't screw up the
9460                  * repair.
9461                  */
9462
9463                 entry = find_entry(&entries, dback->disk_bytenr,
9464                                    dback->bytes);
9465                 if (!entry) {
9466                         entry = malloc(sizeof(struct extent_entry));
9467                         if (!entry) {
9468                                 ret = -ENOMEM;
9469                                 goto out;
9470                         }
9471                         memset(entry, 0, sizeof(*entry));
9472                         entry->bytenr = dback->disk_bytenr;
9473                         entry->bytes = dback->bytes;
9474                         list_add_tail(&entry->list, &entries);
9475                         nr_entries++;
9476                 }
9477
9478                 /*
9479                  * If we only have on entry we may think the entries agree when
9480                  * in reality they don't so we have to do some extra checking.
9481                  */
9482                 if (dback->disk_bytenr != rec->start ||
9483                     dback->bytes != rec->nr || back->broken)
9484                         mismatch = 1;
9485
9486                 if (back->broken) {
9487                         entry->broken++;
9488                         broken_entries++;
9489                 }
9490
9491                 entry->count++;
9492         }
9493
9494         /* Yay all the backrefs agree, carry on good sir */
9495         if (nr_entries <= 1 && !mismatch)
9496                 goto out;
9497
9498         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9499                 "%Lu\n", rec->start);
9500
9501         /*
9502          * First we want to see if the backrefs can agree amongst themselves who
9503          * is right, so figure out which one of the entries has the highest
9504          * count.
9505          */
9506         best = find_most_right_entry(&entries);
9507
9508         /*
9509          * Ok so we may have an even split between what the backrefs think, so
9510          * this is where we use the extent ref to see what it thinks.
9511          */
9512         if (!best) {
9513                 entry = find_entry(&entries, rec->start, rec->nr);
9514                 if (!entry && (!broken_entries || !rec->found_rec)) {
9515                         fprintf(stderr, "Backrefs don't agree with each other "
9516                                 "and extent record doesn't agree with anybody,"
9517                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9518                                 rec->start, rec->nr);
9519                         ret = -EINVAL;
9520                         goto out;
9521                 } else if (!entry) {
9522                         /*
9523                          * Ok our backrefs were broken, we'll assume this is the
9524                          * correct value and add an entry for this range.
9525                          */
9526                         entry = malloc(sizeof(struct extent_entry));
9527                         if (!entry) {
9528                                 ret = -ENOMEM;
9529                                 goto out;
9530                         }
9531                         memset(entry, 0, sizeof(*entry));
9532                         entry->bytenr = rec->start;
9533                         entry->bytes = rec->nr;
9534                         list_add_tail(&entry->list, &entries);
9535                         nr_entries++;
9536                 }
9537                 entry->count++;
9538                 best = find_most_right_entry(&entries);
9539                 if (!best) {
9540                         fprintf(stderr, "Backrefs and extent record evenly "
9541                                 "split on who is right, this is going to "
9542                                 "require user input to fix bytenr %Lu bytes "
9543                                 "%Lu\n", rec->start, rec->nr);
9544                         ret = -EINVAL;
9545                         goto out;
9546                 }
9547         }
9548
9549         /*
9550          * I don't think this can happen currently as we'll abort() if we catch
9551          * this case higher up, but in case somebody removes that we still can't
9552          * deal with it properly here yet, so just bail out of that's the case.
9553          */
9554         if (best->bytenr != rec->start) {
9555                 fprintf(stderr, "Extent start and backref starts don't match, "
9556                         "please use btrfs-image on this file system and send "
9557                         "it to a btrfs developer so they can make fsck fix "
9558                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9559                         rec->start, rec->nr);
9560                 ret = -EINVAL;
9561                 goto out;
9562         }
9563
9564         /*
9565          * Ok great we all agreed on an extent record, let's go find the real
9566          * references and fix up the ones that don't match.
9567          */
9568         rbtree_postorder_for_each_entry_safe(back, tmp,
9569                                              &rec->backref_tree, node) {
9570                 if (back->full_backref || !back->is_data)
9571                         continue;
9572
9573                 dback = to_data_backref(back);
9574
9575                 /*
9576                  * Still ignoring backrefs that don't have a real ref attached
9577                  * to them.
9578                  */
9579                 if (dback->found_ref == 0)
9580                         continue;
9581
9582                 if (dback->bytes == best->bytes &&
9583                     dback->disk_bytenr == best->bytenr)
9584                         continue;
9585
9586                 ret = repair_ref(info, path, dback, best);
9587                 if (ret)
9588                         goto out;
9589         }
9590
9591         /*
9592          * Ok we messed with the actual refs, which means we need to drop our
9593          * entire cache and go back and rescan.  I know this is a huge pain and
9594          * adds a lot of extra work, but it's the only way to be safe.  Once all
9595          * the backrefs agree we may not need to do anything to the extent
9596          * record itself.
9597          */
9598         ret = -EAGAIN;
9599 out:
9600         while (!list_empty(&entries)) {
9601                 entry = list_entry(entries.next, struct extent_entry, list);
9602                 list_del_init(&entry->list);
9603                 free(entry);
9604         }
9605         return ret;
9606 }
9607
9608 static int process_duplicates(struct cache_tree *extent_cache,
9609                               struct extent_record *rec)
9610 {
9611         struct extent_record *good, *tmp;
9612         struct cache_extent *cache;
9613         int ret;
9614
9615         /*
9616          * If we found a extent record for this extent then return, or if we
9617          * have more than one duplicate we are likely going to need to delete
9618          * something.
9619          */
9620         if (rec->found_rec || rec->num_duplicates > 1)
9621                 return 0;
9622
9623         /* Shouldn't happen but just in case */
9624         BUG_ON(!rec->num_duplicates);
9625
9626         /*
9627          * So this happens if we end up with a backref that doesn't match the
9628          * actual extent entry.  So either the backref is bad or the extent
9629          * entry is bad.  Either way we want to have the extent_record actually
9630          * reflect what we found in the extent_tree, so we need to take the
9631          * duplicate out and use that as the extent_record since the only way we
9632          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9633          */
9634         remove_cache_extent(extent_cache, &rec->cache);
9635
9636         good = to_extent_record(rec->dups.next);
9637         list_del_init(&good->list);
9638         INIT_LIST_HEAD(&good->backrefs);
9639         INIT_LIST_HEAD(&good->dups);
9640         good->cache.start = good->start;
9641         good->cache.size = good->nr;
9642         good->content_checked = 0;
9643         good->owner_ref_checked = 0;
9644         good->num_duplicates = 0;
9645         good->refs = rec->refs;
9646         list_splice_init(&rec->backrefs, &good->backrefs);
9647         while (1) {
9648                 cache = lookup_cache_extent(extent_cache, good->start,
9649                                             good->nr);
9650                 if (!cache)
9651                         break;
9652                 tmp = container_of(cache, struct extent_record, cache);
9653
9654                 /*
9655                  * If we find another overlapping extent and it's found_rec is
9656                  * set then it's a duplicate and we need to try and delete
9657                  * something.
9658                  */
9659                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9660                         if (list_empty(&good->list))
9661                                 list_add_tail(&good->list,
9662                                               &duplicate_extents);
9663                         good->num_duplicates += tmp->num_duplicates + 1;
9664                         list_splice_init(&tmp->dups, &good->dups);
9665                         list_del_init(&tmp->list);
9666                         list_add_tail(&tmp->list, &good->dups);
9667                         remove_cache_extent(extent_cache, &tmp->cache);
9668                         continue;
9669                 }
9670
9671                 /*
9672                  * Ok we have another non extent item backed extent rec, so lets
9673                  * just add it to this extent and carry on like we did above.
9674                  */
9675                 good->refs += tmp->refs;
9676                 list_splice_init(&tmp->backrefs, &good->backrefs);
9677                 remove_cache_extent(extent_cache, &tmp->cache);
9678                 free(tmp);
9679         }
9680         ret = insert_cache_extent(extent_cache, &good->cache);
9681         BUG_ON(ret);
9682         free(rec);
9683         return good->num_duplicates ? 0 : 1;
9684 }
9685
9686 static int delete_duplicate_records(struct btrfs_root *root,
9687                                     struct extent_record *rec)
9688 {
9689         struct btrfs_trans_handle *trans;
9690         LIST_HEAD(delete_list);
9691         struct btrfs_path path;
9692         struct extent_record *tmp, *good, *n;
9693         int nr_del = 0;
9694         int ret = 0, err;
9695         struct btrfs_key key;
9696
9697         btrfs_init_path(&path);
9698
9699         good = rec;
9700         /* Find the record that covers all of the duplicates. */
9701         list_for_each_entry(tmp, &rec->dups, list) {
9702                 if (good->start < tmp->start)
9703                         continue;
9704                 if (good->nr > tmp->nr)
9705                         continue;
9706
9707                 if (tmp->start + tmp->nr < good->start + good->nr) {
9708                         fprintf(stderr, "Ok we have overlapping extents that "
9709                                 "aren't completely covered by each other, this "
9710                                 "is going to require more careful thought.  "
9711                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9712                                 tmp->start, tmp->nr, good->start, good->nr);
9713                         abort();
9714                 }
9715                 good = tmp;
9716         }
9717
9718         if (good != rec)
9719                 list_add_tail(&rec->list, &delete_list);
9720
9721         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9722                 if (tmp == good)
9723                         continue;
9724                 list_move_tail(&tmp->list, &delete_list);
9725         }
9726
9727         root = root->fs_info->extent_root;
9728         trans = btrfs_start_transaction(root, 1);
9729         if (IS_ERR(trans)) {
9730                 ret = PTR_ERR(trans);
9731                 goto out;
9732         }
9733
9734         list_for_each_entry(tmp, &delete_list, list) {
9735                 if (tmp->found_rec == 0)
9736                         continue;
9737                 key.objectid = tmp->start;
9738                 key.type = BTRFS_EXTENT_ITEM_KEY;
9739                 key.offset = tmp->nr;
9740
9741                 /* Shouldn't happen but just in case */
9742                 if (tmp->metadata) {
9743                         fprintf(stderr, "Well this shouldn't happen, extent "
9744                                 "record overlaps but is metadata? "
9745                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9746                         abort();
9747                 }
9748
9749                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9750                 if (ret) {
9751                         if (ret > 0)
9752                                 ret = -EINVAL;
9753                         break;
9754                 }
9755                 ret = btrfs_del_item(trans, root, &path);
9756                 if (ret)
9757                         break;
9758                 btrfs_release_path(&path);
9759                 nr_del++;
9760         }
9761         err = btrfs_commit_transaction(trans, root);
9762         if (err && !ret)
9763                 ret = err;
9764 out:
9765         while (!list_empty(&delete_list)) {
9766                 tmp = to_extent_record(delete_list.next);
9767                 list_del_init(&tmp->list);
9768                 if (tmp == rec)
9769                         continue;
9770                 free(tmp);
9771         }
9772
9773         while (!list_empty(&rec->dups)) {
9774                 tmp = to_extent_record(rec->dups.next);
9775                 list_del_init(&tmp->list);
9776                 free(tmp);
9777         }
9778
9779         btrfs_release_path(&path);
9780
9781         if (!ret && !nr_del)
9782                 rec->num_duplicates = 0;
9783
9784         return ret ? ret : nr_del;
9785 }
9786
9787 static int find_possible_backrefs(struct btrfs_fs_info *info,
9788                                   struct btrfs_path *path,
9789                                   struct cache_tree *extent_cache,
9790                                   struct extent_record *rec)
9791 {
9792         struct btrfs_root *root;
9793         struct extent_backref *back, *tmp;
9794         struct data_backref *dback;
9795         struct cache_extent *cache;
9796         struct btrfs_file_extent_item *fi;
9797         struct btrfs_key key;
9798         u64 bytenr, bytes;
9799         int ret;
9800
9801         rbtree_postorder_for_each_entry_safe(back, tmp,
9802                                              &rec->backref_tree, node) {
9803                 /* Don't care about full backrefs (poor unloved backrefs) */
9804                 if (back->full_backref || !back->is_data)
9805                         continue;
9806
9807                 dback = to_data_backref(back);
9808
9809                 /* We found this one, we don't need to do a lookup */
9810                 if (dback->found_ref)
9811                         continue;
9812
9813                 key.objectid = dback->root;
9814                 key.type = BTRFS_ROOT_ITEM_KEY;
9815                 key.offset = (u64)-1;
9816
9817                 root = btrfs_read_fs_root(info, &key);
9818
9819                 /* No root, definitely a bad ref, skip */
9820                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9821                         continue;
9822                 /* Other err, exit */
9823                 if (IS_ERR(root))
9824                         return PTR_ERR(root);
9825
9826                 key.objectid = dback->owner;
9827                 key.type = BTRFS_EXTENT_DATA_KEY;
9828                 key.offset = dback->offset;
9829                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9830                 if (ret) {
9831                         btrfs_release_path(path);
9832                         if (ret < 0)
9833                                 return ret;
9834                         /* Didn't find it, we can carry on */
9835                         ret = 0;
9836                         continue;
9837                 }
9838
9839                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9840                                     struct btrfs_file_extent_item);
9841                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9842                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9843                 btrfs_release_path(path);
9844                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9845                 if (cache) {
9846                         struct extent_record *tmp;
9847                         tmp = container_of(cache, struct extent_record, cache);
9848
9849                         /*
9850                          * If we found an extent record for the bytenr for this
9851                          * particular backref then we can't add it to our
9852                          * current extent record.  We only want to add backrefs
9853                          * that don't have a corresponding extent item in the
9854                          * extent tree since they likely belong to this record
9855                          * and we need to fix it if it doesn't match bytenrs.
9856                          */
9857                         if  (tmp->found_rec)
9858                                 continue;
9859                 }
9860
9861                 dback->found_ref += 1;
9862                 dback->disk_bytenr = bytenr;
9863                 dback->bytes = bytes;
9864
9865                 /*
9866                  * Set this so the verify backref code knows not to trust the
9867                  * values in this backref.
9868                  */
9869                 back->broken = 1;
9870         }
9871
9872         return 0;
9873 }
9874
9875 /*
9876  * Record orphan data ref into corresponding root.
9877  *
9878  * Return 0 if the extent item contains data ref and recorded.
9879  * Return 1 if the extent item contains no useful data ref
9880  *   On that case, it may contains only shared_dataref or metadata backref
9881  *   or the file extent exists(this should be handled by the extent bytenr
9882  *   recovery routine)
9883  * Return <0 if something goes wrong.
9884  */
9885 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9886                                       struct extent_record *rec)
9887 {
9888         struct btrfs_key key;
9889         struct btrfs_root *dest_root;
9890         struct extent_backref *back, *tmp;
9891         struct data_backref *dback;
9892         struct orphan_data_extent *orphan;
9893         struct btrfs_path path;
9894         int recorded_data_ref = 0;
9895         int ret = 0;
9896
9897         if (rec->metadata)
9898                 return 1;
9899         btrfs_init_path(&path);
9900         rbtree_postorder_for_each_entry_safe(back, tmp,
9901                                              &rec->backref_tree, node) {
9902                 if (back->full_backref || !back->is_data ||
9903                     !back->found_extent_tree)
9904                         continue;
9905                 dback = to_data_backref(back);
9906                 if (dback->found_ref)
9907                         continue;
9908                 key.objectid = dback->root;
9909                 key.type = BTRFS_ROOT_ITEM_KEY;
9910                 key.offset = (u64)-1;
9911
9912                 dest_root = btrfs_read_fs_root(fs_info, &key);
9913
9914                 /* For non-exist root we just skip it */
9915                 if (IS_ERR(dest_root) || !dest_root)
9916                         continue;
9917
9918                 key.objectid = dback->owner;
9919                 key.type = BTRFS_EXTENT_DATA_KEY;
9920                 key.offset = dback->offset;
9921
9922                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9923                 btrfs_release_path(&path);
9924                 /*
9925                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9926                  * we need to record it for inode/file extent rebuild.
9927                  * For ret > 0, we record it only for file extent rebuild.
9928                  * For ret == 0, the file extent exists but only bytenr
9929                  * mismatch, let the original bytenr fix routine to handle,
9930                  * don't record it.
9931                  */
9932                 if (ret == 0)
9933                         continue;
9934                 ret = 0;
9935                 orphan = malloc(sizeof(*orphan));
9936                 if (!orphan) {
9937                         ret = -ENOMEM;
9938                         goto out;
9939                 }
9940                 INIT_LIST_HEAD(&orphan->list);
9941                 orphan->root = dback->root;
9942                 orphan->objectid = dback->owner;
9943                 orphan->offset = dback->offset;
9944                 orphan->disk_bytenr = rec->cache.start;
9945                 orphan->disk_len = rec->cache.size;
9946                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9947                 recorded_data_ref = 1;
9948         }
9949 out:
9950         btrfs_release_path(&path);
9951         if (!ret)
9952                 return !recorded_data_ref;
9953         else
9954                 return ret;
9955 }
9956
9957 /*
9958  * when an incorrect extent item is found, this will delete
9959  * all of the existing entries for it and recreate them
9960  * based on what the tree scan found.
9961  */
9962 static int fixup_extent_refs(struct btrfs_fs_info *info,
9963                              struct cache_tree *extent_cache,
9964                              struct extent_record *rec)
9965 {
9966         struct btrfs_trans_handle *trans = NULL;
9967         int ret;
9968         struct btrfs_path path;
9969         struct cache_extent *cache;
9970         struct extent_backref *back, *tmp;
9971         int allocated = 0;
9972         u64 flags = 0;
9973
9974         if (rec->flag_block_full_backref)
9975                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9976
9977         btrfs_init_path(&path);
9978         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9979                 /*
9980                  * Sometimes the backrefs themselves are so broken they don't
9981                  * get attached to any meaningful rec, so first go back and
9982                  * check any of our backrefs that we couldn't find and throw
9983                  * them into the list if we find the backref so that
9984                  * verify_backrefs can figure out what to do.
9985                  */
9986                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9987                 if (ret < 0)
9988                         goto out;
9989         }
9990
9991         /* step one, make sure all of the backrefs agree */
9992         ret = verify_backrefs(info, &path, rec);
9993         if (ret < 0)
9994                 goto out;
9995
9996         trans = btrfs_start_transaction(info->extent_root, 1);
9997         if (IS_ERR(trans)) {
9998                 ret = PTR_ERR(trans);
9999                 goto out;
10000         }
10001
10002         /* step two, delete all the existing records */
10003         ret = delete_extent_records(trans, info->extent_root, &path,
10004                                     rec->start);
10005
10006         if (ret < 0)
10007                 goto out;
10008
10009         /* was this block corrupt?  If so, don't add references to it */
10010         cache = lookup_cache_extent(info->corrupt_blocks,
10011                                     rec->start, rec->max_size);
10012         if (cache) {
10013                 ret = 0;
10014                 goto out;
10015         }
10016
10017         /* step three, recreate all the refs we did find */
10018         rbtree_postorder_for_each_entry_safe(back, tmp,
10019                                              &rec->backref_tree, node) {
10020                 /*
10021                  * if we didn't find any references, don't create a
10022                  * new extent record
10023                  */
10024                 if (!back->found_ref)
10025                         continue;
10026
10027                 rec->bad_full_backref = 0;
10028                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10029                 allocated = 1;
10030
10031                 if (ret)
10032                         goto out;
10033         }
10034 out:
10035         if (trans) {
10036                 int err = btrfs_commit_transaction(trans, info->extent_root);
10037                 if (!ret)
10038                         ret = err;
10039         }
10040
10041         if (!ret)
10042                 fprintf(stderr, "Repaired extent references for %llu\n",
10043                                 (unsigned long long)rec->start);
10044
10045         btrfs_release_path(&path);
10046         return ret;
10047 }
10048
10049 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10050                               struct extent_record *rec)
10051 {
10052         struct btrfs_trans_handle *trans;
10053         struct btrfs_root *root = fs_info->extent_root;
10054         struct btrfs_path path;
10055         struct btrfs_extent_item *ei;
10056         struct btrfs_key key;
10057         u64 flags;
10058         int ret = 0;
10059
10060         key.objectid = rec->start;
10061         if (rec->metadata) {
10062                 key.type = BTRFS_METADATA_ITEM_KEY;
10063                 key.offset = rec->info_level;
10064         } else {
10065                 key.type = BTRFS_EXTENT_ITEM_KEY;
10066                 key.offset = rec->max_size;
10067         }
10068
10069         trans = btrfs_start_transaction(root, 0);
10070         if (IS_ERR(trans))
10071                 return PTR_ERR(trans);
10072
10073         btrfs_init_path(&path);
10074         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10075         if (ret < 0) {
10076                 btrfs_release_path(&path);
10077                 btrfs_commit_transaction(trans, root);
10078                 return ret;
10079         } else if (ret) {
10080                 fprintf(stderr, "Didn't find extent for %llu\n",
10081                         (unsigned long long)rec->start);
10082                 btrfs_release_path(&path);
10083                 btrfs_commit_transaction(trans, root);
10084                 return -ENOENT;
10085         }
10086
10087         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10088                             struct btrfs_extent_item);
10089         flags = btrfs_extent_flags(path.nodes[0], ei);
10090         if (rec->flag_block_full_backref) {
10091                 fprintf(stderr, "setting full backref on %llu\n",
10092                         (unsigned long long)key.objectid);
10093                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10094         } else {
10095                 fprintf(stderr, "clearing full backref on %llu\n",
10096                         (unsigned long long)key.objectid);
10097                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10098         }
10099         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10100         btrfs_mark_buffer_dirty(path.nodes[0]);
10101         btrfs_release_path(&path);
10102         ret = btrfs_commit_transaction(trans, root);
10103         if (!ret)
10104                 fprintf(stderr, "Repaired extent flags for %llu\n",
10105                                 (unsigned long long)rec->start);
10106
10107         return ret;
10108 }
10109
10110 /* right now we only prune from the extent allocation tree */
10111 static int prune_one_block(struct btrfs_trans_handle *trans,
10112                            struct btrfs_fs_info *info,
10113                            struct btrfs_corrupt_block *corrupt)
10114 {
10115         int ret;
10116         struct btrfs_path path;
10117         struct extent_buffer *eb;
10118         u64 found;
10119         int slot;
10120         int nritems;
10121         int level = corrupt->level + 1;
10122
10123         btrfs_init_path(&path);
10124 again:
10125         /* we want to stop at the parent to our busted block */
10126         path.lowest_level = level;
10127
10128         ret = btrfs_search_slot(trans, info->extent_root,
10129                                 &corrupt->key, &path, -1, 1);
10130
10131         if (ret < 0)
10132                 goto out;
10133
10134         eb = path.nodes[level];
10135         if (!eb) {
10136                 ret = -ENOENT;
10137                 goto out;
10138         }
10139
10140         /*
10141          * hopefully the search gave us the block we want to prune,
10142          * lets try that first
10143          */
10144         slot = path.slots[level];
10145         found =  btrfs_node_blockptr(eb, slot);
10146         if (found == corrupt->cache.start)
10147                 goto del_ptr;
10148
10149         nritems = btrfs_header_nritems(eb);
10150
10151         /* the search failed, lets scan this node and hope we find it */
10152         for (slot = 0; slot < nritems; slot++) {
10153                 found =  btrfs_node_blockptr(eb, slot);
10154                 if (found == corrupt->cache.start)
10155                         goto del_ptr;
10156         }
10157         /*
10158          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10159          * to this block
10160          */
10161         if (eb == info->extent_root->node) {
10162                 ret = -ENOENT;
10163                 goto out;
10164         } else {
10165                 level++;
10166                 btrfs_release_path(&path);
10167                 goto again;
10168         }
10169
10170 del_ptr:
10171         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10172         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10173
10174 out:
10175         btrfs_release_path(&path);
10176         return ret;
10177 }
10178
10179 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10180 {
10181         struct btrfs_trans_handle *trans = NULL;
10182         struct cache_extent *cache;
10183         struct btrfs_corrupt_block *corrupt;
10184
10185         while (1) {
10186                 cache = search_cache_extent(info->corrupt_blocks, 0);
10187                 if (!cache)
10188                         break;
10189                 if (!trans) {
10190                         trans = btrfs_start_transaction(info->extent_root, 1);
10191                         if (IS_ERR(trans))
10192                                 return PTR_ERR(trans);
10193                 }
10194                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10195                 prune_one_block(trans, info, corrupt);
10196                 remove_cache_extent(info->corrupt_blocks, cache);
10197         }
10198         if (trans)
10199                 return btrfs_commit_transaction(trans, info->extent_root);
10200         return 0;
10201 }
10202
10203 static int check_extent_refs(struct btrfs_root *root,
10204                              struct cache_tree *extent_cache)
10205 {
10206         struct extent_record *rec;
10207         struct cache_extent *cache;
10208         int ret = 0;
10209         int had_dups = 0;
10210         int err = 0;
10211
10212         if (repair) {
10213                 /*
10214                  * if we're doing a repair, we have to make sure
10215                  * we don't allocate from the problem extents.
10216                  * In the worst case, this will be all the
10217                  * extents in the FS
10218                  */
10219                 cache = search_cache_extent(extent_cache, 0);
10220                 while(cache) {
10221                         rec = container_of(cache, struct extent_record, cache);
10222                         set_extent_dirty(root->fs_info->excluded_extents,
10223                                          rec->start,
10224                                          rec->start + rec->max_size - 1);
10225                         cache = next_cache_extent(cache);
10226                 }
10227
10228                 /* pin down all the corrupted blocks too */
10229                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10230                 while(cache) {
10231                         set_extent_dirty(root->fs_info->excluded_extents,
10232                                          cache->start,
10233                                          cache->start + cache->size - 1);
10234                         cache = next_cache_extent(cache);
10235                 }
10236                 prune_corrupt_blocks(root->fs_info);
10237                 reset_cached_block_groups(root->fs_info);
10238         }
10239
10240         reset_cached_block_groups(root->fs_info);
10241
10242         /*
10243          * We need to delete any duplicate entries we find first otherwise we
10244          * could mess up the extent tree when we have backrefs that actually
10245          * belong to a different extent item and not the weird duplicate one.
10246          */
10247         while (repair && !list_empty(&duplicate_extents)) {
10248                 rec = to_extent_record(duplicate_extents.next);
10249                 list_del_init(&rec->list);
10250
10251                 /* Sometimes we can find a backref before we find an actual
10252                  * extent, so we need to process it a little bit to see if there
10253                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10254                  * if this is a backref screwup.  If we need to delete stuff
10255                  * process_duplicates() will return 0, otherwise it will return
10256                  * 1 and we
10257                  */
10258                 if (process_duplicates(extent_cache, rec))
10259                         continue;
10260                 ret = delete_duplicate_records(root, rec);
10261                 if (ret < 0)
10262                         return ret;
10263                 /*
10264                  * delete_duplicate_records will return the number of entries
10265                  * deleted, so if it's greater than 0 then we know we actually
10266                  * did something and we need to remove.
10267                  */
10268                 if (ret)
10269                         had_dups = 1;
10270         }
10271
10272         if (had_dups)
10273                 return -EAGAIN;
10274
10275         while(1) {
10276                 int cur_err = 0;
10277                 int fix = 0;
10278
10279                 cache = search_cache_extent(extent_cache, 0);
10280                 if (!cache)
10281                         break;
10282                 rec = container_of(cache, struct extent_record, cache);
10283                 if (rec->num_duplicates) {
10284                         fprintf(stderr, "extent item %llu has multiple extent "
10285                                 "items\n", (unsigned long long)rec->start);
10286                         cur_err = 1;
10287                 }
10288
10289                 if (rec->refs != rec->extent_item_refs) {
10290                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10291                                 (unsigned long long)rec->start,
10292                                 (unsigned long long)rec->nr);
10293                         fprintf(stderr, "extent item %llu, found %llu\n",
10294                                 (unsigned long long)rec->extent_item_refs,
10295                                 (unsigned long long)rec->refs);
10296                         ret = record_orphan_data_extents(root->fs_info, rec);
10297                         if (ret < 0)
10298                                 goto repair_abort;
10299                         fix = ret;
10300                         cur_err = 1;
10301                 }
10302                 if (all_backpointers_checked(rec, 1)) {
10303                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10304                                 (unsigned long long)rec->start,
10305                                 (unsigned long long)rec->nr);
10306                         fix = 1;
10307                         cur_err = 1;
10308                 }
10309                 if (!rec->owner_ref_checked) {
10310                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10311                                 (unsigned long long)rec->start,
10312                                 (unsigned long long)rec->nr);
10313                         fix = 1;
10314                         cur_err = 1;
10315                 }
10316
10317                 if (repair && fix) {
10318                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10319                         if (ret)
10320                                 goto repair_abort;
10321                 }
10322
10323
10324                 if (rec->bad_full_backref) {
10325                         fprintf(stderr, "bad full backref, on [%llu]\n",
10326                                 (unsigned long long)rec->start);
10327                         if (repair) {
10328                                 ret = fixup_extent_flags(root->fs_info, rec);
10329                                 if (ret)
10330                                         goto repair_abort;
10331                                 fix = 1;
10332                         }
10333                         cur_err = 1;
10334                 }
10335                 /*
10336                  * Although it's not a extent ref's problem, we reuse this
10337                  * routine for error reporting.
10338                  * No repair function yet.
10339                  */
10340                 if (rec->crossing_stripes) {
10341                         fprintf(stderr,
10342                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10343                                 rec->start, rec->start + rec->max_size);
10344                         cur_err = 1;
10345                 }
10346
10347                 if (rec->wrong_chunk_type) {
10348                         fprintf(stderr,
10349                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10350                                 rec->start, rec->start + rec->max_size);
10351                         cur_err = 1;
10352                 }
10353
10354                 err = cur_err;
10355                 remove_cache_extent(extent_cache, cache);
10356                 free_all_extent_backrefs(rec);
10357                 if (!init_extent_tree && repair && (!cur_err || fix))
10358                         clear_extent_dirty(root->fs_info->excluded_extents,
10359                                            rec->start,
10360                                            rec->start + rec->max_size - 1);
10361                 free(rec);
10362         }
10363 repair_abort:
10364         if (repair) {
10365                 if (ret && ret != -EAGAIN) {
10366                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10367                         exit(1);
10368                 } else if (!ret) {
10369                         struct btrfs_trans_handle *trans;
10370
10371                         root = root->fs_info->extent_root;
10372                         trans = btrfs_start_transaction(root, 1);
10373                         if (IS_ERR(trans)) {
10374                                 ret = PTR_ERR(trans);
10375                                 goto repair_abort;
10376                         }
10377
10378                         ret = btrfs_fix_block_accounting(trans, root);
10379                         if (ret)
10380                                 goto repair_abort;
10381                         ret = btrfs_commit_transaction(trans, root);
10382                         if (ret)
10383                                 goto repair_abort;
10384                 }
10385                 return ret;
10386         }
10387
10388         if (err)
10389                 err = -EIO;
10390         return err;
10391 }
10392
10393 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10394 {
10395         u64 stripe_size;
10396
10397         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10398                 stripe_size = length;
10399                 stripe_size /= num_stripes;
10400         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10401                 stripe_size = length * 2;
10402                 stripe_size /= num_stripes;
10403         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10404                 stripe_size = length;
10405                 stripe_size /= (num_stripes - 1);
10406         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10407                 stripe_size = length;
10408                 stripe_size /= (num_stripes - 2);
10409         } else {
10410                 stripe_size = length;
10411         }
10412         return stripe_size;
10413 }
10414
10415 /*
10416  * Check the chunk with its block group/dev list ref:
10417  * Return 0 if all refs seems valid.
10418  * Return 1 if part of refs seems valid, need later check for rebuild ref
10419  * like missing block group and needs to search extent tree to rebuild them.
10420  * Return -1 if essential refs are missing and unable to rebuild.
10421  */
10422 static int check_chunk_refs(struct chunk_record *chunk_rec,
10423                             struct block_group_tree *block_group_cache,
10424                             struct device_extent_tree *dev_extent_cache,
10425                             int silent)
10426 {
10427         struct cache_extent *block_group_item;
10428         struct block_group_record *block_group_rec;
10429         struct cache_extent *dev_extent_item;
10430         struct device_extent_record *dev_extent_rec;
10431         u64 devid;
10432         u64 offset;
10433         u64 length;
10434         int metadump_v2 = 0;
10435         int i;
10436         int ret = 0;
10437
10438         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10439                                                chunk_rec->offset,
10440                                                chunk_rec->length);
10441         if (block_group_item) {
10442                 block_group_rec = container_of(block_group_item,
10443                                                struct block_group_record,
10444                                                cache);
10445                 if (chunk_rec->length != block_group_rec->offset ||
10446                     chunk_rec->offset != block_group_rec->objectid ||
10447                     (!metadump_v2 &&
10448                      chunk_rec->type_flags != block_group_rec->flags)) {
10449                         if (!silent)
10450                                 fprintf(stderr,
10451                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10452                                         chunk_rec->objectid,
10453                                         chunk_rec->type,
10454                                         chunk_rec->offset,
10455                                         chunk_rec->length,
10456                                         chunk_rec->offset,
10457                                         chunk_rec->type_flags,
10458                                         block_group_rec->objectid,
10459                                         block_group_rec->type,
10460                                         block_group_rec->offset,
10461                                         block_group_rec->offset,
10462                                         block_group_rec->objectid,
10463                                         block_group_rec->flags);
10464                         ret = -1;
10465                 } else {
10466                         list_del_init(&block_group_rec->list);
10467                         chunk_rec->bg_rec = block_group_rec;
10468                 }
10469         } else {
10470                 if (!silent)
10471                         fprintf(stderr,
10472                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10473                                 chunk_rec->objectid,
10474                                 chunk_rec->type,
10475                                 chunk_rec->offset,
10476                                 chunk_rec->length,
10477                                 chunk_rec->offset,
10478                                 chunk_rec->type_flags);
10479                 ret = 1;
10480         }
10481
10482         if (metadump_v2)
10483                 return ret;
10484
10485         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10486                                     chunk_rec->num_stripes);
10487         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10488                 devid = chunk_rec->stripes[i].devid;
10489                 offset = chunk_rec->stripes[i].offset;
10490                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10491                                                        devid, offset, length);
10492                 if (dev_extent_item) {
10493                         dev_extent_rec = container_of(dev_extent_item,
10494                                                 struct device_extent_record,
10495                                                 cache);
10496                         if (dev_extent_rec->objectid != devid ||
10497                             dev_extent_rec->offset != offset ||
10498                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10499                             dev_extent_rec->length != length) {
10500                                 if (!silent)
10501                                         fprintf(stderr,
10502                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10503                                                 chunk_rec->objectid,
10504                                                 chunk_rec->type,
10505                                                 chunk_rec->offset,
10506                                                 chunk_rec->stripes[i].devid,
10507                                                 chunk_rec->stripes[i].offset,
10508                                                 dev_extent_rec->objectid,
10509                                                 dev_extent_rec->offset,
10510                                                 dev_extent_rec->length);
10511                                 ret = -1;
10512                         } else {
10513                                 list_move(&dev_extent_rec->chunk_list,
10514                                           &chunk_rec->dextents);
10515                         }
10516                 } else {
10517                         if (!silent)
10518                                 fprintf(stderr,
10519                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10520                                         chunk_rec->objectid,
10521                                         chunk_rec->type,
10522                                         chunk_rec->offset,
10523                                         chunk_rec->stripes[i].devid,
10524                                         chunk_rec->stripes[i].offset);
10525                         ret = -1;
10526                 }
10527         }
10528         return ret;
10529 }
10530
10531 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10532 int check_chunks(struct cache_tree *chunk_cache,
10533                  struct block_group_tree *block_group_cache,
10534                  struct device_extent_tree *dev_extent_cache,
10535                  struct list_head *good, struct list_head *bad,
10536                  struct list_head *rebuild, int silent)
10537 {
10538         struct cache_extent *chunk_item;
10539         struct chunk_record *chunk_rec;
10540         struct block_group_record *bg_rec;
10541         struct device_extent_record *dext_rec;
10542         int err;
10543         int ret = 0;
10544
10545         chunk_item = first_cache_extent(chunk_cache);
10546         while (chunk_item) {
10547                 chunk_rec = container_of(chunk_item, struct chunk_record,
10548                                          cache);
10549                 err = check_chunk_refs(chunk_rec, block_group_cache,
10550                                        dev_extent_cache, silent);
10551                 if (err < 0)
10552                         ret = err;
10553                 if (err == 0 && good)
10554                         list_add_tail(&chunk_rec->list, good);
10555                 if (err > 0 && rebuild)
10556                         list_add_tail(&chunk_rec->list, rebuild);
10557                 if (err < 0 && bad)
10558                         list_add_tail(&chunk_rec->list, bad);
10559                 chunk_item = next_cache_extent(chunk_item);
10560         }
10561
10562         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10563                 if (!silent)
10564                         fprintf(stderr,
10565                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10566                                 bg_rec->objectid,
10567                                 bg_rec->offset,
10568                                 bg_rec->flags);
10569                 if (!ret)
10570                         ret = 1;
10571         }
10572
10573         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10574                             chunk_list) {
10575                 if (!silent)
10576                         fprintf(stderr,
10577                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10578                                 dext_rec->objectid,
10579                                 dext_rec->offset,
10580                                 dext_rec->length);
10581                 if (!ret)
10582                         ret = 1;
10583         }
10584         return ret;
10585 }
10586
10587
10588 static int check_device_used(struct device_record *dev_rec,
10589                              struct device_extent_tree *dext_cache)
10590 {
10591         struct cache_extent *cache;
10592         struct device_extent_record *dev_extent_rec;
10593         u64 total_byte = 0;
10594
10595         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10596         while (cache) {
10597                 dev_extent_rec = container_of(cache,
10598                                               struct device_extent_record,
10599                                               cache);
10600                 if (dev_extent_rec->objectid != dev_rec->devid)
10601                         break;
10602
10603                 list_del_init(&dev_extent_rec->device_list);
10604                 total_byte += dev_extent_rec->length;
10605                 cache = next_cache_extent(cache);
10606         }
10607
10608         if (total_byte != dev_rec->byte_used) {
10609                 fprintf(stderr,
10610                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10611                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10612                         dev_rec->type, dev_rec->offset);
10613                 return -1;
10614         } else {
10615                 return 0;
10616         }
10617 }
10618
10619 /*
10620  * Unlike device size alignment check above, some super total_bytes check
10621  * failure can lead to mount failure for newer kernel.
10622  *
10623  * So this function will return the error for a fatal super total_bytes problem.
10624  */
10625 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10626 {
10627         struct btrfs_device *dev;
10628         struct list_head *dev_list = &fs_info->fs_devices->devices;
10629         u64 total_bytes = 0;
10630         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10631
10632         list_for_each_entry(dev, dev_list, dev_list)
10633                 total_bytes += dev->total_bytes;
10634
10635         /* Important check, which can cause unmountable fs */
10636         if (super_bytes < total_bytes) {
10637                 error("super total bytes %llu smaller than real device(s) size %llu",
10638                         super_bytes, total_bytes);
10639                 error("mounting this fs may fail for newer kernels");
10640                 error("this can be fixed by 'btrfs rescue fix-device-size'");
10641                 return false;
10642         }
10643
10644         /*
10645          * Optional check, just to make everything aligned and match with each
10646          * other.
10647          *
10648          * For a btrfs-image restored fs, we don't need to check it anyway.
10649          */
10650         if (btrfs_super_flags(fs_info->super_copy) &
10651             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10652                 return true;
10653         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10654             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10655             super_bytes != total_bytes) {
10656                 warning("minor unaligned/mismatch device size detected");
10657                 warning(
10658                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10659         }
10660         return true;
10661 }
10662
10663 /* check btrfs_dev_item -> btrfs_dev_extent */
10664 static int check_devices(struct rb_root *dev_cache,
10665                          struct device_extent_tree *dev_extent_cache)
10666 {
10667         struct rb_node *dev_node;
10668         struct device_record *dev_rec;
10669         struct device_extent_record *dext_rec;
10670         int err;
10671         int ret = 0;
10672
10673         dev_node = rb_first(dev_cache);
10674         while (dev_node) {
10675                 dev_rec = container_of(dev_node, struct device_record, node);
10676                 err = check_device_used(dev_rec, dev_extent_cache);
10677                 if (err)
10678                         ret = err;
10679
10680                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
10681                                          global_info->sectorsize);
10682                 dev_node = rb_next(dev_node);
10683         }
10684         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10685                             device_list) {
10686                 fprintf(stderr,
10687                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10688                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10689                 if (!ret)
10690                         ret = 1;
10691         }
10692         return ret;
10693 }
10694
10695 static int add_root_item_to_list(struct list_head *head,
10696                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10697                                   u8 level, u8 drop_level,
10698                                   struct btrfs_key *drop_key)
10699 {
10700
10701         struct root_item_record *ri_rec;
10702         ri_rec = malloc(sizeof(*ri_rec));
10703         if (!ri_rec)
10704                 return -ENOMEM;
10705         ri_rec->bytenr = bytenr;
10706         ri_rec->objectid = objectid;
10707         ri_rec->level = level;
10708         ri_rec->drop_level = drop_level;
10709         ri_rec->last_snapshot = last_snapshot;
10710         if (drop_key)
10711                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10712         list_add_tail(&ri_rec->list, head);
10713
10714         return 0;
10715 }
10716
10717 static void free_root_item_list(struct list_head *list)
10718 {
10719         struct root_item_record *ri_rec;
10720
10721         while (!list_empty(list)) {
10722                 ri_rec = list_first_entry(list, struct root_item_record,
10723                                           list);
10724                 list_del_init(&ri_rec->list);
10725                 free(ri_rec);
10726         }
10727 }
10728
10729 static int deal_root_from_list(struct list_head *list,
10730                                struct btrfs_root *root,
10731                                struct block_info *bits,
10732                                int bits_nr,
10733                                struct cache_tree *pending,
10734                                struct cache_tree *seen,
10735                                struct cache_tree *reada,
10736                                struct cache_tree *nodes,
10737                                struct cache_tree *extent_cache,
10738                                struct cache_tree *chunk_cache,
10739                                struct rb_root *dev_cache,
10740                                struct block_group_tree *block_group_cache,
10741                                struct device_extent_tree *dev_extent_cache)
10742 {
10743         int ret = 0;
10744         u64 last;
10745
10746         while (!list_empty(list)) {
10747                 struct root_item_record *rec;
10748                 struct extent_buffer *buf;
10749                 rec = list_entry(list->next,
10750                                  struct root_item_record, list);
10751                 last = 0;
10752                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10753                 if (!extent_buffer_uptodate(buf)) {
10754                         free_extent_buffer(buf);
10755                         ret = -EIO;
10756                         break;
10757                 }
10758                 ret = add_root_to_pending(buf, extent_cache, pending,
10759                                     seen, nodes, rec->objectid);
10760                 if (ret < 0)
10761                         break;
10762                 /*
10763                  * To rebuild extent tree, we need deal with snapshot
10764                  * one by one, otherwise we deal with node firstly which
10765                  * can maximize readahead.
10766                  */
10767                 while (1) {
10768                         ret = run_next_block(root, bits, bits_nr, &last,
10769                                              pending, seen, reada, nodes,
10770                                              extent_cache, chunk_cache,
10771                                              dev_cache, block_group_cache,
10772                                              dev_extent_cache, rec);
10773                         if (ret != 0)
10774                                 break;
10775                 }
10776                 free_extent_buffer(buf);
10777                 list_del(&rec->list);
10778                 free(rec);
10779                 if (ret < 0)
10780                         break;
10781         }
10782         while (ret >= 0) {
10783                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10784                                      reada, nodes, extent_cache, chunk_cache,
10785                                      dev_cache, block_group_cache,
10786                                      dev_extent_cache, NULL);
10787                 if (ret != 0) {
10788                         if (ret > 0)
10789                                 ret = 0;
10790                         break;
10791                 }
10792         }
10793         return ret;
10794 }
10795
10796 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10797 {
10798         struct rb_root dev_cache;
10799         struct cache_tree chunk_cache;
10800         struct block_group_tree block_group_cache;
10801         struct device_extent_tree dev_extent_cache;
10802         struct cache_tree extent_cache;
10803         struct cache_tree seen;
10804         struct cache_tree pending;
10805         struct cache_tree reada;
10806         struct cache_tree nodes;
10807         struct extent_io_tree excluded_extents;
10808         struct cache_tree corrupt_blocks;
10809         struct btrfs_path path;
10810         struct btrfs_key key;
10811         struct btrfs_key found_key;
10812         int ret, err = 0;
10813         struct block_info *bits;
10814         int bits_nr;
10815         struct extent_buffer *leaf;
10816         int slot;
10817         struct btrfs_root_item ri;
10818         struct list_head dropping_trees;
10819         struct list_head normal_trees;
10820         struct btrfs_root *root1;
10821         struct btrfs_root *root;
10822         u64 objectid;
10823         u8 level;
10824
10825         root = fs_info->fs_root;
10826         dev_cache = RB_ROOT;
10827         cache_tree_init(&chunk_cache);
10828         block_group_tree_init(&block_group_cache);
10829         device_extent_tree_init(&dev_extent_cache);
10830
10831         cache_tree_init(&extent_cache);
10832         cache_tree_init(&seen);
10833         cache_tree_init(&pending);
10834         cache_tree_init(&nodes);
10835         cache_tree_init(&reada);
10836         cache_tree_init(&corrupt_blocks);
10837         extent_io_tree_init(&excluded_extents);
10838         INIT_LIST_HEAD(&dropping_trees);
10839         INIT_LIST_HEAD(&normal_trees);
10840
10841         if (repair) {
10842                 fs_info->excluded_extents = &excluded_extents;
10843                 fs_info->fsck_extent_cache = &extent_cache;
10844                 fs_info->free_extent_hook = free_extent_hook;
10845                 fs_info->corrupt_blocks = &corrupt_blocks;
10846         }
10847
10848         bits_nr = 1024;
10849         bits = malloc(bits_nr * sizeof(struct block_info));
10850         if (!bits) {
10851                 perror("malloc");
10852                 exit(1);
10853         }
10854
10855         if (ctx.progress_enabled) {
10856                 ctx.tp = TASK_EXTENTS;
10857                 task_start(ctx.info);
10858         }
10859
10860 again:
10861         root1 = fs_info->tree_root;
10862         level = btrfs_header_level(root1->node);
10863         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10864                                     root1->node->start, 0, level, 0, NULL);
10865         if (ret < 0)
10866                 goto out;
10867         root1 = fs_info->chunk_root;
10868         level = btrfs_header_level(root1->node);
10869         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10870                                     root1->node->start, 0, level, 0, NULL);
10871         if (ret < 0)
10872                 goto out;
10873         btrfs_init_path(&path);
10874         key.offset = 0;
10875         key.objectid = 0;
10876         key.type = BTRFS_ROOT_ITEM_KEY;
10877         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10878         if (ret < 0)
10879                 goto out;
10880         while(1) {
10881                 leaf = path.nodes[0];
10882                 slot = path.slots[0];
10883                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10884                         ret = btrfs_next_leaf(root, &path);
10885                         if (ret != 0)
10886                                 break;
10887                         leaf = path.nodes[0];
10888                         slot = path.slots[0];
10889                 }
10890                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10891                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10892                         unsigned long offset;
10893                         u64 last_snapshot;
10894
10895                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10896                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10897                         last_snapshot = btrfs_root_last_snapshot(&ri);
10898                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10899                                 level = btrfs_root_level(&ri);
10900                                 ret = add_root_item_to_list(&normal_trees,
10901                                                 found_key.objectid,
10902                                                 btrfs_root_bytenr(&ri),
10903                                                 last_snapshot, level,
10904                                                 0, NULL);
10905                                 if (ret < 0)
10906                                         goto out;
10907                         } else {
10908                                 level = btrfs_root_level(&ri);
10909                                 objectid = found_key.objectid;
10910                                 btrfs_disk_key_to_cpu(&found_key,
10911                                                       &ri.drop_progress);
10912                                 ret = add_root_item_to_list(&dropping_trees,
10913                                                 objectid,
10914                                                 btrfs_root_bytenr(&ri),
10915                                                 last_snapshot, level,
10916                                                 ri.drop_level, &found_key);
10917                                 if (ret < 0)
10918                                         goto out;
10919                         }
10920                 }
10921                 path.slots[0]++;
10922         }
10923         btrfs_release_path(&path);
10924
10925         /*
10926          * check_block can return -EAGAIN if it fixes something, please keep
10927          * this in mind when dealing with return values from these functions, if
10928          * we get -EAGAIN we want to fall through and restart the loop.
10929          */
10930         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10931                                   &seen, &reada, &nodes, &extent_cache,
10932                                   &chunk_cache, &dev_cache, &block_group_cache,
10933                                   &dev_extent_cache);
10934         if (ret < 0) {
10935                 if (ret == -EAGAIN)
10936                         goto loop;
10937                 goto out;
10938         }
10939         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10940                                   &pending, &seen, &reada, &nodes,
10941                                   &extent_cache, &chunk_cache, &dev_cache,
10942                                   &block_group_cache, &dev_extent_cache);
10943         if (ret < 0) {
10944                 if (ret == -EAGAIN)
10945                         goto loop;
10946                 goto out;
10947         }
10948
10949         ret = check_chunks(&chunk_cache, &block_group_cache,
10950                            &dev_extent_cache, NULL, NULL, NULL, 0);
10951         if (ret) {
10952                 if (ret == -EAGAIN)
10953                         goto loop;
10954                 err = ret;
10955         }
10956
10957         ret = check_extent_refs(root, &extent_cache);
10958         if (ret < 0) {
10959                 if (ret == -EAGAIN)
10960                         goto loop;
10961                 goto out;
10962         }
10963
10964         ret = check_devices(&dev_cache, &dev_extent_cache);
10965         if (ret && err)
10966                 ret = err;
10967
10968 out:
10969         task_stop(ctx.info);
10970         if (repair) {
10971                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10972                 extent_io_tree_cleanup(&excluded_extents);
10973                 fs_info->fsck_extent_cache = NULL;
10974                 fs_info->free_extent_hook = NULL;
10975                 fs_info->corrupt_blocks = NULL;
10976                 fs_info->excluded_extents = NULL;
10977         }
10978         free(bits);
10979         free_chunk_cache_tree(&chunk_cache);
10980         free_device_cache_tree(&dev_cache);
10981         free_block_group_tree(&block_group_cache);
10982         free_device_extent_tree(&dev_extent_cache);
10983         free_extent_cache_tree(&seen);
10984         free_extent_cache_tree(&pending);
10985         free_extent_cache_tree(&reada);
10986         free_extent_cache_tree(&nodes);
10987         free_root_item_list(&normal_trees);
10988         free_root_item_list(&dropping_trees);
10989         return ret;
10990 loop:
10991         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10992         free_extent_cache_tree(&seen);
10993         free_extent_cache_tree(&pending);
10994         free_extent_cache_tree(&reada);
10995         free_extent_cache_tree(&nodes);
10996         free_chunk_cache_tree(&chunk_cache);
10997         free_block_group_tree(&block_group_cache);
10998         free_device_cache_tree(&dev_cache);
10999         free_device_extent_tree(&dev_extent_cache);
11000         free_extent_record_cache(&extent_cache);
11001         free_root_item_list(&normal_trees);
11002         free_root_item_list(&dropping_trees);
11003         extent_io_tree_cleanup(&excluded_extents);
11004         goto again;
11005 }
11006
11007 static int check_extent_inline_ref(struct extent_buffer *eb,
11008                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11009 {
11010         int ret;
11011         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11012
11013         switch (type) {
11014         case BTRFS_TREE_BLOCK_REF_KEY:
11015         case BTRFS_EXTENT_DATA_REF_KEY:
11016         case BTRFS_SHARED_BLOCK_REF_KEY:
11017         case BTRFS_SHARED_DATA_REF_KEY:
11018                 ret = 0;
11019                 break;
11020         default:
11021                 error("extent[%llu %u %llu] has unknown ref type: %d",
11022                       key->objectid, key->type, key->offset, type);
11023                 ret = UNKNOWN_TYPE;
11024                 break;
11025         }
11026
11027         return ret;
11028 }
11029
11030 /*
11031  * Check backrefs of a tree block given by @bytenr or @eb.
11032  *
11033  * @root:       the root containing the @bytenr or @eb
11034  * @eb:         tree block extent buffer, can be NULL
11035  * @bytenr:     bytenr of the tree block to search
11036  * @level:      tree level of the tree block
11037  * @owner:      owner of the tree block
11038  *
11039  * Return >0 for any error found and output error message
11040  * Return 0 for no error found
11041  */
11042 static int check_tree_block_ref(struct btrfs_root *root,
11043                                 struct extent_buffer *eb, u64 bytenr,
11044                                 int level, u64 owner, struct node_refs *nrefs)
11045 {
11046         struct btrfs_key key;
11047         struct btrfs_root *extent_root = root->fs_info->extent_root;
11048         struct btrfs_path path;
11049         struct btrfs_extent_item *ei;
11050         struct btrfs_extent_inline_ref *iref;
11051         struct extent_buffer *leaf;
11052         unsigned long end;
11053         unsigned long ptr;
11054         int slot;
11055         int skinny_level;
11056         int root_level = btrfs_header_level(root->node);
11057         int type;
11058         u32 nodesize = root->fs_info->nodesize;
11059         u32 item_size;
11060         u64 offset;
11061         int found_ref = 0;
11062         int err = 0;
11063         int ret;
11064         int strict = 1;
11065         int parent = 0;
11066
11067         btrfs_init_path(&path);
11068         key.objectid = bytenr;
11069         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11070                 key.type = BTRFS_METADATA_ITEM_KEY;
11071         else
11072                 key.type = BTRFS_EXTENT_ITEM_KEY;
11073         key.offset = (u64)-1;
11074
11075         /* Search for the backref in extent tree */
11076         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11077         if (ret < 0) {
11078                 err |= BACKREF_MISSING;
11079                 goto out;
11080         }
11081         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11082         if (ret) {
11083                 err |= BACKREF_MISSING;
11084                 goto out;
11085         }
11086
11087         leaf = path.nodes[0];
11088         slot = path.slots[0];
11089         btrfs_item_key_to_cpu(leaf, &key, slot);
11090
11091         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11092
11093         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11094                 skinny_level = (int)key.offset;
11095                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11096         } else {
11097                 struct btrfs_tree_block_info *info;
11098
11099                 info = (struct btrfs_tree_block_info *)(ei + 1);
11100                 skinny_level = btrfs_tree_block_level(leaf, info);
11101                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11102         }
11103
11104
11105         if (eb) {
11106                 u64 header_gen;
11107                 u64 extent_gen;
11108
11109                 /*
11110                  * Due to the feature of shared tree blocks, if the upper node
11111                  * is a fs root or shared node, the extent of checked node may
11112                  * not be updated until the next CoW.
11113                  */
11114                 if (nrefs)
11115                         strict = should_check_extent_strictly(root, nrefs,
11116                                         level);
11117                 if (!(btrfs_extent_flags(leaf, ei) &
11118                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11119                         error(
11120                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11121                                 key.objectid, nodesize,
11122                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11123                         err = BACKREF_MISMATCH;
11124                 }
11125                 header_gen = btrfs_header_generation(eb);
11126                 extent_gen = btrfs_extent_generation(leaf, ei);
11127                 if (header_gen != extent_gen) {
11128                         error(
11129         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11130                                 key.objectid, nodesize, header_gen,
11131                                 extent_gen);
11132                         err = BACKREF_MISMATCH;
11133                 }
11134                 if (level != skinny_level) {
11135                         error(
11136                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11137                                 key.objectid, nodesize, level, skinny_level);
11138                         err = BACKREF_MISMATCH;
11139                 }
11140                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11141                         error(
11142                         "extent[%llu %u] is referred by other roots than %llu",
11143                                 key.objectid, nodesize, root->objectid);
11144                         err = BACKREF_MISMATCH;
11145                 }
11146         }
11147
11148         /*
11149          * Iterate the extent/metadata item to find the exact backref
11150          */
11151         item_size = btrfs_item_size_nr(leaf, slot);
11152         ptr = (unsigned long)iref;
11153         end = (unsigned long)ei + item_size;
11154
11155         while (ptr < end) {
11156                 iref = (struct btrfs_extent_inline_ref *)ptr;
11157                 type = btrfs_extent_inline_ref_type(leaf, iref);
11158                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11159
11160                 ret = check_extent_inline_ref(leaf, &key, iref);
11161                 if (ret) {
11162                         err |= ret;
11163                         break;
11164                 }
11165                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11166                         if (offset == root->objectid)
11167                                 found_ref = 1;
11168                         if (!strict && owner == offset)
11169                                 found_ref = 1;
11170                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11171                         /*
11172                          * Backref of tree reloc root points to itself, no need
11173                          * to check backref any more.
11174                          *
11175                          * This may be an error of loop backref, but extent tree
11176                          * checker should have already handled it.
11177                          * Here we only need to avoid infinite iteration.
11178                          */
11179                         if (offset == bytenr) {
11180                                 found_ref = 1;
11181                         } else {
11182                                 /*
11183                                  * Check if the backref points to valid
11184                                  * referencer
11185                                  */
11186                                 found_ref = !check_tree_block_ref( root, NULL,
11187                                                 offset, level + 1, owner,
11188                                                 NULL);
11189                         }
11190                 }
11191
11192                 if (found_ref)
11193                         break;
11194                 ptr += btrfs_extent_inline_ref_size(type);
11195         }
11196
11197         /*
11198          * Inlined extent item doesn't have what we need, check
11199          * TREE_BLOCK_REF_KEY
11200          */
11201         if (!found_ref) {
11202                 btrfs_release_path(&path);
11203                 key.objectid = bytenr;
11204                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11205                 key.offset = root->objectid;
11206
11207                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11208                 if (!ret)
11209                         found_ref = 1;
11210         }
11211         /*
11212          * Finally check SHARED BLOCK REF, any found will be good
11213          * Here we're not doing comprehensive extent backref checking,
11214          * only need to ensure there is some extent referring to this
11215          * tree block.
11216          */
11217         if (!found_ref) {
11218                 btrfs_release_path(&path);
11219                 key.objectid = bytenr;
11220                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11221                 key.offset = (u64)-1;
11222
11223                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11224                 if (ret < 0) {
11225                         err |= BACKREF_MISSING;
11226                         goto out;
11227                 }
11228                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11229                 if (ret) {
11230                         err |= BACKREF_MISSING;
11231                         goto out;
11232                 }
11233                 found_ref = 1;
11234         }
11235         if (!found_ref)
11236                 err |= BACKREF_MISSING;
11237 out:
11238         btrfs_release_path(&path);
11239         if (nrefs && strict &&
11240             level < root_level && nrefs->full_backref[level + 1])
11241                 parent = nrefs->bytenr[level + 1];
11242         if (eb && (err & BACKREF_MISSING))
11243                 error(
11244         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11245                       bytenr, nodesize, owner, level,
11246                       parent ? "parent" : "root",
11247                       parent ? parent : root->objectid);
11248         return err;
11249 }
11250
11251 /*
11252  * If @err contains BACKREF_MISSING then add extent of the
11253  * file_extent_data_item.
11254  *
11255  * Returns error bits after reapir.
11256  */
11257 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11258                                    struct btrfs_root *root,
11259                                    struct btrfs_path *pathp,
11260                                    struct node_refs *nrefs,
11261                                    int err)
11262 {
11263         struct btrfs_file_extent_item *fi;
11264         struct btrfs_key fi_key;
11265         struct btrfs_key key;
11266         struct btrfs_extent_item *ei;
11267         struct btrfs_path path;
11268         struct btrfs_root *extent_root = root->fs_info->extent_root;
11269         struct extent_buffer *eb;
11270         u64 size;
11271         u64 disk_bytenr;
11272         u64 num_bytes;
11273         u64 parent;
11274         u64 offset;
11275         u64 extent_offset;
11276         u64 file_offset;
11277         int generation;
11278         int slot;
11279         int ret = 0;
11280
11281         eb = pathp->nodes[0];
11282         slot = pathp->slots[0];
11283         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11284         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11285
11286         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11287             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11288                 return err;
11289
11290         file_offset = fi_key.offset;
11291         generation = btrfs_file_extent_generation(eb, fi);
11292         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11293         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11294         extent_offset = btrfs_file_extent_offset(eb, fi);
11295         offset = file_offset - extent_offset;
11296
11297         /* now repair only adds backref */
11298         if ((err & BACKREF_MISSING) == 0)
11299                 return err;
11300
11301         /* search extent item */
11302         key.objectid = disk_bytenr;
11303         key.type = BTRFS_EXTENT_ITEM_KEY;
11304         key.offset = num_bytes;
11305
11306         btrfs_init_path(&path);
11307         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11308         if (ret < 0) {
11309                 ret = -EIO;
11310                 goto out;
11311         }
11312
11313         /* insert an extent item */
11314         if (ret > 0) {
11315                 key.objectid = disk_bytenr;
11316                 key.type = BTRFS_EXTENT_ITEM_KEY;
11317                 key.offset = num_bytes;
11318                 size = sizeof(*ei);
11319
11320                 btrfs_release_path(&path);
11321                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11322                                               size);
11323                 if (ret)
11324                         goto out;
11325                 eb = path.nodes[0];
11326                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11327
11328                 btrfs_set_extent_refs(eb, ei, 0);
11329                 btrfs_set_extent_generation(eb, ei, generation);
11330                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11331
11332                 btrfs_mark_buffer_dirty(eb);
11333                 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11334                                                num_bytes, 1, 0);
11335                 btrfs_release_path(&path);
11336         }
11337
11338         if (nrefs->full_backref[0])
11339                 parent = btrfs_header_bytenr(eb);
11340         else
11341                 parent = 0;
11342
11343         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11344                                    root->objectid,
11345                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11346                                    offset);
11347         if (ret) {
11348                 error(
11349                 "failed to increase extent data backref[%llu %llu] root %llu",
11350                       disk_bytenr, num_bytes, root->objectid);
11351                 goto out;
11352         } else {
11353                 printf("Add one extent data backref [%llu %llu]\n",
11354                        disk_bytenr, num_bytes);
11355         }
11356
11357         err &= ~BACKREF_MISSING;
11358 out:
11359         if (ret)
11360                 error("can't repair root %llu extent data item[%llu %llu]",
11361                       root->objectid, disk_bytenr, num_bytes);
11362         return err;
11363 }
11364
11365 /*
11366  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11367  *
11368  * Return >0 any error found and output error message
11369  * Return 0 for no error found
11370  */
11371 static int check_extent_data_item(struct btrfs_root *root,
11372                                   struct btrfs_path *pathp,
11373                                   struct node_refs *nrefs,  int account_bytes)
11374 {
11375         struct btrfs_file_extent_item *fi;
11376         struct extent_buffer *eb = pathp->nodes[0];
11377         struct btrfs_path path;
11378         struct btrfs_root *extent_root = root->fs_info->extent_root;
11379         struct btrfs_key fi_key;
11380         struct btrfs_key dbref_key;
11381         struct extent_buffer *leaf;
11382         struct btrfs_extent_item *ei;
11383         struct btrfs_extent_inline_ref *iref;
11384         struct btrfs_extent_data_ref *dref;
11385         u64 owner;
11386         u64 disk_bytenr;
11387         u64 disk_num_bytes;
11388         u64 extent_num_bytes;
11389         u64 extent_flags;
11390         u64 offset;
11391         u32 item_size;
11392         unsigned long end;
11393         unsigned long ptr;
11394         int type;
11395         int found_dbackref = 0;
11396         int slot = pathp->slots[0];
11397         int err = 0;
11398         int ret;
11399         int strict;
11400
11401         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11402         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11403
11404         /* Nothing to check for hole and inline data extents */
11405         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11406             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11407                 return 0;
11408
11409         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11410         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11411         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11412         offset = btrfs_file_extent_offset(eb, fi);
11413
11414         /* Check unaligned disk_num_bytes and num_bytes */
11415         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11416                 error(
11417 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11418                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11419                         root->fs_info->sectorsize);
11420                 err |= BYTES_UNALIGNED;
11421         } else if (account_bytes) {
11422                 data_bytes_allocated += disk_num_bytes;
11423         }
11424         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11425                 error(
11426 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11427                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11428                         root->fs_info->sectorsize);
11429                 err |= BYTES_UNALIGNED;
11430         } else if (account_bytes) {
11431                 data_bytes_referenced += extent_num_bytes;
11432         }
11433         owner = btrfs_header_owner(eb);
11434
11435         /* Check the extent item of the file extent in extent tree */
11436         btrfs_init_path(&path);
11437         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11438         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11439         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11440
11441         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11442         if (ret)
11443                 goto out;
11444
11445         leaf = path.nodes[0];
11446         slot = path.slots[0];
11447         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11448
11449         extent_flags = btrfs_extent_flags(leaf, ei);
11450
11451         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11452                 error(
11453                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11454                     disk_bytenr, disk_num_bytes,
11455                     BTRFS_EXTENT_FLAG_DATA);
11456                 err |= BACKREF_MISMATCH;
11457         }
11458
11459         /* Check data backref inside that extent item */
11460         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11461         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11462         ptr = (unsigned long)iref;
11463         end = (unsigned long)ei + item_size;
11464         strict = should_check_extent_strictly(root, nrefs, -1);
11465
11466         while (ptr < end) {
11467                 u64 ref_root;
11468                 u64 ref_objectid;
11469                 u64 ref_offset;
11470                 bool match = false;
11471
11472                 iref = (struct btrfs_extent_inline_ref *)ptr;
11473                 type = btrfs_extent_inline_ref_type(leaf, iref);
11474                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11475
11476                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11477                 if (ret) {
11478                         err |= ret;
11479                         break;
11480                 }
11481                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11482                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11483                         ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11484                         ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11485
11486                         if (ref_objectid == fi_key.objectid &&
11487                             ref_offset == fi_key.offset - offset)
11488                                 match = true;
11489                         if (ref_root == root->objectid && match)
11490                                 found_dbackref = 1;
11491                         else if (!strict && owner == ref_root && match)
11492                                 found_dbackref = 1;
11493                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11494                         found_dbackref = !check_tree_block_ref(root, NULL,
11495                                 btrfs_extent_inline_ref_offset(leaf, iref),
11496                                 0, owner, NULL);
11497                 }
11498
11499                 if (found_dbackref)
11500                         break;
11501                 ptr += btrfs_extent_inline_ref_size(type);
11502         }
11503
11504         if (!found_dbackref) {
11505                 btrfs_release_path(&path);
11506
11507                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11508                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11509                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11510                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11511                                 fi_key.objectid, fi_key.offset - offset);
11512
11513                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11514                                         &dbref_key, &path, 0, 0);
11515                 if (!ret) {
11516                         found_dbackref = 1;
11517                         goto out;
11518                 }
11519
11520                 btrfs_release_path(&path);
11521
11522                 /*
11523                  * Neither inlined nor EXTENT_DATA_REF found, try
11524                  * SHARED_DATA_REF as last chance.
11525                  */
11526                 dbref_key.objectid = disk_bytenr;
11527                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11528                 dbref_key.offset = eb->start;
11529
11530                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11531                                         &dbref_key, &path, 0, 0);
11532                 if (!ret) {
11533                         found_dbackref = 1;
11534                         goto out;
11535                 }
11536         }
11537
11538 out:
11539         if (!found_dbackref)
11540                 err |= BACKREF_MISSING;
11541         btrfs_release_path(&path);
11542         if (err & BACKREF_MISSING) {
11543                 error("data extent[%llu %llu] backref lost",
11544                       disk_bytenr, disk_num_bytes);
11545         }
11546         return err;
11547 }
11548
11549 /*
11550  * Get real tree block level for the case like shared block
11551  * Return >= 0 as tree level
11552  * Return <0 for error
11553  */
11554 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11555 {
11556         struct extent_buffer *eb;
11557         struct btrfs_path path;
11558         struct btrfs_key key;
11559         struct btrfs_extent_item *ei;
11560         u64 flags;
11561         u64 transid;
11562         u8 backref_level;
11563         u8 header_level;
11564         int ret;
11565
11566         /* Search extent tree for extent generation and level */
11567         key.objectid = bytenr;
11568         key.type = BTRFS_METADATA_ITEM_KEY;
11569         key.offset = (u64)-1;
11570
11571         btrfs_init_path(&path);
11572         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11573         if (ret < 0)
11574                 goto release_out;
11575         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11576         if (ret < 0)
11577                 goto release_out;
11578         if (ret > 0) {
11579                 ret = -ENOENT;
11580                 goto release_out;
11581         }
11582
11583         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11584         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11585                             struct btrfs_extent_item);
11586         flags = btrfs_extent_flags(path.nodes[0], ei);
11587         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11588                 ret = -ENOENT;
11589                 goto release_out;
11590         }
11591
11592         /* Get transid for later read_tree_block() check */
11593         transid = btrfs_extent_generation(path.nodes[0], ei);
11594
11595         /* Get backref level as one source */
11596         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11597                 backref_level = key.offset;
11598         } else {
11599                 struct btrfs_tree_block_info *info;
11600
11601                 info = (struct btrfs_tree_block_info *)(ei + 1);
11602                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11603         }
11604         btrfs_release_path(&path);
11605
11606         /* Get level from tree block as an alternative source */
11607         eb = read_tree_block(fs_info, bytenr, transid);
11608         if (!extent_buffer_uptodate(eb)) {
11609                 free_extent_buffer(eb);
11610                 return -EIO;
11611         }
11612         header_level = btrfs_header_level(eb);
11613         free_extent_buffer(eb);
11614
11615         if (header_level != backref_level)
11616                 return -EIO;
11617         return header_level;
11618
11619 release_out:
11620         btrfs_release_path(&path);
11621         return ret;
11622 }
11623
11624 /*
11625  * Check if a tree block backref is valid (points to a valid tree block)
11626  * if level == -1, level will be resolved
11627  * Return >0 for any error found and print error message
11628  */
11629 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11630                                     u64 bytenr, int level)
11631 {
11632         struct btrfs_root *root;
11633         struct btrfs_key key;
11634         struct btrfs_path path;
11635         struct extent_buffer *eb;
11636         struct extent_buffer *node;
11637         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11638         int err = 0;
11639         int ret;
11640
11641         /* Query level for level == -1 special case */
11642         if (level == -1)
11643                 level = query_tree_block_level(fs_info, bytenr);
11644         if (level < 0) {
11645                 err |= REFERENCER_MISSING;
11646                 goto out;
11647         }
11648
11649         key.objectid = root_id;
11650         key.type = BTRFS_ROOT_ITEM_KEY;
11651         key.offset = (u64)-1;
11652
11653         root = btrfs_read_fs_root(fs_info, &key);
11654         if (IS_ERR(root)) {
11655                 err |= REFERENCER_MISSING;
11656                 goto out;
11657         }
11658
11659         /* Read out the tree block to get item/node key */
11660         eb = read_tree_block(fs_info, bytenr, 0);
11661         if (!extent_buffer_uptodate(eb)) {
11662                 err |= REFERENCER_MISSING;
11663                 free_extent_buffer(eb);
11664                 goto out;
11665         }
11666
11667         /* Empty tree, no need to check key */
11668         if (!btrfs_header_nritems(eb) && !level) {
11669                 free_extent_buffer(eb);
11670                 goto out;
11671         }
11672
11673         if (level)
11674                 btrfs_node_key_to_cpu(eb, &key, 0);
11675         else
11676                 btrfs_item_key_to_cpu(eb, &key, 0);
11677
11678         free_extent_buffer(eb);
11679
11680         btrfs_init_path(&path);
11681         path.lowest_level = level;
11682         /* Search with the first key, to ensure we can reach it */
11683         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11684         if (ret < 0) {
11685                 err |= REFERENCER_MISSING;
11686                 goto release_out;
11687         }
11688
11689         node = path.nodes[level];
11690         if (btrfs_header_bytenr(node) != bytenr) {
11691                 error(
11692         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11693                         bytenr, nodesize, bytenr,
11694                         btrfs_header_bytenr(node));
11695                 err |= REFERENCER_MISMATCH;
11696         }
11697         if (btrfs_header_level(node) != level) {
11698                 error(
11699         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11700                         bytenr, nodesize, level,
11701                         btrfs_header_level(node));
11702                 err |= REFERENCER_MISMATCH;
11703         }
11704
11705 release_out:
11706         btrfs_release_path(&path);
11707 out:
11708         if (err & REFERENCER_MISSING) {
11709                 if (level < 0)
11710                         error("extent [%llu %d] lost referencer (owner: %llu)",
11711                                 bytenr, nodesize, root_id);
11712                 else
11713                         error(
11714                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11715                                 bytenr, nodesize, root_id, level);
11716         }
11717
11718         return err;
11719 }
11720
11721 /*
11722  * Check if tree block @eb is tree reloc root.
11723  * Return 0 if it's not or any problem happens
11724  * Return 1 if it's a tree reloc root
11725  */
11726 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11727                                  struct extent_buffer *eb)
11728 {
11729         struct btrfs_root *tree_reloc_root;
11730         struct btrfs_key key;
11731         u64 bytenr = btrfs_header_bytenr(eb);
11732         u64 owner = btrfs_header_owner(eb);
11733         int ret = 0;
11734
11735         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11736         key.offset = owner;
11737         key.type = BTRFS_ROOT_ITEM_KEY;
11738
11739         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11740         if (IS_ERR(tree_reloc_root))
11741                 return 0;
11742
11743         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11744                 ret = 1;
11745         btrfs_free_fs_root(tree_reloc_root);
11746         return ret;
11747 }
11748
11749 /*
11750  * Check referencer for shared block backref
11751  * If level == -1, this function will resolve the level.
11752  */
11753 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11754                                      u64 parent, u64 bytenr, int level)
11755 {
11756         struct extent_buffer *eb;
11757         u32 nr;
11758         int found_parent = 0;
11759         int i;
11760
11761         eb = read_tree_block(fs_info, parent, 0);
11762         if (!extent_buffer_uptodate(eb))
11763                 goto out;
11764
11765         if (level == -1)
11766                 level = query_tree_block_level(fs_info, bytenr);
11767         if (level < 0)
11768                 goto out;
11769
11770         /* It's possible it's a tree reloc root */
11771         if (parent == bytenr) {
11772                 if (is_tree_reloc_root(fs_info, eb))
11773                         found_parent = 1;
11774                 goto out;
11775         }
11776
11777         if (level + 1 != btrfs_header_level(eb))
11778                 goto out;
11779
11780         nr = btrfs_header_nritems(eb);
11781         for (i = 0; i < nr; i++) {
11782                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11783                         found_parent = 1;
11784                         break;
11785                 }
11786         }
11787 out:
11788         free_extent_buffer(eb);
11789         if (!found_parent) {
11790                 error(
11791         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11792                         bytenr, fs_info->nodesize, parent, level);
11793                 return REFERENCER_MISSING;
11794         }
11795         return 0;
11796 }
11797
11798 /*
11799  * Check referencer for normal (inlined) data ref
11800  * If len == 0, it will be resolved by searching in extent tree
11801  */
11802 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11803                                      u64 root_id, u64 objectid, u64 offset,
11804                                      u64 bytenr, u64 len, u32 count)
11805 {
11806         struct btrfs_root *root;
11807         struct btrfs_root *extent_root = fs_info->extent_root;
11808         struct btrfs_key key;
11809         struct btrfs_path path;
11810         struct extent_buffer *leaf;
11811         struct btrfs_file_extent_item *fi;
11812         u32 found_count = 0;
11813         int slot;
11814         int ret = 0;
11815
11816         if (!len) {
11817                 key.objectid = bytenr;
11818                 key.type = BTRFS_EXTENT_ITEM_KEY;
11819                 key.offset = (u64)-1;
11820
11821                 btrfs_init_path(&path);
11822                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11823                 if (ret < 0)
11824                         goto out;
11825                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11826                 if (ret)
11827                         goto out;
11828                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11829                 if (key.objectid != bytenr ||
11830                     key.type != BTRFS_EXTENT_ITEM_KEY)
11831                         goto out;
11832                 len = key.offset;
11833                 btrfs_release_path(&path);
11834         }
11835         key.objectid = root_id;
11836         key.type = BTRFS_ROOT_ITEM_KEY;
11837         key.offset = (u64)-1;
11838         btrfs_init_path(&path);
11839
11840         root = btrfs_read_fs_root(fs_info, &key);
11841         if (IS_ERR(root))
11842                 goto out;
11843
11844         key.objectid = objectid;
11845         key.type = BTRFS_EXTENT_DATA_KEY;
11846         /*
11847          * It can be nasty as data backref offset is
11848          * file offset - file extent offset, which is smaller or
11849          * equal to original backref offset.  The only special case is
11850          * overflow.  So we need to special check and do further search.
11851          */
11852         key.offset = offset & (1ULL << 63) ? 0 : offset;
11853
11854         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11855         if (ret < 0)
11856                 goto out;
11857
11858         /*
11859          * Search afterwards to get correct one
11860          * NOTE: As we must do a comprehensive check on the data backref to
11861          * make sure the dref count also matches, we must iterate all file
11862          * extents for that inode.
11863          */
11864         while (1) {
11865                 leaf = path.nodes[0];
11866                 slot = path.slots[0];
11867
11868                 if (slot >= btrfs_header_nritems(leaf) ||
11869                     btrfs_header_owner(leaf) != root_id)
11870                         goto next;
11871                 btrfs_item_key_to_cpu(leaf, &key, slot);
11872                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11873                         break;
11874                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11875                 /*
11876                  * Except normal disk bytenr and disk num bytes, we still
11877                  * need to do extra check on dbackref offset as
11878                  * dbackref offset = file_offset - file_extent_offset
11879                  *
11880                  * Also, we must check the leaf owner.
11881                  * In case of shared tree blocks (snapshots) we can inherit
11882                  * leaves from source snapshot.
11883                  * In that case, reference from source snapshot should not
11884                  * count.
11885                  */
11886                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11887                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11888                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11889                     offset && btrfs_header_owner(leaf) == root_id)
11890                         found_count++;
11891
11892 next:
11893                 ret = btrfs_next_item(root, &path);
11894                 if (ret)
11895                         break;
11896         }
11897 out:
11898         btrfs_release_path(&path);
11899         if (found_count != count) {
11900                 error(
11901 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11902                         bytenr, len, root_id, objectid, offset, count, found_count);
11903                 return REFERENCER_MISSING;
11904         }
11905         return 0;
11906 }
11907
11908 /*
11909  * Check if the referencer of a shared data backref exists
11910  */
11911 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11912                                      u64 parent, u64 bytenr)
11913 {
11914         struct extent_buffer *eb;
11915         struct btrfs_key key;
11916         struct btrfs_file_extent_item *fi;
11917         u32 nr;
11918         int found_parent = 0;
11919         int i;
11920
11921         eb = read_tree_block(fs_info, parent, 0);
11922         if (!extent_buffer_uptodate(eb))
11923                 goto out;
11924
11925         nr = btrfs_header_nritems(eb);
11926         for (i = 0; i < nr; i++) {
11927                 btrfs_item_key_to_cpu(eb, &key, i);
11928                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11929                         continue;
11930
11931                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11932                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11933                         continue;
11934
11935                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11936                         found_parent = 1;
11937                         break;
11938                 }
11939         }
11940
11941 out:
11942         free_extent_buffer(eb);
11943         if (!found_parent) {
11944                 error("shared extent %llu referencer lost (parent: %llu)",
11945                         bytenr, parent);
11946                 return REFERENCER_MISSING;
11947         }
11948         return 0;
11949 }
11950
11951 /*
11952  * Only delete backref if REFERENCER_MISSING now
11953  *
11954  * Returns <0   the extent was deleted
11955  * Returns >0   the backref was deleted but extent still exists, returned value
11956  *               means error after repair
11957  * Returns  0   nothing happened
11958  */
11959 static int repair_extent_item(struct btrfs_trans_handle *trans,
11960                       struct btrfs_root *root, struct btrfs_path *path,
11961                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
11962                       u64 owner, u64 offset, int err)
11963 {
11964         struct btrfs_key old_key;
11965         int freed = 0;
11966         int ret;
11967
11968         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
11969
11970         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
11971                 /* delete the backref */
11972                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
11973                           num_bytes, parent, root_objectid, owner, offset);
11974                 if (!ret) {
11975                         freed = 1;
11976                         err &= ~REFERENCER_MISSING;
11977                         printf("Delete backref in extent [%llu %llu]\n",
11978                                bytenr, num_bytes);
11979                 } else {
11980                         error("fail to delete backref in extent [%llu %llu]",
11981                                bytenr, num_bytes);
11982                 }
11983         }
11984
11985         /* btrfs_free_extent may delete the extent */
11986         btrfs_release_path(path);
11987         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
11988
11989         if (ret)
11990                 ret = -ENOENT;
11991         else if (freed)
11992                 ret = err;
11993         return ret;
11994 }
11995
11996 /*
11997  * This function will check a given extent item, including its backref and
11998  * itself (like crossing stripe boundary and type)
11999  *
12000  * Since we don't use extent_record anymore, introduce new error bit
12001  */
12002 static int check_extent_item(struct btrfs_trans_handle *trans,
12003                              struct btrfs_fs_info *fs_info,
12004                              struct btrfs_path *path)
12005 {
12006         struct btrfs_extent_item *ei;
12007         struct btrfs_extent_inline_ref *iref;
12008         struct btrfs_extent_data_ref *dref;
12009         struct extent_buffer *eb = path->nodes[0];
12010         unsigned long end;
12011         unsigned long ptr;
12012         int slot = path->slots[0];
12013         int type;
12014         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12015         u32 item_size = btrfs_item_size_nr(eb, slot);
12016         u64 flags;
12017         u64 offset;
12018         u64 parent;
12019         u64 num_bytes;
12020         u64 root_objectid;
12021         u64 owner;
12022         u64 owner_offset;
12023         int metadata = 0;
12024         int level;
12025         struct btrfs_key key;
12026         int ret;
12027         int err = 0;
12028
12029         btrfs_item_key_to_cpu(eb, &key, slot);
12030         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12031                 bytes_used += key.offset;
12032                 num_bytes = key.offset;
12033         } else {
12034                 bytes_used += nodesize;
12035                 num_bytes = nodesize;
12036         }
12037
12038         if (item_size < sizeof(*ei)) {
12039                 /*
12040                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12041                  * old thing when on disk format is still un-determined.
12042                  * No need to care about it anymore
12043                  */
12044                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12045                 return -ENOTTY;
12046         }
12047
12048         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12049         flags = btrfs_extent_flags(eb, ei);
12050
12051         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12052                 metadata = 1;
12053         if (metadata && check_crossing_stripes(global_info, key.objectid,
12054                                                eb->len)) {
12055                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12056                       key.objectid, key.objectid + nodesize);
12057                 err |= CROSSING_STRIPE_BOUNDARY;
12058         }
12059
12060         ptr = (unsigned long)(ei + 1);
12061
12062         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12063                 /* Old EXTENT_ITEM metadata */
12064                 struct btrfs_tree_block_info *info;
12065
12066                 info = (struct btrfs_tree_block_info *)ptr;
12067                 level = btrfs_tree_block_level(eb, info);
12068                 ptr += sizeof(struct btrfs_tree_block_info);
12069         } else {
12070                 /* New METADATA_ITEM */
12071                 level = key.offset;
12072         }
12073         end = (unsigned long)ei + item_size;
12074
12075 next:
12076         /* Reached extent item end normally */
12077         if (ptr == end)
12078                 goto out;
12079
12080         /* Beyond extent item end, wrong item size */
12081         if (ptr > end) {
12082                 err |= ITEM_SIZE_MISMATCH;
12083                 error("extent item at bytenr %llu slot %d has wrong size",
12084                         eb->start, slot);
12085                 goto out;
12086         }
12087
12088         parent = 0;
12089         root_objectid = 0;
12090         owner = 0;
12091         owner_offset = 0;
12092         /* Now check every backref in this extent item */
12093         iref = (struct btrfs_extent_inline_ref *)ptr;
12094         type = btrfs_extent_inline_ref_type(eb, iref);
12095         offset = btrfs_extent_inline_ref_offset(eb, iref);
12096         switch (type) {
12097         case BTRFS_TREE_BLOCK_REF_KEY:
12098                 root_objectid = offset;
12099                 owner = level;
12100                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12101                                                level);
12102                 err |= ret;
12103                 break;
12104         case BTRFS_SHARED_BLOCK_REF_KEY:
12105                 parent = offset;
12106                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12107                                                  level);
12108                 err |= ret;
12109                 break;
12110         case BTRFS_EXTENT_DATA_REF_KEY:
12111                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12112                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12113                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12114                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12115                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12116                                         owner_offset, key.objectid, key.offset,
12117                                         btrfs_extent_data_ref_count(eb, dref));
12118                 err |= ret;
12119                 break;
12120         case BTRFS_SHARED_DATA_REF_KEY:
12121                 parent = offset;
12122                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12123                 err |= ret;
12124                 break;
12125         default:
12126                 error("extent[%llu %d %llu] has unknown ref type: %d",
12127                         key.objectid, key.type, key.offset, type);
12128                 ret = UNKNOWN_TYPE;
12129                 err |= ret;
12130                 goto out;
12131         }
12132
12133         if (err && repair) {
12134                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12135                          key.objectid, num_bytes, parent, root_objectid,
12136                          owner, owner_offset, ret);
12137                 if (ret < 0)
12138                         goto out;
12139                 if (ret) {
12140                         goto next;
12141                         err = ret;
12142                 }
12143         }
12144
12145         ptr += btrfs_extent_inline_ref_size(type);
12146         goto next;
12147
12148 out:
12149         return err;
12150 }
12151
12152 /*
12153  * Check if a dev extent item is referred correctly by its chunk
12154  */
12155 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12156                                  struct extent_buffer *eb, int slot)
12157 {
12158         struct btrfs_root *chunk_root = fs_info->chunk_root;
12159         struct btrfs_dev_extent *ptr;
12160         struct btrfs_path path;
12161         struct btrfs_key chunk_key;
12162         struct btrfs_key devext_key;
12163         struct btrfs_chunk *chunk;
12164         struct extent_buffer *l;
12165         int num_stripes;
12166         u64 length;
12167         int i;
12168         int found_chunk = 0;
12169         int ret;
12170
12171         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12172         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12173         length = btrfs_dev_extent_length(eb, ptr);
12174
12175         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12176         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12177         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12178
12179         btrfs_init_path(&path);
12180         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12181         if (ret)
12182                 goto out;
12183
12184         l = path.nodes[0];
12185         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12186         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12187                                       chunk_key.offset);
12188         if (ret < 0)
12189                 goto out;
12190
12191         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12192                 goto out;
12193
12194         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12195         for (i = 0; i < num_stripes; i++) {
12196                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12197                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12198
12199                 if (devid == devext_key.objectid &&
12200                     offset == devext_key.offset) {
12201                         found_chunk = 1;
12202                         break;
12203                 }
12204         }
12205 out:
12206         btrfs_release_path(&path);
12207         if (!found_chunk) {
12208                 error(
12209                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12210                         devext_key.objectid, devext_key.offset, length);
12211                 return REFERENCER_MISSING;
12212         }
12213         return 0;
12214 }
12215
12216 /*
12217  * Check if the used space is correct with the dev item
12218  */
12219 static int check_dev_item(struct btrfs_fs_info *fs_info,
12220                           struct extent_buffer *eb, int slot)
12221 {
12222         struct btrfs_root *dev_root = fs_info->dev_root;
12223         struct btrfs_dev_item *dev_item;
12224         struct btrfs_path path;
12225         struct btrfs_key key;
12226         struct btrfs_dev_extent *ptr;
12227         u64 total_bytes;
12228         u64 dev_id;
12229         u64 used;
12230         u64 total = 0;
12231         int ret;
12232
12233         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12234         dev_id = btrfs_device_id(eb, dev_item);
12235         used = btrfs_device_bytes_used(eb, dev_item);
12236         total_bytes = btrfs_device_total_bytes(eb, dev_item);
12237
12238         key.objectid = dev_id;
12239         key.type = BTRFS_DEV_EXTENT_KEY;
12240         key.offset = 0;
12241
12242         btrfs_init_path(&path);
12243         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12244         if (ret < 0) {
12245                 btrfs_item_key_to_cpu(eb, &key, slot);
12246                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12247                         key.objectid, key.type, key.offset);
12248                 btrfs_release_path(&path);
12249                 return REFERENCER_MISSING;
12250         }
12251
12252         /* Iterate dev_extents to calculate the used space of a device */
12253         while (1) {
12254                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12255                         goto next;
12256
12257                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12258                 if (key.objectid > dev_id)
12259                         break;
12260                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12261                         goto next;
12262
12263                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12264                                      struct btrfs_dev_extent);
12265                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12266 next:
12267                 ret = btrfs_next_item(dev_root, &path);
12268                 if (ret)
12269                         break;
12270         }
12271         btrfs_release_path(&path);
12272
12273         if (used != total) {
12274                 btrfs_item_key_to_cpu(eb, &key, slot);
12275                 error(
12276 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12277                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12278                         BTRFS_DEV_EXTENT_KEY, dev_id);
12279                 return ACCOUNTING_MISMATCH;
12280         }
12281         check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12282
12283         return 0;
12284 }
12285
12286 /*
12287  * Check a block group item with its referener (chunk) and its used space
12288  * with extent/metadata item
12289  */
12290 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12291                                   struct extent_buffer *eb, int slot)
12292 {
12293         struct btrfs_root *extent_root = fs_info->extent_root;
12294         struct btrfs_root *chunk_root = fs_info->chunk_root;
12295         struct btrfs_block_group_item *bi;
12296         struct btrfs_block_group_item bg_item;
12297         struct btrfs_path path;
12298         struct btrfs_key bg_key;
12299         struct btrfs_key chunk_key;
12300         struct btrfs_key extent_key;
12301         struct btrfs_chunk *chunk;
12302         struct extent_buffer *leaf;
12303         struct btrfs_extent_item *ei;
12304         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12305         u64 flags;
12306         u64 bg_flags;
12307         u64 used;
12308         u64 total = 0;
12309         int ret;
12310         int err = 0;
12311
12312         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12313         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12314         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12315         used = btrfs_block_group_used(&bg_item);
12316         bg_flags = btrfs_block_group_flags(&bg_item);
12317
12318         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12319         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12320         chunk_key.offset = bg_key.objectid;
12321
12322         btrfs_init_path(&path);
12323         /* Search for the referencer chunk */
12324         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12325         if (ret) {
12326                 error(
12327                 "block group[%llu %llu] did not find the related chunk item",
12328                         bg_key.objectid, bg_key.offset);
12329                 err |= REFERENCER_MISSING;
12330         } else {
12331                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12332                                         struct btrfs_chunk);
12333                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12334                                                 bg_key.offset) {
12335                         error(
12336         "block group[%llu %llu] related chunk item length does not match",
12337                                 bg_key.objectid, bg_key.offset);
12338                         err |= REFERENCER_MISMATCH;
12339                 }
12340         }
12341         btrfs_release_path(&path);
12342
12343         /* Search from the block group bytenr */
12344         extent_key.objectid = bg_key.objectid;
12345         extent_key.type = 0;
12346         extent_key.offset = 0;
12347
12348         btrfs_init_path(&path);
12349         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12350         if (ret < 0)
12351                 goto out;
12352
12353         /* Iterate extent tree to account used space */
12354         while (1) {
12355                 leaf = path.nodes[0];
12356
12357                 /* Search slot can point to the last item beyond leaf nritems */
12358                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12359                         goto next;
12360
12361                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12362                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12363                         break;
12364
12365                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12366                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12367                         goto next;
12368                 if (extent_key.objectid < bg_key.objectid)
12369                         goto next;
12370
12371                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12372                         total += nodesize;
12373                 else
12374                         total += extent_key.offset;
12375
12376                 ei = btrfs_item_ptr(leaf, path.slots[0],
12377                                     struct btrfs_extent_item);
12378                 flags = btrfs_extent_flags(leaf, ei);
12379                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12380                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12381                                 error(
12382                         "bad extent[%llu, %llu) type mismatch with chunk",
12383                                         extent_key.objectid,
12384                                         extent_key.objectid + extent_key.offset);
12385                                 err |= CHUNK_TYPE_MISMATCH;
12386                         }
12387                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12388                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12389                                     BTRFS_BLOCK_GROUP_METADATA))) {
12390                                 error(
12391                         "bad extent[%llu, %llu) type mismatch with chunk",
12392                                         extent_key.objectid,
12393                                         extent_key.objectid + nodesize);
12394                                 err |= CHUNK_TYPE_MISMATCH;
12395                         }
12396                 }
12397 next:
12398                 ret = btrfs_next_item(extent_root, &path);
12399                 if (ret)
12400                         break;
12401         }
12402
12403 out:
12404         btrfs_release_path(&path);
12405
12406         if (total != used) {
12407                 error(
12408                 "block group[%llu %llu] used %llu but extent items used %llu",
12409                         bg_key.objectid, bg_key.offset, used, total);
12410                 err |= BG_ACCOUNTING_ERROR;
12411         }
12412         return err;
12413 }
12414
12415 /*
12416  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12417  * FIXME: We still need to repair error of dev_item.
12418  *
12419  * Returns error after repair.
12420  */
12421 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12422                              struct btrfs_root *chunk_root,
12423                              struct btrfs_path *path, int err)
12424 {
12425         struct btrfs_chunk *chunk;
12426         struct btrfs_key chunk_key;
12427         struct extent_buffer *eb = path->nodes[0];
12428         u64 length;
12429         int slot = path->slots[0];
12430         u64 type;
12431         int ret = 0;
12432
12433         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12434         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12435                 return err;
12436         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12437         type = btrfs_chunk_type(path->nodes[0], chunk);
12438         length = btrfs_chunk_length(eb, chunk);
12439
12440         if (err & REFERENCER_MISSING) {
12441                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12442                                              type, chunk_key.offset, length);
12443                 if (ret) {
12444                         error("fail to add block group item[%llu %llu]",
12445                               chunk_key.offset, length);
12446                         goto out;
12447                 } else {
12448                         err &= ~REFERENCER_MISSING;
12449                         printf("Added block group item[%llu %llu]\n",
12450                                chunk_key.offset, length);
12451                 }
12452         }
12453
12454 out:
12455         return err;
12456 }
12457
12458 /*
12459  * Check a chunk item.
12460  * Including checking all referred dev_extents and block group
12461  */
12462 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12463                             struct extent_buffer *eb, int slot)
12464 {
12465         struct btrfs_root *extent_root = fs_info->extent_root;
12466         struct btrfs_root *dev_root = fs_info->dev_root;
12467         struct btrfs_path path;
12468         struct btrfs_key chunk_key;
12469         struct btrfs_key bg_key;
12470         struct btrfs_key devext_key;
12471         struct btrfs_chunk *chunk;
12472         struct extent_buffer *leaf;
12473         struct btrfs_block_group_item *bi;
12474         struct btrfs_block_group_item bg_item;
12475         struct btrfs_dev_extent *ptr;
12476         u64 length;
12477         u64 chunk_end;
12478         u64 stripe_len;
12479         u64 type;
12480         int num_stripes;
12481         u64 offset;
12482         u64 objectid;
12483         int i;
12484         int ret;
12485         int err = 0;
12486
12487         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12488         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12489         length = btrfs_chunk_length(eb, chunk);
12490         chunk_end = chunk_key.offset + length;
12491         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12492                                       chunk_key.offset);
12493         if (ret < 0) {
12494                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12495                         chunk_end);
12496                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12497                 goto out;
12498         }
12499         type = btrfs_chunk_type(eb, chunk);
12500
12501         bg_key.objectid = chunk_key.offset;
12502         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12503         bg_key.offset = length;
12504
12505         btrfs_init_path(&path);
12506         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12507         if (ret) {
12508                 error(
12509                 "chunk[%llu %llu) did not find the related block group item",
12510                         chunk_key.offset, chunk_end);
12511                 err |= REFERENCER_MISSING;
12512         } else{
12513                 leaf = path.nodes[0];
12514                 bi = btrfs_item_ptr(leaf, path.slots[0],
12515                                     struct btrfs_block_group_item);
12516                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12517                                    sizeof(bg_item));
12518                 if (btrfs_block_group_flags(&bg_item) != type) {
12519                         error(
12520 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12521                                 chunk_key.offset, chunk_end, type,
12522                                 btrfs_block_group_flags(&bg_item));
12523                         err |= REFERENCER_MISSING;
12524                 }
12525         }
12526
12527         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12528         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12529         for (i = 0; i < num_stripes; i++) {
12530                 btrfs_release_path(&path);
12531                 btrfs_init_path(&path);
12532                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12533                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12534                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12535
12536                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12537                                         0, 0);
12538                 if (ret)
12539                         goto not_match_dev;
12540
12541                 leaf = path.nodes[0];
12542                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12543                                      struct btrfs_dev_extent);
12544                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12545                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12546                 if (objectid != chunk_key.objectid ||
12547                     offset != chunk_key.offset ||
12548                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12549                         goto not_match_dev;
12550                 continue;
12551 not_match_dev:
12552                 err |= BACKREF_MISSING;
12553                 error(
12554                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12555                         chunk_key.objectid, chunk_end, i);
12556                 continue;
12557         }
12558         btrfs_release_path(&path);
12559 out:
12560         return err;
12561 }
12562
12563 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12564                                    struct btrfs_root *root,
12565                                    struct btrfs_path *path)
12566 {
12567         struct btrfs_key key;
12568         int ret = 0;
12569
12570         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12571         btrfs_release_path(path);
12572         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12573         if (ret) {
12574                 ret = -ENOENT;
12575                 goto out;
12576         }
12577
12578         ret = btrfs_del_item(trans, root, path);
12579         if (ret)
12580                 goto out;
12581
12582         if (path->slots[0] == 0)
12583                 btrfs_prev_leaf(root, path);
12584         else
12585                 path->slots[0]--;
12586 out:
12587         if (ret)
12588                 error("failed to delete root %llu item[%llu, %u, %llu]",
12589                       root->objectid, key.objectid, key.type, key.offset);
12590         else
12591                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12592                        root->objectid, key.objectid, key.type, key.offset);
12593         return ret;
12594 }
12595
12596 /*
12597  * Main entry function to check known items and update related accounting info
12598  */
12599 static int check_leaf_items(struct btrfs_trans_handle *trans,
12600                             struct btrfs_root *root, struct btrfs_path *path,
12601                             struct node_refs *nrefs, int account_bytes)
12602 {
12603         struct btrfs_fs_info *fs_info = root->fs_info;
12604         struct btrfs_key key;
12605         struct extent_buffer *eb;
12606         int slot;
12607         int type;
12608         struct btrfs_extent_data_ref *dref;
12609         int ret = 0;
12610         int err = 0;
12611
12612 again:
12613         eb = path->nodes[0];
12614         slot = path->slots[0];
12615         if (slot >= btrfs_header_nritems(eb)) {
12616                 if (slot == 0) {
12617                         error("empty leaf [%llu %u] root %llu", eb->start,
12618                                 root->fs_info->nodesize, root->objectid);
12619                         err |= EIO;
12620                 }
12621                 goto out;
12622         }
12623
12624         btrfs_item_key_to_cpu(eb, &key, slot);
12625         type = key.type;
12626
12627         switch (type) {
12628         case BTRFS_EXTENT_DATA_KEY:
12629                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12630                 if (repair && ret)
12631                         ret = repair_extent_data_item(trans, root, path, nrefs,
12632                                                       ret);
12633                 err |= ret;
12634                 break;
12635         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12636                 ret = check_block_group_item(fs_info, eb, slot);
12637                 if (repair &&
12638                     ret & REFERENCER_MISSING)
12639                         ret = delete_extent_tree_item(trans, root, path);
12640                 err |= ret;
12641                 break;
12642         case BTRFS_DEV_ITEM_KEY:
12643                 ret = check_dev_item(fs_info, eb, slot);
12644                 err |= ret;
12645                 break;
12646         case BTRFS_CHUNK_ITEM_KEY:
12647                 ret = check_chunk_item(fs_info, eb, slot);
12648                 if (repair && ret)
12649                         ret = repair_chunk_item(trans, root, path, ret);
12650                 err |= ret;
12651                 break;
12652         case BTRFS_DEV_EXTENT_KEY:
12653                 ret = check_dev_extent_item(fs_info, eb, slot);
12654                 err |= ret;
12655                 break;
12656         case BTRFS_EXTENT_ITEM_KEY:
12657         case BTRFS_METADATA_ITEM_KEY:
12658                 ret = check_extent_item(trans, fs_info, path);
12659                 err |= ret;
12660                 break;
12661         case BTRFS_EXTENT_CSUM_KEY:
12662                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12663                 err |= ret;
12664                 break;
12665         case BTRFS_TREE_BLOCK_REF_KEY:
12666                 ret = check_tree_block_backref(fs_info, key.offset,
12667                                                key.objectid, -1);
12668                 if (repair &&
12669                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12670                         ret = delete_extent_tree_item(trans, root, path);
12671                 err |= ret;
12672                 break;
12673         case BTRFS_EXTENT_DATA_REF_KEY:
12674                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12675                 ret = check_extent_data_backref(fs_info,
12676                                 btrfs_extent_data_ref_root(eb, dref),
12677                                 btrfs_extent_data_ref_objectid(eb, dref),
12678                                 btrfs_extent_data_ref_offset(eb, dref),
12679                                 key.objectid, 0,
12680                                 btrfs_extent_data_ref_count(eb, dref));
12681                 if (repair &&
12682                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12683                         ret = delete_extent_tree_item(trans, root, path);
12684                 err |= ret;
12685                 break;
12686         case BTRFS_SHARED_BLOCK_REF_KEY:
12687                 ret = check_shared_block_backref(fs_info, key.offset,
12688                                                  key.objectid, -1);
12689                 if (repair &&
12690                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12691                         ret = delete_extent_tree_item(trans, root, path);
12692                 err |= ret;
12693                 break;
12694         case BTRFS_SHARED_DATA_REF_KEY:
12695                 ret = check_shared_data_backref(fs_info, key.offset,
12696                                                 key.objectid);
12697                 if (repair &&
12698                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12699                         ret = delete_extent_tree_item(trans, root, path);
12700                 err |= ret;
12701                 break;
12702         default:
12703                 break;
12704         }
12705
12706         ++path->slots[0];
12707         goto again;
12708 out:
12709         return err;
12710 }
12711
12712 /*
12713  * Low memory usage version check_chunks_and_extents.
12714  */
12715 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12716 {
12717         struct btrfs_trans_handle *trans = NULL;
12718         struct btrfs_path path;
12719         struct btrfs_key old_key;
12720         struct btrfs_key key;
12721         struct btrfs_root *root1;
12722         struct btrfs_root *root;
12723         struct btrfs_root *cur_root;
12724         int err = 0;
12725         int ret;
12726
12727         root = fs_info->fs_root;
12728
12729         if (repair) {
12730                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12731                 if (IS_ERR(trans)) {
12732                         error("failed to start transaction before check");
12733                         return PTR_ERR(trans);
12734                 }
12735         }
12736
12737         root1 = root->fs_info->chunk_root;
12738         ret = check_btrfs_root(trans, root1, 0, 1);
12739         err |= ret;
12740
12741         root1 = root->fs_info->tree_root;
12742         ret = check_btrfs_root(trans, root1, 0, 1);
12743         err |= ret;
12744
12745         btrfs_init_path(&path);
12746         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12747         key.offset = 0;
12748         key.type = BTRFS_ROOT_ITEM_KEY;
12749
12750         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12751         if (ret) {
12752                 error("cannot find extent tree in tree_root");
12753                 goto out;
12754         }
12755
12756         while (1) {
12757                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12758                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12759                         goto next;
12760                 old_key = key;
12761                 key.offset = (u64)-1;
12762
12763                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12764                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12765                                         &key);
12766                 else
12767                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12768                 if (IS_ERR(cur_root) || !cur_root) {
12769                         error("failed to read tree: %lld", key.objectid);
12770                         goto next;
12771                 }
12772
12773                 ret = check_btrfs_root(trans, cur_root, 0, 1);
12774                 err |= ret;
12775
12776                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12777                         btrfs_free_fs_root(cur_root);
12778
12779                 btrfs_release_path(&path);
12780                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
12781                                         &old_key, &path, 0, 0);
12782                 if (ret)
12783                         goto out;
12784 next:
12785                 ret = btrfs_next_item(root1, &path);
12786                 if (ret)
12787                         goto out;
12788         }
12789 out:
12790
12791         /* if repair, update block accounting */
12792         if (repair) {
12793                 ret = btrfs_fix_block_accounting(trans, root);
12794                 if (ret)
12795                         err |= ret;
12796                 else
12797                         err &= ~BG_ACCOUNTING_ERROR;
12798         }
12799
12800         if (trans)
12801                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
12802
12803         btrfs_release_path(&path);
12804
12805         return err;
12806 }
12807
12808 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12809 {
12810         int ret;
12811
12812         if (!ctx.progress_enabled)
12813                 fprintf(stderr, "checking extents\n");
12814         if (check_mode == CHECK_MODE_LOWMEM)
12815                 ret = check_chunks_and_extents_v2(fs_info);
12816         else
12817                 ret = check_chunks_and_extents(fs_info);
12818
12819         /* Also repair device size related problems */
12820         if (repair && !ret) {
12821                 ret = btrfs_fix_device_and_super_size(fs_info);
12822                 if (ret > 0)
12823                         ret = 0;
12824         }
12825         return ret;
12826 }
12827
12828 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12829                            struct btrfs_root *root, int overwrite)
12830 {
12831         struct extent_buffer *c;
12832         struct extent_buffer *old = root->node;
12833         int level;
12834         int ret;
12835         struct btrfs_disk_key disk_key = {0,0,0};
12836
12837         level = 0;
12838
12839         if (overwrite) {
12840                 c = old;
12841                 extent_buffer_get(c);
12842                 goto init;
12843         }
12844         c = btrfs_alloc_free_block(trans, root,
12845                                    root->fs_info->nodesize,
12846                                    root->root_key.objectid,
12847                                    &disk_key, level, 0, 0);
12848         if (IS_ERR(c)) {
12849                 c = old;
12850                 extent_buffer_get(c);
12851                 overwrite = 1;
12852         }
12853 init:
12854         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12855         btrfs_set_header_level(c, level);
12856         btrfs_set_header_bytenr(c, c->start);
12857         btrfs_set_header_generation(c, trans->transid);
12858         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12859         btrfs_set_header_owner(c, root->root_key.objectid);
12860
12861         write_extent_buffer(c, root->fs_info->fsid,
12862                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12863
12864         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12865                             btrfs_header_chunk_tree_uuid(c),
12866                             BTRFS_UUID_SIZE);
12867
12868         btrfs_mark_buffer_dirty(c);
12869         /*
12870          * this case can happen in the following case:
12871          *
12872          * 1.overwrite previous root.
12873          *
12874          * 2.reinit reloc data root, this is because we skip pin
12875          * down reloc data tree before which means we can allocate
12876          * same block bytenr here.
12877          */
12878         if (old->start == c->start) {
12879                 btrfs_set_root_generation(&root->root_item,
12880                                           trans->transid);
12881                 root->root_item.level = btrfs_header_level(root->node);
12882                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12883                                         &root->root_key, &root->root_item);
12884                 if (ret) {
12885                         free_extent_buffer(c);
12886                         return ret;
12887                 }
12888         }
12889         free_extent_buffer(old);
12890         root->node = c;
12891         add_root_to_dirty_list(root);
12892         return 0;
12893 }
12894
12895 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12896                                 struct extent_buffer *eb, int tree_root)
12897 {
12898         struct extent_buffer *tmp;
12899         struct btrfs_root_item *ri;
12900         struct btrfs_key key;
12901         u64 bytenr;
12902         int level = btrfs_header_level(eb);
12903         int nritems;
12904         int ret;
12905         int i;
12906
12907         /*
12908          * If we have pinned this block before, don't pin it again.
12909          * This can not only avoid forever loop with broken filesystem
12910          * but also give us some speedups.
12911          */
12912         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12913                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12914                 return 0;
12915
12916         btrfs_pin_extent(fs_info, eb->start, eb->len);
12917
12918         nritems = btrfs_header_nritems(eb);
12919         for (i = 0; i < nritems; i++) {
12920                 if (level == 0) {
12921                         btrfs_item_key_to_cpu(eb, &key, i);
12922                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12923                                 continue;
12924                         /* Skip the extent root and reloc roots */
12925                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12926                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12927                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12928                                 continue;
12929                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12930                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12931
12932                         /*
12933                          * If at any point we start needing the real root we
12934                          * will have to build a stump root for the root we are
12935                          * in, but for now this doesn't actually use the root so
12936                          * just pass in extent_root.
12937                          */
12938                         tmp = read_tree_block(fs_info, bytenr, 0);
12939                         if (!extent_buffer_uptodate(tmp)) {
12940                                 fprintf(stderr, "Error reading root block\n");
12941                                 return -EIO;
12942                         }
12943                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12944                         free_extent_buffer(tmp);
12945                         if (ret)
12946                                 return ret;
12947                 } else {
12948                         bytenr = btrfs_node_blockptr(eb, i);
12949
12950                         /* If we aren't the tree root don't read the block */
12951                         if (level == 1 && !tree_root) {
12952                                 btrfs_pin_extent(fs_info, bytenr,
12953                                                 fs_info->nodesize);
12954                                 continue;
12955                         }
12956
12957                         tmp = read_tree_block(fs_info, bytenr, 0);
12958                         if (!extent_buffer_uptodate(tmp)) {
12959                                 fprintf(stderr, "Error reading tree block\n");
12960                                 return -EIO;
12961                         }
12962                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12963                         free_extent_buffer(tmp);
12964                         if (ret)
12965                                 return ret;
12966                 }
12967         }
12968
12969         return 0;
12970 }
12971
12972 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12973 {
12974         int ret;
12975
12976         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12977         if (ret)
12978                 return ret;
12979
12980         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12981 }
12982
12983 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12984 {
12985         struct btrfs_block_group_cache *cache;
12986         struct btrfs_path path;
12987         struct extent_buffer *leaf;
12988         struct btrfs_chunk *chunk;
12989         struct btrfs_key key;
12990         int ret;
12991         u64 start;
12992
12993         btrfs_init_path(&path);
12994         key.objectid = 0;
12995         key.type = BTRFS_CHUNK_ITEM_KEY;
12996         key.offset = 0;
12997         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12998         if (ret < 0) {
12999                 btrfs_release_path(&path);
13000                 return ret;
13001         }
13002
13003         /*
13004          * We do this in case the block groups were screwed up and had alloc
13005          * bits that aren't actually set on the chunks.  This happens with
13006          * restored images every time and could happen in real life I guess.
13007          */
13008         fs_info->avail_data_alloc_bits = 0;
13009         fs_info->avail_metadata_alloc_bits = 0;
13010         fs_info->avail_system_alloc_bits = 0;
13011
13012         /* First we need to create the in-memory block groups */
13013         while (1) {
13014                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13015                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13016                         if (ret < 0) {
13017                                 btrfs_release_path(&path);
13018                                 return ret;
13019                         }
13020                         if (ret) {
13021                                 ret = 0;
13022                                 break;
13023                         }
13024                 }
13025                 leaf = path.nodes[0];
13026                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13027                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13028                         path.slots[0]++;
13029                         continue;
13030                 }
13031
13032                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13033                 btrfs_add_block_group(fs_info, 0,
13034                                       btrfs_chunk_type(leaf, chunk), key.offset,
13035                                       btrfs_chunk_length(leaf, chunk));
13036                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13037                                  key.offset + btrfs_chunk_length(leaf, chunk));
13038                 path.slots[0]++;
13039         }
13040         start = 0;
13041         while (1) {
13042                 cache = btrfs_lookup_first_block_group(fs_info, start);
13043                 if (!cache)
13044                         break;
13045                 cache->cached = 1;
13046                 start = cache->key.objectid + cache->key.offset;
13047         }
13048
13049         btrfs_release_path(&path);
13050         return 0;
13051 }
13052
13053 static int reset_balance(struct btrfs_trans_handle *trans,
13054                          struct btrfs_fs_info *fs_info)
13055 {
13056         struct btrfs_root *root = fs_info->tree_root;
13057         struct btrfs_path path;
13058         struct extent_buffer *leaf;
13059         struct btrfs_key key;
13060         int del_slot, del_nr = 0;
13061         int ret;
13062         int found = 0;
13063
13064         btrfs_init_path(&path);
13065         key.objectid = BTRFS_BALANCE_OBJECTID;
13066         key.type = BTRFS_BALANCE_ITEM_KEY;
13067         key.offset = 0;
13068         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13069         if (ret) {
13070                 if (ret > 0)
13071                         ret = 0;
13072                 if (!ret)
13073                         goto reinit_data_reloc;
13074                 else
13075                         goto out;
13076         }
13077
13078         ret = btrfs_del_item(trans, root, &path);
13079         if (ret)
13080                 goto out;
13081         btrfs_release_path(&path);
13082
13083         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13084         key.type = BTRFS_ROOT_ITEM_KEY;
13085         key.offset = 0;
13086         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13087         if (ret < 0)
13088                 goto out;
13089         while (1) {
13090                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13091                         if (!found)
13092                                 break;
13093
13094                         if (del_nr) {
13095                                 ret = btrfs_del_items(trans, root, &path,
13096                                                       del_slot, del_nr);
13097                                 del_nr = 0;
13098                                 if (ret)
13099                                         goto out;
13100                         }
13101                         key.offset++;
13102                         btrfs_release_path(&path);
13103
13104                         found = 0;
13105                         ret = btrfs_search_slot(trans, root, &key, &path,
13106                                                 -1, 1);
13107                         if (ret < 0)
13108                                 goto out;
13109                         continue;
13110                 }
13111                 found = 1;
13112                 leaf = path.nodes[0];
13113                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13114                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13115                         break;
13116                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13117                         path.slots[0]++;
13118                         continue;
13119                 }
13120                 if (!del_nr) {
13121                         del_slot = path.slots[0];
13122                         del_nr = 1;
13123                 } else {
13124                         del_nr++;
13125                 }
13126                 path.slots[0]++;
13127         }
13128
13129         if (del_nr) {
13130                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13131                 if (ret)
13132                         goto out;
13133         }
13134         btrfs_release_path(&path);
13135
13136 reinit_data_reloc:
13137         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13138         key.type = BTRFS_ROOT_ITEM_KEY;
13139         key.offset = (u64)-1;
13140         root = btrfs_read_fs_root(fs_info, &key);
13141         if (IS_ERR(root)) {
13142                 fprintf(stderr, "Error reading data reloc tree\n");
13143                 ret = PTR_ERR(root);
13144                 goto out;
13145         }
13146         record_root_in_trans(trans, root);
13147         ret = btrfs_fsck_reinit_root(trans, root, 0);
13148         if (ret)
13149                 goto out;
13150         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13151 out:
13152         btrfs_release_path(&path);
13153         return ret;
13154 }
13155
13156 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13157                               struct btrfs_fs_info *fs_info)
13158 {
13159         u64 start = 0;
13160         int ret;
13161
13162         /*
13163          * The only reason we don't do this is because right now we're just
13164          * walking the trees we find and pinning down their bytes, we don't look
13165          * at any of the leaves.  In order to do mixed groups we'd have to check
13166          * the leaves of any fs roots and pin down the bytes for any file
13167          * extents we find.  Not hard but why do it if we don't have to?
13168          */
13169         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13170                 fprintf(stderr, "We don't support re-initing the extent tree "
13171                         "for mixed block groups yet, please notify a btrfs "
13172                         "developer you want to do this so they can add this "
13173                         "functionality.\n");
13174                 return -EINVAL;
13175         }
13176
13177         /*
13178          * first we need to walk all of the trees except the extent tree and pin
13179          * down the bytes that are in use so we don't overwrite any existing
13180          * metadata.
13181          */
13182         ret = pin_metadata_blocks(fs_info);
13183         if (ret) {
13184                 fprintf(stderr, "error pinning down used bytes\n");
13185                 return ret;
13186         }
13187
13188         /*
13189          * Need to drop all the block groups since we're going to recreate all
13190          * of them again.
13191          */
13192         btrfs_free_block_groups(fs_info);
13193         ret = reset_block_groups(fs_info);
13194         if (ret) {
13195                 fprintf(stderr, "error resetting the block groups\n");
13196                 return ret;
13197         }
13198
13199         /* Ok we can allocate now, reinit the extent root */
13200         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13201         if (ret) {
13202                 fprintf(stderr, "extent root initialization failed\n");
13203                 /*
13204                  * When the transaction code is updated we should end the
13205                  * transaction, but for now progs only knows about commit so
13206                  * just return an error.
13207                  */
13208                 return ret;
13209         }
13210
13211         /*
13212          * Now we have all the in-memory block groups setup so we can make
13213          * allocations properly, and the metadata we care about is safe since we
13214          * pinned all of it above.
13215          */
13216         while (1) {
13217                 struct btrfs_block_group_cache *cache;
13218
13219                 cache = btrfs_lookup_first_block_group(fs_info, start);
13220                 if (!cache)
13221                         break;
13222                 start = cache->key.objectid + cache->key.offset;
13223                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13224                                         &cache->key, &cache->item,
13225                                         sizeof(cache->item));
13226                 if (ret) {
13227                         fprintf(stderr, "Error adding block group\n");
13228                         return ret;
13229                 }
13230                 btrfs_extent_post_op(trans, fs_info->extent_root);
13231         }
13232
13233         ret = reset_balance(trans, fs_info);
13234         if (ret)
13235                 fprintf(stderr, "error resetting the pending balance\n");
13236
13237         return ret;
13238 }
13239
13240 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13241 {
13242         struct btrfs_path path;
13243         struct btrfs_trans_handle *trans;
13244         struct btrfs_key key;
13245         int ret;
13246
13247         printf("Recowing metadata block %llu\n", eb->start);
13248         key.objectid = btrfs_header_owner(eb);
13249         key.type = BTRFS_ROOT_ITEM_KEY;
13250         key.offset = (u64)-1;
13251
13252         root = btrfs_read_fs_root(root->fs_info, &key);
13253         if (IS_ERR(root)) {
13254                 fprintf(stderr, "Couldn't find owner root %llu\n",
13255                         key.objectid);
13256                 return PTR_ERR(root);
13257         }
13258
13259         trans = btrfs_start_transaction(root, 1);
13260         if (IS_ERR(trans))
13261                 return PTR_ERR(trans);
13262
13263         btrfs_init_path(&path);
13264         path.lowest_level = btrfs_header_level(eb);
13265         if (path.lowest_level)
13266                 btrfs_node_key_to_cpu(eb, &key, 0);
13267         else
13268                 btrfs_item_key_to_cpu(eb, &key, 0);
13269
13270         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13271         btrfs_commit_transaction(trans, root);
13272         btrfs_release_path(&path);
13273         return ret;
13274 }
13275
13276 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13277 {
13278         struct btrfs_path path;
13279         struct btrfs_trans_handle *trans;
13280         struct btrfs_key key;
13281         int ret;
13282
13283         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13284                bad->key.type, bad->key.offset);
13285         key.objectid = bad->root_id;
13286         key.type = BTRFS_ROOT_ITEM_KEY;
13287         key.offset = (u64)-1;
13288
13289         root = btrfs_read_fs_root(root->fs_info, &key);
13290         if (IS_ERR(root)) {
13291                 fprintf(stderr, "Couldn't find owner root %llu\n",
13292                         key.objectid);
13293                 return PTR_ERR(root);
13294         }
13295
13296         trans = btrfs_start_transaction(root, 1);
13297         if (IS_ERR(trans))
13298                 return PTR_ERR(trans);
13299
13300         btrfs_init_path(&path);
13301         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13302         if (ret) {
13303                 if (ret > 0)
13304                         ret = 0;
13305                 goto out;
13306         }
13307         ret = btrfs_del_item(trans, root, &path);
13308 out:
13309         btrfs_commit_transaction(trans, root);
13310         btrfs_release_path(&path);
13311         return ret;
13312 }
13313
13314 static int zero_log_tree(struct btrfs_root *root)
13315 {
13316         struct btrfs_trans_handle *trans;
13317         int ret;
13318
13319         trans = btrfs_start_transaction(root, 1);
13320         if (IS_ERR(trans)) {
13321                 ret = PTR_ERR(trans);
13322                 return ret;
13323         }
13324         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13325         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13326         ret = btrfs_commit_transaction(trans, root);
13327         return ret;
13328 }
13329
13330 static int populate_csum(struct btrfs_trans_handle *trans,
13331                          struct btrfs_root *csum_root, char *buf, u64 start,
13332                          u64 len)
13333 {
13334         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13335         u64 offset = 0;
13336         u64 sectorsize;
13337         int ret = 0;
13338
13339         while (offset < len) {
13340                 sectorsize = fs_info->sectorsize;
13341                 ret = read_extent_data(fs_info, buf, start + offset,
13342                                        &sectorsize, 0);
13343                 if (ret)
13344                         break;
13345                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13346                                             start + offset, buf, sectorsize);
13347                 if (ret)
13348                         break;
13349                 offset += sectorsize;
13350         }
13351         return ret;
13352 }
13353
13354 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13355                                       struct btrfs_root *csum_root,
13356                                       struct btrfs_root *cur_root)
13357 {
13358         struct btrfs_path path;
13359         struct btrfs_key key;
13360         struct extent_buffer *node;
13361         struct btrfs_file_extent_item *fi;
13362         char *buf = NULL;
13363         u64 start = 0;
13364         u64 len = 0;
13365         int slot = 0;
13366         int ret = 0;
13367
13368         buf = malloc(cur_root->fs_info->sectorsize);
13369         if (!buf)
13370                 return -ENOMEM;
13371
13372         btrfs_init_path(&path);
13373         key.objectid = 0;
13374         key.offset = 0;
13375         key.type = 0;
13376         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13377         if (ret < 0)
13378                 goto out;
13379         /* Iterate all regular file extents and fill its csum */
13380         while (1) {
13381                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13382
13383                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13384                         goto next;
13385                 node = path.nodes[0];
13386                 slot = path.slots[0];
13387                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13388                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13389                         goto next;
13390                 start = btrfs_file_extent_disk_bytenr(node, fi);
13391                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13392
13393                 ret = populate_csum(trans, csum_root, buf, start, len);
13394                 if (ret == -EEXIST)
13395                         ret = 0;
13396                 if (ret < 0)
13397                         goto out;
13398 next:
13399                 /*
13400                  * TODO: if next leaf is corrupted, jump to nearest next valid
13401                  * leaf.
13402                  */
13403                 ret = btrfs_next_item(cur_root, &path);
13404                 if (ret < 0)
13405                         goto out;
13406                 if (ret > 0) {
13407                         ret = 0;
13408                         goto out;
13409                 }
13410         }
13411
13412 out:
13413         btrfs_release_path(&path);
13414         free(buf);
13415         return ret;
13416 }
13417
13418 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13419                                   struct btrfs_root *csum_root)
13420 {
13421         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13422         struct btrfs_path path;
13423         struct btrfs_root *tree_root = fs_info->tree_root;
13424         struct btrfs_root *cur_root;
13425         struct extent_buffer *node;
13426         struct btrfs_key key;
13427         int slot = 0;
13428         int ret = 0;
13429
13430         btrfs_init_path(&path);
13431         key.objectid = BTRFS_FS_TREE_OBJECTID;
13432         key.offset = 0;
13433         key.type = BTRFS_ROOT_ITEM_KEY;
13434         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13435         if (ret < 0)
13436                 goto out;
13437         if (ret > 0) {
13438                 ret = -ENOENT;
13439                 goto out;
13440         }
13441
13442         while (1) {
13443                 node = path.nodes[0];
13444                 slot = path.slots[0];
13445                 btrfs_item_key_to_cpu(node, &key, slot);
13446                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13447                         goto out;
13448                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13449                         goto next;
13450                 if (!is_fstree(key.objectid))
13451                         goto next;
13452                 key.offset = (u64)-1;
13453
13454                 cur_root = btrfs_read_fs_root(fs_info, &key);
13455                 if (IS_ERR(cur_root) || !cur_root) {
13456                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13457                                 key.objectid);
13458                         goto out;
13459                 }
13460                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13461                                 cur_root);
13462                 if (ret < 0)
13463                         goto out;
13464 next:
13465                 ret = btrfs_next_item(tree_root, &path);
13466                 if (ret > 0) {
13467                         ret = 0;
13468                         goto out;
13469                 }
13470                 if (ret < 0)
13471                         goto out;
13472         }
13473
13474 out:
13475         btrfs_release_path(&path);
13476         return ret;
13477 }
13478
13479 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13480                                       struct btrfs_root *csum_root)
13481 {
13482         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13483         struct btrfs_path path;
13484         struct btrfs_extent_item *ei;
13485         struct extent_buffer *leaf;
13486         char *buf;
13487         struct btrfs_key key;
13488         int ret;
13489
13490         btrfs_init_path(&path);
13491         key.objectid = 0;
13492         key.type = BTRFS_EXTENT_ITEM_KEY;
13493         key.offset = 0;
13494         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13495         if (ret < 0) {
13496                 btrfs_release_path(&path);
13497                 return ret;
13498         }
13499
13500         buf = malloc(csum_root->fs_info->sectorsize);
13501         if (!buf) {
13502                 btrfs_release_path(&path);
13503                 return -ENOMEM;
13504         }
13505
13506         while (1) {
13507                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13508                         ret = btrfs_next_leaf(extent_root, &path);
13509                         if (ret < 0)
13510                                 break;
13511                         if (ret) {
13512                                 ret = 0;
13513                                 break;
13514                         }
13515                 }
13516                 leaf = path.nodes[0];
13517
13518                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13519                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13520                         path.slots[0]++;
13521                         continue;
13522                 }
13523
13524                 ei = btrfs_item_ptr(leaf, path.slots[0],
13525                                     struct btrfs_extent_item);
13526                 if (!(btrfs_extent_flags(leaf, ei) &
13527                       BTRFS_EXTENT_FLAG_DATA)) {
13528                         path.slots[0]++;
13529                         continue;
13530                 }
13531
13532                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13533                                     key.offset);
13534                 if (ret)
13535                         break;
13536                 path.slots[0]++;
13537         }
13538
13539         btrfs_release_path(&path);
13540         free(buf);
13541         return ret;
13542 }
13543
13544 /*
13545  * Recalculate the csum and put it into the csum tree.
13546  *
13547  * Extent tree init will wipe out all the extent info, so in that case, we
13548  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13549  * will use fs/subvol trees to init the csum tree.
13550  */
13551 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13552                           struct btrfs_root *csum_root,
13553                           int search_fs_tree)
13554 {
13555         if (search_fs_tree)
13556                 return fill_csum_tree_from_fs(trans, csum_root);
13557         else
13558                 return fill_csum_tree_from_extent(trans, csum_root);
13559 }
13560
13561 static void free_roots_info_cache(void)
13562 {
13563         if (!roots_info_cache)
13564                 return;
13565
13566         while (!cache_tree_empty(roots_info_cache)) {
13567                 struct cache_extent *entry;
13568                 struct root_item_info *rii;
13569
13570                 entry = first_cache_extent(roots_info_cache);
13571                 if (!entry)
13572                         break;
13573                 remove_cache_extent(roots_info_cache, entry);
13574                 rii = container_of(entry, struct root_item_info, cache_extent);
13575                 free(rii);
13576         }
13577
13578         free(roots_info_cache);
13579         roots_info_cache = NULL;
13580 }
13581
13582 static int build_roots_info_cache(struct btrfs_fs_info *info)
13583 {
13584         int ret = 0;
13585         struct btrfs_key key;
13586         struct extent_buffer *leaf;
13587         struct btrfs_path path;
13588
13589         if (!roots_info_cache) {
13590                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13591                 if (!roots_info_cache)
13592                         return -ENOMEM;
13593                 cache_tree_init(roots_info_cache);
13594         }
13595
13596         btrfs_init_path(&path);
13597         key.objectid = 0;
13598         key.type = BTRFS_EXTENT_ITEM_KEY;
13599         key.offset = 0;
13600         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13601         if (ret < 0)
13602                 goto out;
13603         leaf = path.nodes[0];
13604
13605         while (1) {
13606                 struct btrfs_key found_key;
13607                 struct btrfs_extent_item *ei;
13608                 struct btrfs_extent_inline_ref *iref;
13609                 int slot = path.slots[0];
13610                 int type;
13611                 u64 flags;
13612                 u64 root_id;
13613                 u8 level;
13614                 struct cache_extent *entry;
13615                 struct root_item_info *rii;
13616
13617                 if (slot >= btrfs_header_nritems(leaf)) {
13618                         ret = btrfs_next_leaf(info->extent_root, &path);
13619                         if (ret < 0) {
13620                                 break;
13621                         } else if (ret) {
13622                                 ret = 0;
13623                                 break;
13624                         }
13625                         leaf = path.nodes[0];
13626                         slot = path.slots[0];
13627                 }
13628
13629                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13630
13631                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13632                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13633                         goto next;
13634
13635                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13636                 flags = btrfs_extent_flags(leaf, ei);
13637
13638                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13639                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13640                         goto next;
13641
13642                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13643                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13644                         level = found_key.offset;
13645                 } else {
13646                         struct btrfs_tree_block_info *binfo;
13647
13648                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13649                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13650                         level = btrfs_tree_block_level(leaf, binfo);
13651                 }
13652
13653                 /*
13654                  * For a root extent, it must be of the following type and the
13655                  * first (and only one) iref in the item.
13656                  */
13657                 type = btrfs_extent_inline_ref_type(leaf, iref);
13658                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13659                         goto next;
13660
13661                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13662                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13663                 if (!entry) {
13664                         rii = malloc(sizeof(struct root_item_info));
13665                         if (!rii) {
13666                                 ret = -ENOMEM;
13667                                 goto out;
13668                         }
13669                         rii->cache_extent.start = root_id;
13670                         rii->cache_extent.size = 1;
13671                         rii->level = (u8)-1;
13672                         entry = &rii->cache_extent;
13673                         ret = insert_cache_extent(roots_info_cache, entry);
13674                         ASSERT(ret == 0);
13675                 } else {
13676                         rii = container_of(entry, struct root_item_info,
13677                                            cache_extent);
13678                 }
13679
13680                 ASSERT(rii->cache_extent.start == root_id);
13681                 ASSERT(rii->cache_extent.size == 1);
13682
13683                 if (level > rii->level || rii->level == (u8)-1) {
13684                         rii->level = level;
13685                         rii->bytenr = found_key.objectid;
13686                         rii->gen = btrfs_extent_generation(leaf, ei);
13687                         rii->node_count = 1;
13688                 } else if (level == rii->level) {
13689                         rii->node_count++;
13690                 }
13691 next:
13692                 path.slots[0]++;
13693         }
13694
13695 out:
13696         btrfs_release_path(&path);
13697
13698         return ret;
13699 }
13700
13701 static int maybe_repair_root_item(struct btrfs_path *path,
13702                                   const struct btrfs_key *root_key,
13703                                   const int read_only_mode)
13704 {
13705         const u64 root_id = root_key->objectid;
13706         struct cache_extent *entry;
13707         struct root_item_info *rii;
13708         struct btrfs_root_item ri;
13709         unsigned long offset;
13710
13711         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13712         if (!entry) {
13713                 fprintf(stderr,
13714                         "Error: could not find extent items for root %llu\n",
13715                         root_key->objectid);
13716                 return -ENOENT;
13717         }
13718
13719         rii = container_of(entry, struct root_item_info, cache_extent);
13720         ASSERT(rii->cache_extent.start == root_id);
13721         ASSERT(rii->cache_extent.size == 1);
13722
13723         if (rii->node_count != 1) {
13724                 fprintf(stderr,
13725                         "Error: could not find btree root extent for root %llu\n",
13726                         root_id);
13727                 return -ENOENT;
13728         }
13729
13730         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13731         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13732
13733         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13734             btrfs_root_level(&ri) != rii->level ||
13735             btrfs_root_generation(&ri) != rii->gen) {
13736
13737                 /*
13738                  * If we're in repair mode but our caller told us to not update
13739                  * the root item, i.e. just check if it needs to be updated, don't
13740                  * print this message, since the caller will call us again shortly
13741                  * for the same root item without read only mode (the caller will
13742                  * open a transaction first).
13743                  */
13744                 if (!(read_only_mode && repair))
13745                         fprintf(stderr,
13746                                 "%sroot item for root %llu,"
13747                                 " current bytenr %llu, current gen %llu, current level %u,"
13748                                 " new bytenr %llu, new gen %llu, new level %u\n",
13749                                 (read_only_mode ? "" : "fixing "),
13750                                 root_id,
13751                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13752                                 btrfs_root_level(&ri),
13753                                 rii->bytenr, rii->gen, rii->level);
13754
13755                 if (btrfs_root_generation(&ri) > rii->gen) {
13756                         fprintf(stderr,
13757                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13758                                 root_id, btrfs_root_generation(&ri), rii->gen);
13759                         return -EINVAL;
13760                 }
13761
13762                 if (!read_only_mode) {
13763                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13764                         btrfs_set_root_level(&ri, rii->level);
13765                         btrfs_set_root_generation(&ri, rii->gen);
13766                         write_extent_buffer(path->nodes[0], &ri,
13767                                             offset, sizeof(ri));
13768                 }
13769
13770                 return 1;
13771         }
13772
13773         return 0;
13774 }
13775
13776 /*
13777  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13778  * caused read-only snapshots to be corrupted if they were created at a moment
13779  * when the source subvolume/snapshot had orphan items. The issue was that the
13780  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13781  * node instead of the post orphan cleanup root node.
13782  * So this function, and its callees, just detects and fixes those cases. Even
13783  * though the regression was for read-only snapshots, this function applies to
13784  * any snapshot/subvolume root.
13785  * This must be run before any other repair code - not doing it so, makes other
13786  * repair code delete or modify backrefs in the extent tree for example, which
13787  * will result in an inconsistent fs after repairing the root items.
13788  */
13789 static int repair_root_items(struct btrfs_fs_info *info)
13790 {
13791         struct btrfs_path path;
13792         struct btrfs_key key;
13793         struct extent_buffer *leaf;
13794         struct btrfs_trans_handle *trans = NULL;
13795         int ret = 0;
13796         int bad_roots = 0;
13797         int need_trans = 0;
13798
13799         btrfs_init_path(&path);
13800
13801         ret = build_roots_info_cache(info);
13802         if (ret)
13803                 goto out;
13804
13805         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13806         key.type = BTRFS_ROOT_ITEM_KEY;
13807         key.offset = 0;
13808
13809 again:
13810         /*
13811          * Avoid opening and committing transactions if a leaf doesn't have
13812          * any root items that need to be fixed, so that we avoid rotating
13813          * backup roots unnecessarily.
13814          */
13815         if (need_trans) {
13816                 trans = btrfs_start_transaction(info->tree_root, 1);
13817                 if (IS_ERR(trans)) {
13818                         ret = PTR_ERR(trans);
13819                         goto out;
13820                 }
13821         }
13822
13823         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13824                                 0, trans ? 1 : 0);
13825         if (ret < 0)
13826                 goto out;
13827         leaf = path.nodes[0];
13828
13829         while (1) {
13830                 struct btrfs_key found_key;
13831
13832                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13833                         int no_more_keys = find_next_key(&path, &key);
13834
13835                         btrfs_release_path(&path);
13836                         if (trans) {
13837                                 ret = btrfs_commit_transaction(trans,
13838                                                                info->tree_root);
13839                                 trans = NULL;
13840                                 if (ret < 0)
13841                                         goto out;
13842                         }
13843                         need_trans = 0;
13844                         if (no_more_keys)
13845                                 break;
13846                         goto again;
13847                 }
13848
13849                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13850
13851                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13852                         goto next;
13853                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13854                         goto next;
13855
13856                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13857                 if (ret < 0)
13858                         goto out;
13859                 if (ret) {
13860                         if (!trans && repair) {
13861                                 need_trans = 1;
13862                                 key = found_key;
13863                                 btrfs_release_path(&path);
13864                                 goto again;
13865                         }
13866                         bad_roots++;
13867                 }
13868 next:
13869                 path.slots[0]++;
13870         }
13871         ret = 0;
13872 out:
13873         free_roots_info_cache();
13874         btrfs_release_path(&path);
13875         if (trans)
13876                 btrfs_commit_transaction(trans, info->tree_root);
13877         if (ret < 0)
13878                 return ret;
13879
13880         return bad_roots;
13881 }
13882
13883 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13884 {
13885         struct btrfs_trans_handle *trans;
13886         struct btrfs_block_group_cache *bg_cache;
13887         u64 current = 0;
13888         int ret = 0;
13889
13890         /* Clear all free space cache inodes and its extent data */
13891         while (1) {
13892                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13893                 if (!bg_cache)
13894                         break;
13895                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13896                 if (ret < 0)
13897                         return ret;
13898                 current = bg_cache->key.objectid + bg_cache->key.offset;
13899         }
13900
13901         /* Don't forget to set cache_generation to -1 */
13902         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13903         if (IS_ERR(trans)) {
13904                 error("failed to update super block cache generation");
13905                 return PTR_ERR(trans);
13906         }
13907         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13908         btrfs_commit_transaction(trans, fs_info->tree_root);
13909
13910         return ret;
13911 }
13912
13913 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13914                 int clear_version)
13915 {
13916         int ret = 0;
13917
13918         if (clear_version == 1) {
13919                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13920                         error(
13921                 "free space cache v2 detected, use --clear-space-cache v2");
13922                         ret = 1;
13923                         goto close_out;
13924                 }
13925                 printf("Clearing free space cache\n");
13926                 ret = clear_free_space_cache(fs_info);
13927                 if (ret) {
13928                         error("failed to clear free space cache");
13929                         ret = 1;
13930                 } else {
13931                         printf("Free space cache cleared\n");
13932                 }
13933         } else if (clear_version == 2) {
13934                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13935                         printf("no free space cache v2 to clear\n");
13936                         ret = 0;
13937                         goto close_out;
13938                 }
13939                 printf("Clear free space cache v2\n");
13940                 ret = btrfs_clear_free_space_tree(fs_info);
13941                 if (ret) {
13942                         error("failed to clear free space cache v2: %d", ret);
13943                         ret = 1;
13944                 } else {
13945                         printf("free space cache v2 cleared\n");
13946                 }
13947         }
13948 close_out:
13949         return ret;
13950 }
13951
13952 const char * const cmd_check_usage[] = {
13953         "btrfs check [options] <device>",
13954         "Check structural integrity of a filesystem (unmounted).",
13955         "Check structural integrity of an unmounted filesystem. Verify internal",
13956         "trees' consistency and item connectivity. In the repair mode try to",
13957         "fix the problems found. ",
13958         "WARNING: the repair mode is considered dangerous",
13959         "",
13960         "-s|--super <superblock>     use this superblock copy",
13961         "-b|--backup                 use the first valid backup root copy",
13962         "--force                     skip mount checks, repair is not possible",
13963         "--repair                    try to repair the filesystem",
13964         "--readonly                  run in read-only mode (default)",
13965         "--init-csum-tree            create a new CRC tree",
13966         "--init-extent-tree          create a new extent tree",
13967         "--mode <MODE>               allows choice of memory/IO trade-offs",
13968         "                            where MODE is one of:",
13969         "                            original - read inodes and extents to memory (requires",
13970         "                                       more memory, does less IO)",
13971         "                            lowmem   - try to use less memory but read blocks again",
13972         "                                       when needed",
13973         "--check-data-csum           verify checksums of data blocks",
13974         "-Q|--qgroup-report          print a report on qgroup consistency",
13975         "-E|--subvol-extents <subvolid>",
13976         "                            print subvolume extents and sharing state",
13977         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
13978         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
13979         "-p|--progress               indicate progress",
13980         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
13981         NULL
13982 };
13983
13984 int cmd_check(int argc, char **argv)
13985 {
13986         struct cache_tree root_cache;
13987         struct btrfs_root *root;
13988         struct btrfs_fs_info *info;
13989         u64 bytenr = 0;
13990         u64 subvolid = 0;
13991         u64 tree_root_bytenr = 0;
13992         u64 chunk_root_bytenr = 0;
13993         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13994         int ret = 0;
13995         int err = 0;
13996         u64 num;
13997         int init_csum_tree = 0;
13998         int readonly = 0;
13999         int clear_space_cache = 0;
14000         int qgroup_report = 0;
14001         int qgroups_repaired = 0;
14002         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14003         int force = 0;
14004
14005         while(1) {
14006                 int c;
14007                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14008                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14009                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14010                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14011                         GETOPT_VAL_FORCE };
14012                 static const struct option long_options[] = {
14013                         { "super", required_argument, NULL, 's' },
14014                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14015                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14016                         { "init-csum-tree", no_argument, NULL,
14017                                 GETOPT_VAL_INIT_CSUM },
14018                         { "init-extent-tree", no_argument, NULL,
14019                                 GETOPT_VAL_INIT_EXTENT },
14020                         { "check-data-csum", no_argument, NULL,
14021                                 GETOPT_VAL_CHECK_CSUM },
14022                         { "backup", no_argument, NULL, 'b' },
14023                         { "subvol-extents", required_argument, NULL, 'E' },
14024                         { "qgroup-report", no_argument, NULL, 'Q' },
14025                         { "tree-root", required_argument, NULL, 'r' },
14026                         { "chunk-root", required_argument, NULL,
14027                                 GETOPT_VAL_CHUNK_TREE },
14028                         { "progress", no_argument, NULL, 'p' },
14029                         { "mode", required_argument, NULL,
14030                                 GETOPT_VAL_MODE },
14031                         { "clear-space-cache", required_argument, NULL,
14032                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14033                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14034                         { NULL, 0, NULL, 0}
14035                 };
14036
14037                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14038                 if (c < 0)
14039                         break;
14040                 switch(c) {
14041                         case 'a': /* ignored */ break;
14042                         case 'b':
14043                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14044                                 break;
14045                         case 's':
14046                                 num = arg_strtou64(optarg);
14047                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14048                                         error(
14049                                         "super mirror should be less than %d",
14050                                                 BTRFS_SUPER_MIRROR_MAX);
14051                                         exit(1);
14052                                 }
14053                                 bytenr = btrfs_sb_offset(((int)num));
14054                                 printf("using SB copy %llu, bytenr %llu\n", num,
14055                                        (unsigned long long)bytenr);
14056                                 break;
14057                         case 'Q':
14058                                 qgroup_report = 1;
14059                                 break;
14060                         case 'E':
14061                                 subvolid = arg_strtou64(optarg);
14062                                 break;
14063                         case 'r':
14064                                 tree_root_bytenr = arg_strtou64(optarg);
14065                                 break;
14066                         case GETOPT_VAL_CHUNK_TREE:
14067                                 chunk_root_bytenr = arg_strtou64(optarg);
14068                                 break;
14069                         case 'p':
14070                                 ctx.progress_enabled = true;
14071                                 break;
14072                         case '?':
14073                         case 'h':
14074                                 usage(cmd_check_usage);
14075                         case GETOPT_VAL_REPAIR:
14076                                 printf("enabling repair mode\n");
14077                                 repair = 1;
14078                                 ctree_flags |= OPEN_CTREE_WRITES;
14079                                 break;
14080                         case GETOPT_VAL_READONLY:
14081                                 readonly = 1;
14082                                 break;
14083                         case GETOPT_VAL_INIT_CSUM:
14084                                 printf("Creating a new CRC tree\n");
14085                                 init_csum_tree = 1;
14086                                 repair = 1;
14087                                 ctree_flags |= OPEN_CTREE_WRITES;
14088                                 break;
14089                         case GETOPT_VAL_INIT_EXTENT:
14090                                 init_extent_tree = 1;
14091                                 ctree_flags |= (OPEN_CTREE_WRITES |
14092                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14093                                 repair = 1;
14094                                 break;
14095                         case GETOPT_VAL_CHECK_CSUM:
14096                                 check_data_csum = 1;
14097                                 break;
14098                         case GETOPT_VAL_MODE:
14099                                 check_mode = parse_check_mode(optarg);
14100                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14101                                         error("unknown mode: %s", optarg);
14102                                         exit(1);
14103                                 }
14104                                 break;
14105                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14106                                 if (strcmp(optarg, "v1") == 0) {
14107                                         clear_space_cache = 1;
14108                                 } else if (strcmp(optarg, "v2") == 0) {
14109                                         clear_space_cache = 2;
14110                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14111                                 } else {
14112                                         error(
14113                 "invalid argument to --clear-space-cache, must be v1 or v2");
14114                                         exit(1);
14115                                 }
14116                                 ctree_flags |= OPEN_CTREE_WRITES;
14117                                 break;
14118                         case GETOPT_VAL_FORCE:
14119                                 force = 1;
14120                                 break;
14121                 }
14122         }
14123
14124         if (check_argc_exact(argc - optind, 1))
14125                 usage(cmd_check_usage);
14126
14127         if (ctx.progress_enabled) {
14128                 ctx.tp = TASK_NOTHING;
14129                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14130         }
14131
14132         /* This check is the only reason for --readonly to exist */
14133         if (readonly && repair) {
14134                 error("repair options are not compatible with --readonly");
14135                 exit(1);
14136         }
14137
14138         /*
14139          * experimental and dangerous
14140          */
14141         if (repair && check_mode == CHECK_MODE_LOWMEM)
14142                 warning("low-memory mode repair support is only partial");
14143
14144         radix_tree_init();
14145         cache_tree_init(&root_cache);
14146
14147         ret = check_mounted(argv[optind]);
14148         if (!force) {
14149                 if (ret < 0) {
14150                         error("could not check mount status: %s",
14151                                         strerror(-ret));
14152                         err |= !!ret;
14153                         goto err_out;
14154                 } else if (ret) {
14155                         error(
14156 "%s is currently mounted, use --force if you really intend to check the filesystem",
14157                                 argv[optind]);
14158                         ret = -EBUSY;
14159                         err |= !!ret;
14160                         goto err_out;
14161                 }
14162         } else {
14163                 if (repair) {
14164                         error("repair and --force is not yet supported");
14165                         ret = 1;
14166                         err |= !!ret;
14167                         goto err_out;
14168                 }
14169                 if (ret < 0) {
14170                         warning(
14171 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14172                                 argv[optind]);
14173                 } else if (ret) {
14174                         warning(
14175                         "filesystem mounted, continuing because of --force");
14176                 }
14177                 /* A block device is mounted in exclusive mode by kernel */
14178                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14179         }
14180
14181         /* only allow partial opening under repair mode */
14182         if (repair)
14183                 ctree_flags |= OPEN_CTREE_PARTIAL;
14184
14185         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14186                                   chunk_root_bytenr, ctree_flags);
14187         if (!info) {
14188                 error("cannot open file system");
14189                 ret = -EIO;
14190                 err |= !!ret;
14191                 goto err_out;
14192         }
14193
14194         global_info = info;
14195         root = info->fs_root;
14196         uuid_unparse(info->super_copy->fsid, uuidbuf);
14197
14198         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14199
14200         /*
14201          * Check the bare minimum before starting anything else that could rely
14202          * on it, namely the tree roots, any local consistency checks
14203          */
14204         if (!extent_buffer_uptodate(info->tree_root->node) ||
14205             !extent_buffer_uptodate(info->dev_root->node) ||
14206             !extent_buffer_uptodate(info->chunk_root->node)) {
14207                 error("critical roots corrupted, unable to check the filesystem");
14208                 err |= !!ret;
14209                 ret = -EIO;
14210                 goto close_out;
14211         }
14212
14213         if (clear_space_cache) {
14214                 ret = do_clear_free_space_cache(info, clear_space_cache);
14215                 err |= !!ret;
14216                 goto close_out;
14217         }
14218
14219         /*
14220          * repair mode will force us to commit transaction which
14221          * will make us fail to load log tree when mounting.
14222          */
14223         if (repair && btrfs_super_log_root(info->super_copy)) {
14224                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14225                 if (!ret) {
14226                         ret = 1;
14227                         err |= !!ret;
14228                         goto close_out;
14229                 }
14230                 ret = zero_log_tree(root);
14231                 err |= !!ret;
14232                 if (ret) {
14233                         error("failed to zero log tree: %d", ret);
14234                         goto close_out;
14235                 }
14236         }
14237
14238         if (qgroup_report) {
14239                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14240                        uuidbuf);
14241                 ret = qgroup_verify_all(info);
14242                 err |= !!ret;
14243                 if (ret == 0)
14244                         report_qgroups(1);
14245                 goto close_out;
14246         }
14247         if (subvolid) {
14248                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14249                        subvolid, argv[optind], uuidbuf);
14250                 ret = print_extent_state(info, subvolid);
14251                 err |= !!ret;
14252                 goto close_out;
14253         }
14254
14255         if (init_extent_tree || init_csum_tree) {
14256                 struct btrfs_trans_handle *trans;
14257
14258                 trans = btrfs_start_transaction(info->extent_root, 0);
14259                 if (IS_ERR(trans)) {
14260                         error("error starting transaction");
14261                         ret = PTR_ERR(trans);
14262                         err |= !!ret;
14263                         goto close_out;
14264                 }
14265
14266                 if (init_extent_tree) {
14267                         printf("Creating a new extent tree\n");
14268                         ret = reinit_extent_tree(trans, info);
14269                         err |= !!ret;
14270                         if (ret)
14271                                 goto close_out;
14272                 }
14273
14274                 if (init_csum_tree) {
14275                         printf("Reinitialize checksum tree\n");
14276                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14277                         if (ret) {
14278                                 error("checksum tree initialization failed: %d",
14279                                                 ret);
14280                                 ret = -EIO;
14281                                 err |= !!ret;
14282                                 goto close_out;
14283                         }
14284
14285                         ret = fill_csum_tree(trans, info->csum_root,
14286                                              init_extent_tree);
14287                         err |= !!ret;
14288                         if (ret) {
14289                                 error("checksum tree refilling failed: %d", ret);
14290                                 return -EIO;
14291                         }
14292                 }
14293                 /*
14294                  * Ok now we commit and run the normal fsck, which will add
14295                  * extent entries for all of the items it finds.
14296                  */
14297                 ret = btrfs_commit_transaction(trans, info->extent_root);
14298                 err |= !!ret;
14299                 if (ret)
14300                         goto close_out;
14301         }
14302         if (!extent_buffer_uptodate(info->extent_root->node)) {
14303                 error("critical: extent_root, unable to check the filesystem");
14304                 ret = -EIO;
14305                 err |= !!ret;
14306                 goto close_out;
14307         }
14308         if (!extent_buffer_uptodate(info->csum_root->node)) {
14309                 error("critical: csum_root, unable to check the filesystem");
14310                 ret = -EIO;
14311                 err |= !!ret;
14312                 goto close_out;
14313         }
14314
14315         if (!init_extent_tree) {
14316                 ret = repair_root_items(info);
14317                 if (ret < 0) {
14318                         err = !!ret;
14319                         error("failed to repair root items: %s", strerror(-ret));
14320                         goto close_out;
14321                 }
14322                 if (repair) {
14323                         fprintf(stderr, "Fixed %d roots.\n", ret);
14324                         ret = 0;
14325                 } else if (ret > 0) {
14326                         fprintf(stderr,
14327                                 "Found %d roots with an outdated root item.\n",
14328                                 ret);
14329                         fprintf(stderr,
14330         "Please run a filesystem check with the option --repair to fix them.\n");
14331                         ret = 1;
14332                         err |= ret;
14333                         goto close_out;
14334                 }
14335         }
14336
14337         ret = do_check_chunks_and_extents(info);
14338         err |= !!ret;
14339         if (ret)
14340                 error(
14341                 "errors found in extent allocation tree or chunk allocation");
14342
14343         /* Only re-check super size after we checked and repaired the fs */
14344         err |= !is_super_size_valid(info);
14345
14346         if (!ctx.progress_enabled) {
14347                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14348                         fprintf(stderr, "checking free space tree\n");
14349                 else
14350                         fprintf(stderr, "checking free space cache\n");
14351         }
14352         ret = check_space_cache(root);
14353         err |= !!ret;
14354         if (ret) {
14355                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14356                         error("errors found in free space tree");
14357                 else
14358                         error("errors found in free space cache");
14359                 goto out;
14360         }
14361
14362         /*
14363          * We used to have to have these hole extents in between our real
14364          * extents so if we don't have this flag set we need to make sure there
14365          * are no gaps in the file extents for inodes, otherwise we can just
14366          * ignore it when this happens.
14367          */
14368         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14369         ret = do_check_fs_roots(info, &root_cache);
14370         err |= !!ret;
14371         if (ret) {
14372                 error("errors found in fs roots");
14373                 goto out;
14374         }
14375
14376         fprintf(stderr, "checking csums\n");
14377         ret = check_csums(root);
14378         err |= !!ret;
14379         if (ret) {
14380                 error("errors found in csum tree");
14381                 goto out;
14382         }
14383
14384         fprintf(stderr, "checking root refs\n");
14385         /* For low memory mode, check_fs_roots_v2 handles root refs */
14386         if (check_mode != CHECK_MODE_LOWMEM) {
14387                 ret = check_root_refs(root, &root_cache);
14388                 err |= !!ret;
14389                 if (ret) {
14390                         error("errors found in root refs");
14391                         goto out;
14392                 }
14393         }
14394
14395         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14396                 struct extent_buffer *eb;
14397
14398                 eb = list_first_entry(&root->fs_info->recow_ebs,
14399                                       struct extent_buffer, recow);
14400                 list_del_init(&eb->recow);
14401                 ret = recow_extent_buffer(root, eb);
14402                 err |= !!ret;
14403                 if (ret) {
14404                         error("fails to fix transid errors");
14405                         break;
14406                 }
14407         }
14408
14409         while (!list_empty(&delete_items)) {
14410                 struct bad_item *bad;
14411
14412                 bad = list_first_entry(&delete_items, struct bad_item, list);
14413                 list_del_init(&bad->list);
14414                 if (repair) {
14415                         ret = delete_bad_item(root, bad);
14416                         err |= !!ret;
14417                 }
14418                 free(bad);
14419         }
14420
14421         if (info->quota_enabled) {
14422                 fprintf(stderr, "checking quota groups\n");
14423                 ret = qgroup_verify_all(info);
14424                 err |= !!ret;
14425                 if (ret) {
14426                         error("failed to check quota groups");
14427                         goto out;
14428                 }
14429                 report_qgroups(0);
14430                 ret = repair_qgroups(info, &qgroups_repaired);
14431                 err |= !!ret;
14432                 if (err) {
14433                         error("failed to repair quota groups");
14434                         goto out;
14435                 }
14436                 ret = 0;
14437         }
14438
14439         if (!list_empty(&root->fs_info->recow_ebs)) {
14440                 error("transid errors in file system");
14441                 ret = 1;
14442                 err |= !!ret;
14443         }
14444 out:
14445         printf("found %llu bytes used, ",
14446                (unsigned long long)bytes_used);
14447         if (err)
14448                 printf("error(s) found\n");
14449         else
14450                 printf("no error found\n");
14451         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14452         printf("total tree bytes: %llu\n",
14453                (unsigned long long)total_btree_bytes);
14454         printf("total fs tree bytes: %llu\n",
14455                (unsigned long long)total_fs_tree_bytes);
14456         printf("total extent tree bytes: %llu\n",
14457                (unsigned long long)total_extent_tree_bytes);
14458         printf("btree space waste bytes: %llu\n",
14459                (unsigned long long)btree_space_waste);
14460         printf("file data blocks allocated: %llu\n referenced %llu\n",
14461                 (unsigned long long)data_bytes_allocated,
14462                 (unsigned long long)data_bytes_referenced);
14463
14464         free_qgroup_counts();
14465         free_root_recs_tree(&root_cache);
14466 close_out:
14467         close_ctree(root);
14468 err_out:
14469         if (ctx.progress_enabled)
14470                 task_deinit(ctx.info);
14471
14472         return err;
14473 }